24 25 26#include "config.h" 27#include "system.h" 28#include "coretypes.h" 29#include "tm.h" 30#include "rtl.h" 31#include "tree.h" 32#include "tm_p.h" 33#include "regs.h" 34#include "hard-reg-set.h" 35#include "real.h" 36#include "insn-config.h" 37#include "conditions.h" 38#include "output.h" 39#include "insn-attr.h" 40#include "flags.h" 41#include "except.h" 42#include "function.h" 43#include "recog.h" 44#include "expr.h" 45#include "optabs.h" 46#include "toplev.h" 47#include "basic-block.h" 48#include "ggc.h" 49#include "target.h" 50#include "target-def.h" 51#include "langhooks.h" 52#include "cgraph.h" 53 54#ifndef CHECK_STACK_LIMIT 55#define CHECK_STACK_LIMIT (-1) 56#endif 57 58/* Return index of given mode in mult and division cost tables. */ 59#define MODE_INDEX(mode) \ 60 ((mode) == QImode ? 0 \ 61 : (mode) == HImode ? 1 \ 62 : (mode) == SImode ? 2 \ 63 : (mode) == DImode ? 3 \ 64 : 4) 65 66/* Processor costs (relative to an add) */ 67static const 68struct processor_costs size_cost = { /* costs for tunning for size */ 69 2, /* cost of an add instruction */ 70 3, /* cost of a lea instruction */ 71 2, /* variable shift costs */ 72 3, /* constant shift costs */ 73 {3, 3, 3, 3, 5}, /* cost of starting a multiply */ 74 0, /* cost of multiply per each bit set */ 75 {3, 3, 3, 3, 5}, /* cost of a divide/mod */ 76 3, /* cost of movsx */ 77 3, /* cost of movzx */ 78 0, /* "large" insn */ 79 2, /* MOVE_RATIO */ 80 2, /* cost for loading QImode using movzbl */ 81 {2, 2, 2}, /* cost of loading integer registers 82 in QImode, HImode and SImode. 83 Relative to reg-reg move (2). */ 84 {2, 2, 2}, /* cost of storing integer registers */ 85 2, /* cost of reg,reg fld/fst */ 86 {2, 2, 2}, /* cost of loading fp registers 87 in SFmode, DFmode and XFmode */ 88 {2, 2, 2}, /* cost of loading integer registers */ 89 3, /* cost of moving MMX register */ 90 {3, 3}, /* cost of loading MMX registers 91 in SImode and DImode */ 92 {3, 3}, /* cost of storing MMX registers 93 in SImode and DImode */ 94 3, /* cost of moving SSE register */ 95 {3, 3, 3}, /* cost of loading SSE registers 96 in SImode, DImode and TImode */ 97 {3, 3, 3}, /* cost of storing SSE registers 98 in SImode, DImode and TImode */ 99 3, /* MMX or SSE register to integer */ 100 0, /* size of prefetch block */ 101 0, /* number of parallel prefetches */ 102 1, /* Branch cost */ 103 2, /* cost of FADD and FSUB insns. */ 104 2, /* cost of FMUL instruction. */ 105 2, /* cost of FDIV instruction. */ 106 2, /* cost of FABS instruction. */ 107 2, /* cost of FCHS instruction. */ 108 2, /* cost of FSQRT instruction. */ 109}; 110 111/* Processor costs (relative to an add) */ 112static const 113struct processor_costs i386_cost = { /* 386 specific costs */ 114 1, /* cost of an add instruction */ 115 1, /* cost of a lea instruction */ 116 3, /* variable shift costs */ 117 2, /* constant shift costs */ 118 {6, 6, 6, 6, 6}, /* cost of starting a multiply */ 119 1, /* cost of multiply per each bit set */ 120 {23, 23, 23, 23, 23}, /* cost of a divide/mod */ 121 3, /* cost of movsx */ 122 2, /* cost of movzx */ 123 15, /* "large" insn */ 124 3, /* MOVE_RATIO */ 125 4, /* cost for loading QImode using movzbl */ 126 {2, 4, 2}, /* cost of loading integer registers 127 in QImode, HImode and SImode. 128 Relative to reg-reg move (2). */ 129 {2, 4, 2}, /* cost of storing integer registers */ 130 2, /* cost of reg,reg fld/fst */ 131 {8, 8, 8}, /* cost of loading fp registers 132 in SFmode, DFmode and XFmode */ 133 {8, 8, 8}, /* cost of loading integer registers */ 134 2, /* cost of moving MMX register */ 135 {4, 8}, /* cost of loading MMX registers 136 in SImode and DImode */ 137 {4, 8}, /* cost of storing MMX registers 138 in SImode and DImode */ 139 2, /* cost of moving SSE register */ 140 {4, 8, 16}, /* cost of loading SSE registers 141 in SImode, DImode and TImode */ 142 {4, 8, 16}, /* cost of storing SSE registers 143 in SImode, DImode and TImode */ 144 3, /* MMX or SSE register to integer */ 145 0, /* size of prefetch block */ 146 0, /* number of parallel prefetches */ 147 1, /* Branch cost */ 148 23, /* cost of FADD and FSUB insns. */ 149 27, /* cost of FMUL instruction. */ 150 88, /* cost of FDIV instruction. */ 151 22, /* cost of FABS instruction. */ 152 24, /* cost of FCHS instruction. */ 153 122, /* cost of FSQRT instruction. */ 154}; 155 156static const 157struct processor_costs i486_cost = { /* 486 specific costs */ 158 1, /* cost of an add instruction */ 159 1, /* cost of a lea instruction */ 160 3, /* variable shift costs */ 161 2, /* constant shift costs */ 162 {12, 12, 12, 12, 12}, /* cost of starting a multiply */ 163 1, /* cost of multiply per each bit set */ 164 {40, 40, 40, 40, 40}, /* cost of a divide/mod */ 165 3, /* cost of movsx */ 166 2, /* cost of movzx */ 167 15, /* "large" insn */ 168 3, /* MOVE_RATIO */ 169 4, /* cost for loading QImode using movzbl */ 170 {2, 4, 2}, /* cost of loading integer registers 171 in QImode, HImode and SImode. 172 Relative to reg-reg move (2). */ 173 {2, 4, 2}, /* cost of storing integer registers */ 174 2, /* cost of reg,reg fld/fst */ 175 {8, 8, 8}, /* cost of loading fp registers 176 in SFmode, DFmode and XFmode */ 177 {8, 8, 8}, /* cost of loading integer registers */ 178 2, /* cost of moving MMX register */ 179 {4, 8}, /* cost of loading MMX registers 180 in SImode and DImode */ 181 {4, 8}, /* cost of storing MMX registers 182 in SImode and DImode */ 183 2, /* cost of moving SSE register */ 184 {4, 8, 16}, /* cost of loading SSE registers 185 in SImode, DImode and TImode */ 186 {4, 8, 16}, /* cost of storing SSE registers 187 in SImode, DImode and TImode */ 188 3, /* MMX or SSE register to integer */ 189 0, /* size of prefetch block */ 190 0, /* number of parallel prefetches */ 191 1, /* Branch cost */ 192 8, /* cost of FADD and FSUB insns. */ 193 16, /* cost of FMUL instruction. */ 194 73, /* cost of FDIV instruction. */ 195 3, /* cost of FABS instruction. */ 196 3, /* cost of FCHS instruction. */ 197 83, /* cost of FSQRT instruction. */ 198}; 199 200static const 201struct processor_costs pentium_cost = { 202 1, /* cost of an add instruction */ 203 1, /* cost of a lea instruction */ 204 4, /* variable shift costs */ 205 1, /* constant shift costs */ 206 {11, 11, 11, 11, 11}, /* cost of starting a multiply */ 207 0, /* cost of multiply per each bit set */ 208 {25, 25, 25, 25, 25}, /* cost of a divide/mod */ 209 3, /* cost of movsx */ 210 2, /* cost of movzx */ 211 8, /* "large" insn */ 212 6, /* MOVE_RATIO */ 213 6, /* cost for loading QImode using movzbl */ 214 {2, 4, 2}, /* cost of loading integer registers 215 in QImode, HImode and SImode. 216 Relative to reg-reg move (2). */ 217 {2, 4, 2}, /* cost of storing integer registers */ 218 2, /* cost of reg,reg fld/fst */ 219 {2, 2, 6}, /* cost of loading fp registers 220 in SFmode, DFmode and XFmode */ 221 {4, 4, 6}, /* cost of loading integer registers */ 222 8, /* cost of moving MMX register */ 223 {8, 8}, /* cost of loading MMX registers 224 in SImode and DImode */ 225 {8, 8}, /* cost of storing MMX registers 226 in SImode and DImode */ 227 2, /* cost of moving SSE register */ 228 {4, 8, 16}, /* cost of loading SSE registers 229 in SImode, DImode and TImode */ 230 {4, 8, 16}, /* cost of storing SSE registers 231 in SImode, DImode and TImode */ 232 3, /* MMX or SSE register to integer */ 233 0, /* size of prefetch block */ 234 0, /* number of parallel prefetches */ 235 2, /* Branch cost */ 236 3, /* cost of FADD and FSUB insns. */ 237 3, /* cost of FMUL instruction. */ 238 39, /* cost of FDIV instruction. */ 239 1, /* cost of FABS instruction. */ 240 1, /* cost of FCHS instruction. */ 241 70, /* cost of FSQRT instruction. */ 242}; 243 244static const 245struct processor_costs pentiumpro_cost = { 246 1, /* cost of an add instruction */ 247 1, /* cost of a lea instruction */ 248 1, /* variable shift costs */ 249 1, /* constant shift costs */ 250 {4, 4, 4, 4, 4}, /* cost of starting a multiply */ 251 0, /* cost of multiply per each bit set */ 252 {17, 17, 17, 17, 17}, /* cost of a divide/mod */ 253 1, /* cost of movsx */ 254 1, /* cost of movzx */ 255 8, /* "large" insn */ 256 6, /* MOVE_RATIO */ 257 2, /* cost for loading QImode using movzbl */ 258 {4, 4, 4}, /* cost of loading integer registers 259 in QImode, HImode and SImode. 260 Relative to reg-reg move (2). */ 261 {2, 2, 2}, /* cost of storing integer registers */ 262 2, /* cost of reg,reg fld/fst */ 263 {2, 2, 6}, /* cost of loading fp registers 264 in SFmode, DFmode and XFmode */ 265 {4, 4, 6}, /* cost of loading integer registers */ 266 2, /* cost of moving MMX register */ 267 {2, 2}, /* cost of loading MMX registers 268 in SImode and DImode */ 269 {2, 2}, /* cost of storing MMX registers 270 in SImode and DImode */ 271 2, /* cost of moving SSE register */ 272 {2, 2, 8}, /* cost of loading SSE registers 273 in SImode, DImode and TImode */ 274 {2, 2, 8}, /* cost of storing SSE registers 275 in SImode, DImode and TImode */ 276 3, /* MMX or SSE register to integer */ 277 32, /* size of prefetch block */ 278 6, /* number of parallel prefetches */ 279 2, /* Branch cost */ 280 3, /* cost of FADD and FSUB insns. */ 281 5, /* cost of FMUL instruction. */ 282 56, /* cost of FDIV instruction. */ 283 2, /* cost of FABS instruction. */ 284 2, /* cost of FCHS instruction. */ 285 56, /* cost of FSQRT instruction. */ 286}; 287 288static const 289struct processor_costs k6_cost = { 290 1, /* cost of an add instruction */ 291 2, /* cost of a lea instruction */ 292 1, /* variable shift costs */ 293 1, /* constant shift costs */ 294 {3, 3, 3, 3, 3}, /* cost of starting a multiply */ 295 0, /* cost of multiply per each bit set */ 296 {18, 18, 18, 18, 18}, /* cost of a divide/mod */ 297 2, /* cost of movsx */ 298 2, /* cost of movzx */ 299 8, /* "large" insn */ 300 4, /* MOVE_RATIO */ 301 3, /* cost for loading QImode using movzbl */ 302 {4, 5, 4}, /* cost of loading integer registers 303 in QImode, HImode and SImode. 304 Relative to reg-reg move (2). */ 305 {2, 3, 2}, /* cost of storing integer registers */ 306 4, /* cost of reg,reg fld/fst */ 307 {6, 6, 6}, /* cost of loading fp registers 308 in SFmode, DFmode and XFmode */ 309 {4, 4, 4}, /* cost of loading integer registers */ 310 2, /* cost of moving MMX register */ 311 {2, 2}, /* cost of loading MMX registers 312 in SImode and DImode */ 313 {2, 2}, /* cost of storing MMX registers 314 in SImode and DImode */ 315 2, /* cost of moving SSE register */ 316 {2, 2, 8}, /* cost of loading SSE registers 317 in SImode, DImode and TImode */ 318 {2, 2, 8}, /* cost of storing SSE registers 319 in SImode, DImode and TImode */ 320 6, /* MMX or SSE register to integer */ 321 32, /* size of prefetch block */ 322 1, /* number of parallel prefetches */ 323 1, /* Branch cost */ 324 2, /* cost of FADD and FSUB insns. */ 325 2, /* cost of FMUL instruction. */ 326 56, /* cost of FDIV instruction. */ 327 2, /* cost of FABS instruction. */ 328 2, /* cost of FCHS instruction. */ 329 56, /* cost of FSQRT instruction. */ 330}; 331 332static const 333struct processor_costs athlon_cost = { 334 1, /* cost of an add instruction */ 335 2, /* cost of a lea instruction */ 336 1, /* variable shift costs */ 337 1, /* constant shift costs */ 338 {5, 5, 5, 5, 5}, /* cost of starting a multiply */ 339 0, /* cost of multiply per each bit set */ 340 {18, 26, 42, 74, 74}, /* cost of a divide/mod */ 341 1, /* cost of movsx */ 342 1, /* cost of movzx */ 343 8, /* "large" insn */ 344 9, /* MOVE_RATIO */ 345 4, /* cost for loading QImode using movzbl */ 346 {3, 4, 3}, /* cost of loading integer registers 347 in QImode, HImode and SImode. 348 Relative to reg-reg move (2). */ 349 {3, 4, 3}, /* cost of storing integer registers */ 350 4, /* cost of reg,reg fld/fst */ 351 {4, 4, 12}, /* cost of loading fp registers 352 in SFmode, DFmode and XFmode */ 353 {6, 6, 8}, /* cost of loading integer registers */ 354 2, /* cost of moving MMX register */ 355 {4, 4}, /* cost of loading MMX registers 356 in SImode and DImode */ 357 {4, 4}, /* cost of storing MMX registers 358 in SImode and DImode */ 359 2, /* cost of moving SSE register */ 360 {4, 4, 6}, /* cost of loading SSE registers 361 in SImode, DImode and TImode */ 362 {4, 4, 5}, /* cost of storing SSE registers 363 in SImode, DImode and TImode */ 364 5, /* MMX or SSE register to integer */ 365 64, /* size of prefetch block */ 366 6, /* number of parallel prefetches */ 367 2, /* Branch cost */ 368 4, /* cost of FADD and FSUB insns. */ 369 4, /* cost of FMUL instruction. */ 370 24, /* cost of FDIV instruction. */ 371 2, /* cost of FABS instruction. */ 372 2, /* cost of FCHS instruction. */ 373 35, /* cost of FSQRT instruction. */ 374}; 375 376static const 377struct processor_costs k8_cost = { 378 1, /* cost of an add instruction */ 379 2, /* cost of a lea instruction */ 380 1, /* variable shift costs */ 381 1, /* constant shift costs */ 382 {3, 4, 3, 4, 5}, /* cost of starting a multiply */ 383 0, /* cost of multiply per each bit set */ 384 {18, 26, 42, 74, 74}, /* cost of a divide/mod */ 385 1, /* cost of movsx */ 386 1, /* cost of movzx */ 387 8, /* "large" insn */ 388 9, /* MOVE_RATIO */ 389 4, /* cost for loading QImode using movzbl */ 390 {3, 4, 3}, /* cost of loading integer registers 391 in QImode, HImode and SImode. 392 Relative to reg-reg move (2). */ 393 {3, 4, 3}, /* cost of storing integer registers */ 394 4, /* cost of reg,reg fld/fst */ 395 {4, 4, 12}, /* cost of loading fp registers 396 in SFmode, DFmode and XFmode */ 397 {6, 6, 8}, /* cost of loading integer registers */ 398 2, /* cost of moving MMX register */ 399 {3, 3}, /* cost of loading MMX registers 400 in SImode and DImode */ 401 {4, 4}, /* cost of storing MMX registers 402 in SImode and DImode */ 403 2, /* cost of moving SSE register */ 404 {4, 3, 6}, /* cost of loading SSE registers 405 in SImode, DImode and TImode */ 406 {4, 4, 5}, /* cost of storing SSE registers 407 in SImode, DImode and TImode */ 408 5, /* MMX or SSE register to integer */ 409 64, /* size of prefetch block */ 410 6, /* number of parallel prefetches */ 411 2, /* Branch cost */ 412 4, /* cost of FADD and FSUB insns. */ 413 4, /* cost of FMUL instruction. */ 414 19, /* cost of FDIV instruction. */ 415 2, /* cost of FABS instruction. */ 416 2, /* cost of FCHS instruction. */ 417 35, /* cost of FSQRT instruction. */ 418}; 419 420static const 421struct processor_costs pentium4_cost = { 422 1, /* cost of an add instruction */ 423 1, /* cost of a lea instruction */ 424 4, /* variable shift costs */ 425 4, /* constant shift costs */ 426 {15, 15, 15, 15, 15}, /* cost of starting a multiply */ 427 0, /* cost of multiply per each bit set */ 428 {56, 56, 56, 56, 56}, /* cost of a divide/mod */ 429 1, /* cost of movsx */ 430 1, /* cost of movzx */ 431 16, /* "large" insn */ 432 6, /* MOVE_RATIO */ 433 2, /* cost for loading QImode using movzbl */ 434 {4, 5, 4}, /* cost of loading integer registers 435 in QImode, HImode and SImode. 436 Relative to reg-reg move (2). */ 437 {2, 3, 2}, /* cost of storing integer registers */ 438 2, /* cost of reg,reg fld/fst */ 439 {2, 2, 6}, /* cost of loading fp registers 440 in SFmode, DFmode and XFmode */ 441 {4, 4, 6}, /* cost of loading integer registers */ 442 2, /* cost of moving MMX register */ 443 {2, 2}, /* cost of loading MMX registers 444 in SImode and DImode */ 445 {2, 2}, /* cost of storing MMX registers 446 in SImode and DImode */ 447 12, /* cost of moving SSE register */ 448 {12, 12, 12}, /* cost of loading SSE registers 449 in SImode, DImode and TImode */ 450 {2, 2, 8}, /* cost of storing SSE registers 451 in SImode, DImode and TImode */ 452 10, /* MMX or SSE register to integer */ 453 64, /* size of prefetch block */ 454 6, /* number of parallel prefetches */ 455 2, /* Branch cost */ 456 5, /* cost of FADD and FSUB insns. */ 457 7, /* cost of FMUL instruction. */ 458 43, /* cost of FDIV instruction. */ 459 2, /* cost of FABS instruction. */ 460 2, /* cost of FCHS instruction. */ 461 43, /* cost of FSQRT instruction. */ 462}; 463 464const struct processor_costs *ix86_cost = &pentium_cost; 465 466/* Processor feature/optimization bitmasks. */ 467#define m_386 (1<<PROCESSOR_I386) 468#define m_486 (1<<PROCESSOR_I486) 469#define m_PENT (1<<PROCESSOR_PENTIUM) 470#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 471#define m_K6 (1<<PROCESSOR_K6) 472#define m_ATHLON (1<<PROCESSOR_ATHLON) 473#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 474#define m_K8 (1<<PROCESSOR_K8) 475#define m_ATHLON_K8 (m_K8 | m_ATHLON) 476 477const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8; 478const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4; 479const int x86_zero_extend_with_and = m_486 | m_PENT; 480const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */; 481const int x86_double_with_add = ~m_386; 482const int x86_use_bit_test = m_386; 483const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6; 484const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4; 485const int x86_3dnow_a = m_ATHLON_K8; 486const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4; 487const int x86_branch_hints = m_PENT4; 488const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; 489const int x86_partial_reg_stall = m_PPRO; 490const int x86_use_loop = m_K6; 491const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT); 492const int x86_use_mov0 = m_K6; 493const int x86_use_cltd = ~(m_PENT | m_K6); 494const int x86_read_modify_write = ~m_PENT; 495const int x86_read_modify = ~(m_PENT | m_PPRO); 496const int x86_split_long_moves = m_PPRO; 497const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8; 498const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 499const int x86_single_stringop = m_386 | m_PENT4; 500const int x86_qimode_math = ~(0); 501const int x86_promote_qi_regs = 0; 502const int x86_himode_math = ~(m_PPRO); 503const int x86_promote_hi_regs = m_PPRO; 504const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4; 505const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4; 506const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4; 507const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; 508const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO); 509const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4; 510const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4; 511const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO; 512const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO; 513const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO; 514const int x86_decompose_lea = m_PENT4; 515const int x86_shift1 = ~m_486; 516const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4; 517const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO; 518/* Set for machines where the type and dependencies are resolved on SSE register 519 parts instead of whole registers, so we may maintain just lower part of 520 scalar values in proper format leaving the upper part undefined. */ 521const int x86_sse_partial_regs = m_ATHLON_K8; 522/* Athlon optimizes partial-register FPS special case, thus avoiding the 523 need for extra instructions beforehand */ 524const int x86_sse_partial_regs_for_cvtsd2ss = 0; 525const int x86_sse_typeless_stores = m_ATHLON_K8; 526const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4; 527const int x86_use_ffreep = m_ATHLON_K8; 528const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6; 529 530/* ??? HACK! The following is a lie. SSE can hold e.g. SImode, and 531 indeed *must* be able to hold SImode so that SSE2 shifts are able 532 to work right. But this can result in some mighty surprising 533 register allocation when building kernels. Turning this off should 534 make us less likely to all-of-the-sudden select an SSE register. */ 535const int x86_inter_unit_moves = 0; /* ~(m_ATHLON_K8) */ 536 537const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO; 538 539/* In case the average insn count for single function invocation is 540 lower than this constant, emit fast (but longer) prologue and 541 epilogue code. */ 542#define FAST_PROLOGUE_INSN_COUNT 20 543 544/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 545static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 546static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 547static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 548 549/* Array of the smallest class containing reg number REGNO, indexed by 550 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 551 552enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 553{ 554 /* ax, dx, cx, bx */ 555 AREG, DREG, CREG, BREG, 556 /* si, di, bp, sp */ 557 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 558 /* FP registers */ 559 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 560 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 561 /* arg pointer */ 562 NON_Q_REGS, 563 /* flags, fpsr, dirflag, frame */ 564 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 565 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 566 SSE_REGS, SSE_REGS, 567 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 568 MMX_REGS, MMX_REGS, 569 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 570 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 571 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 572 SSE_REGS, SSE_REGS, 573}; 574 575/* The "default" register map used in 32bit mode. */ 576 577int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 578{ 579 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 580 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 581 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 582 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 583 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 584 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 585 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 586}; 587 588static int const x86_64_int_parameter_registers[6] = 589{ 590 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 591 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 592}; 593 594static int const x86_64_int_return_registers[4] = 595{ 596 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 597}; 598 599/* The "default" register map used in 64bit mode. */ 600int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 601{ 602 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 603 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 604 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 605 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 606 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 607 8,9,10,11,12,13,14,15, /* extended integer registers */ 608 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 609}; 610 611/* Define the register numbers to be used in Dwarf debugging information. 612 The SVR4 reference port C compiler uses the following register numbers 613 in its Dwarf output code: 614 0 for %eax (gcc regno = 0) 615 1 for %ecx (gcc regno = 2) 616 2 for %edx (gcc regno = 1) 617 3 for %ebx (gcc regno = 3) 618 4 for %esp (gcc regno = 7) 619 5 for %ebp (gcc regno = 6) 620 6 for %esi (gcc regno = 4) 621 7 for %edi (gcc regno = 5) 622 The following three DWARF register numbers are never generated by 623 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 624 believes these numbers have these meanings. 625 8 for %eip (no gcc equivalent) 626 9 for %eflags (gcc regno = 17) 627 10 for %trapno (no gcc equivalent) 628 It is not at all clear how we should number the FP stack registers 629 for the x86 architecture. If the version of SDB on x86/svr4 were 630 a bit less brain dead with respect to floating-point then we would 631 have a precedent to follow with respect to DWARF register numbers 632 for x86 FP registers, but the SDB on x86/svr4 is so completely 633 broken with respect to FP registers that it is hardly worth thinking 634 of it as something to strive for compatibility with. 635 The version of x86/svr4 SDB I have at the moment does (partially) 636 seem to believe that DWARF register number 11 is associated with 637 the x86 register %st(0), but that's about all. Higher DWARF 638 register numbers don't seem to be associated with anything in 639 particular, and even for DWARF regno 11, SDB only seems to under- 640 stand that it should say that a variable lives in %st(0) (when 641 asked via an `=' command) if we said it was in DWARF regno 11, 642 but SDB still prints garbage when asked for the value of the 643 variable in question (via a `/' command). 644 (Also note that the labels SDB prints for various FP stack regs 645 when doing an `x' command are all wrong.) 646 Note that these problems generally don't affect the native SVR4 647 C compiler because it doesn't allow the use of -O with -g and 648 because when it is *not* optimizing, it allocates a memory 649 location for each floating-point variable, and the memory 650 location is what gets described in the DWARF AT_location 651 attribute for the variable in question. 652 Regardless of the severe mental illness of the x86/svr4 SDB, we 653 do something sensible here and we use the following DWARF 654 register numbers. Note that these are all stack-top-relative 655 numbers. 656 11 for %st(0) (gcc regno = 8) 657 12 for %st(1) (gcc regno = 9) 658 13 for %st(2) (gcc regno = 10) 659 14 for %st(3) (gcc regno = 11) 660 15 for %st(4) (gcc regno = 12) 661 16 for %st(5) (gcc regno = 13) 662 17 for %st(6) (gcc regno = 14) 663 18 for %st(7) (gcc regno = 15) 664*/ 665int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 666{ 667 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 668 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 669 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 670 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 671 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 672 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 673 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 674}; 675 676/* Test and compare insns in i386.md store the information needed to 677 generate branch and scc insns here. */ 678 679rtx ix86_compare_op0 = NULL_RTX; 680rtx ix86_compare_op1 = NULL_RTX; 681 682#define MAX_386_STACK_LOCALS 3 683/* Size of the register save area. */ 684#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 685 686/* Define the structure for the machine field in struct function. */ 687 688struct stack_local_entry GTY(()) 689{ 690 unsigned short mode; 691 unsigned short n; 692 rtx rtl; 693 struct stack_local_entry *next; 694}; 695 696/* Structure describing stack frame layout. 697 Stack grows downward: 698 699 [arguments] 700 <- ARG_POINTER 701 saved pc 702 703 saved frame pointer if frame_pointer_needed 704 <- HARD_FRAME_POINTER 705 [saved regs] 706 707 [padding1] \ 708 ) 709 [va_arg registers] ( 710 > to_allocate <- FRAME_POINTER 711 [frame] ( 712 ) 713 [padding2] / 714 */ 715struct ix86_frame 716{ 717 int nregs; 718 int padding1; 719 int va_arg_size; 720 HOST_WIDE_INT frame; 721 int padding2; 722 int outgoing_arguments_size; 723 int red_zone_size; 724 725 HOST_WIDE_INT to_allocate; 726 /* The offsets relative to ARG_POINTER. */ 727 HOST_WIDE_INT frame_pointer_offset; 728 HOST_WIDE_INT hard_frame_pointer_offset; 729 HOST_WIDE_INT stack_pointer_offset; 730 731 /* When save_regs_using_mov is set, emit prologue using 732 move instead of push instructions. */ 733 bool save_regs_using_mov; 734}; 735 736/* Used to enable/disable debugging features. */ 737const char *ix86_debug_arg_string, *ix86_debug_addr_string; 738/* Code model option as passed by user. */ 739const char *ix86_cmodel_string; 740/* Parsed value. */ 741enum cmodel ix86_cmodel; 742/* Asm dialect. */ 743const char *ix86_asm_string; 744enum asm_dialect ix86_asm_dialect = ASM_ATT; 745/* TLS dialext. */ 746const char *ix86_tls_dialect_string; 747enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 748 749/* Which unit we are generating floating point math for. */ 750enum fpmath_unit ix86_fpmath; 751 752/* Which cpu are we scheduling for. */ 753enum processor_type ix86_tune; 754/* Which instruction set architecture to use. */ 755enum processor_type ix86_arch; 756 757/* Strings to hold which cpu and instruction set architecture to use. */ 758const char *ix86_tune_string; /* for -mtune=<xxx> */ 759const char *ix86_arch_string; /* for -march=<xxx> */ 760const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */ 761 762/* # of registers to use to pass arguments. */ 763const char *ix86_regparm_string; 764 765/* true if sse prefetch instruction is not NOOP. */ 766int x86_prefetch_sse; 767 768/* ix86_regparm_string as a number */ 769int ix86_regparm; 770 771/* Alignment to use for loops and jumps: */ 772 773/* Power of two alignment for loops. */ 774const char *ix86_align_loops_string; 775 776/* Power of two alignment for non-loop jumps. */ 777const char *ix86_align_jumps_string; 778 779/* Power of two alignment for stack boundary in bytes. */ 780const char *ix86_preferred_stack_boundary_string; 781 782/* Preferred alignment for stack boundary in bits. */ 783int ix86_preferred_stack_boundary; 784 785/* Values 1-5: see jump.c */ 786int ix86_branch_cost; 787const char *ix86_branch_cost_string; 788 789/* Power of two alignment for functions. */ 790const char *ix86_align_funcs_string; 791 792/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 793static char internal_label_prefix[16]; 794static int internal_label_prefix_len; 795 796static int local_symbolic_operand (rtx, enum machine_mode); 797static int tls_symbolic_operand_1 (rtx, enum tls_model); 798static void output_pic_addr_const (FILE *, rtx, int); 799static void put_condition_code (enum rtx_code, enum machine_mode, 800 int, int, FILE *); 801static const char *get_some_local_dynamic_name (void); 802static int get_some_local_dynamic_name_1 (rtx *, void *); 803static rtx maybe_get_pool_constant (rtx); 804static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); 805static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, 806 rtx *); 807static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); 808static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, 809 enum machine_mode); 810static rtx get_thread_pointer (int); 811static rtx legitimize_tls_address (rtx, enum tls_model, int); 812static void get_pc_thunk_name (char [32], unsigned int); 813static rtx gen_push (rtx); 814static int memory_address_length (rtx addr); 815static int ix86_flags_dependant (rtx, rtx, enum attr_type); 816static int ix86_agi_dependant (rtx, rtx, enum attr_type); 817static enum attr_ppro_uops ix86_safe_ppro_uops (rtx); 818static void ix86_dump_ppro_packet (FILE *); 819static void ix86_reorder_insn (rtx *, rtx *); 820static struct machine_function * ix86_init_machine_status (void); 821static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); 822static int ix86_nsaved_regs (void); 823static void ix86_emit_save_regs (void); 824static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); 825static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); 826static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); 827static void ix86_sched_reorder_ppro (rtx *, rtx *); 828static HOST_WIDE_INT ix86_GOT_alias_set (void); 829static void ix86_adjust_counter (rtx, HOST_WIDE_INT); 830static rtx ix86_expand_aligntest (rtx, int); 831static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); 832static int ix86_issue_rate (void); 833static int ix86_adjust_cost (rtx, rtx, rtx, int); 834static void ix86_sched_init (FILE *, int, int); 835static int ix86_sched_reorder (FILE *, int, rtx *, int *, int); 836static int ix86_variable_issue (FILE *, int, rtx, int); 837static int ia32_use_dfa_pipeline_interface (void); 838static int ia32_multipass_dfa_lookahead (void); 839static void ix86_init_mmx_sse_builtins (void); 840static rtx x86_this_parameter (tree); 841static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 842 HOST_WIDE_INT, tree); 843static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); 844static void x86_file_start (void); 845static void ix86_reorg (void); 846static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); 847static tree ix86_build_builtin_va_list (void); 848 849struct ix86_address 850{ 851 rtx base, index, disp; 852 HOST_WIDE_INT scale; 853 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg; 854}; 855 856static int ix86_decompose_address (rtx, struct ix86_address *); 857static int ix86_address_cost (rtx); 858static bool ix86_cannot_force_const_mem (rtx); 859static rtx ix86_delegitimize_address (rtx); 860 861struct builtin_description; 862static rtx ix86_expand_sse_comi (const struct builtin_description *, 863 tree, rtx); 864static rtx ix86_expand_sse_compare (const struct builtin_description *, 865 tree, rtx); 866static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); 867static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); 868static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); 869static rtx ix86_expand_store_builtin (enum insn_code, tree); 870static rtx safe_vector_operand (rtx, enum machine_mode); 871static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code); 872static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *, 873 enum rtx_code *, enum rtx_code *); 874static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); 875static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); 876static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); 877static int ix86_fp_comparison_sahf_cost (enum rtx_code code); 878static int ix86_fp_comparison_cost (enum rtx_code code); 879static unsigned int ix86_select_alt_pic_regnum (void); 880static int ix86_save_reg (unsigned int, int); 881static void ix86_compute_frame_layout (struct ix86_frame *); 882static int ix86_comp_type_attributes (tree, tree); 883static int ix86_function_regparm (tree, tree); 884const struct attribute_spec ix86_attribute_table[]; 885static bool ix86_function_ok_for_sibcall (tree, tree); 886static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *); 887static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *); 888static int ix86_value_regno (enum machine_mode); 889static bool contains_128bit_aligned_vector_p (tree); 890static bool ix86_ms_bitfield_layout_p (tree); 891static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); 892static int extended_reg_mentioned_1 (rtx *, void *); 893static bool ix86_rtx_costs (rtx, int, int, int *); 894static int min_insn_size (rtx); 895static void k8_avoid_jump_misspredicts (void); 896 897#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) 898static void ix86_svr3_asm_out_constructor (rtx, int); 899#endif 900 901/* Register class used for passing given 64bit part of the argument. 902 These represent classes as documented by the PS ABI, with the exception 903 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 904 use SF or DFmode move instead of DImode to avoid reformatting penalties. 905 906 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 907 whenever possible (upper half does contain padding). 908 */ 909enum x86_64_reg_class 910 { 911 X86_64_NO_CLASS, 912 X86_64_INTEGER_CLASS, 913 X86_64_INTEGERSI_CLASS, 914 X86_64_SSE_CLASS, 915 X86_64_SSESF_CLASS, 916 X86_64_SSEDF_CLASS, 917 X86_64_SSEUP_CLASS, 918 X86_64_X87_CLASS, 919 X86_64_X87UP_CLASS, 920 X86_64_MEMORY_CLASS 921 }; 922static const char * const x86_64_reg_class_name[] = 923 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"}; 924 925#define MAX_CLASSES 4 926static int classify_argument (enum machine_mode, tree, 927 enum x86_64_reg_class [MAX_CLASSES], int); 928static int examine_argument (enum machine_mode, tree, int, int *, int *); 929static rtx construct_container (enum machine_mode, tree, int, int, int, 930 const int *, int); 931static enum x86_64_reg_class merge_classes (enum x86_64_reg_class, 932 enum x86_64_reg_class); 933 934/* Table of constants used by fldpi, fldln2, etc.... */ 935static REAL_VALUE_TYPE ext_80387_constants_table [5]; 936static bool ext_80387_constants_init = 0; 937static void init_ext_80387_constants (void); 938 939/* Initialize the GCC target structure. */ 940#undef TARGET_ATTRIBUTE_TABLE 941#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 942#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 943# undef TARGET_MERGE_DECL_ATTRIBUTES 944# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 945#endif 946 947#undef TARGET_COMP_TYPE_ATTRIBUTES 948#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 949 950#undef TARGET_INIT_BUILTINS 951#define TARGET_INIT_BUILTINS ix86_init_builtins 952 953#undef TARGET_EXPAND_BUILTIN 954#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 955 956#undef TARGET_ASM_FUNCTION_EPILOGUE 957#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 958 959#undef TARGET_ASM_OPEN_PAREN 960#define TARGET_ASM_OPEN_PAREN "" 961#undef TARGET_ASM_CLOSE_PAREN 962#define TARGET_ASM_CLOSE_PAREN "" 963 964#undef TARGET_ASM_ALIGNED_HI_OP 965#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 966#undef TARGET_ASM_ALIGNED_SI_OP 967#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 968#ifdef ASM_QUAD 969#undef TARGET_ASM_ALIGNED_DI_OP 970#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 971#endif 972 973#undef TARGET_ASM_UNALIGNED_HI_OP 974#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 975#undef TARGET_ASM_UNALIGNED_SI_OP 976#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 977#undef TARGET_ASM_UNALIGNED_DI_OP 978#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 979 980#undef TARGET_SCHED_ADJUST_COST 981#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 982#undef TARGET_SCHED_ISSUE_RATE 983#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 984#undef TARGET_SCHED_VARIABLE_ISSUE 985#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue 986#undef TARGET_SCHED_INIT 987#define TARGET_SCHED_INIT ix86_sched_init 988#undef TARGET_SCHED_REORDER 989#define TARGET_SCHED_REORDER ix86_sched_reorder 990#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE 991#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \ 992 ia32_use_dfa_pipeline_interface 993#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 994#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 995 ia32_multipass_dfa_lookahead 996 997#undef TARGET_FUNCTION_OK_FOR_SIBCALL 998#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 999 1000#ifdef HAVE_AS_TLS 1001#undef TARGET_HAVE_TLS 1002#define TARGET_HAVE_TLS true 1003#endif 1004#undef TARGET_CANNOT_FORCE_CONST_MEM 1005#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 1006 1007#undef TARGET_DELEGITIMIZE_ADDRESS 1008#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 1009 1010#undef TARGET_MS_BITFIELD_LAYOUT_P 1011#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 1012 1013#undef TARGET_ASM_OUTPUT_MI_THUNK 1014#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 1015#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1016#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 1017 1018#undef TARGET_ASM_FILE_START 1019#define TARGET_ASM_FILE_START x86_file_start 1020 1021#undef TARGET_RTX_COSTS 1022#define TARGET_RTX_COSTS ix86_rtx_costs 1023#undef TARGET_ADDRESS_COST 1024#define TARGET_ADDRESS_COST ix86_address_cost 1025 1026#undef TARGET_FIXED_CONDITION_CODE_REGS 1027#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 1028#undef TARGET_CC_MODES_COMPATIBLE 1029#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 1030 1031#undef TARGET_MACHINE_DEPENDENT_REORG 1032#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 1033 1034#undef TARGET_BUILD_BUILTIN_VA_LIST 1035#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 1036 1037struct gcc_target targetm = TARGET_INITIALIZER; 1038 1039/* The svr4 ABI for the i386 says that records and unions are returned 1040 in memory. */ 1041#ifndef DEFAULT_PCC_STRUCT_RETURN 1042#define DEFAULT_PCC_STRUCT_RETURN 1 1043#endif 1044 1045/* Sometimes certain combinations of command options do not make 1046 sense on a particular target machine. You can define a macro 1047 `OVERRIDE_OPTIONS' to take account of this. This macro, if 1048 defined, is executed once just after all the command options have 1049 been parsed. 1050 1051 Don't use this macro to turn on various extra optimizations for 1052 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 1053 1054void 1055override_options (void) 1056{ 1057 int i; 1058 /* Comes from final.c -- no real reason to change it. */ 1059#define MAX_CODE_ALIGN 16 1060 1061 static struct ptt 1062 { 1063 const struct processor_costs *cost; /* Processor costs */ 1064 const int target_enable; /* Target flags to enable. */ 1065 const int target_disable; /* Target flags to disable. */ 1066 const int align_loop; /* Default alignments. */ 1067 const int align_loop_max_skip; 1068 const int align_jump; 1069 const int align_jump_max_skip; 1070 const int align_func; 1071 } 1072 const processor_target_table[PROCESSOR_max] = 1073 { 1074 {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, 1075 {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, 1076 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, 1077 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, 1078 {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, 1079 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, 1080 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, 1081 {&k8_cost, 0, 0, 16, 7, 16, 7, 16} 1082 }; 1083 1084 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 1085 static struct pta 1086 { 1087 const char *const name; /* processor name or nickname. */ 1088 const enum processor_type processor; 1089 const enum pta_flags 1090 { 1091 PTA_SSE = 1, 1092 PTA_SSE2 = 2, 1093 PTA_SSE3 = 4, 1094 PTA_MMX = 8, 1095 PTA_PREFETCH_SSE = 16, 1096 PTA_3DNOW = 32, 1097 PTA_3DNOW_A = 64, 1098 PTA_64BIT = 128 1099 } flags; 1100 } 1101 const processor_alias_table[] = 1102 { 1103 {"i386", PROCESSOR_I386, 0}, 1104 {"i486", PROCESSOR_I486, 0}, 1105 {"i586", PROCESSOR_PENTIUM, 0}, 1106 {"pentium", PROCESSOR_PENTIUM, 0}, 1107 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 1108 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 1109 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1110 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1111 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, 1112 {"i686", PROCESSOR_PENTIUMPRO, 0}, 1113 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 1114 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 1115 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1116 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1117 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, 1118 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1119 | PTA_MMX | PTA_PREFETCH_SSE}, 1120 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1121 | PTA_MMX | PTA_PREFETCH_SSE}, 1122 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 1123 | PTA_MMX | PTA_PREFETCH_SSE}, 1124 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT 1125 | PTA_MMX | PTA_PREFETCH_SSE}, 1126 {"k6", PROCESSOR_K6, PTA_MMX}, 1127 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1128 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1129 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1130 | PTA_3DNOW_A}, 1131 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1132 | PTA_3DNOW | PTA_3DNOW_A}, 1133 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1134 | PTA_3DNOW_A | PTA_SSE}, 1135 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1136 | PTA_3DNOW_A | PTA_SSE}, 1137 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1138 | PTA_3DNOW_A | PTA_SSE}, 1139 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT 1140 | PTA_SSE | PTA_SSE2 }, 1141 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1142 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1143 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1144 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1145 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1146 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1147 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1148 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1149 }; 1150 1151 int const pta_size = ARRAY_SIZE (processor_alias_table); 1152 1153 /* Set the default values for switches whose default depends on TARGET_64BIT 1154 in case they weren't overwritten by command line options. */ 1155 if (TARGET_64BIT) 1156 { 1157 if (flag_omit_frame_pointer == 2) 1158 flag_omit_frame_pointer = 1; 1159 if (flag_asynchronous_unwind_tables == 2) 1160 flag_asynchronous_unwind_tables = 1; 1161 if (flag_pcc_struct_return == 2) 1162 flag_pcc_struct_return = 0; 1163 } 1164 else 1165 { 1166 if (flag_omit_frame_pointer == 2) 1167 flag_omit_frame_pointer = 0; 1168 if (flag_asynchronous_unwind_tables == 2) 1169 flag_asynchronous_unwind_tables = 0; 1170 if (flag_pcc_struct_return == 2) 1171 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1172 } 1173 1174#ifdef SUBTARGET_OVERRIDE_OPTIONS 1175 SUBTARGET_OVERRIDE_OPTIONS; 1176#endif 1177 1178 if (!ix86_tune_string && ix86_arch_string) 1179 ix86_tune_string = ix86_arch_string; 1180 if (!ix86_tune_string) 1181 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; 1182 if (!ix86_arch_string) 1183 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; 1184 1185 if (ix86_cmodel_string != 0) 1186 { 1187 if (!strcmp (ix86_cmodel_string, "small")) 1188 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1189 else if (flag_pic) 1190 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1191 else if (!strcmp (ix86_cmodel_string, "32")) 1192 ix86_cmodel = CM_32; 1193 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1194 ix86_cmodel = CM_KERNEL; 1195 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic) 1196 ix86_cmodel = CM_MEDIUM; 1197 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1198 ix86_cmodel = CM_LARGE; 1199 else 1200 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1201 } 1202 else 1203 { 1204 ix86_cmodel = CM_32; 1205 if (TARGET_64BIT) 1206 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1207 } 1208 if (ix86_asm_string != 0) 1209 { 1210 if (!strcmp (ix86_asm_string, "intel")) 1211 ix86_asm_dialect = ASM_INTEL; 1212 else if (!strcmp (ix86_asm_string, "att")) 1213 ix86_asm_dialect = ASM_ATT; 1214 else 1215 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1216 } 1217 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1218 error ("code model `%s' not supported in the %s bit mode", 1219 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1220 if (ix86_cmodel == CM_LARGE) 1221 sorry ("code model `large' not supported yet"); 1222 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1223 sorry ("%i-bit mode not compiled in", 1224 (target_flags & MASK_64BIT) ? 64 : 32); 1225 1226 for (i = 0; i < pta_size; i++) 1227 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1228 { 1229 ix86_arch = processor_alias_table[i].processor; 1230 /* Default cpu tuning to the architecture. */ 1231 ix86_tune = ix86_arch; 1232 if (processor_alias_table[i].flags & PTA_MMX 1233 && !(target_flags_explicit & MASK_MMX)) 1234 target_flags |= MASK_MMX; 1235 if (processor_alias_table[i].flags & PTA_3DNOW 1236 && !(target_flags_explicit & MASK_3DNOW)) 1237 target_flags |= MASK_3DNOW; 1238 if (processor_alias_table[i].flags & PTA_3DNOW_A 1239 && !(target_flags_explicit & MASK_3DNOW_A)) 1240 target_flags |= MASK_3DNOW_A; 1241 if (processor_alias_table[i].flags & PTA_SSE 1242 && !(target_flags_explicit & MASK_SSE)) 1243 target_flags |= MASK_SSE; 1244 if (processor_alias_table[i].flags & PTA_SSE2 1245 && !(target_flags_explicit & MASK_SSE2)) 1246 target_flags |= MASK_SSE2; 1247 if (processor_alias_table[i].flags & PTA_SSE3 1248 && !(target_flags_explicit & MASK_SSE3)) 1249 target_flags |= MASK_SSE3; 1250 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1251 x86_prefetch_sse = true; 1252 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1253 error ("CPU you selected does not support x86-64 instruction set"); 1254 break; 1255 } 1256 1257 if (i == pta_size) 1258 error ("bad value (%s) for -march= switch", ix86_arch_string); 1259 1260 for (i = 0; i < pta_size; i++) 1261 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 1262 { 1263 ix86_tune = processor_alias_table[i].processor; 1264 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1265 error ("CPU you selected does not support x86-64 instruction set"); 1266 1267 /* Intel CPUs have always interpreted SSE prefetch instructions as 1268 NOPs; so, we can enable SSE prefetch instructions even when 1269 -mtune (rather than -march) points us to a processor that has them. 1270 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 1271 higher processors. */ 1272 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) 1273 x86_prefetch_sse = true; 1274 break; 1275 } 1276 if (i == pta_size) 1277 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1278 1279 if (optimize_size) 1280 ix86_cost = &size_cost; 1281 else 1282 ix86_cost = processor_target_table[ix86_tune].cost; 1283 target_flags |= processor_target_table[ix86_tune].target_enable; 1284 target_flags &= ~processor_target_table[ix86_tune].target_disable; 1285 1286 /* Arrange to set up i386_stack_locals for all functions. */ 1287 init_machine_status = ix86_init_machine_status; 1288 1289 /* Validate -mregparm= value. */ 1290 if (ix86_regparm_string) 1291 { 1292 i = atoi (ix86_regparm_string); 1293 if (i < 0 || i > REGPARM_MAX) 1294 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1295 else 1296 ix86_regparm = i; 1297 } 1298 else 1299 if (TARGET_64BIT) 1300 ix86_regparm = REGPARM_MAX; 1301 1302 /* If the user has provided any of the -malign-* options, 1303 warn and use that value only if -falign-* is not set. 1304 Remove this code in GCC 3.2 or later. */ 1305 if (ix86_align_loops_string) 1306 { 1307 warning ("-malign-loops is obsolete, use -falign-loops"); 1308 if (align_loops == 0) 1309 { 1310 i = atoi (ix86_align_loops_string); 1311 if (i < 0 || i > MAX_CODE_ALIGN) 1312 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1313 else 1314 align_loops = 1 << i; 1315 } 1316 } 1317 1318 if (ix86_align_jumps_string) 1319 { 1320 warning ("-malign-jumps is obsolete, use -falign-jumps"); 1321 if (align_jumps == 0) 1322 { 1323 i = atoi (ix86_align_jumps_string); 1324 if (i < 0 || i > MAX_CODE_ALIGN) 1325 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1326 else 1327 align_jumps = 1 << i; 1328 } 1329 } 1330 1331 if (ix86_align_funcs_string) 1332 { 1333 warning ("-malign-functions is obsolete, use -falign-functions"); 1334 if (align_functions == 0) 1335 { 1336 i = atoi (ix86_align_funcs_string); 1337 if (i < 0 || i > MAX_CODE_ALIGN) 1338 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1339 else 1340 align_functions = 1 << i; 1341 } 1342 } 1343 1344 /* Default align_* from the processor table. */ 1345 if (align_loops == 0) 1346 { 1347 align_loops = processor_target_table[ix86_tune].align_loop; 1348 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 1349 } 1350 if (align_jumps == 0) 1351 { 1352 align_jumps = processor_target_table[ix86_tune].align_jump; 1353 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 1354 } 1355 if (align_functions == 0) 1356 { 1357 align_functions = processor_target_table[ix86_tune].align_func; 1358 } 1359 1360 /* Validate -mpreferred-stack-boundary= value, or provide default. 1361 The default of 128 bits is for Pentium III's SSE __m128, but we 1362 don't want additional code to keep the stack aligned when 1363 optimizing for code size. */ 1364 ix86_preferred_stack_boundary = (optimize_size 1365 ? TARGET_64BIT ? 128 : 32 1366 : 128); 1367 if (ix86_preferred_stack_boundary_string) 1368 { 1369 i = atoi (ix86_preferred_stack_boundary_string); 1370 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 1371 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1372 TARGET_64BIT ? 4 : 2); 1373 else 1374 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1375 } 1376 1377 /* Validate -mbranch-cost= value, or provide default. */ 1378 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost; 1379 if (ix86_branch_cost_string) 1380 { 1381 i = atoi (ix86_branch_cost_string); 1382 if (i < 0 || i > 5) 1383 error ("-mbranch-cost=%d is not between 0 and 5", i); 1384 else 1385 ix86_branch_cost = i; 1386 } 1387 1388 if (ix86_tls_dialect_string) 1389 { 1390 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 1391 ix86_tls_dialect = TLS_DIALECT_GNU; 1392 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 1393 ix86_tls_dialect = TLS_DIALECT_SUN; 1394 else 1395 error ("bad value (%s) for -mtls-dialect= switch", 1396 ix86_tls_dialect_string); 1397 } 1398 1399 /* Keep nonleaf frame pointers. */ 1400 if (TARGET_OMIT_LEAF_FRAME_POINTER) 1401 flag_omit_frame_pointer = 1; 1402 1403 /* If we're doing fast math, we don't care about comparison order 1404 wrt NaNs. This lets us use a shorter comparison sequence. */ 1405 if (flag_unsafe_math_optimizations) 1406 target_flags &= ~MASK_IEEE_FP; 1407 1408 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1409 since the insns won't need emulation. */ 1410 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1411 target_flags &= ~MASK_NO_FANCY_MATH_387; 1412 1413 /* Turn on SSE2 builtins for -msse3. */ 1414 if (TARGET_SSE3) 1415 target_flags |= MASK_SSE2; 1416 1417 /* Turn on SSE builtins for -msse2. */ 1418 if (TARGET_SSE2) 1419 target_flags |= MASK_SSE; 1420 1421 if (TARGET_64BIT) 1422 { 1423 if (TARGET_ALIGN_DOUBLE) 1424 error ("-malign-double makes no sense in the 64bit mode"); 1425 if (TARGET_RTD) 1426 error ("-mrtd calling convention not supported in the 64bit mode"); 1427 /* Enable by default the SSE and MMX builtins. */ 1428 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE); 1429 ix86_fpmath = FPMATH_SSE; 1430 } 1431 else 1432 { 1433 ix86_fpmath = FPMATH_387; 1434 /* i386 ABI does not specify red zone. It still makes sense to use it 1435 when programmer takes care to stack from being destroyed. */ 1436 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 1437 target_flags |= MASK_NO_RED_ZONE; 1438 } 1439 1440 if (ix86_fpmath_string != 0) 1441 { 1442 if (! strcmp (ix86_fpmath_string, "387")) 1443 ix86_fpmath = FPMATH_387; 1444 else if (! strcmp (ix86_fpmath_string, "sse")) 1445 { 1446 if (!TARGET_SSE) 1447 { 1448 warning ("SSE instruction set disabled, using 387 arithmetics"); 1449 ix86_fpmath = FPMATH_387; 1450 } 1451 else 1452 ix86_fpmath = FPMATH_SSE; 1453 } 1454 else if (! strcmp (ix86_fpmath_string, "387,sse") 1455 || ! strcmp (ix86_fpmath_string, "sse,387")) 1456 { 1457 if (!TARGET_SSE) 1458 { 1459 warning ("SSE instruction set disabled, using 387 arithmetics"); 1460 ix86_fpmath = FPMATH_387; 1461 } 1462 else if (!TARGET_80387) 1463 { 1464 warning ("387 instruction set disabled, using SSE arithmetics"); 1465 ix86_fpmath = FPMATH_SSE; 1466 } 1467 else 1468 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1469 } 1470 else 1471 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1472 } 1473 1474 /* It makes no sense to ask for just SSE builtins, so MMX is also turned 1475 on by -msse. */ 1476 if (TARGET_SSE) 1477 { 1478 target_flags |= MASK_MMX; 1479 x86_prefetch_sse = true; 1480 } 1481 1482 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ 1483 if (TARGET_3DNOW) 1484 { 1485 target_flags |= MASK_MMX; 1486 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX 1487 extensions it adds. */ 1488 if (x86_3dnow_a & (1 << ix86_arch)) 1489 target_flags |= MASK_3DNOW_A; 1490 } 1491 if ((x86_accumulate_outgoing_args & TUNEMASK) 1492 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 1493 && !optimize_size) 1494 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1495 1496 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1497 { 1498 char *p; 1499 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1500 p = strchr (internal_label_prefix, 'X'); 1501 internal_label_prefix_len = p - internal_label_prefix; 1502 *p = '\0'; 1503 } 1504} 1505 1506void 1507optimization_options (int level, int size ATTRIBUTE_UNUSED) 1508{ 1509 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 1510 make the problem with not enough registers even worse. */ 1511#ifdef INSN_SCHEDULING 1512 if (level > 1) 1513 flag_schedule_insns = 0; 1514#endif 1515 1516 /* The default values of these switches depend on the TARGET_64BIT 1517 that is not known at this moment. Mark these values with 2 and 1518 let user the to override these. In case there is no command line option 1519 specifying them, we will set the defaults in override_options. */ 1520 if (optimize >= 1) 1521 flag_omit_frame_pointer = 2; 1522 flag_pcc_struct_return = 2; 1523 flag_asynchronous_unwind_tables = 2; 1524} 1525 1526/* Table of valid machine attributes. */ 1527const struct attribute_spec ix86_attribute_table[] = 1528{ 1529 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 1530 /* Stdcall attribute says callee is responsible for popping arguments 1531 if they are not variable. */ 1532 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1533 /* Fastcall attribute says callee is responsible for popping arguments 1534 if they are not variable. */ 1535 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1536 /* Cdecl attribute says the callee is a normal C declaration */ 1537 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1538 /* Regparm attribute specifies how many integer arguments are to be 1539 passed in registers. */ 1540 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute }, 1541#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 1542 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1543 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1544 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 1545#endif 1546 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 1547 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 1548 { NULL, 0, 0, false, false, false, NULL } 1549}; 1550 1551/* Decide whether we can make a sibling call to a function. DECL is the 1552 declaration of the function being targeted by the call and EXP is the 1553 CALL_EXPR representing the call. */ 1554 1555static bool 1556ix86_function_ok_for_sibcall (tree decl, tree exp) 1557{ 1558 /* If we are generating position-independent code, we cannot sibcall 1559 optimize any indirect call, or a direct call to a global function, 1560 as the PLT requires %ebx be live. */ 1561 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl))) 1562 return false; 1563 1564 /* If we are returning floats on the 80387 register stack, we cannot 1565 make a sibcall from a function that doesn't return a float to a 1566 function that does or, conversely, from a function that does return 1567 a float to a function that doesn't; the necessary stack adjustment 1568 would not be executed. */ 1569 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp))) 1570 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl))))) 1571 return false; 1572 1573 /* If this call is indirect, we'll need to be able to use a call-clobbered 1574 register for the address of the target function. Make sure that all 1575 such registers are not used for passing parameters. */ 1576 if (!decl && !TARGET_64BIT) 1577 { 1578 tree type; 1579 1580 /* We're looking at the CALL_EXPR, we need the type of the function. */ 1581 type = TREE_OPERAND (exp, 0); /* pointer expression */ 1582 type = TREE_TYPE (type); /* pointer type */ 1583 type = TREE_TYPE (type); /* function type */ 1584 1585 if (ix86_function_regparm (type, NULL) >= 3) 1586 { 1587 /* ??? Need to count the actual number of registers to be used, 1588 not the possible number of registers. Fix later. */ 1589 return false; 1590 } 1591 } 1592 1593 /* Otherwise okay. That also includes certain types of indirect calls. */ 1594 return true; 1595} 1596 1597/* Handle a "cdecl", "stdcall", or "fastcall" attribute; 1598 arguments as in struct attribute_spec.handler. */ 1599static tree 1600ix86_handle_cdecl_attribute (tree *node, tree name, 1601 tree args ATTRIBUTE_UNUSED, 1602 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 1603{ 1604 if (TREE_CODE (*node) != FUNCTION_TYPE 1605 && TREE_CODE (*node) != METHOD_TYPE 1606 && TREE_CODE (*node) != FIELD_DECL 1607 && TREE_CODE (*node) != TYPE_DECL) 1608 { 1609 warning ("`%s' attribute only applies to functions", 1610 IDENTIFIER_POINTER (name)); 1611 *no_add_attrs = true; 1612 } 1613 else 1614 { 1615 if (is_attribute_p ("fastcall", name)) 1616 { 1617 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 1618 { 1619 error ("fastcall and stdcall attributes are not compatible"); 1620 } 1621 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 1622 { 1623 error ("fastcall and regparm attributes are not compatible"); 1624 } 1625 } 1626 else if (is_attribute_p ("stdcall", name)) 1627 { 1628 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 1629 { 1630 error ("fastcall and stdcall attributes are not compatible"); 1631 } 1632 } 1633 } 1634 1635 if (TARGET_64BIT) 1636 { 1637 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name)); 1638 *no_add_attrs = true; 1639 } 1640 1641 return NULL_TREE; 1642} 1643 1644/* Handle a "regparm" attribute; 1645 arguments as in struct attribute_spec.handler. */ 1646static tree 1647ix86_handle_regparm_attribute (tree *node, tree name, tree args, 1648 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 1649{ 1650 if (TREE_CODE (*node) != FUNCTION_TYPE 1651 && TREE_CODE (*node) != METHOD_TYPE 1652 && TREE_CODE (*node) != FIELD_DECL 1653 && TREE_CODE (*node) != TYPE_DECL) 1654 { 1655 warning ("`%s' attribute only applies to functions", 1656 IDENTIFIER_POINTER (name)); 1657 *no_add_attrs = true; 1658 } 1659 else 1660 { 1661 tree cst; 1662 1663 cst = TREE_VALUE (args); 1664 if (TREE_CODE (cst) != INTEGER_CST) 1665 { 1666 warning ("`%s' attribute requires an integer constant argument", 1667 IDENTIFIER_POINTER (name)); 1668 *no_add_attrs = true; 1669 } 1670 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 1671 { 1672 warning ("argument to `%s' attribute larger than %d", 1673 IDENTIFIER_POINTER (name), REGPARM_MAX); 1674 *no_add_attrs = true; 1675 } 1676 1677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 1678 { 1679 error ("fastcall and regparm attributes are not compatible"); 1680 } 1681 } 1682 1683 return NULL_TREE; 1684} 1685 1686/* Return 0 if the attributes for two types are incompatible, 1 if they 1687 are compatible, and 2 if they are nearly compatible (which causes a 1688 warning to be generated). */ 1689 1690static int 1691ix86_comp_type_attributes (tree type1, tree type2) 1692{ 1693 /* Check for mismatch of non-default calling convention. */ 1694 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 1695 1696 if (TREE_CODE (type1) != FUNCTION_TYPE) 1697 return 1; 1698 1699 /* Check for mismatched fastcall types */ 1700 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 1701 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 1702 return 0; 1703 1704 /* Check for mismatched return types (cdecl vs stdcall). */ 1705 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 1706 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 1707 return 0; 1708 if (ix86_function_regparm (type1, NULL) 1709 != ix86_function_regparm (type2, NULL)) 1710 return 0; 1711 return 1; 1712} 1713 1714/* Return the regparm value for a fuctio with the indicated TYPE and DECL. 1715 DECL may be NULL when calling function indirectly 1716 or considering a libcall. */ 1717 1718static int 1719ix86_function_regparm (tree type, tree decl) 1720{ 1721 tree attr; 1722 int regparm = ix86_regparm; 1723 bool user_convention = false; 1724 1725 if (!TARGET_64BIT) 1726 { 1727 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 1728 if (attr) 1729 { 1730 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1731 user_convention = true; 1732 } 1733 1734 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 1735 { 1736 regparm = 2; 1737 user_convention = true; 1738 } 1739 1740 /* Use register calling convention for local functions when possible. */ 1741 if (!TARGET_64BIT && !user_convention && decl 1742 && flag_unit_at_a_time && !profile_flag) 1743 { 1744 struct cgraph_local_info *i = cgraph_local_info (decl); 1745 if (i && i->local) 1746 { 1747 /* We can't use regparm(3) for nested functions as these use 1748 static chain pointer in third argument. */ 1749 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl)) 1750 regparm = 2; 1751 else 1752 regparm = 3; 1753 } 1754 } 1755 } 1756 return regparm; 1757} 1758 1759/* Return true if EAX is live at the start of the function. Used by 1760 ix86_expand_prologue to determine if we need special help before 1761 calling allocate_stack_worker. */ 1762 1763static bool 1764ix86_eax_live_at_start_p (void) 1765{ 1766 /* Cheat. Don't bother working forward from ix86_function_regparm 1767 to the function type to whether an actual argument is located in 1768 eax. Instead just look at cfg info, which is still close enough 1769 to correct at this point. This gives false positives for broken 1770 functions that might use uninitialized data that happens to be 1771 allocated in eax, but who cares? */ 1772 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0); 1773} 1774 1775/* Value is the number of bytes of arguments automatically 1776 popped when returning from a subroutine call. 1777 FUNDECL is the declaration node of the function (as a tree), 1778 FUNTYPE is the data type of the function (as a tree), 1779 or for a library call it is an identifier node for the subroutine name. 1780 SIZE is the number of bytes of arguments passed on the stack. 1781 1782 On the 80386, the RTD insn may be used to pop them if the number 1783 of args is fixed, but if the number is variable then the caller 1784 must pop them all. RTD can't be used for library calls now 1785 because the library is compiled with the Unix compiler. 1786 Use of RTD is a selectable option, since it is incompatible with 1787 standard Unix calling sequences. If the option is not selected, 1788 the caller must always pop the args. 1789 1790 The attribute stdcall is equivalent to RTD on a per module basis. */ 1791 1792int 1793ix86_return_pops_args (tree fundecl, tree funtype, int size) 1794{ 1795 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 1796 1797 /* Cdecl functions override -mrtd, and never pop the stack. */ 1798 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 1799 1800 /* Stdcall and fastcall functions will pop the stack if not 1801 variable args. */ 1802 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 1803 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 1804 rtd = 1; 1805 1806 if (rtd 1807 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 1808 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 1809 == void_type_node))) 1810 return size; 1811 } 1812 1813 /* Lose any fake structure return argument if it is passed on the stack. */ 1814 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 1815 && !TARGET_64BIT) 1816 { 1817 int nregs = ix86_function_regparm (funtype, fundecl); 1818 1819 if (!nregs) 1820 return GET_MODE_SIZE (Pmode); 1821 } 1822 1823 return 0; 1824} 1825 1826/* Argument support functions. */ 1827 1828/* Return true when register may be used to pass function parameters. */ 1829bool 1830ix86_function_arg_regno_p (int regno) 1831{ 1832 int i; 1833 if (!TARGET_64BIT) 1834 return (regno < REGPARM_MAX 1835 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 1836 if (SSE_REGNO_P (regno) && TARGET_SSE) 1837 return true; 1838 /* RAX is used as hidden argument to va_arg functions. */ 1839 if (!regno) 1840 return true; 1841 for (i = 0; i < REGPARM_MAX; i++) 1842 if (regno == x86_64_int_parameter_registers[i]) 1843 return true; 1844 return false; 1845} 1846 1847/* Initialize a variable CUM of type CUMULATIVE_ARGS 1848 for a call to a function whose data type is FNTYPE. 1849 For a library call, FNTYPE is 0. */ 1850 1851void 1852init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 1853 tree fntype, /* tree ptr for function decl */ 1854 rtx libname, /* SYMBOL_REF of library name or 0 */ 1855 tree fndecl) 1856{ 1857 static CUMULATIVE_ARGS zero_cum; 1858 tree param, next_param; 1859 1860 if (TARGET_DEBUG_ARG) 1861 { 1862 fprintf (stderr, "\ninit_cumulative_args ("); 1863 if (fntype) 1864 fprintf (stderr, "fntype code = %s, ret code = %s", 1865 tree_code_name[(int) TREE_CODE (fntype)], 1866 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 1867 else 1868 fprintf (stderr, "no fntype"); 1869 1870 if (libname) 1871 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 1872 } 1873 1874 *cum = zero_cum; 1875 1876 /* Set up the number of registers to use for passing arguments. */ 1877 if (fntype) 1878 cum->nregs = ix86_function_regparm (fntype, fndecl); 1879 else 1880 cum->nregs = ix86_regparm; 1881 cum->sse_nregs = SSE_REGPARM_MAX; 1882 cum->mmx_nregs = MMX_REGPARM_MAX; 1883 cum->warn_sse = true; 1884 cum->warn_mmx = true; 1885 cum->maybe_vaarg = false; 1886 1887 /* Use ecx and edx registers if function has fastcall attribute */ 1888 if (fntype && !TARGET_64BIT) 1889 { 1890 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 1891 { 1892 cum->nregs = 2; 1893 cum->fastcall = 1; 1894 } 1895 } 1896 1897 1898 /* Determine if this function has variable arguments. This is 1899 indicated by the last argument being 'void_type_mode' if there 1900 are no variable arguments. If there are variable arguments, then 1901 we won't pass anything in registers */ 1902 1903 if (cum->nregs || !TARGET_MMX || !TARGET_SSE) 1904 { 1905 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 1906 param != 0; param = next_param) 1907 { 1908 next_param = TREE_CHAIN (param); 1909 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 1910 { 1911 if (!TARGET_64BIT) 1912 { 1913 cum->nregs = 0; 1914 cum->sse_nregs = 0; 1915 cum->mmx_nregs = 0; 1916 cum->warn_sse = 0; 1917 cum->warn_mmx = 0; 1918 cum->fastcall = 0; 1919 } 1920 cum->maybe_vaarg = true; 1921 } 1922 } 1923 } 1924 if ((!fntype && !libname) 1925 || (fntype && !TYPE_ARG_TYPES (fntype))) 1926 cum->maybe_vaarg = 1; 1927 1928 if (TARGET_DEBUG_ARG) 1929 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 1930 1931 return; 1932} 1933 1934/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 1935 of this code is to classify each 8bytes of incoming argument by the register 1936 class and assign registers accordingly. */ 1937 1938/* Return the union class of CLASS1 and CLASS2. 1939 See the x86-64 PS ABI for details. */ 1940 1941static enum x86_64_reg_class 1942merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 1943{ 1944 /* Rule #1: If both classes are equal, this is the resulting class. */ 1945 if (class1 == class2) 1946 return class1; 1947 1948 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 1949 the other class. */ 1950 if (class1 == X86_64_NO_CLASS) 1951 return class2; 1952 if (class2 == X86_64_NO_CLASS) 1953 return class1; 1954 1955 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 1956 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 1957 return X86_64_MEMORY_CLASS; 1958 1959 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 1960 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 1961 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 1962 return X86_64_INTEGERSI_CLASS; 1963 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 1964 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 1965 return X86_64_INTEGER_CLASS; 1966 1967 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */ 1968 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS 1969 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS) 1970 return X86_64_MEMORY_CLASS; 1971 1972 /* Rule #6: Otherwise class SSE is used. */ 1973 return X86_64_SSE_CLASS; 1974} 1975 1976/* Classify the argument of type TYPE and mode MODE. 1977 CLASSES will be filled by the register class used to pass each word 1978 of the operand. The number of words is returned. In case the parameter 1979 should be passed in memory, 0 is returned. As a special case for zero 1980 sized containers, classes[0] will be NO_CLASS and 1 is returned. 1981 1982 BIT_OFFSET is used internally for handling records and specifies offset 1983 of the offset in bits modulo 256 to avoid overflow cases. 1984 1985 See the x86-64 PS ABI for details. 1986*/ 1987 1988static int 1989classify_argument (enum machine_mode mode, tree type, 1990 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 1991{ 1992 HOST_WIDE_INT bytes = 1993 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1994 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1995 1996 /* Variable sized entities are always passed/returned in memory. */ 1997 if (bytes < 0) 1998 return 0; 1999 2000 if (mode != VOIDmode 2001 && MUST_PASS_IN_STACK (mode, type)) 2002 return 0; 2003 2004 if (type && AGGREGATE_TYPE_P (type)) 2005 { 2006 int i; 2007 tree field; 2008 enum x86_64_reg_class subclasses[MAX_CLASSES]; 2009 2010 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 2011 if (bytes > 16) 2012 return 0; 2013 2014 for (i = 0; i < words; i++) 2015 classes[i] = X86_64_NO_CLASS; 2016 2017 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 2018 signalize memory class, so handle it as special case. */ 2019 if (!words) 2020 { 2021 classes[0] = X86_64_NO_CLASS; 2022 return 1; 2023 } 2024 2025 /* Classify each field of record and merge classes. */ 2026 if (TREE_CODE (type) == RECORD_TYPE) 2027 { 2028 /* For classes first merge in the field of the subclasses. */ 2029 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 2030 { 2031 tree bases = TYPE_BINFO_BASETYPES (type); 2032 int n_bases = TREE_VEC_LENGTH (bases); 2033 int i; 2034 2035 for (i = 0; i < n_bases; ++i) 2036 { 2037 tree binfo = TREE_VEC_ELT (bases, i); 2038 int num; 2039 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 2040 tree type = BINFO_TYPE (binfo); 2041 2042 num = classify_argument (TYPE_MODE (type), 2043 type, subclasses, 2044 (offset + bit_offset) % 256); 2045 if (!num) 2046 return 0; 2047 for (i = 0; i < num; i++) 2048 { 2049 int pos = (offset + (bit_offset % 64)) / 8 / 8; 2050 classes[i + pos] = 2051 merge_classes (subclasses[i], classes[i + pos]); 2052 } 2053 } 2054 } 2055 /* And now merge the fields of structure. */ 2056 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2057 { 2058 if (TREE_CODE (field) == FIELD_DECL) 2059 { 2060 int num; 2061 2062 /* Bitfields are always classified as integer. Handle them 2063 early, since later code would consider them to be 2064 misaligned integers. */ 2065 if (DECL_BIT_FIELD (field)) 2066 { 2067 for (i = int_bit_position (field) / 8 / 8; 2068 i < (int_bit_position (field) 2069 + tree_low_cst (DECL_SIZE (field), 0) 2070 + 63) / 8 / 8; i++) 2071 classes[i] = 2072 merge_classes (X86_64_INTEGER_CLASS, 2073 classes[i]); 2074 } 2075 else 2076 { 2077 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 2078 TREE_TYPE (field), subclasses, 2079 (int_bit_position (field) 2080 + bit_offset) % 256); 2081 if (!num) 2082 return 0; 2083 for (i = 0; i < num; i++) 2084 { 2085 int pos = 2086 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 2087 classes[i + pos] = 2088 merge_classes (subclasses[i], classes[i + pos]); 2089 } 2090 } 2091 } 2092 } 2093 } 2094 /* Arrays are handled as small records. */ 2095 else if (TREE_CODE (type) == ARRAY_TYPE) 2096 { 2097 int num; 2098 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 2099 TREE_TYPE (type), subclasses, bit_offset); 2100 if (!num) 2101 return 0; 2102 2103 /* The partial classes are now full classes. */ 2104 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 2105 subclasses[0] = X86_64_SSE_CLASS; 2106 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 2107 subclasses[0] = X86_64_INTEGER_CLASS; 2108 2109 for (i = 0; i < words; i++) 2110 classes[i] = subclasses[i % num]; 2111 } 2112 /* Unions are similar to RECORD_TYPE but offset is always 0. */ 2113 else if (TREE_CODE (type) == UNION_TYPE 2114 || TREE_CODE (type) == QUAL_UNION_TYPE) 2115 { 2116 /* For classes first merge in the field of the subclasses. */ 2117 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 2118 { 2119 tree bases = TYPE_BINFO_BASETYPES (type); 2120 int n_bases = TREE_VEC_LENGTH (bases); 2121 int i; 2122 2123 for (i = 0; i < n_bases; ++i) 2124 { 2125 tree binfo = TREE_VEC_ELT (bases, i); 2126 int num; 2127 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 2128 tree type = BINFO_TYPE (binfo); 2129 2130 num = classify_argument (TYPE_MODE (type), 2131 type, subclasses, 2132 (offset + (bit_offset % 64)) % 256); 2133 if (!num) 2134 return 0; 2135 for (i = 0; i < num; i++) 2136 { 2137 int pos = (offset + (bit_offset % 64)) / 8 / 8; 2138 classes[i + pos] = 2139 merge_classes (subclasses[i], classes[i + pos]); 2140 } 2141 } 2142 } 2143 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2144 { 2145 if (TREE_CODE (field) == FIELD_DECL) 2146 { 2147 int num; 2148 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 2149 TREE_TYPE (field), subclasses, 2150 bit_offset); 2151 if (!num) 2152 return 0; 2153 for (i = 0; i < num; i++) 2154 classes[i] = merge_classes (subclasses[i], classes[i]); 2155 } 2156 } 2157 } 2158 else if (TREE_CODE (type) == SET_TYPE) 2159 { 2160 if (bytes <= 4) 2161 { 2162 classes[0] = X86_64_INTEGERSI_CLASS; 2163 return 1; 2164 } 2165 else if (bytes <= 8) 2166 { 2167 classes[0] = X86_64_INTEGER_CLASS; 2168 return 1; 2169 } 2170 else if (bytes <= 12) 2171 { 2172 classes[0] = X86_64_INTEGER_CLASS; 2173 classes[1] = X86_64_INTEGERSI_CLASS; 2174 return 2; 2175 } 2176 else 2177 { 2178 classes[0] = X86_64_INTEGER_CLASS; 2179 classes[1] = X86_64_INTEGER_CLASS; 2180 return 2; 2181 } 2182 } 2183 else 2184 abort (); 2185 2186 /* Final merger cleanup. */ 2187 for (i = 0; i < words; i++) 2188 { 2189 /* If one class is MEMORY, everything should be passed in 2190 memory. */ 2191 if (classes[i] == X86_64_MEMORY_CLASS) 2192 return 0; 2193 2194 /* The X86_64_SSEUP_CLASS should be always preceded by 2195 X86_64_SSE_CLASS. */ 2196 if (classes[i] == X86_64_SSEUP_CLASS 2197 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 2198 classes[i] = X86_64_SSE_CLASS; 2199 2200 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 2201 if (classes[i] == X86_64_X87UP_CLASS 2202 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 2203 classes[i] = X86_64_SSE_CLASS; 2204 } 2205 return words; 2206 } 2207 2208 /* Compute alignment needed. We align all types to natural boundaries with 2209 exception of XFmode that is aligned to 64bits. */ 2210 if (mode != VOIDmode && mode != BLKmode) 2211 { 2212 int mode_alignment = GET_MODE_BITSIZE (mode); 2213 2214 if (mode == XFmode) 2215 mode_alignment = 128; 2216 else if (mode == XCmode) 2217 mode_alignment = 256; 2218 if (COMPLEX_MODE_P (mode)) 2219 mode_alignment /= 2; 2220 /* Misaligned fields are always returned in memory. */ 2221 if (bit_offset % mode_alignment) 2222 return 0; 2223 } 2224 2225 /* Classification of atomic types. */ 2226 switch (mode) 2227 { 2228 case DImode: 2229 case SImode: 2230 case HImode: 2231 case QImode: 2232 case CSImode: 2233 case CHImode: 2234 case CQImode: 2235 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 2236 classes[0] = X86_64_INTEGERSI_CLASS; 2237 else 2238 classes[0] = X86_64_INTEGER_CLASS; 2239 return 1; 2240 case CDImode: 2241 case TImode: 2242 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 2243 return 2; 2244 case CTImode: 2245 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 2246 classes[2] = classes[3] = X86_64_INTEGER_CLASS; 2247 return 4; 2248 case SFmode: 2249 if (!(bit_offset % 64)) 2250 classes[0] = X86_64_SSESF_CLASS; 2251 else 2252 classes[0] = X86_64_SSE_CLASS; 2253 return 1; 2254 case DFmode: 2255 classes[0] = X86_64_SSEDF_CLASS; 2256 return 1; 2257 case XFmode: 2258 classes[0] = X86_64_X87_CLASS; 2259 classes[1] = X86_64_X87UP_CLASS; 2260 return 2; 2261 case TFmode: 2262 case TCmode: 2263 return 0; 2264 case XCmode: 2265 classes[0] = X86_64_X87_CLASS; 2266 classes[1] = X86_64_X87UP_CLASS; 2267 classes[2] = X86_64_X87_CLASS; 2268 classes[3] = X86_64_X87UP_CLASS; 2269 return 4; 2270 case DCmode: 2271 classes[0] = X86_64_SSEDF_CLASS; 2272 classes[1] = X86_64_SSEDF_CLASS; 2273 return 2; 2274 case SCmode: 2275 classes[0] = X86_64_SSE_CLASS; 2276 return 1; 2277 case V4SFmode: 2278 case V4SImode: 2279 case V16QImode: 2280 case V8HImode: 2281 case V2DFmode: 2282 case V2DImode: 2283 classes[0] = X86_64_SSE_CLASS; 2284 classes[1] = X86_64_SSEUP_CLASS; 2285 return 2; 2286 case V2SFmode: 2287 case V2SImode: 2288 case V4HImode: 2289 case V8QImode: 2290 return 0; 2291 case BLKmode: 2292 case VOIDmode: 2293 return 0; 2294 default: 2295 abort (); 2296 } 2297} 2298 2299/* Examine the argument and return set number of register required in each 2300 class. Return 0 iff parameter should be passed in memory. */ 2301static int 2302examine_argument (enum machine_mode mode, tree type, int in_return, 2303 int *int_nregs, int *sse_nregs) 2304{ 2305 enum x86_64_reg_class class[MAX_CLASSES]; 2306 int n = classify_argument (mode, type, class, 0); 2307 2308 *int_nregs = 0; 2309 *sse_nregs = 0; 2310 if (!n) 2311 return 0; 2312 for (n--; n >= 0; n--) 2313 switch (class[n]) 2314 { 2315 case X86_64_INTEGER_CLASS: 2316 case X86_64_INTEGERSI_CLASS: 2317 (*int_nregs)++; 2318 break; 2319 case X86_64_SSE_CLASS: 2320 case X86_64_SSESF_CLASS: 2321 case X86_64_SSEDF_CLASS: 2322 (*sse_nregs)++; 2323 break; 2324 case X86_64_NO_CLASS: 2325 case X86_64_SSEUP_CLASS: 2326 break; 2327 case X86_64_X87_CLASS: 2328 case X86_64_X87UP_CLASS: 2329 if (!in_return) 2330 return 0; 2331 break; 2332 case X86_64_MEMORY_CLASS: 2333 abort (); 2334 } 2335 return 1; 2336} 2337/* Construct container for the argument used by GCC interface. See 2338 FUNCTION_ARG for the detailed description. */ 2339static rtx 2340construct_container (enum machine_mode mode, tree type, int in_return, 2341 int nintregs, int nsseregs, const int * intreg, 2342 int sse_regno) 2343{ 2344 enum machine_mode tmpmode; 2345 int bytes = 2346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2347 enum x86_64_reg_class class[MAX_CLASSES]; 2348 int n; 2349 int i; 2350 int nexps = 0; 2351 int needed_sseregs, needed_intregs; 2352 rtx exp[MAX_CLASSES]; 2353 rtx ret; 2354 2355 n = classify_argument (mode, type, class, 0); 2356 if (TARGET_DEBUG_ARG) 2357 { 2358 if (!n) 2359 fprintf (stderr, "Memory class\n"); 2360 else 2361 { 2362 fprintf (stderr, "Classes:"); 2363 for (i = 0; i < n; i++) 2364 { 2365 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 2366 } 2367 fprintf (stderr, "\n"); 2368 } 2369 } 2370 if (!n) 2371 return NULL; 2372 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs)) 2373 return NULL; 2374 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 2375 return NULL; 2376 2377 /* First construct simple cases. Avoid SCmode, since we want to use 2378 single register to pass this type. */ 2379 if (n == 1 && mode != SCmode) 2380 switch (class[0]) 2381 { 2382 case X86_64_INTEGER_CLASS: 2383 case X86_64_INTEGERSI_CLASS: 2384 return gen_rtx_REG (mode, intreg[0]); 2385 case X86_64_SSE_CLASS: 2386 case X86_64_SSESF_CLASS: 2387 case X86_64_SSEDF_CLASS: 2388 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2389 case X86_64_X87_CLASS: 2390 return gen_rtx_REG (mode, FIRST_STACK_REG); 2391 case X86_64_NO_CLASS: 2392 /* Zero sized array, struct or class. */ 2393 return NULL; 2394 default: 2395 abort (); 2396 } 2397 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS 2398 && mode != BLKmode) 2399 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2400 if (n == 2 2401 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 2402 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 2403 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 2404 && class[1] == X86_64_INTEGER_CLASS 2405 && (mode == CDImode || mode == TImode || mode == TFmode) 2406 && intreg[0] + 1 == intreg[1]) 2407 return gen_rtx_REG (mode, intreg[0]); 2408 if (n == 4 2409 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS 2410 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS 2411 && mode != BLKmode) 2412 return gen_rtx_REG (XCmode, FIRST_STACK_REG); 2413 2414 /* Otherwise figure out the entries of the PARALLEL. */ 2415 for (i = 0; i < n; i++) 2416 { 2417 switch (class[i]) 2418 { 2419 case X86_64_NO_CLASS: 2420 break; 2421 case X86_64_INTEGER_CLASS: 2422 case X86_64_INTEGERSI_CLASS: 2423 /* Merge TImodes on aligned occasions here too. */ 2424 if (i * 8 + 8 > bytes) 2425 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 2426 else if (class[i] == X86_64_INTEGERSI_CLASS) 2427 tmpmode = SImode; 2428 else 2429 tmpmode = DImode; 2430 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 2431 if (tmpmode == BLKmode) 2432 tmpmode = DImode; 2433 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2434 gen_rtx_REG (tmpmode, *intreg), 2435 GEN_INT (i*8)); 2436 intreg++; 2437 break; 2438 case X86_64_SSESF_CLASS: 2439 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2440 gen_rtx_REG (SFmode, 2441 SSE_REGNO (sse_regno)), 2442 GEN_INT (i*8)); 2443 sse_regno++; 2444 break; 2445 case X86_64_SSEDF_CLASS: 2446 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2447 gen_rtx_REG (DFmode, 2448 SSE_REGNO (sse_regno)), 2449 GEN_INT (i*8)); 2450 sse_regno++; 2451 break; 2452 case X86_64_SSE_CLASS: 2453 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 2454 tmpmode = TImode; 2455 else 2456 tmpmode = DImode; 2457 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2458 gen_rtx_REG (tmpmode, 2459 SSE_REGNO (sse_regno)), 2460 GEN_INT (i*8)); 2461 if (tmpmode == TImode) 2462 i++; 2463 sse_regno++; 2464 break; 2465 default: 2466 abort (); 2467 } 2468 } 2469 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 2470 for (i = 0; i < nexps; i++) 2471 XVECEXP (ret, 0, i) = exp [i]; 2472 return ret; 2473} 2474 2475/* Update the data in CUM to advance over an argument 2476 of mode MODE and data type TYPE. 2477 (TYPE is null for libcalls where that information may not be available.) */ 2478 2479void 2480function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */ 2481 enum machine_mode mode, /* current arg mode */ 2482 tree type, /* type of the argument or 0 if lib support */ 2483 int named) /* whether or not the argument was named */ 2484{ 2485 int bytes = 2486 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2487 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2488 2489 if (TARGET_DEBUG_ARG) 2490 fprintf (stderr, 2491 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n", 2492 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named); 2493 if (TARGET_64BIT) 2494 { 2495 int int_nregs, sse_nregs; 2496 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 2497 cum->words += words; 2498 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 2499 { 2500 cum->nregs -= int_nregs; 2501 cum->sse_nregs -= sse_nregs; 2502 cum->regno += int_nregs; 2503 cum->sse_regno += sse_nregs; 2504 } 2505 else 2506 cum->words += words; 2507 } 2508 else 2509 { 2510 if (TARGET_SSE && SSE_REG_MODE_P (mode) 2511 && (!type || !AGGREGATE_TYPE_P (type))) 2512 { 2513 cum->sse_words += words; 2514 cum->sse_nregs -= 1; 2515 cum->sse_regno += 1; 2516 if (cum->sse_nregs <= 0) 2517 { 2518 cum->sse_nregs = 0; 2519 cum->sse_regno = 0; 2520 } 2521 } 2522 else if (TARGET_MMX && MMX_REG_MODE_P (mode) 2523 && (!type || !AGGREGATE_TYPE_P (type))) 2524 { 2525 cum->mmx_words += words; 2526 cum->mmx_nregs -= 1; 2527 cum->mmx_regno += 1; 2528 if (cum->mmx_nregs <= 0) 2529 { 2530 cum->mmx_nregs = 0; 2531 cum->mmx_regno = 0; 2532 } 2533 } 2534 else 2535 { 2536 cum->words += words; 2537 cum->nregs -= words; 2538 cum->regno += words; 2539 2540 if (cum->nregs <= 0) 2541 { 2542 cum->nregs = 0; 2543 cum->regno = 0; 2544 } 2545 } 2546 } 2547 return; 2548} 2549 2550/* A subroutine of function_arg. We want to pass a parameter whose nominal 2551 type is MODE in REGNO. We try to minimize ABI variation, so MODE may not 2552 actually be valid for REGNO with the current ISA. In this case, ALT_MODE 2553 is used instead. It must be the same size as MODE, and must be known to 2554 be valid for REGNO. Finally, ORIG_MODE is the original mode of the 2555 parameter, as seen by the type system. This may be different from MODE 2556 when we're mucking with things minimizing ABI variations. 2557 2558 Returns a REG or a PARALLEL as appropriate. */ 2559 2560static rtx 2561gen_reg_or_parallel (enum machine_mode mode, enum machine_mode alt_mode, 2562 enum machine_mode orig_mode, unsigned int regno) 2563{ 2564 rtx tmp; 2565 2566 if (HARD_REGNO_MODE_OK (regno, mode)) 2567 tmp = gen_rtx_REG (mode, regno); 2568 else 2569 { 2570 tmp = gen_rtx_REG (alt_mode, regno); 2571 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 2572 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 2573 } 2574 2575 return tmp; 2576} 2577 2578/* Define where to put the arguments to a function. 2579 Value is zero to push the argument on the stack, 2580 or a hard register in which to store the argument. 2581 2582 MODE is the argument's machine mode. 2583 TYPE is the data type of the argument (as a tree). 2584 This is null for libcalls where that information may 2585 not be available. 2586 CUM is a variable of type CUMULATIVE_ARGS which gives info about 2587 the preceding args and about the function being called. 2588 NAMED is nonzero if this argument is a named parameter 2589 (otherwise it is an extra parameter matching an ellipsis). */ 2590 2591rtx 2592function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, 2593 tree type, int named) 2594{ 2595 enum machine_mode mode = orig_mode; 2596 rtx ret = NULL_RTX; 2597 int bytes = 2598 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2599 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2600 static bool warnedsse, warnedmmx; 2601 2602 /* Handle a hidden AL argument containing number of registers for varargs 2603 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 2604 any AL settings. */ 2605 if (mode == VOIDmode) 2606 { 2607 if (TARGET_64BIT) 2608 return GEN_INT (cum->maybe_vaarg 2609 ? (cum->sse_nregs < 0 2610 ? SSE_REGPARM_MAX 2611 : cum->sse_regno) 2612 : -1); 2613 else 2614 return constm1_rtx; 2615 } 2616 if (TARGET_64BIT) 2617 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs, 2618 &x86_64_int_parameter_registers [cum->regno], 2619 cum->sse_regno); 2620 else 2621 switch (mode) 2622 { 2623 /* For now, pass fp/complex values on the stack. */ 2624 default: 2625 break; 2626 2627 case BLKmode: 2628 if (bytes < 0) 2629 break; 2630 /* FALLTHRU */ 2631 case DImode: 2632 case SImode: 2633 case HImode: 2634 case QImode: 2635 if (words <= cum->nregs) 2636 { 2637 int regno = cum->regno; 2638 2639 /* Fastcall allocates the first two DWORD (SImode) or 2640 smaller arguments to ECX and EDX. */ 2641 if (cum->fastcall) 2642 { 2643 if (mode == BLKmode || mode == DImode) 2644 break; 2645 2646 /* ECX not EAX is the first allocated register. */ 2647 if (regno == 0) 2648 regno = 2; 2649 } 2650 ret = gen_rtx_REG (mode, regno); 2651 } 2652 break; 2653 case TImode: 2654 case V16QImode: 2655 case V8HImode: 2656 case V4SImode: 2657 case V2DImode: 2658 case V4SFmode: 2659 case V2DFmode: 2660 if (!type || !AGGREGATE_TYPE_P (type)) 2661 { 2662 if (!TARGET_SSE && !warnedmmx && cum->warn_sse) 2663 { 2664 warnedsse = true; 2665 warning ("SSE vector argument without SSE enabled " 2666 "changes the ABI"); 2667 } 2668 if (cum->sse_nregs) 2669 ret = gen_reg_or_parallel (mode, TImode, orig_mode, 2670 cum->sse_regno + FIRST_SSE_REG); 2671 } 2672 break; 2673 case V8QImode: 2674 case V4HImode: 2675 case V2SImode: 2676 case V2SFmode: 2677 if (!type || !AGGREGATE_TYPE_P (type)) 2678 { 2679 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 2680 { 2681 warnedmmx = true; 2682 warning ("MMX vector argument without MMX enabled " 2683 "changes the ABI"); 2684 } 2685 if (cum->mmx_nregs) 2686 ret = gen_reg_or_parallel (mode, DImode, orig_mode, 2687 cum->mmx_regno + FIRST_MMX_REG); 2688 } 2689 break; 2690 } 2691 2692 if (TARGET_DEBUG_ARG) 2693 { 2694 fprintf (stderr, 2695 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 2696 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2697 2698 if (ret) 2699 print_simple_rtl (stderr, ret); 2700 else 2701 fprintf (stderr, ", stack"); 2702 2703 fprintf (stderr, " )\n"); 2704 } 2705 2706 return ret; 2707} 2708 2709/* A C expression that indicates when an argument must be passed by 2710 reference. If nonzero for an argument, a copy of that argument is 2711 made in memory and a pointer to the argument is passed instead of 2712 the argument itself. The pointer is passed in whatever way is 2713 appropriate for passing a pointer to that type. */ 2714 2715int 2716function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 2717 enum machine_mode mode ATTRIBUTE_UNUSED, 2718 tree type, int named ATTRIBUTE_UNUSED) 2719{ 2720 if (!TARGET_64BIT) 2721 return 0; 2722 2723 if (type && int_size_in_bytes (type) == -1) 2724 { 2725 if (TARGET_DEBUG_ARG) 2726 fprintf (stderr, "function_arg_pass_by_reference\n"); 2727 return 1; 2728 } 2729 2730 return 0; 2731} 2732 2733/* Return true when TYPE should be 128bit aligned for 32bit argument passing 2734 ABI */ 2735static bool 2736contains_128bit_aligned_vector_p (tree type) 2737{ 2738 enum machine_mode mode = TYPE_MODE (type); 2739 if (SSE_REG_MODE_P (mode) 2740 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 2741 return true; 2742 if (TYPE_ALIGN (type) < 128) 2743 return false; 2744 2745 if (AGGREGATE_TYPE_P (type)) 2746 { 2747 /* Walk the aggregates recursively. */ 2748 if (TREE_CODE (type) == RECORD_TYPE 2749 || TREE_CODE (type) == UNION_TYPE 2750 || TREE_CODE (type) == QUAL_UNION_TYPE) 2751 { 2752 tree field; 2753 2754 if (TYPE_BINFO (type) != NULL 2755 && TYPE_BINFO_BASETYPES (type) != NULL) 2756 { 2757 tree bases = TYPE_BINFO_BASETYPES (type); 2758 int n_bases = TREE_VEC_LENGTH (bases); 2759 int i; 2760 2761 for (i = 0; i < n_bases; ++i) 2762 { 2763 tree binfo = TREE_VEC_ELT (bases, i); 2764 tree type = BINFO_TYPE (binfo); 2765 2766 if (contains_128bit_aligned_vector_p (type)) 2767 return true; 2768 } 2769 } 2770 /* And now merge the fields of structure. */ 2771 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2772 { 2773 if (TREE_CODE (field) == FIELD_DECL 2774 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 2775 return true; 2776 } 2777 } 2778 /* Just for use if some languages passes arrays by value. */ 2779 else if (TREE_CODE (type) == ARRAY_TYPE) 2780 { 2781 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 2782 return true; 2783 } 2784 else 2785 abort (); 2786 } 2787 return false; 2788} 2789 2790/* Gives the alignment boundary, in bits, of an argument with the 2791 specified mode and type. */ 2792 2793int 2794ix86_function_arg_boundary (enum machine_mode mode, tree type) 2795{ 2796 int align; 2797 if (type) 2798 align = TYPE_ALIGN (type); 2799 else 2800 align = GET_MODE_ALIGNMENT (mode); 2801 if (align < PARM_BOUNDARY) 2802 align = PARM_BOUNDARY; 2803 if (!TARGET_64BIT) 2804 { 2805 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 2806 make an exception for SSE modes since these require 128bit 2807 alignment. 2808 2809 The handling here differs from field_alignment. ICC aligns MMX 2810 arguments to 4 byte boundaries, while structure fields are aligned 2811 to 8 byte boundaries. */ 2812 if (!type) 2813 { 2814 if (!SSE_REG_MODE_P (mode)) 2815 align = PARM_BOUNDARY; 2816 } 2817 else 2818 { 2819 if (!contains_128bit_aligned_vector_p (type)) 2820 align = PARM_BOUNDARY; 2821 } 2822 } 2823 if (align > 128) 2824 align = 128; 2825 return align; 2826} 2827 2828/* Return true if N is a possible register number of function value. */ 2829bool 2830ix86_function_value_regno_p (int regno) 2831{ 2832 if (!TARGET_64BIT) 2833 { 2834 return ((regno) == 0 2835 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 2836 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 2837 } 2838 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 2839 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 2840 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 2841} 2842 2843/* Define how to find the value returned by a function. 2844 VALTYPE is the data type of the value (as a tree). 2845 If the precise function being called is known, FUNC is its FUNCTION_DECL; 2846 otherwise, FUNC is 0. */ 2847rtx 2848ix86_function_value (tree valtype) 2849{ 2850 if (TARGET_64BIT) 2851 { 2852 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1, 2853 REGPARM_MAX, SSE_REGPARM_MAX, 2854 x86_64_int_return_registers, 0); 2855 /* For zero sized structures, construct_container return NULL, but we need 2856 to keep rest of compiler happy by returning meaningful value. */ 2857 if (!ret) 2858 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 2859 return ret; 2860 } 2861 else 2862 return gen_rtx_REG (TYPE_MODE (valtype), 2863 ix86_value_regno (TYPE_MODE (valtype))); 2864} 2865 2866/* Return false iff type is returned in memory. */ 2867int 2868ix86_return_in_memory (tree type) 2869{ 2870 int needed_intregs, needed_sseregs, size; 2871 enum machine_mode mode = TYPE_MODE (type); 2872 2873 if (TARGET_64BIT) 2874 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 2875 2876 if (mode == BLKmode) 2877 return 1; 2878 2879 size = int_size_in_bytes (type); 2880 2881 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 2882 return 0; 2883 2884 if (VECTOR_MODE_P (mode) || mode == TImode) 2885 { 2886 /* User-created vectors small enough to fit in EAX. */ 2887 if (size < 8) 2888 return 0; 2889 2890 /* MMX/3dNow values are returned on the stack, since we've 2891 got to EMMS/FEMMS before returning. */ 2892 if (size == 8) 2893 return 1; 2894 2895 /* SSE values are returned in XMM0. */ 2896 /* ??? Except when it doesn't exist? We have a choice of 2897 either (1) being abi incompatible with a -march switch, 2898 or (2) generating an error here. Given no good solution, 2899 I think the safest thing is one warning. The user won't 2900 be able to use -Werror, but.... */ 2901 if (size == 16) 2902 { 2903 static bool warned; 2904 2905 if (TARGET_SSE) 2906 return 0; 2907 2908 if (!warned) 2909 { 2910 warned = true; 2911 warning ("SSE vector return without SSE enabled " 2912 "changes the ABI"); 2913 } 2914 return 1; 2915 } 2916 } 2917 2918 if (mode == XFmode) 2919 return 0; 2920 2921 if (size > 12) 2922 return 1; 2923 return 0; 2924} 2925 2926/* Define how to find the value returned by a library function 2927 assuming the value has mode MODE. */ 2928rtx 2929ix86_libcall_value (enum machine_mode mode) 2930{ 2931 if (TARGET_64BIT) 2932 { 2933 switch (mode) 2934 { 2935 case SFmode: 2936 case SCmode: 2937 case DFmode: 2938 case DCmode: 2939 return gen_rtx_REG (mode, FIRST_SSE_REG); 2940 case XFmode: 2941 case XCmode: 2942 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 2943 case TFmode: 2944 case TCmode: 2945 return NULL; 2946 default: 2947 return gen_rtx_REG (mode, 0); 2948 } 2949 } 2950 else 2951 return gen_rtx_REG (mode, ix86_value_regno (mode)); 2952} 2953 2954/* Given a mode, return the register to use for a return value. */ 2955 2956static int 2957ix86_value_regno (enum machine_mode mode) 2958{ 2959 /* Floating point return values in %st(0). */ 2960 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387) 2961 return FIRST_FLOAT_REG; 2962 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 2963 we prevent this case when sse is not available. */ 2964 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 2965 return FIRST_SSE_REG; 2966 /* Everything else in %eax. */ 2967 return 0; 2968} 2969 2970/* Create the va_list data type. */ 2971 2972static tree 2973ix86_build_builtin_va_list (void) 2974{ 2975 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 2976 2977 /* For i386 we use plain pointer to argument area. */ 2978 if (!TARGET_64BIT) 2979 return build_pointer_type (char_type_node); 2980 2981 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 2982 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 2983 2984 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 2985 unsigned_type_node); 2986 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 2987 unsigned_type_node); 2988 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 2989 ptr_type_node); 2990 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 2991 ptr_type_node); 2992 2993 DECL_FIELD_CONTEXT (f_gpr) = record; 2994 DECL_FIELD_CONTEXT (f_fpr) = record; 2995 DECL_FIELD_CONTEXT (f_ovf) = record; 2996 DECL_FIELD_CONTEXT (f_sav) = record; 2997 2998 TREE_CHAIN (record) = type_decl; 2999 TYPE_NAME (record) = type_decl; 3000 TYPE_FIELDS (record) = f_gpr; 3001 TREE_CHAIN (f_gpr) = f_fpr; 3002 TREE_CHAIN (f_fpr) = f_ovf; 3003 TREE_CHAIN (f_ovf) = f_sav; 3004 3005 layout_type (record); 3006 3007 /* The correct type is an array type of one element. */ 3008 return build_array_type (record, build_index_type (size_zero_node)); 3009} 3010 3011/* Perform any needed actions needed for a function that is receiving a 3012 variable number of arguments. 3013 3014 CUM is as above. 3015 3016 MODE and TYPE are the mode and type of the current parameter. 3017 3018 PRETEND_SIZE is a variable that should be set to the amount of stack 3019 that must be pushed by the prolog to pretend that our caller pushed 3020 it. 3021 3022 Normally, this macro will push all remaining incoming registers on the 3023 stack and set PRETEND_SIZE to the length of the registers pushed. */ 3024 3025void 3026ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 3027 tree type, int *pretend_size ATTRIBUTE_UNUSED, 3028 int no_rtl) 3029{ 3030 CUMULATIVE_ARGS next_cum; 3031 rtx save_area = NULL_RTX, mem; 3032 rtx label; 3033 rtx label_ref; 3034 rtx tmp_reg; 3035 rtx nsse_reg; 3036 int set; 3037 tree fntype; 3038 int stdarg_p; 3039 int i; 3040 3041 if (!TARGET_64BIT) 3042 return; 3043 3044 /* Indicate to allocate space on the stack for varargs save area. */ 3045 ix86_save_varrargs_registers = 1; 3046 3047 cfun->stack_alignment_needed = 128; 3048 3049 fntype = TREE_TYPE (current_function_decl); 3050 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 3051 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 3052 != void_type_node)); 3053 3054 /* For varargs, we do not want to skip the dummy va_dcl argument. 3055 For stdargs, we do want to skip the last named argument. */ 3056 next_cum = *cum; 3057 if (stdarg_p) 3058 function_arg_advance (&next_cum, mode, type, 1); 3059 3060 if (!no_rtl) 3061 save_area = frame_pointer_rtx; 3062 3063 set = get_varargs_alias_set (); 3064 3065 for (i = next_cum.regno; i < ix86_regparm; i++) 3066 { 3067 mem = gen_rtx_MEM (Pmode, 3068 plus_constant (save_area, i * UNITS_PER_WORD)); 3069 set_mem_alias_set (mem, set); 3070 emit_move_insn (mem, gen_rtx_REG (Pmode, 3071 x86_64_int_parameter_registers[i])); 3072 } 3073 3074 if (next_cum.sse_nregs) 3075 { 3076 /* Now emit code to save SSE registers. The AX parameter contains number 3077 of SSE parameter registers used to call this function. We use 3078 sse_prologue_save insn template that produces computed jump across 3079 SSE saves. We need some preparation work to get this working. */ 3080 3081 label = gen_label_rtx (); 3082 label_ref = gen_rtx_LABEL_REF (Pmode, label); 3083 3084 /* Compute address to jump to : 3085 label - 5*eax + nnamed_sse_arguments*5 */ 3086 tmp_reg = gen_reg_rtx (Pmode); 3087 nsse_reg = gen_reg_rtx (Pmode); 3088 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 3089 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 3090 gen_rtx_MULT (Pmode, nsse_reg, 3091 GEN_INT (4)))); 3092 if (next_cum.sse_regno) 3093 emit_move_insn 3094 (nsse_reg, 3095 gen_rtx_CONST (DImode, 3096 gen_rtx_PLUS (DImode, 3097 label_ref, 3098 GEN_INT (next_cum.sse_regno * 4)))); 3099 else 3100 emit_move_insn (nsse_reg, label_ref); 3101 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 3102 3103 /* Compute address of memory block we save into. We always use pointer 3104 pointing 127 bytes after first byte to store - this is needed to keep 3105 instruction size limited by 4 bytes. */ 3106 tmp_reg = gen_reg_rtx (Pmode); 3107 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 3108 plus_constant (save_area, 3109 8 * REGPARM_MAX + 127))); 3110 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 3111 set_mem_alias_set (mem, set); 3112 set_mem_align (mem, BITS_PER_WORD); 3113 3114 /* And finally do the dirty job! */ 3115 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 3116 GEN_INT (next_cum.sse_regno), label)); 3117 } 3118 3119} 3120 3121/* Implement va_start. */ 3122 3123void 3124ix86_va_start (tree valist, rtx nextarg) 3125{ 3126 HOST_WIDE_INT words, n_gpr, n_fpr; 3127 tree f_gpr, f_fpr, f_ovf, f_sav; 3128 tree gpr, fpr, ovf, sav, t; 3129 3130 /* Only 64bit target needs something special. */ 3131 if (!TARGET_64BIT) 3132 { 3133 std_expand_builtin_va_start (valist, nextarg); 3134 return; 3135 } 3136 3137 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 3138 f_fpr = TREE_CHAIN (f_gpr); 3139 f_ovf = TREE_CHAIN (f_fpr); 3140 f_sav = TREE_CHAIN (f_ovf); 3141 3142 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 3143 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 3144 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 3145 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 3146 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 3147 3148 /* Count number of gp and fp argument registers used. */ 3149 words = current_function_args_info.words; 3150 n_gpr = current_function_args_info.regno; 3151 n_fpr = current_function_args_info.sse_regno; 3152 3153 if (TARGET_DEBUG_ARG) 3154 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 3155 (int) words, (int) n_gpr, (int) n_fpr); 3156 3157 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 3158 build_int_2 (n_gpr * 8, 0)); 3159 TREE_SIDE_EFFECTS (t) = 1; 3160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3161 3162 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 3163 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0)); 3164 TREE_SIDE_EFFECTS (t) = 1; 3165 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3166 3167 /* Find the overflow area. */ 3168 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 3169 if (words != 0) 3170 t = build (PLUS_EXPR, TREE_TYPE (ovf), t, 3171 build_int_2 (words * UNITS_PER_WORD, 0)); 3172 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 3173 TREE_SIDE_EFFECTS (t) = 1; 3174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3175 3176 /* Find the register save area. 3177 Prologue of the function save it right above stack frame. */ 3178 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx); 3179 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 3180 TREE_SIDE_EFFECTS (t) = 1; 3181 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3182} 3183 3184/* Implement va_arg. */ 3185rtx 3186ix86_va_arg (tree valist, tree type) 3187{ 3188 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 3189 tree f_gpr, f_fpr, f_ovf, f_sav; 3190 tree gpr, fpr, ovf, sav, t; 3191 int size, rsize; 3192 rtx lab_false, lab_over = NULL_RTX; 3193 rtx addr_rtx, r; 3194 rtx container; 3195 int indirect_p = 0; 3196 3197 /* Only 64bit target needs something special. */ 3198 if (!TARGET_64BIT) 3199 { 3200 return std_expand_builtin_va_arg (valist, type); 3201 } 3202 3203 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 3204 f_fpr = TREE_CHAIN (f_gpr); 3205 f_ovf = TREE_CHAIN (f_fpr); 3206 f_sav = TREE_CHAIN (f_ovf); 3207 3208 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 3209 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 3210 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 3211 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 3212 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 3213 3214 size = int_size_in_bytes (type); 3215 if (size == -1) 3216 { 3217 /* Passed by reference. */ 3218 indirect_p = 1; 3219 type = build_pointer_type (type); 3220 size = int_size_in_bytes (type); 3221 } 3222 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3223 3224 container = construct_container (TYPE_MODE (type), type, 0, 3225 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 3226 /* 3227 * Pull the value out of the saved registers ... 3228 */ 3229 3230 addr_rtx = gen_reg_rtx (Pmode); 3231 3232 if (container) 3233 { 3234 rtx int_addr_rtx, sse_addr_rtx; 3235 int needed_intregs, needed_sseregs; 3236 int need_temp; 3237 3238 lab_over = gen_label_rtx (); 3239 lab_false = gen_label_rtx (); 3240 3241 examine_argument (TYPE_MODE (type), type, 0, 3242 &needed_intregs, &needed_sseregs); 3243 3244 3245 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64) 3246 || TYPE_ALIGN (type) > 128); 3247 3248 /* In case we are passing structure, verify that it is consecutive block 3249 on the register save area. If not we need to do moves. */ 3250 if (!need_temp && !REG_P (container)) 3251 { 3252 /* Verify that all registers are strictly consecutive */ 3253 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 3254 { 3255 int i; 3256 3257 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 3258 { 3259 rtx slot = XVECEXP (container, 0, i); 3260 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 3261 || INTVAL (XEXP (slot, 1)) != i * 16) 3262 need_temp = 1; 3263 } 3264 } 3265 else 3266 { 3267 int i; 3268 3269 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 3270 { 3271 rtx slot = XVECEXP (container, 0, i); 3272 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 3273 || INTVAL (XEXP (slot, 1)) != i * 8) 3274 need_temp = 1; 3275 } 3276 } 3277 } 3278 if (!need_temp) 3279 { 3280 int_addr_rtx = addr_rtx; 3281 sse_addr_rtx = addr_rtx; 3282 } 3283 else 3284 { 3285 int_addr_rtx = gen_reg_rtx (Pmode); 3286 sse_addr_rtx = gen_reg_rtx (Pmode); 3287 } 3288 /* First ensure that we fit completely in registers. */ 3289 if (needed_intregs) 3290 { 3291 emit_cmp_and_jump_insns (expand_expr 3292 (gpr, NULL_RTX, SImode, EXPAND_NORMAL), 3293 GEN_INT ((REGPARM_MAX - needed_intregs + 3294 1) * 8), GE, const1_rtx, SImode, 3295 1, lab_false); 3296 } 3297 if (needed_sseregs) 3298 { 3299 emit_cmp_and_jump_insns (expand_expr 3300 (fpr, NULL_RTX, SImode, EXPAND_NORMAL), 3301 GEN_INT ((SSE_REGPARM_MAX - 3302 needed_sseregs + 1) * 16 + 3303 REGPARM_MAX * 8), GE, const1_rtx, 3304 SImode, 1, lab_false); 3305 } 3306 3307 /* Compute index to start of area used for integer regs. */ 3308 if (needed_intregs) 3309 { 3310 t = build (PLUS_EXPR, ptr_type_node, sav, gpr); 3311 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL); 3312 if (r != int_addr_rtx) 3313 emit_move_insn (int_addr_rtx, r); 3314 } 3315 if (needed_sseregs) 3316 { 3317 t = build (PLUS_EXPR, ptr_type_node, sav, fpr); 3318 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL); 3319 if (r != sse_addr_rtx) 3320 emit_move_insn (sse_addr_rtx, r); 3321 } 3322 if (need_temp) 3323 { 3324 int i; 3325 rtx mem; 3326 rtx x; 3327 3328 /* Never use the memory itself, as it has the alias set. */ 3329 x = XEXP (assign_temp (type, 0, 1, 0), 0); 3330 mem = gen_rtx_MEM (BLKmode, x); 3331 force_operand (x, addr_rtx); 3332 set_mem_alias_set (mem, get_varargs_alias_set ()); 3333 set_mem_align (mem, BITS_PER_UNIT); 3334 3335 for (i = 0; i < XVECLEN (container, 0); i++) 3336 { 3337 rtx slot = XVECEXP (container, 0, i); 3338 rtx reg = XEXP (slot, 0); 3339 enum machine_mode mode = GET_MODE (reg); 3340 rtx src_addr; 3341 rtx src_mem; 3342 int src_offset; 3343 rtx dest_mem; 3344 3345 if (SSE_REGNO_P (REGNO (reg))) 3346 { 3347 src_addr = sse_addr_rtx; 3348 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 3349 } 3350 else 3351 { 3352 src_addr = int_addr_rtx; 3353 src_offset = REGNO (reg) * 8; 3354 } 3355 src_mem = gen_rtx_MEM (mode, src_addr); 3356 set_mem_alias_set (src_mem, get_varargs_alias_set ()); 3357 src_mem = adjust_address (src_mem, mode, src_offset); 3358 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1))); 3359 emit_move_insn (dest_mem, src_mem); 3360 } 3361 } 3362 3363 if (needed_intregs) 3364 { 3365 t = 3366 build (PLUS_EXPR, TREE_TYPE (gpr), gpr, 3367 build_int_2 (needed_intregs * 8, 0)); 3368 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 3369 TREE_SIDE_EFFECTS (t) = 1; 3370 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3371 } 3372 if (needed_sseregs) 3373 { 3374 t = 3375 build (PLUS_EXPR, TREE_TYPE (fpr), fpr, 3376 build_int_2 (needed_sseregs * 16, 0)); 3377 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 3378 TREE_SIDE_EFFECTS (t) = 1; 3379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3380 } 3381 3382 emit_jump_insn (gen_jump (lab_over)); 3383 emit_barrier (); 3384 emit_label (lab_false); 3385 } 3386 3387 /* ... otherwise out of the overflow area. */ 3388 3389 /* Care for on-stack alignment if needed. */ 3390 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64) 3391 t = ovf; 3392 else 3393 { 3394 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 3395 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0)); 3396 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1)); 3397 } 3398 t = save_expr (t); 3399 3400 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL); 3401 if (r != addr_rtx) 3402 emit_move_insn (addr_rtx, r); 3403 3404 t = 3405 build (PLUS_EXPR, TREE_TYPE (t), t, 3406 build_int_2 (rsize * UNITS_PER_WORD, 0)); 3407 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 3408 TREE_SIDE_EFFECTS (t) = 1; 3409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3410 3411 if (container) 3412 emit_label (lab_over); 3413 3414 if (indirect_p) 3415 { 3416 r = gen_rtx_MEM (Pmode, addr_rtx); 3417 set_mem_alias_set (r, get_varargs_alias_set ()); 3418 emit_move_insn (addr_rtx, r); 3419 } 3420 3421 return addr_rtx; 3422} 3423 3424/* Return nonzero if OP is either a i387 or SSE fp register. */ 3425int 3426any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3427{ 3428 return ANY_FP_REG_P (op); 3429} 3430 3431/* Return nonzero if OP is an i387 fp register. */ 3432int 3433fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3434{ 3435 return FP_REG_P (op); 3436} 3437 3438/* Return nonzero if OP is a non-fp register_operand. */ 3439int 3440register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode) 3441{ 3442 return register_operand (op, mode) && !ANY_FP_REG_P (op); 3443} 3444 3445/* Return nonzero if OP is a register operand other than an 3446 i387 fp register. */ 3447int 3448register_and_not_fp_reg_operand (rtx op, enum machine_mode mode) 3449{ 3450 return register_operand (op, mode) && !FP_REG_P (op); 3451} 3452 3453/* Return nonzero if OP is general operand representable on x86_64. */ 3454 3455int 3456x86_64_general_operand (rtx op, enum machine_mode mode) 3457{ 3458 if (!TARGET_64BIT) 3459 return general_operand (op, mode); 3460 if (nonimmediate_operand (op, mode)) 3461 return 1; 3462 return x86_64_sign_extended_value (op); 3463} 3464 3465/* Return nonzero if OP is general operand representable on x86_64 3466 as either sign extended or zero extended constant. */ 3467 3468int 3469x86_64_szext_general_operand (rtx op, enum machine_mode mode) 3470{ 3471 if (!TARGET_64BIT) 3472 return general_operand (op, mode); 3473 if (nonimmediate_operand (op, mode)) 3474 return 1; 3475 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 3476} 3477 3478/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 3479 3480int 3481x86_64_nonmemory_operand (rtx op, enum machine_mode mode) 3482{ 3483 if (!TARGET_64BIT) 3484 return nonmemory_operand (op, mode); 3485 if (register_operand (op, mode)) 3486 return 1; 3487 return x86_64_sign_extended_value (op); 3488} 3489 3490/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */ 3491 3492int 3493x86_64_movabs_operand (rtx op, enum machine_mode mode) 3494{ 3495 if (!TARGET_64BIT || !flag_pic) 3496 return nonmemory_operand (op, mode); 3497 if (register_operand (op, mode) || x86_64_sign_extended_value (op)) 3498 return 1; 3499 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op)) 3500 return 1; 3501 return 0; 3502} 3503 3504/* Return nonzero if OPNUM's MEM should be matched 3505 in movabs* patterns. */ 3506 3507int 3508ix86_check_movabs (rtx insn, int opnum) 3509{ 3510 rtx set, mem; 3511 3512 set = PATTERN (insn); 3513 if (GET_CODE (set) == PARALLEL) 3514 set = XVECEXP (set, 0, 0); 3515 if (GET_CODE (set) != SET) 3516 abort (); 3517 mem = XEXP (set, opnum); 3518 while (GET_CODE (mem) == SUBREG) 3519 mem = SUBREG_REG (mem); 3520 if (GET_CODE (mem) != MEM) 3521 abort (); 3522 return (volatile_ok || !MEM_VOLATILE_P (mem)); 3523} 3524 3525/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 3526 3527int 3528x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode) 3529{ 3530 if (!TARGET_64BIT) 3531 return nonmemory_operand (op, mode); 3532 if (register_operand (op, mode)) 3533 return 1; 3534 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 3535} 3536 3537/* Return nonzero if OP is immediate operand representable on x86_64. */ 3538 3539int 3540x86_64_immediate_operand (rtx op, enum machine_mode mode) 3541{ 3542 if (!TARGET_64BIT) 3543 return immediate_operand (op, mode); 3544 return x86_64_sign_extended_value (op); 3545} 3546 3547/* Return nonzero if OP is immediate operand representable on x86_64. */ 3548 3549int 3550x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3551{ 3552 return x86_64_zero_extended_value (op); 3553} 3554 3555/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand 3556 for shift & compare patterns, as shifting by 0 does not change flags), 3557 else return zero. */ 3558 3559int 3560const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3561{ 3562 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31); 3563} 3564 3565/* Returns 1 if OP is either a symbol reference or a sum of a symbol 3566 reference and a constant. */ 3567 3568int 3569symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3570{ 3571 switch (GET_CODE (op)) 3572 { 3573 case SYMBOL_REF: 3574 case LABEL_REF: 3575 return 1; 3576 3577 case CONST: 3578 op = XEXP (op, 0); 3579 if (GET_CODE (op) == SYMBOL_REF 3580 || GET_CODE (op) == LABEL_REF 3581 || (GET_CODE (op) == UNSPEC 3582 && (XINT (op, 1) == UNSPEC_GOT 3583 || XINT (op, 1) == UNSPEC_GOTOFF 3584 || XINT (op, 1) == UNSPEC_GOTPCREL))) 3585 return 1; 3586 if (GET_CODE (op) != PLUS 3587 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3588 return 0; 3589 3590 op = XEXP (op, 0); 3591 if (GET_CODE (op) == SYMBOL_REF 3592 || GET_CODE (op) == LABEL_REF) 3593 return 1; 3594 /* Only @GOTOFF gets offsets. */ 3595 if (GET_CODE (op) != UNSPEC 3596 || XINT (op, 1) != UNSPEC_GOTOFF) 3597 return 0; 3598 3599 op = XVECEXP (op, 0, 0); 3600 if (GET_CODE (op) == SYMBOL_REF 3601 || GET_CODE (op) == LABEL_REF) 3602 return 1; 3603 return 0; 3604 3605 default: 3606 return 0; 3607 } 3608} 3609 3610/* Return true if the operand contains a @GOT or @GOTOFF reference. */ 3611 3612int 3613pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3614{ 3615 if (GET_CODE (op) != CONST) 3616 return 0; 3617 op = XEXP (op, 0); 3618 if (TARGET_64BIT) 3619 { 3620 if (GET_CODE (op) == UNSPEC 3621 && XINT (op, 1) == UNSPEC_GOTPCREL) 3622 return 1; 3623 if (GET_CODE (op) == PLUS 3624 && GET_CODE (XEXP (op, 0)) == UNSPEC 3625 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL) 3626 return 1; 3627 } 3628 else 3629 { 3630 if (GET_CODE (op) == UNSPEC) 3631 return 1; 3632 if (GET_CODE (op) != PLUS 3633 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3634 return 0; 3635 op = XEXP (op, 0); 3636 if (GET_CODE (op) == UNSPEC) 3637 return 1; 3638 } 3639 return 0; 3640} 3641 3642/* Return true if OP is a symbolic operand that resolves locally. */ 3643 3644static int 3645local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3646{ 3647 if (GET_CODE (op) == CONST 3648 && GET_CODE (XEXP (op, 0)) == PLUS 3649 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3650 op = XEXP (XEXP (op, 0), 0); 3651 3652 if (GET_CODE (op) == LABEL_REF) 3653 return 1; 3654 3655 if (GET_CODE (op) != SYMBOL_REF) 3656 return 0; 3657 3658 if (SYMBOL_REF_LOCAL_P (op)) 3659 return 1; 3660 3661 /* There is, however, a not insubstantial body of code in the rest of 3662 the compiler that assumes it can just stick the results of 3663 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ 3664 /* ??? This is a hack. Should update the body of the compiler to 3665 always create a DECL an invoke targetm.encode_section_info. */ 3666 if (strncmp (XSTR (op, 0), internal_label_prefix, 3667 internal_label_prefix_len) == 0) 3668 return 1; 3669 3670 return 0; 3671} 3672 3673/* Test for various thread-local symbols. */ 3674 3675int 3676tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3677{ 3678 if (GET_CODE (op) != SYMBOL_REF) 3679 return 0; 3680 return SYMBOL_REF_TLS_MODEL (op); 3681} 3682 3683static inline int 3684tls_symbolic_operand_1 (rtx op, enum tls_model kind) 3685{ 3686 if (GET_CODE (op) != SYMBOL_REF) 3687 return 0; 3688 return SYMBOL_REF_TLS_MODEL (op) == kind; 3689} 3690 3691int 3692global_dynamic_symbolic_operand (rtx op, 3693 enum machine_mode mode ATTRIBUTE_UNUSED) 3694{ 3695 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC); 3696} 3697 3698int 3699local_dynamic_symbolic_operand (rtx op, 3700 enum machine_mode mode ATTRIBUTE_UNUSED) 3701{ 3702 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC); 3703} 3704 3705int 3706initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3707{ 3708 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC); 3709} 3710 3711int 3712local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3713{ 3714 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC); 3715} 3716 3717/* Test for a valid operand for a call instruction. Don't allow the 3718 arg pointer register or virtual regs since they may decay into 3719 reg + const, which the patterns can't handle. */ 3720 3721int 3722call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3723{ 3724 /* Disallow indirect through a virtual register. This leads to 3725 compiler aborts when trying to eliminate them. */ 3726 if (GET_CODE (op) == REG 3727 && (op == arg_pointer_rtx 3728 || op == frame_pointer_rtx 3729 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 3730 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 3731 return 0; 3732 3733 /* Disallow `call 1234'. Due to varying assembler lameness this 3734 gets either rejected or translated to `call .+1234'. */ 3735 if (GET_CODE (op) == CONST_INT) 3736 return 0; 3737 3738 /* Explicitly allow SYMBOL_REF even if pic. */ 3739 if (GET_CODE (op) == SYMBOL_REF) 3740 return 1; 3741 3742 /* Otherwise we can allow any general_operand in the address. */ 3743 return general_operand (op, Pmode); 3744} 3745 3746/* Test for a valid operand for a call instruction. Don't allow the 3747 arg pointer register or virtual regs since they may decay into 3748 reg + const, which the patterns can't handle. */ 3749 3750int 3751sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3752{ 3753 /* Disallow indirect through a virtual register. This leads to 3754 compiler aborts when trying to eliminate them. */ 3755 if (GET_CODE (op) == REG 3756 && (op == arg_pointer_rtx 3757 || op == frame_pointer_rtx 3758 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 3759 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 3760 return 0; 3761 3762 /* Explicitly allow SYMBOL_REF even if pic. */ 3763 if (GET_CODE (op) == SYMBOL_REF) 3764 return 1; 3765 3766 /* Otherwise we can only allow register operands. */ 3767 return register_operand (op, Pmode); 3768} 3769 3770int 3771constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3772{ 3773 if (GET_CODE (op) == CONST 3774 && GET_CODE (XEXP (op, 0)) == PLUS 3775 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3776 op = XEXP (XEXP (op, 0), 0); 3777 return GET_CODE (op) == SYMBOL_REF; 3778} 3779 3780/* Match exactly zero and one. */ 3781 3782int 3783const0_operand (rtx op, enum machine_mode mode) 3784{ 3785 return op == CONST0_RTX (mode); 3786} 3787 3788int 3789const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3790{ 3791 return op == const1_rtx; 3792} 3793 3794/* Match 2, 4, or 8. Used for leal multiplicands. */ 3795 3796int 3797const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3798{ 3799 return (GET_CODE (op) == CONST_INT 3800 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8)); 3801} 3802 3803int 3804const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3805{ 3806 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4); 3807} 3808 3809int 3810const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3811{ 3812 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8); 3813} 3814 3815int 3816const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3817{ 3818 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16); 3819} 3820 3821int 3822const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3823{ 3824 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256); 3825} 3826 3827 3828/* True if this is a constant appropriate for an increment or decrement. */ 3829 3830int 3831incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3832{ 3833 /* On Pentium4, the inc and dec operations causes extra dependency on flag 3834 registers, since carry flag is not set. */ 3835 if (TARGET_PENTIUM4 && !optimize_size) 3836 return 0; 3837 return op == const1_rtx || op == constm1_rtx; 3838} 3839 3840/* Return nonzero if OP is acceptable as operand of DImode shift 3841 expander. */ 3842 3843int 3844shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3845{ 3846 if (TARGET_64BIT) 3847 return nonimmediate_operand (op, mode); 3848 else 3849 return register_operand (op, mode); 3850} 3851 3852/* Return false if this is the stack pointer, or any other fake 3853 register eliminable to the stack pointer. Otherwise, this is 3854 a register operand. 3855 3856 This is used to prevent esp from being used as an index reg. 3857 Which would only happen in pathological cases. */ 3858 3859int 3860reg_no_sp_operand (rtx op, enum machine_mode mode) 3861{ 3862 rtx t = op; 3863 if (GET_CODE (t) == SUBREG) 3864 t = SUBREG_REG (t); 3865 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx) 3866 return 0; 3867 3868 return register_operand (op, mode); 3869} 3870 3871int 3872mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3873{ 3874 return MMX_REG_P (op); 3875} 3876 3877/* Return false if this is any eliminable register. Otherwise 3878 general_operand. */ 3879 3880int 3881general_no_elim_operand (rtx op, enum machine_mode mode) 3882{ 3883 rtx t = op; 3884 if (GET_CODE (t) == SUBREG) 3885 t = SUBREG_REG (t); 3886 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3887 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3888 || t == virtual_stack_dynamic_rtx) 3889 return 0; 3890 if (REG_P (t) 3891 && REGNO (t) >= FIRST_VIRTUAL_REGISTER 3892 && REGNO (t) <= LAST_VIRTUAL_REGISTER) 3893 return 0; 3894 3895 return general_operand (op, mode); 3896} 3897 3898/* Return false if this is any eliminable register. Otherwise 3899 register_operand or const_int. */ 3900 3901int 3902nonmemory_no_elim_operand (rtx op, enum machine_mode mode) 3903{ 3904 rtx t = op; 3905 if (GET_CODE (t) == SUBREG) 3906 t = SUBREG_REG (t); 3907 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3908 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3909 || t == virtual_stack_dynamic_rtx) 3910 return 0; 3911 3912 return GET_CODE (op) == CONST_INT || register_operand (op, mode); 3913} 3914 3915/* Return false if this is any eliminable register or stack register, 3916 otherwise work like register_operand. */ 3917 3918int 3919index_register_operand (rtx op, enum machine_mode mode) 3920{ 3921 rtx t = op; 3922 if (GET_CODE (t) == SUBREG) 3923 t = SUBREG_REG (t); 3924 if (!REG_P (t)) 3925 return 0; 3926 if (t == arg_pointer_rtx 3927 || t == frame_pointer_rtx 3928 || t == virtual_incoming_args_rtx 3929 || t == virtual_stack_vars_rtx 3930 || t == virtual_stack_dynamic_rtx 3931 || REGNO (t) == STACK_POINTER_REGNUM) 3932 return 0; 3933 3934 return general_operand (op, mode); 3935} 3936 3937/* Return true if op is a Q_REGS class register. */ 3938 3939int 3940q_regs_operand (rtx op, enum machine_mode mode) 3941{ 3942 if (mode != VOIDmode && GET_MODE (op) != mode) 3943 return 0; 3944 if (GET_CODE (op) == SUBREG) 3945 op = SUBREG_REG (op); 3946 return ANY_QI_REG_P (op); 3947} 3948 3949/* Return true if op is an flags register. */ 3950 3951int 3952flags_reg_operand (rtx op, enum machine_mode mode) 3953{ 3954 if (mode != VOIDmode && GET_MODE (op) != mode) 3955 return 0; 3956 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode; 3957} 3958 3959/* Return true if op is a NON_Q_REGS class register. */ 3960 3961int 3962non_q_regs_operand (rtx op, enum machine_mode mode) 3963{ 3964 if (mode != VOIDmode && GET_MODE (op) != mode) 3965 return 0; 3966 if (GET_CODE (op) == SUBREG) 3967 op = SUBREG_REG (op); 3968 return NON_QI_REG_P (op); 3969} 3970 3971int 3972zero_extended_scalar_load_operand (rtx op, 3973 enum machine_mode mode ATTRIBUTE_UNUSED) 3974{ 3975 unsigned n_elts; 3976 if (GET_CODE (op) != MEM) 3977 return 0; 3978 op = maybe_get_pool_constant (op); 3979 if (!op) 3980 return 0; 3981 if (GET_CODE (op) != CONST_VECTOR) 3982 return 0; 3983 n_elts = 3984 (GET_MODE_SIZE (GET_MODE (op)) / 3985 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op)))); 3986 for (n_elts--; n_elts > 0; n_elts--) 3987 { 3988 rtx elt = CONST_VECTOR_ELT (op, n_elts); 3989 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op)))) 3990 return 0; 3991 } 3992 return 1; 3993} 3994 3995/* Return 1 when OP is operand acceptable for standard SSE move. */ 3996int 3997vector_move_operand (rtx op, enum machine_mode mode) 3998{ 3999 if (nonimmediate_operand (op, mode)) 4000 return 1; 4001 if (GET_MODE (op) != mode && mode != VOIDmode) 4002 return 0; 4003 return (op == CONST0_RTX (GET_MODE (op))); 4004} 4005 4006/* Return true if op if a valid address, and does not contain 4007 a segment override. */ 4008 4009int 4010no_seg_address_operand (rtx op, enum machine_mode mode) 4011{ 4012 struct ix86_address parts; 4013 4014 if (! address_operand (op, mode)) 4015 return 0; 4016 4017 if (! ix86_decompose_address (op, &parts)) 4018 abort (); 4019 4020 return parts.seg == SEG_DEFAULT; 4021} 4022 4023/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS 4024 insns. */ 4025int 4026sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4027{ 4028 enum rtx_code code = GET_CODE (op); 4029 switch (code) 4030 { 4031 /* Operations supported directly. */ 4032 case EQ: 4033 case LT: 4034 case LE: 4035 case UNORDERED: 4036 case NE: 4037 case UNGE: 4038 case UNGT: 4039 case ORDERED: 4040 return 1; 4041 /* These are equivalent to ones above in non-IEEE comparisons. */ 4042 case UNEQ: 4043 case UNLT: 4044 case UNLE: 4045 case LTGT: 4046 case GE: 4047 case GT: 4048 return !TARGET_IEEE_FP; 4049 default: 4050 return 0; 4051 } 4052} 4053/* Return 1 if OP is a valid comparison operator in valid mode. */ 4054int 4055ix86_comparison_operator (rtx op, enum machine_mode mode) 4056{ 4057 enum machine_mode inmode; 4058 enum rtx_code code = GET_CODE (op); 4059 if (mode != VOIDmode && GET_MODE (op) != mode) 4060 return 0; 4061 if (GET_RTX_CLASS (code) != '<') 4062 return 0; 4063 inmode = GET_MODE (XEXP (op, 0)); 4064 4065 if (inmode == CCFPmode || inmode == CCFPUmode) 4066 { 4067 enum rtx_code second_code, bypass_code; 4068 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 4069 return (bypass_code == NIL && second_code == NIL); 4070 } 4071 switch (code) 4072 { 4073 case EQ: case NE: 4074 return 1; 4075 case LT: case GE: 4076 if (inmode == CCmode || inmode == CCGCmode 4077 || inmode == CCGOCmode || inmode == CCNOmode) 4078 return 1; 4079 return 0; 4080 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU: 4081 if (inmode == CCmode) 4082 return 1; 4083 return 0; 4084 case GT: case LE: 4085 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode) 4086 return 1; 4087 return 0; 4088 default: 4089 return 0; 4090 } 4091} 4092 4093/* Return 1 if OP is a valid comparison operator testing carry flag 4094 to be set. */ 4095int 4096ix86_carry_flag_operator (rtx op, enum machine_mode mode) 4097{ 4098 enum machine_mode inmode; 4099 enum rtx_code code = GET_CODE (op); 4100 4101 if (mode != VOIDmode && GET_MODE (op) != mode) 4102 return 0; 4103 if (GET_RTX_CLASS (code) != '<') 4104 return 0; 4105 inmode = GET_MODE (XEXP (op, 0)); 4106 if (GET_CODE (XEXP (op, 0)) != REG 4107 || REGNO (XEXP (op, 0)) != 17 4108 || XEXP (op, 1) != const0_rtx) 4109 return 0; 4110 4111 if (inmode == CCFPmode || inmode == CCFPUmode) 4112 { 4113 enum rtx_code second_code, bypass_code; 4114 4115 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 4116 if (bypass_code != NIL || second_code != NIL) 4117 return 0; 4118 code = ix86_fp_compare_code_to_integer (code); 4119 } 4120 else if (inmode != CCmode) 4121 return 0; 4122 return code == LTU; 4123} 4124 4125/* Return 1 if OP is a comparison operator that can be issued by fcmov. */ 4126 4127int 4128fcmov_comparison_operator (rtx op, enum machine_mode mode) 4129{ 4130 enum machine_mode inmode; 4131 enum rtx_code code = GET_CODE (op); 4132 4133 if (mode != VOIDmode && GET_MODE (op) != mode) 4134 return 0; 4135 if (GET_RTX_CLASS (code) != '<') 4136 return 0; 4137 inmode = GET_MODE (XEXP (op, 0)); 4138 if (inmode == CCFPmode || inmode == CCFPUmode) 4139 { 4140 enum rtx_code second_code, bypass_code; 4141 4142 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 4143 if (bypass_code != NIL || second_code != NIL) 4144 return 0; 4145 code = ix86_fp_compare_code_to_integer (code); 4146 } 4147 /* i387 supports just limited amount of conditional codes. */ 4148 switch (code) 4149 { 4150 case LTU: case GTU: case LEU: case GEU: 4151 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode) 4152 return 1; 4153 return 0; 4154 case ORDERED: case UNORDERED: 4155 case EQ: case NE: 4156 return 1; 4157 default: 4158 return 0; 4159 } 4160} 4161 4162/* Return 1 if OP is a binary operator that can be promoted to wider mode. */ 4163 4164int 4165promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4166{ 4167 switch (GET_CODE (op)) 4168 { 4169 case MULT: 4170 /* Modern CPUs have same latency for HImode and SImode multiply, 4171 but 386 and 486 do HImode multiply faster. */ 4172 return ix86_tune > PROCESSOR_I486; 4173 case PLUS: 4174 case AND: 4175 case IOR: 4176 case XOR: 4177 case ASHIFT: 4178 return 1; 4179 default: 4180 return 0; 4181 } 4182} 4183 4184/* Nearly general operand, but accept any const_double, since we wish 4185 to be able to drop them into memory rather than have them get pulled 4186 into registers. */ 4187 4188int 4189cmp_fp_expander_operand (rtx op, enum machine_mode mode) 4190{ 4191 if (mode != VOIDmode && mode != GET_MODE (op)) 4192 return 0; 4193 if (GET_CODE (op) == CONST_DOUBLE) 4194 return 1; 4195 return general_operand (op, mode); 4196} 4197 4198/* Match an SI or HImode register for a zero_extract. */ 4199 4200int 4201ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4202{ 4203 int regno; 4204 if ((!TARGET_64BIT || GET_MODE (op) != DImode) 4205 && GET_MODE (op) != SImode && GET_MODE (op) != HImode) 4206 return 0; 4207 4208 if (!register_operand (op, VOIDmode)) 4209 return 0; 4210 4211 /* Be careful to accept only registers having upper parts. */ 4212 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op)); 4213 return (regno > LAST_VIRTUAL_REGISTER || regno < 4); 4214} 4215 4216/* Return 1 if this is a valid binary floating-point operation. 4217 OP is the expression matched, and MODE is its mode. */ 4218 4219int 4220binary_fp_operator (rtx op, enum machine_mode mode) 4221{ 4222 if (mode != VOIDmode && mode != GET_MODE (op)) 4223 return 0; 4224 4225 switch (GET_CODE (op)) 4226 { 4227 case PLUS: 4228 case MINUS: 4229 case MULT: 4230 case DIV: 4231 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT; 4232 4233 default: 4234 return 0; 4235 } 4236} 4237 4238int 4239mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4240{ 4241 return GET_CODE (op) == MULT; 4242} 4243 4244int 4245div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4246{ 4247 return GET_CODE (op) == DIV; 4248} 4249 4250int 4251arith_or_logical_operator (rtx op, enum machine_mode mode) 4252{ 4253 return ((mode == VOIDmode || GET_MODE (op) == mode) 4254 && (GET_RTX_CLASS (GET_CODE (op)) == 'c' 4255 || GET_RTX_CLASS (GET_CODE (op)) == '2')); 4256} 4257 4258/* Returns 1 if OP is memory operand with a displacement. */ 4259 4260int 4261memory_displacement_operand (rtx op, enum machine_mode mode) 4262{ 4263 struct ix86_address parts; 4264 4265 if (! memory_operand (op, mode)) 4266 return 0; 4267 4268 if (! ix86_decompose_address (XEXP (op, 0), &parts)) 4269 abort (); 4270 4271 return parts.disp != NULL_RTX; 4272} 4273 4274/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0, 4275 re-recognize the operand to avoid a copy_to_mode_reg that will fail. 4276 4277 ??? It seems likely that this will only work because cmpsi is an 4278 expander, and no actual insns use this. */ 4279 4280int 4281cmpsi_operand (rtx op, enum machine_mode mode) 4282{ 4283 if (nonimmediate_operand (op, mode)) 4284 return 1; 4285 4286 if (GET_CODE (op) == AND 4287 && GET_MODE (op) == SImode 4288 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT 4289 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT 4290 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT 4291 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8 4292 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8 4293 && GET_CODE (XEXP (op, 1)) == CONST_INT) 4294 return 1; 4295 4296 return 0; 4297} 4298 4299/* Returns 1 if OP is memory operand that can not be represented by the 4300 modRM array. */ 4301 4302int 4303long_memory_operand (rtx op, enum machine_mode mode) 4304{ 4305 if (! memory_operand (op, mode)) 4306 return 0; 4307 4308 return memory_address_length (op) != 0; 4309} 4310 4311/* Return nonzero if the rtx is known aligned. */ 4312 4313int 4314aligned_operand (rtx op, enum machine_mode mode) 4315{ 4316 struct ix86_address parts; 4317 4318 if (!general_operand (op, mode)) 4319 return 0; 4320 4321 /* Registers and immediate operands are always "aligned". */ 4322 if (GET_CODE (op) != MEM) 4323 return 1; 4324 4325 /* Don't even try to do any aligned optimizations with volatiles. */ 4326 if (MEM_VOLATILE_P (op)) 4327 return 0; 4328 4329 op = XEXP (op, 0); 4330 4331 /* Pushes and pops are only valid on the stack pointer. */ 4332 if (GET_CODE (op) == PRE_DEC 4333 || GET_CODE (op) == POST_INC) 4334 return 1; 4335 4336 /* Decode the address. */ 4337 if (! ix86_decompose_address (op, &parts)) 4338 abort (); 4339 4340 /* Look for some component that isn't known to be aligned. */ 4341 if (parts.index) 4342 { 4343 if (parts.scale < 4 4344 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32) 4345 return 0; 4346 } 4347 if (parts.base) 4348 { 4349 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32) 4350 return 0; 4351 } 4352 if (parts.disp) 4353 { 4354 if (GET_CODE (parts.disp) != CONST_INT 4355 || (INTVAL (parts.disp) & 3) != 0) 4356 return 0; 4357 } 4358 4359 /* Didn't find one -- this must be an aligned address. */ 4360 return 1; 4361} 4362 4363int 4364compare_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4365{ 4366 return GET_CODE (op) == COMPARE; 4367} 4368 4369/* Initialize the table of extra 80387 mathematical constants. */ 4370 4371static void 4372init_ext_80387_constants (void) 4373{ 4374 static const char * cst[5] = 4375 { 4376 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 4377 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 4378 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 4379 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 4380 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 4381 }; 4382 int i; 4383 4384 for (i = 0; i < 5; i++) 4385 { 4386 real_from_string (&ext_80387_constants_table[i], cst[i]); 4387 /* Ensure each constant is rounded to XFmode precision. */ 4388 real_convert (&ext_80387_constants_table[i], 4389 XFmode, &ext_80387_constants_table[i]); 4390 } 4391 4392 ext_80387_constants_init = 1; 4393} 4394 4395/* Return true if the constant is something that can be loaded with 4396 a special instruction. */ 4397 4398int 4399standard_80387_constant_p (rtx x) 4400{ 4401 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 4402 return -1; 4403 4404 if (x == CONST0_RTX (GET_MODE (x))) 4405 return 1; 4406 if (x == CONST1_RTX (GET_MODE (x))) 4407 return 2; 4408 4409 /* For XFmode constants, try to find a special 80387 instruction on 4410 those CPUs that benefit from them. */ 4411 if (GET_MODE (x) == XFmode 4412 && x86_ext_80387_constants & TUNEMASK) 4413 { 4414 REAL_VALUE_TYPE r; 4415 int i; 4416 4417 if (! ext_80387_constants_init) 4418 init_ext_80387_constants (); 4419 4420 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 4421 for (i = 0; i < 5; i++) 4422 if (real_identical (&r, &ext_80387_constants_table[i])) 4423 return i + 3; 4424 } 4425 4426 return 0; 4427} 4428 4429/* Return the opcode of the special instruction to be used to load 4430 the constant X. */ 4431 4432const char * 4433standard_80387_constant_opcode (rtx x) 4434{ 4435 switch (standard_80387_constant_p (x)) 4436 { 4437 case 1: 4438 return "fldz"; 4439 case 2: 4440 return "fld1"; 4441 case 3: 4442 return "fldlg2"; 4443 case 4: 4444 return "fldln2"; 4445 case 5: 4446 return "fldl2e"; 4447 case 6: 4448 return "fldl2t"; 4449 case 7: 4450 return "fldpi"; 4451 } 4452 abort (); 4453} 4454 4455/* Return the CONST_DOUBLE representing the 80387 constant that is 4456 loaded by the specified special instruction. The argument IDX 4457 matches the return value from standard_80387_constant_p. */ 4458 4459rtx 4460standard_80387_constant_rtx (int idx) 4461{ 4462 int i; 4463 4464 if (! ext_80387_constants_init) 4465 init_ext_80387_constants (); 4466 4467 switch (idx) 4468 { 4469 case 3: 4470 case 4: 4471 case 5: 4472 case 6: 4473 case 7: 4474 i = idx - 3; 4475 break; 4476 4477 default: 4478 abort (); 4479 } 4480 4481 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 4482 XFmode); 4483} 4484 4485/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 4486 */ 4487int 4488standard_sse_constant_p (rtx x) 4489{ 4490 if (x == const0_rtx) 4491 return 1; 4492 return (x == CONST0_RTX (GET_MODE (x))); 4493} 4494 4495/* Returns 1 if OP contains a symbol reference */ 4496 4497int 4498symbolic_reference_mentioned_p (rtx op) 4499{ 4500 const char *fmt; 4501 int i; 4502 4503 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 4504 return 1; 4505 4506 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4507 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4508 { 4509 if (fmt[i] == 'E') 4510 { 4511 int j; 4512 4513 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4514 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 4515 return 1; 4516 } 4517 4518 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 4519 return 1; 4520 } 4521 4522 return 0; 4523} 4524 4525/* Return 1 if it is appropriate to emit `ret' instructions in the 4526 body of a function. Do this only if the epilogue is simple, needing a 4527 couple of insns. Prior to reloading, we can't tell how many registers 4528 must be saved, so return 0 then. Return 0 if there is no frame 4529 marker to de-allocate. 4530 4531 If NON_SAVING_SETJMP is defined and true, then it is not possible 4532 for the epilogue to be simple, so return 0. This is a special case 4533 since NON_SAVING_SETJMP will not cause regs_ever_live to change 4534 until final, but jump_optimize may need to know sooner if a 4535 `return' is OK. */ 4536 4537int 4538ix86_can_use_return_insn_p (void) 4539{ 4540 struct ix86_frame frame; 4541 4542#ifdef NON_SAVING_SETJMP 4543 if (NON_SAVING_SETJMP && current_function_calls_setjmp) 4544 return 0; 4545#endif 4546 4547 if (! reload_completed || frame_pointer_needed) 4548 return 0; 4549 4550 /* Don't allow more than 32 pop, since that's all we can do 4551 with one instruction. */ 4552 if (current_function_pops_args 4553 && current_function_args_size >= 32768) 4554 return 0; 4555 4556 ix86_compute_frame_layout (&frame); 4557 return frame.to_allocate == 0 && frame.nregs == 0; 4558} 4559 4560/* Return 1 if VALUE can be stored in the sign extended immediate field. */ 4561int 4562x86_64_sign_extended_value (rtx value) 4563{ 4564 switch (GET_CODE (value)) 4565 { 4566 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known 4567 to be at least 32 and this all acceptable constants are 4568 represented as CONST_INT. */ 4569 case CONST_INT: 4570 if (HOST_BITS_PER_WIDE_INT == 32) 4571 return 1; 4572 else 4573 { 4574 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode); 4575 return trunc_int_for_mode (val, SImode) == val; 4576 } 4577 break; 4578 4579 /* For certain code models, the symbolic references are known to fit. 4580 in CM_SMALL_PIC model we know it fits if it is local to the shared 4581 library. Don't count TLS SYMBOL_REFs here, since they should fit 4582 only if inside of UNSPEC handled below. */ 4583 case SYMBOL_REF: 4584 /* TLS symbols are not constant. */ 4585 if (tls_symbolic_operand (value, Pmode)) 4586 return false; 4587 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL); 4588 4589 /* For certain code models, the code is near as well. */ 4590 case LABEL_REF: 4591 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM 4592 || ix86_cmodel == CM_KERNEL); 4593 4594 /* We also may accept the offsetted memory references in certain special 4595 cases. */ 4596 case CONST: 4597 if (GET_CODE (XEXP (value, 0)) == UNSPEC) 4598 switch (XINT (XEXP (value, 0), 1)) 4599 { 4600 case UNSPEC_GOTPCREL: 4601 case UNSPEC_DTPOFF: 4602 case UNSPEC_GOTNTPOFF: 4603 case UNSPEC_NTPOFF: 4604 return 1; 4605 default: 4606 break; 4607 } 4608 if (GET_CODE (XEXP (value, 0)) == PLUS) 4609 { 4610 rtx op1 = XEXP (XEXP (value, 0), 0); 4611 rtx op2 = XEXP (XEXP (value, 0), 1); 4612 HOST_WIDE_INT offset; 4613 4614 if (ix86_cmodel == CM_LARGE) 4615 return 0; 4616 if (GET_CODE (op2) != CONST_INT) 4617 return 0; 4618 offset = trunc_int_for_mode (INTVAL (op2), DImode); 4619 switch (GET_CODE (op1)) 4620 { 4621 case SYMBOL_REF: 4622 /* For CM_SMALL assume that latest object is 16MB before 4623 end of 31bits boundary. We may also accept pretty 4624 large negative constants knowing that all objects are 4625 in the positive half of address space. */ 4626 if (ix86_cmodel == CM_SMALL 4627 && offset < 16*1024*1024 4628 && trunc_int_for_mode (offset, SImode) == offset) 4629 return 1; 4630 /* For CM_KERNEL we know that all object resist in the 4631 negative half of 32bits address space. We may not 4632 accept negative offsets, since they may be just off 4633 and we may accept pretty large positive ones. */ 4634 if (ix86_cmodel == CM_KERNEL 4635 && offset > 0 4636 && trunc_int_for_mode (offset, SImode) == offset) 4637 return 1; 4638 break; 4639 case LABEL_REF: 4640 /* These conditions are similar to SYMBOL_REF ones, just the 4641 constraints for code models differ. */ 4642 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 4643 && offset < 16*1024*1024 4644 && trunc_int_for_mode (offset, SImode) == offset) 4645 return 1; 4646 if (ix86_cmodel == CM_KERNEL 4647 && offset > 0 4648 && trunc_int_for_mode (offset, SImode) == offset) 4649 return 1; 4650 break; 4651 case UNSPEC: 4652 switch (XINT (op1, 1)) 4653 { 4654 case UNSPEC_DTPOFF: 4655 case UNSPEC_NTPOFF: 4656 if (offset > 0 4657 && trunc_int_for_mode (offset, SImode) == offset) 4658 return 1; 4659 } 4660 break; 4661 default: 4662 return 0; 4663 } 4664 } 4665 return 0; 4666 default: 4667 return 0; 4668 } 4669} 4670 4671/* Return 1 if VALUE can be stored in the zero extended immediate field. */ 4672int 4673x86_64_zero_extended_value (rtx value) 4674{ 4675 switch (GET_CODE (value)) 4676 { 4677 case CONST_DOUBLE: 4678 if (HOST_BITS_PER_WIDE_INT == 32) 4679 return (GET_MODE (value) == VOIDmode 4680 && !CONST_DOUBLE_HIGH (value)); 4681 else 4682 return 0; 4683 case CONST_INT: 4684 if (HOST_BITS_PER_WIDE_INT == 32) 4685 return INTVAL (value) >= 0; 4686 else 4687 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff); 4688 break; 4689 4690 /* For certain code models, the symbolic references are known to fit. */ 4691 case SYMBOL_REF: 4692 /* TLS symbols are not constant. */ 4693 if (tls_symbolic_operand (value, Pmode)) 4694 return false; 4695 return ix86_cmodel == CM_SMALL; 4696 4697 /* For certain code models, the code is near as well. */ 4698 case LABEL_REF: 4699 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM; 4700 4701 /* We also may accept the offsetted memory references in certain special 4702 cases. */ 4703 case CONST: 4704 if (GET_CODE (XEXP (value, 0)) == PLUS) 4705 { 4706 rtx op1 = XEXP (XEXP (value, 0), 0); 4707 rtx op2 = XEXP (XEXP (value, 0), 1); 4708 4709 if (ix86_cmodel == CM_LARGE) 4710 return 0; 4711 switch (GET_CODE (op1)) 4712 { 4713 case SYMBOL_REF: 4714 return 0; 4715 /* For small code model we may accept pretty large positive 4716 offsets, since one bit is available for free. Negative 4717 offsets are limited by the size of NULL pointer area 4718 specified by the ABI. */ 4719 if (ix86_cmodel == CM_SMALL 4720 && GET_CODE (op2) == CONST_INT 4721 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 4722 && (trunc_int_for_mode (INTVAL (op2), SImode) 4723 == INTVAL (op2))) 4724 return 1; 4725 /* ??? For the kernel, we may accept adjustment of 4726 -0x10000000, since we know that it will just convert 4727 negative address space to positive, but perhaps this 4728 is not worthwhile. */ 4729 break; 4730 case LABEL_REF: 4731 /* These conditions are similar to SYMBOL_REF ones, just the 4732 constraints for code models differ. */ 4733 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 4734 && GET_CODE (op2) == CONST_INT 4735 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 4736 && (trunc_int_for_mode (INTVAL (op2), SImode) 4737 == INTVAL (op2))) 4738 return 1; 4739 break; 4740 default: 4741 return 0; 4742 } 4743 } 4744 return 0; 4745 default: 4746 return 0; 4747 } 4748} 4749 4750/* Value should be nonzero if functions must have frame pointers. 4751 Zero means the frame pointer need not be set up (and parms may 4752 be accessed via the stack pointer) in functions that seem suitable. */ 4753 4754int 4755ix86_frame_pointer_required (void) 4756{ 4757 /* If we accessed previous frames, then the generated code expects 4758 to be able to access the saved ebp value in our frame. */ 4759 if (cfun->machine->accesses_prev_frame) 4760 return 1; 4761 4762 /* Several x86 os'es need a frame pointer for other reasons, 4763 usually pertaining to setjmp. */ 4764 if (SUBTARGET_FRAME_POINTER_REQUIRED) 4765 return 1; 4766 4767 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 4768 the frame pointer by default. Turn it back on now if we've not 4769 got a leaf function. */ 4770 if (TARGET_OMIT_LEAF_FRAME_POINTER 4771 && (!current_function_is_leaf)) 4772 return 1; 4773 4774 if (current_function_profile) 4775 return 1; 4776 4777 return 0; 4778} 4779 4780/* Record that the current function accesses previous call frames. */ 4781 4782void 4783ix86_setup_frame_addresses (void) 4784{ 4785 cfun->machine->accesses_prev_frame = 1; 4786} 4787 4788#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY) 4789# define USE_HIDDEN_LINKONCE 1 4790#else 4791# define USE_HIDDEN_LINKONCE 0 4792#endif 4793 4794static int pic_labels_used; 4795 4796/* Fills in the label name that should be used for a pc thunk for 4797 the given register. */ 4798 4799static void 4800get_pc_thunk_name (char name[32], unsigned int regno) 4801{ 4802 if (USE_HIDDEN_LINKONCE) 4803 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 4804 else 4805 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 4806} 4807 4808 4809/* This function generates code for -fpic that loads %ebx with 4810 the return address of the caller and then returns. */ 4811 4812void 4813ix86_file_end (void) 4814{ 4815 rtx xops[2]; 4816 int regno; 4817 4818 for (regno = 0; regno < 8; ++regno) 4819 { 4820 char name[32]; 4821 4822 if (! ((pic_labels_used >> regno) & 1)) 4823 continue; 4824 4825 get_pc_thunk_name (name, regno); 4826 4827 if (USE_HIDDEN_LINKONCE) 4828 { 4829 tree decl; 4830 4831 decl = build_decl (FUNCTION_DECL, get_identifier (name), 4832 error_mark_node); 4833 TREE_PUBLIC (decl) = 1; 4834 TREE_STATIC (decl) = 1; 4835 DECL_ONE_ONLY (decl) = 1; 4836 4837 (*targetm.asm_out.unique_section) (decl, 0); 4838 named_section (decl, NULL, 0); 4839 4840 (*targetm.asm_out.globalize_label) (asm_out_file, name); 4841 fputs ("\t.hidden\t", asm_out_file); 4842 assemble_name (asm_out_file, name); 4843 fputc ('\n', asm_out_file); 4844 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 4845 } 4846 else 4847 { 4848 text_section (); 4849 ASM_OUTPUT_LABEL (asm_out_file, name); 4850 } 4851 4852 xops[0] = gen_rtx_REG (SImode, regno); 4853 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 4854 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 4855 output_asm_insn ("ret", xops); 4856 } 4857 4858 if (NEED_INDICATE_EXEC_STACK) 4859 file_end_indicate_exec_stack (); 4860} 4861 4862/* Emit code for the SET_GOT patterns. */ 4863 4864const char * 4865output_set_got (rtx dest) 4866{ 4867 rtx xops[3]; 4868 4869 xops[0] = dest; 4870 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 4871 4872 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 4873 { 4874 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); 4875 4876 if (!flag_pic) 4877 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 4878 else 4879 output_asm_insn ("call\t%a2", xops); 4880 4881#if TARGET_MACHO 4882 /* Output the "canonical" label name ("Lxx$pb") here too. This 4883 is what will be referred to by the Mach-O PIC subsystem. */ 4884 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 4885#endif 4886 (*targetm.asm_out.internal_label) (asm_out_file, "L", 4887 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 4888 4889 if (flag_pic) 4890 output_asm_insn ("pop{l}\t%0", xops); 4891 } 4892 else 4893 { 4894 char name[32]; 4895 get_pc_thunk_name (name, REGNO (dest)); 4896 pic_labels_used |= 1 << REGNO (dest); 4897 4898 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4899 xops[2] = gen_rtx_MEM (QImode, xops[2]); 4900 output_asm_insn ("call\t%X2", xops); 4901 } 4902 4903 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 4904 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 4905 else if (!TARGET_MACHO)
|
4907 4908 return ""; 4909} 4910 4911/* Generate an "push" pattern for input ARG. */ 4912 4913static rtx 4914gen_push (rtx arg) 4915{ 4916 return gen_rtx_SET (VOIDmode, 4917 gen_rtx_MEM (Pmode, 4918 gen_rtx_PRE_DEC (Pmode, 4919 stack_pointer_rtx)), 4920 arg); 4921} 4922 4923/* Return >= 0 if there is an unused call-clobbered register available 4924 for the entire function. */ 4925 4926static unsigned int 4927ix86_select_alt_pic_regnum (void) 4928{ 4929 if (current_function_is_leaf && !current_function_profile) 4930 { 4931 int i; 4932 for (i = 2; i >= 0; --i) 4933 if (!regs_ever_live[i]) 4934 return i; 4935 } 4936 4937 return INVALID_REGNUM; 4938} 4939 4940/* Return 1 if we need to save REGNO. */ 4941static int 4942ix86_save_reg (unsigned int regno, int maybe_eh_return) 4943{ 4944 if (pic_offset_table_rtx 4945 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 4946 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 4947 || current_function_profile 4948 || current_function_calls_eh_return 4949 || current_function_uses_const_pool)) 4950 { 4951 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 4952 return 0; 4953 return 1; 4954 } 4955 4956 if (current_function_calls_eh_return && maybe_eh_return) 4957 { 4958 unsigned i; 4959 for (i = 0; ; i++) 4960 { 4961 unsigned test = EH_RETURN_DATA_REGNO (i); 4962 if (test == INVALID_REGNUM) 4963 break; 4964 if (test == regno) 4965 return 1; 4966 } 4967 } 4968 4969 return (regs_ever_live[regno] 4970 && !call_used_regs[regno] 4971 && !fixed_regs[regno] 4972 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 4973} 4974 4975/* Return number of registers to be saved on the stack. */ 4976 4977static int 4978ix86_nsaved_regs (void) 4979{ 4980 int nregs = 0; 4981 int regno; 4982 4983 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4984 if (ix86_save_reg (regno, true)) 4985 nregs++; 4986 return nregs; 4987} 4988 4989/* Return the offset between two registers, one to be eliminated, and the other 4990 its replacement, at the start of a routine. */ 4991 4992HOST_WIDE_INT 4993ix86_initial_elimination_offset (int from, int to) 4994{ 4995 struct ix86_frame frame; 4996 ix86_compute_frame_layout (&frame); 4997 4998 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 4999 return frame.hard_frame_pointer_offset; 5000 else if (from == FRAME_POINTER_REGNUM 5001 && to == HARD_FRAME_POINTER_REGNUM) 5002 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 5003 else 5004 { 5005 if (to != STACK_POINTER_REGNUM) 5006 abort (); 5007 else if (from == ARG_POINTER_REGNUM) 5008 return frame.stack_pointer_offset; 5009 else if (from != FRAME_POINTER_REGNUM) 5010 abort (); 5011 else 5012 return frame.stack_pointer_offset - frame.frame_pointer_offset; 5013 } 5014} 5015 5016/* Fill structure ix86_frame about frame of currently computed function. */ 5017 5018static void 5019ix86_compute_frame_layout (struct ix86_frame *frame) 5020{ 5021 HOST_WIDE_INT total_size; 5022 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 5023 HOST_WIDE_INT offset; 5024 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 5025 HOST_WIDE_INT size = get_frame_size (); 5026 5027 frame->nregs = ix86_nsaved_regs (); 5028 total_size = size; 5029 5030 /* During reload iteration the amount of registers saved can change. 5031 Recompute the value as needed. Do not recompute when amount of registers 5032 didn't change as reload does mutiple calls to the function and does not 5033 expect the decision to change within single iteration. */ 5034 if (!optimize_size 5035 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 5036 { 5037 int count = frame->nregs; 5038 5039 cfun->machine->use_fast_prologue_epilogue_nregs = count; 5040 /* The fast prologue uses move instead of push to save registers. This 5041 is significantly longer, but also executes faster as modern hardware 5042 can execute the moves in parallel, but can't do that for push/pop. 5043 5044 Be careful about choosing what prologue to emit: When function takes 5045 many instructions to execute we may use slow version as well as in 5046 case function is known to be outside hot spot (this is known with 5047 feedback only). Weight the size of function by number of registers 5048 to save as it is cheap to use one or two push instructions but very 5049 slow to use many of them. */ 5050 if (count) 5051 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 5052 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 5053 || (flag_branch_probabilities 5054 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 5055 cfun->machine->use_fast_prologue_epilogue = false; 5056 else 5057 cfun->machine->use_fast_prologue_epilogue 5058 = !expensive_function_p (count); 5059 } 5060 if (TARGET_PROLOGUE_USING_MOVE 5061 && cfun->machine->use_fast_prologue_epilogue) 5062 frame->save_regs_using_mov = true; 5063 else 5064 frame->save_regs_using_mov = false; 5065 5066 5067 /* Skip return address and saved base pointer. */ 5068 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 5069 5070 frame->hard_frame_pointer_offset = offset; 5071 5072 /* Do some sanity checking of stack_alignment_needed and 5073 preferred_alignment, since i386 port is the only using those features 5074 that may break easily. */ 5075 5076 if (size && !stack_alignment_needed) 5077 abort (); 5078 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT) 5079 abort (); 5080 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 5081 abort (); 5082 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 5083 abort (); 5084 5085 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 5086 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 5087 5088 /* Register save area */ 5089 offset += frame->nregs * UNITS_PER_WORD; 5090 5091 /* Va-arg area */ 5092 if (ix86_save_varrargs_registers) 5093 { 5094 offset += X86_64_VARARGS_SIZE; 5095 frame->va_arg_size = X86_64_VARARGS_SIZE; 5096 } 5097 else 5098 frame->va_arg_size = 0; 5099 5100 /* Align start of frame for local function. */ 5101 frame->padding1 = ((offset + stack_alignment_needed - 1) 5102 & -stack_alignment_needed) - offset; 5103 5104 offset += frame->padding1; 5105 5106 /* Frame pointer points here. */ 5107 frame->frame_pointer_offset = offset; 5108 5109 offset += size; 5110 5111 /* Add outgoing arguments area. Can be skipped if we eliminated 5112 all the function calls as dead code. 5113 Skipping is however impossible when function calls alloca. Alloca 5114 expander assumes that last current_function_outgoing_args_size 5115 of stack frame are unused. */ 5116 if (ACCUMULATE_OUTGOING_ARGS 5117 && (!current_function_is_leaf || current_function_calls_alloca)) 5118 { 5119 offset += current_function_outgoing_args_size; 5120 frame->outgoing_arguments_size = current_function_outgoing_args_size; 5121 } 5122 else 5123 frame->outgoing_arguments_size = 0; 5124 5125 /* Align stack boundary. Only needed if we're calling another function 5126 or using alloca. */ 5127 if (!current_function_is_leaf || current_function_calls_alloca) 5128 frame->padding2 = ((offset + preferred_alignment - 1) 5129 & -preferred_alignment) - offset; 5130 else 5131 frame->padding2 = 0; 5132 5133 offset += frame->padding2; 5134 5135 /* We've reached end of stack frame. */ 5136 frame->stack_pointer_offset = offset; 5137 5138 /* Size prologue needs to allocate. */ 5139 frame->to_allocate = 5140 (size + frame->padding1 + frame->padding2 5141 + frame->outgoing_arguments_size + frame->va_arg_size); 5142 5143 if ((!frame->to_allocate && frame->nregs <= 1) 5144 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) 5145 frame->save_regs_using_mov = false; 5146 5147 if (TARGET_RED_ZONE && current_function_sp_is_unchanging 5148 && current_function_is_leaf) 5149 { 5150 frame->red_zone_size = frame->to_allocate; 5151 if (frame->save_regs_using_mov) 5152 frame->red_zone_size += frame->nregs * UNITS_PER_WORD; 5153 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 5154 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 5155 } 5156 else 5157 frame->red_zone_size = 0; 5158 frame->to_allocate -= frame->red_zone_size; 5159 frame->stack_pointer_offset -= frame->red_zone_size; 5160#if 0 5161 fprintf (stderr, "nregs: %i\n", frame->nregs); 5162 fprintf (stderr, "size: %i\n", size); 5163 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 5164 fprintf (stderr, "padding1: %i\n", frame->padding1); 5165 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 5166 fprintf (stderr, "padding2: %i\n", frame->padding2); 5167 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 5168 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 5169 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 5170 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 5171 frame->hard_frame_pointer_offset); 5172 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 5173#endif 5174} 5175 5176/* Emit code to save registers in the prologue. */ 5177 5178static void 5179ix86_emit_save_regs (void) 5180{ 5181 int regno; 5182 rtx insn; 5183 5184 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 5185 if (ix86_save_reg (regno, true)) 5186 { 5187 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 5188 RTX_FRAME_RELATED_P (insn) = 1; 5189 } 5190} 5191 5192/* Emit code to save registers using MOV insns. First register 5193 is restored from POINTER + OFFSET. */ 5194static void 5195ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 5196{ 5197 int regno; 5198 rtx insn; 5199 5200 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5201 if (ix86_save_reg (regno, true)) 5202 { 5203 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 5204 Pmode, offset), 5205 gen_rtx_REG (Pmode, regno)); 5206 RTX_FRAME_RELATED_P (insn) = 1; 5207 offset += UNITS_PER_WORD; 5208 } 5209} 5210 5211/* Expand prologue or epilogue stack adjustment. 5212 The pattern exist to put a dependency on all ebp-based memory accesses. 5213 STYLE should be negative if instructions should be marked as frame related, 5214 zero if %r11 register is live and cannot be freely used and positive 5215 otherwise. */ 5216 5217static void 5218pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 5219{ 5220 rtx insn; 5221 5222 if (! TARGET_64BIT) 5223 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 5224 else if (x86_64_immediate_operand (offset, DImode)) 5225 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 5226 else 5227 { 5228 rtx r11; 5229 /* r11 is used by indirect sibcall return as well, set before the 5230 epilogue and used after the epilogue. ATM indirect sibcall 5231 shouldn't be used together with huge frame sizes in one 5232 function because of the frame_size check in sibcall.c. */ 5233 if (style == 0) 5234 abort (); 5235 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5236 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 5237 if (style < 0) 5238 RTX_FRAME_RELATED_P (insn) = 1; 5239 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 5240 offset)); 5241 } 5242 if (style < 0) 5243 RTX_FRAME_RELATED_P (insn) = 1; 5244} 5245 5246/* Expand the prologue into a bunch of separate insns. */ 5247 5248void 5249ix86_expand_prologue (void) 5250{ 5251 rtx insn; 5252 bool pic_reg_used; 5253 struct ix86_frame frame; 5254 HOST_WIDE_INT allocate; 5255 5256 ix86_compute_frame_layout (&frame); 5257 5258 /* Note: AT&T enter does NOT have reversed args. Enter is probably 5259 slower on all targets. Also sdb doesn't like it. */ 5260 5261 if (frame_pointer_needed) 5262 { 5263 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 5264 RTX_FRAME_RELATED_P (insn) = 1; 5265 5266 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 5267 RTX_FRAME_RELATED_P (insn) = 1; 5268 } 5269 5270 allocate = frame.to_allocate; 5271 5272 if (!frame.save_regs_using_mov) 5273 ix86_emit_save_regs (); 5274 else 5275 allocate += frame.nregs * UNITS_PER_WORD; 5276 5277 /* When using red zone we may start register saving before allocating 5278 the stack frame saving one cycle of the prologue. */ 5279 if (TARGET_RED_ZONE && frame.save_regs_using_mov) 5280 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx 5281 : stack_pointer_rtx, 5282 -frame.nregs * UNITS_PER_WORD); 5283 5284 if (allocate == 0) 5285 ; 5286 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 5287 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5288 GEN_INT (-allocate), -1); 5289 else 5290 { 5291 /* Only valid for Win32. */ 5292 rtx eax = gen_rtx_REG (SImode, 0); 5293 bool eax_live = ix86_eax_live_at_start_p (); 5294 5295 if (TARGET_64BIT) 5296 abort (); 5297 5298 if (eax_live) 5299 { 5300 emit_insn (gen_push (eax)); 5301 allocate -= 4; 5302 } 5303 5304 insn = emit_move_insn (eax, GEN_INT (allocate)); 5305 RTX_FRAME_RELATED_P (insn) = 1; 5306 5307 insn = emit_insn (gen_allocate_stack_worker (eax)); 5308 RTX_FRAME_RELATED_P (insn) = 1; 5309 5310 if (eax_live) 5311 { 5312 rtx t = plus_constant (stack_pointer_rtx, allocate); 5313 emit_move_insn (eax, gen_rtx_MEM (SImode, t)); 5314 } 5315 } 5316 5317 if (frame.save_regs_using_mov && !TARGET_RED_ZONE) 5318 { 5319 if (!frame_pointer_needed || !frame.to_allocate) 5320 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 5321 else 5322 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 5323 -frame.nregs * UNITS_PER_WORD); 5324 } 5325 5326 pic_reg_used = false; 5327 if (pic_offset_table_rtx 5328 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5329 || current_function_profile)) 5330 { 5331 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 5332 5333 if (alt_pic_reg_used != INVALID_REGNUM) 5334 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 5335 5336 pic_reg_used = true; 5337 } 5338 5339 if (pic_reg_used) 5340 { 5341 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 5342 5343 /* Even with accurate pre-reload life analysis, we can wind up 5344 deleting all references to the pic register after reload. 5345 Consider if cross-jumping unifies two sides of a branch 5346 controlled by a comparison vs the only read from a global. 5347 In which case, allow the set_got to be deleted, though we're 5348 too late to do anything about the ebx save in the prologue. */ 5349 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 5350 } 5351 5352 /* Prevent function calls from be scheduled before the call to mcount. 5353 In the pic_reg_used case, make sure that the got load isn't deleted. */ 5354 if (current_function_profile) 5355 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 5356} 5357 5358/* Emit code to restore saved registers using MOV insns. First register 5359 is restored from POINTER + OFFSET. */ 5360static void 5361ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 5362 int maybe_eh_return) 5363{ 5364 int regno; 5365 rtx base_address = gen_rtx_MEM (Pmode, pointer); 5366 5367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5368 if (ix86_save_reg (regno, maybe_eh_return)) 5369 { 5370 /* Ensure that adjust_address won't be forced to produce pointer 5371 out of range allowed by x86-64 instruction set. */ 5372 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 5373 { 5374 rtx r11; 5375 5376 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5377 emit_move_insn (r11, GEN_INT (offset)); 5378 emit_insn (gen_adddi3 (r11, r11, pointer)); 5379 base_address = gen_rtx_MEM (Pmode, r11); 5380 offset = 0; 5381 } 5382 emit_move_insn (gen_rtx_REG (Pmode, regno), 5383 adjust_address (base_address, Pmode, offset)); 5384 offset += UNITS_PER_WORD; 5385 } 5386} 5387 5388/* Restore function stack, frame, and registers. */ 5389 5390void 5391ix86_expand_epilogue (int style) 5392{ 5393 int regno; 5394 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 5395 struct ix86_frame frame; 5396 HOST_WIDE_INT offset; 5397 5398 ix86_compute_frame_layout (&frame); 5399 5400 /* Calculate start of saved registers relative to ebp. Special care 5401 must be taken for the normal return case of a function using 5402 eh_return: the eax and edx registers are marked as saved, but not 5403 restored along this path. */ 5404 offset = frame.nregs; 5405 if (current_function_calls_eh_return && style != 2) 5406 offset -= 2; 5407 offset *= -UNITS_PER_WORD; 5408 5409 /* If we're only restoring one register and sp is not valid then 5410 using a move instruction to restore the register since it's 5411 less work than reloading sp and popping the register. 5412 5413 The default code result in stack adjustment using add/lea instruction, 5414 while this code results in LEAVE instruction (or discrete equivalent), 5415 so it is profitable in some other cases as well. Especially when there 5416 are no registers to restore. We also use this code when TARGET_USE_LEAVE 5417 and there is exactly one register to pop. This heuristic may need some 5418 tuning in future. */ 5419 if ((!sp_valid && frame.nregs <= 1) 5420 || (TARGET_EPILOGUE_USING_MOVE 5421 && cfun->machine->use_fast_prologue_epilogue 5422 && (frame.nregs > 1 || frame.to_allocate)) 5423 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 5424 || (frame_pointer_needed && TARGET_USE_LEAVE 5425 && cfun->machine->use_fast_prologue_epilogue 5426 && frame.nregs == 1) 5427 || current_function_calls_eh_return) 5428 { 5429 /* Restore registers. We can use ebp or esp to address the memory 5430 locations. If both are available, default to ebp, since offsets 5431 are known to be small. Only exception is esp pointing directly to the 5432 end of block of saved registers, where we may simplify addressing 5433 mode. */ 5434 5435 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 5436 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 5437 frame.to_allocate, style == 2); 5438 else 5439 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 5440 offset, style == 2); 5441 5442 /* eh_return epilogues need %ecx added to the stack pointer. */ 5443 if (style == 2) 5444 { 5445 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 5446 5447 if (frame_pointer_needed) 5448 { 5449 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 5450 tmp = plus_constant (tmp, UNITS_PER_WORD); 5451 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 5452 5453 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 5454 emit_move_insn (hard_frame_pointer_rtx, tmp); 5455 5456 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 5457 const0_rtx, style); 5458 } 5459 else 5460 { 5461 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 5462 tmp = plus_constant (tmp, (frame.to_allocate 5463 + frame.nregs * UNITS_PER_WORD)); 5464 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 5465 } 5466 } 5467 else if (!frame_pointer_needed) 5468 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5469 GEN_INT (frame.to_allocate 5470 + frame.nregs * UNITS_PER_WORD), 5471 style); 5472 /* If not an i386, mov & pop is faster than "leave". */ 5473 else if (TARGET_USE_LEAVE || optimize_size 5474 || !cfun->machine->use_fast_prologue_epilogue) 5475 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5476 else 5477 { 5478 pro_epilogue_adjust_stack (stack_pointer_rtx, 5479 hard_frame_pointer_rtx, 5480 const0_rtx, style); 5481 if (TARGET_64BIT) 5482 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5483 else 5484 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5485 } 5486 } 5487 else 5488 { 5489 /* First step is to deallocate the stack frame so that we can 5490 pop the registers. */ 5491 if (!sp_valid) 5492 { 5493 if (!frame_pointer_needed) 5494 abort (); 5495 pro_epilogue_adjust_stack (stack_pointer_rtx, 5496 hard_frame_pointer_rtx, 5497 GEN_INT (offset), style); 5498 } 5499 else if (frame.to_allocate) 5500 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5501 GEN_INT (frame.to_allocate), style); 5502 5503 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5504 if (ix86_save_reg (regno, false)) 5505 { 5506 if (TARGET_64BIT) 5507 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 5508 else 5509 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 5510 } 5511 if (frame_pointer_needed) 5512 { 5513 /* Leave results in shorter dependency chains on CPUs that are 5514 able to grok it fast. */ 5515 if (TARGET_USE_LEAVE) 5516 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5517 else if (TARGET_64BIT) 5518 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5519 else 5520 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5521 } 5522 } 5523 5524 /* Sibcall epilogues don't want a return instruction. */ 5525 if (style == 0) 5526 return; 5527 5528 if (current_function_pops_args && current_function_args_size) 5529 { 5530 rtx popc = GEN_INT (current_function_pops_args); 5531 5532 /* i386 can only pop 64K bytes. If asked to pop more, pop 5533 return address, do explicit add, and jump indirectly to the 5534 caller. */ 5535 5536 if (current_function_pops_args >= 65536) 5537 { 5538 rtx ecx = gen_rtx_REG (SImode, 2); 5539 5540 /* There is no "pascal" calling convention in 64bit ABI. */ 5541 if (TARGET_64BIT) 5542 abort (); 5543 5544 emit_insn (gen_popsi1 (ecx)); 5545 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 5546 emit_jump_insn (gen_return_indirect_internal (ecx)); 5547 } 5548 else 5549 emit_jump_insn (gen_return_pop_internal (popc)); 5550 } 5551 else 5552 emit_jump_insn (gen_return_internal ()); 5553} 5554 5555/* Reset from the function's potential modifications. */ 5556 5557static void 5558ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 5559 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5560{ 5561 if (pic_offset_table_rtx) 5562 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 5563} 5564 5565/* Extract the parts of an RTL expression that is a valid memory address 5566 for an instruction. Return 0 if the structure of the address is 5567 grossly off. Return -1 if the address contains ASHIFT, so it is not 5568 strictly valid, but still used for computing length of lea instruction. */ 5569 5570static int 5571ix86_decompose_address (rtx addr, struct ix86_address *out) 5572{ 5573 rtx base = NULL_RTX; 5574 rtx index = NULL_RTX; 5575 rtx disp = NULL_RTX; 5576 HOST_WIDE_INT scale = 1; 5577 rtx scale_rtx = NULL_RTX; 5578 int retval = 1; 5579 enum ix86_address_seg seg = SEG_DEFAULT; 5580 5581 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 5582 base = addr; 5583 else if (GET_CODE (addr) == PLUS) 5584 { 5585 rtx addends[4], op; 5586 int n = 0, i; 5587 5588 op = addr; 5589 do 5590 { 5591 if (n >= 4) 5592 return 0; 5593 addends[n++] = XEXP (op, 1); 5594 op = XEXP (op, 0); 5595 } 5596 while (GET_CODE (op) == PLUS); 5597 if (n >= 4) 5598 return 0; 5599 addends[n] = op; 5600 5601 for (i = n; i >= 0; --i) 5602 { 5603 op = addends[i]; 5604 switch (GET_CODE (op)) 5605 { 5606 case MULT: 5607 if (index) 5608 return 0; 5609 index = XEXP (op, 0); 5610 scale_rtx = XEXP (op, 1); 5611 break; 5612 5613 case UNSPEC: 5614 if (XINT (op, 1) == UNSPEC_TP 5615 && TARGET_TLS_DIRECT_SEG_REFS 5616 && seg == SEG_DEFAULT) 5617 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 5618 else 5619 return 0; 5620 break; 5621 5622 case REG: 5623 case SUBREG: 5624 if (!base) 5625 base = op; 5626 else if (!index) 5627 index = op; 5628 else 5629 return 0; 5630 break; 5631 5632 case CONST: 5633 case CONST_INT: 5634 case SYMBOL_REF: 5635 case LABEL_REF: 5636 if (disp) 5637 return 0; 5638 disp = op; 5639 break; 5640 5641 default: 5642 return 0; 5643 } 5644 } 5645 } 5646 else if (GET_CODE (addr) == MULT) 5647 { 5648 index = XEXP (addr, 0); /* index*scale */ 5649 scale_rtx = XEXP (addr, 1); 5650 } 5651 else if (GET_CODE (addr) == ASHIFT) 5652 { 5653 rtx tmp; 5654 5655 /* We're called for lea too, which implements ashift on occasion. */ 5656 index = XEXP (addr, 0); 5657 tmp = XEXP (addr, 1); 5658 if (GET_CODE (tmp) != CONST_INT) 5659 return 0; 5660 scale = INTVAL (tmp); 5661 if ((unsigned HOST_WIDE_INT) scale > 3) 5662 return 0; 5663 scale = 1 << scale; 5664 retval = -1; 5665 } 5666 else 5667 disp = addr; /* displacement */ 5668 5669 /* Extract the integral value of scale. */ 5670 if (scale_rtx) 5671 { 5672 if (GET_CODE (scale_rtx) != CONST_INT) 5673 return 0; 5674 scale = INTVAL (scale_rtx); 5675 } 5676 5677 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 5678 if (base && index && scale == 1 5679 && (index == arg_pointer_rtx 5680 || index == frame_pointer_rtx 5681 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM))) 5682 { 5683 rtx tmp = base; 5684 base = index; 5685 index = tmp; 5686 } 5687 5688 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 5689 if ((base == hard_frame_pointer_rtx 5690 || base == frame_pointer_rtx 5691 || base == arg_pointer_rtx) && !disp) 5692 disp = const0_rtx; 5693 5694 /* Special case: on K6, [%esi] makes the instruction vector decoded. 5695 Avoid this by transforming to [%esi+0]. */ 5696 if (ix86_tune == PROCESSOR_K6 && !optimize_size 5697 && base && !index && !disp 5698 && REG_P (base) 5699 && REGNO_REG_CLASS (REGNO (base)) == SIREG) 5700 disp = const0_rtx; 5701 5702 /* Special case: encode reg+reg instead of reg*2. */ 5703 if (!base && index && scale && scale == 2) 5704 base = index, scale = 1; 5705 5706 /* Special case: scaling cannot be encoded without base or displacement. */ 5707 if (!base && !disp && index && scale != 1) 5708 disp = const0_rtx; 5709 5710 out->base = base; 5711 out->index = index; 5712 out->disp = disp; 5713 out->scale = scale; 5714 out->seg = seg; 5715 5716 return retval; 5717} 5718 5719/* Return cost of the memory address x. 5720 For i386, it is better to use a complex address than let gcc copy 5721 the address into a reg and make a new pseudo. But not if the address 5722 requires to two regs - that would mean more pseudos with longer 5723 lifetimes. */ 5724static int 5725ix86_address_cost (rtx x) 5726{ 5727 struct ix86_address parts; 5728 int cost = 1; 5729 5730 if (!ix86_decompose_address (x, &parts)) 5731 abort (); 5732 5733 /* More complex memory references are better. */ 5734 if (parts.disp && parts.disp != const0_rtx) 5735 cost--; 5736 if (parts.seg != SEG_DEFAULT) 5737 cost--; 5738 5739 /* Attempt to minimize number of registers in the address. */ 5740 if ((parts.base 5741 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 5742 || (parts.index 5743 && (!REG_P (parts.index) 5744 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 5745 cost++; 5746 5747 if (parts.base 5748 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 5749 && parts.index 5750 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 5751 && parts.base != parts.index) 5752 cost++; 5753 5754 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 5755 since it's predecode logic can't detect the length of instructions 5756 and it degenerates to vector decoded. Increase cost of such 5757 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 5758 to split such addresses or even refuse such addresses at all. 5759 5760 Following addressing modes are affected: 5761 [base+scale*index] 5762 [scale*index+disp] 5763 [base+index] 5764 5765 The first and last case may be avoidable by explicitly coding the zero in 5766 memory address, but I don't have AMD-K6 machine handy to check this 5767 theory. */ 5768 5769 if (TARGET_K6 5770 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 5771 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 5772 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 5773 cost += 10; 5774 5775 return cost; 5776} 5777 5778/* If X is a machine specific address (i.e. a symbol or label being 5779 referenced as a displacement from the GOT implemented using an 5780 UNSPEC), then return the base term. Otherwise return X. */ 5781 5782rtx 5783ix86_find_base_term (rtx x) 5784{ 5785 rtx term; 5786 5787 if (TARGET_64BIT) 5788 { 5789 if (GET_CODE (x) != CONST) 5790 return x; 5791 term = XEXP (x, 0); 5792 if (GET_CODE (term) == PLUS 5793 && (GET_CODE (XEXP (term, 1)) == CONST_INT 5794 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 5795 term = XEXP (term, 0); 5796 if (GET_CODE (term) != UNSPEC 5797 || XINT (term, 1) != UNSPEC_GOTPCREL) 5798 return x; 5799 5800 term = XVECEXP (term, 0, 0); 5801 5802 if (GET_CODE (term) != SYMBOL_REF 5803 && GET_CODE (term) != LABEL_REF) 5804 return x; 5805 5806 return term; 5807 } 5808 5809 term = ix86_delegitimize_address (x); 5810 5811 if (GET_CODE (term) != SYMBOL_REF 5812 && GET_CODE (term) != LABEL_REF) 5813 return x; 5814 5815 return term; 5816} 5817 5818/* Determine if a given RTX is a valid constant. We already know this 5819 satisfies CONSTANT_P. */ 5820 5821bool 5822legitimate_constant_p (rtx x) 5823{ 5824 switch (GET_CODE (x)) 5825 { 5826 case CONST: 5827 x = XEXP (x, 0); 5828 5829 if (GET_CODE (x) == PLUS) 5830 { 5831 if (GET_CODE (XEXP (x, 1)) != CONST_INT) 5832 return false; 5833 x = XEXP (x, 0); 5834 } 5835 5836 /* Only some unspecs are valid as "constants". */ 5837 if (GET_CODE (x) == UNSPEC) 5838 switch (XINT (x, 1)) 5839 { 5840 case UNSPEC_TPOFF: 5841 case UNSPEC_NTPOFF: 5842 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode); 5843 case UNSPEC_DTPOFF: 5844 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode); 5845 default: 5846 return false; 5847 } 5848 5849 /* We must have drilled down to a symbol. */ 5850 if (!symbolic_operand (x, Pmode)) 5851 return false; 5852 /* FALLTHRU */ 5853 5854 case SYMBOL_REF: 5855 /* TLS symbols are never valid. */ 5856 if (tls_symbolic_operand (x, Pmode)) 5857 return false; 5858 break; 5859 5860 default: 5861 break; 5862 } 5863 5864 /* Otherwise we handle everything else in the move patterns. */ 5865 return true; 5866} 5867 5868/* Determine if it's legal to put X into the constant pool. This 5869 is not possible for the address of thread-local symbols, which 5870 is checked above. */ 5871 5872static bool 5873ix86_cannot_force_const_mem (rtx x) 5874{ 5875 return !legitimate_constant_p (x); 5876} 5877 5878/* Determine if a given RTX is a valid constant address. */ 5879 5880bool 5881constant_address_p (rtx x) 5882{ 5883 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 5884} 5885 5886/* Nonzero if the constant value X is a legitimate general operand 5887 when generating PIC code. It is given that flag_pic is on and 5888 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 5889 5890bool 5891legitimate_pic_operand_p (rtx x) 5892{ 5893 rtx inner; 5894 5895 switch (GET_CODE (x)) 5896 { 5897 case CONST: 5898 inner = XEXP (x, 0); 5899 5900 /* Only some unspecs are valid as "constants". */ 5901 if (GET_CODE (inner) == UNSPEC) 5902 switch (XINT (inner, 1)) 5903 { 5904 case UNSPEC_TPOFF: 5905 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); 5906 default: 5907 return false; 5908 } 5909 /* FALLTHRU */ 5910 5911 case SYMBOL_REF: 5912 case LABEL_REF: 5913 return legitimate_pic_address_disp_p (x); 5914 5915 default: 5916 return true; 5917 } 5918} 5919 5920/* Determine if a given CONST RTX is a valid memory displacement 5921 in PIC mode. */ 5922 5923int 5924legitimate_pic_address_disp_p (rtx disp) 5925{ 5926 bool saw_plus; 5927 5928 /* In 64bit mode we can allow direct addresses of symbols and labels 5929 when they are not dynamic symbols. */ 5930 if (TARGET_64BIT) 5931 { 5932 /* TLS references should always be enclosed in UNSPEC. */ 5933 if (tls_symbolic_operand (disp, GET_MODE (disp))) 5934 return 0; 5935 if (GET_CODE (disp) == SYMBOL_REF 5936 && ix86_cmodel == CM_SMALL_PIC 5937 && SYMBOL_REF_LOCAL_P (disp)) 5938 return 1; 5939 if (GET_CODE (disp) == LABEL_REF) 5940 return 1; 5941 if (GET_CODE (disp) == CONST 5942 && GET_CODE (XEXP (disp, 0)) == PLUS) 5943 { 5944 rtx op0 = XEXP (XEXP (disp, 0), 0); 5945 rtx op1 = XEXP (XEXP (disp, 0), 1); 5946 5947 /* TLS references should always be enclosed in UNSPEC. */ 5948 if (tls_symbolic_operand (op0, GET_MODE (op0))) 5949 return 0; 5950 if (((GET_CODE (op0) == SYMBOL_REF 5951 && ix86_cmodel == CM_SMALL_PIC 5952 && SYMBOL_REF_LOCAL_P (op0)) 5953 || GET_CODE (op0) == LABEL_REF) 5954 && GET_CODE (op1) == CONST_INT 5955 && INTVAL (op1) < 16*1024*1024 5956 && INTVAL (op1) >= -16*1024*1024) 5957 return 1; 5958 } 5959 } 5960 if (GET_CODE (disp) != CONST) 5961 return 0; 5962 disp = XEXP (disp, 0); 5963 5964 if (TARGET_64BIT) 5965 { 5966 /* We are unsafe to allow PLUS expressions. This limit allowed distance 5967 of GOT tables. We should not need these anyway. */ 5968 if (GET_CODE (disp) != UNSPEC 5969 || XINT (disp, 1) != UNSPEC_GOTPCREL) 5970 return 0; 5971 5972 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 5973 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 5974 return 0; 5975 return 1; 5976 } 5977 5978 saw_plus = false; 5979 if (GET_CODE (disp) == PLUS) 5980 { 5981 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 5982 return 0; 5983 disp = XEXP (disp, 0); 5984 saw_plus = true; 5985 } 5986 5987 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */ 5988 if (TARGET_MACHO && GET_CODE (disp) == MINUS) 5989 { 5990 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 5991 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 5992 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 5993 { 5994 const char *sym_name = XSTR (XEXP (disp, 1), 0); 5995 if (! strcmp (sym_name, "<pic base>")) 5996 return 1; 5997 } 5998 } 5999 6000 if (GET_CODE (disp) != UNSPEC) 6001 return 0; 6002 6003 switch (XINT (disp, 1)) 6004 { 6005 case UNSPEC_GOT: 6006 if (saw_plus) 6007 return false; 6008 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 6009 case UNSPEC_GOTOFF: 6010 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 6011 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 6012 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6013 return false; 6014 case UNSPEC_GOTTPOFF: 6015 case UNSPEC_GOTNTPOFF: 6016 case UNSPEC_INDNTPOFF: 6017 if (saw_plus) 6018 return false; 6019 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6020 case UNSPEC_NTPOFF: 6021 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6022 case UNSPEC_DTPOFF: 6023 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6024 } 6025 6026 return 0; 6027} 6028 6029/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 6030 memory address for an instruction. The MODE argument is the machine mode 6031 for the MEM expression that wants to use this address. 6032 6033 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 6034 convert common non-canonical forms to canonical form so that they will 6035 be recognized. */ 6036 6037int 6038legitimate_address_p (enum machine_mode mode, rtx addr, int strict) 6039{ 6040 struct ix86_address parts; 6041 rtx base, index, disp; 6042 HOST_WIDE_INT scale; 6043 const char *reason = NULL; 6044 rtx reason_rtx = NULL_RTX; 6045 6046 if (TARGET_DEBUG_ADDR) 6047 { 6048 fprintf (stderr, 6049 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 6050 GET_MODE_NAME (mode), strict); 6051 debug_rtx (addr); 6052 } 6053 6054 if (ix86_decompose_address (addr, &parts) <= 0) 6055 { 6056 reason = "decomposition failed"; 6057 goto report_error; 6058 } 6059 6060 base = parts.base; 6061 index = parts.index; 6062 disp = parts.disp; 6063 scale = parts.scale; 6064 6065 /* Validate base register. 6066 6067 Don't allow SUBREG's here, it can lead to spill failures when the base 6068 is one word out of a two word structure, which is represented internally 6069 as a DImode int. */ 6070 6071 if (base) 6072 { 6073 reason_rtx = base; 6074 6075 if (GET_CODE (base) != REG) 6076 { 6077 reason = "base is not a register"; 6078 goto report_error; 6079 } 6080 6081 if (GET_MODE (base) != Pmode) 6082 { 6083 reason = "base is not in Pmode"; 6084 goto report_error; 6085 } 6086 6087 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) 6088 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) 6089 { 6090 reason = "base is not valid"; 6091 goto report_error; 6092 } 6093 } 6094 6095 /* Validate index register. 6096 6097 Don't allow SUBREG's here, it can lead to spill failures when the index 6098 is one word out of a two word structure, which is represented internally 6099 as a DImode int. */ 6100 6101 if (index) 6102 { 6103 reason_rtx = index; 6104 6105 if (GET_CODE (index) != REG) 6106 { 6107 reason = "index is not a register"; 6108 goto report_error; 6109 } 6110 6111 if (GET_MODE (index) != Pmode) 6112 { 6113 reason = "index is not in Pmode"; 6114 goto report_error; 6115 } 6116 6117 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index)) 6118 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index))) 6119 { 6120 reason = "index is not valid"; 6121 goto report_error; 6122 } 6123 } 6124 6125 /* Validate scale factor. */ 6126 if (scale != 1) 6127 { 6128 reason_rtx = GEN_INT (scale); 6129 if (!index) 6130 { 6131 reason = "scale without index"; 6132 goto report_error; 6133 } 6134 6135 if (scale != 2 && scale != 4 && scale != 8) 6136 { 6137 reason = "scale is not a valid multiplier"; 6138 goto report_error; 6139 } 6140 } 6141 6142 /* Validate displacement. */ 6143 if (disp) 6144 { 6145 reason_rtx = disp; 6146 6147 if (GET_CODE (disp) == CONST 6148 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 6149 switch (XINT (XEXP (disp, 0), 1)) 6150 { 6151 case UNSPEC_GOT: 6152 case UNSPEC_GOTOFF: 6153 case UNSPEC_GOTPCREL: 6154 if (!flag_pic) 6155 abort (); 6156 goto is_legitimate_pic; 6157 6158 case UNSPEC_GOTTPOFF: 6159 case UNSPEC_GOTNTPOFF: 6160 case UNSPEC_INDNTPOFF: 6161 case UNSPEC_NTPOFF: 6162 case UNSPEC_DTPOFF: 6163 break; 6164 6165 default: 6166 reason = "invalid address unspec"; 6167 goto report_error; 6168 } 6169 6170 else if (flag_pic && (SYMBOLIC_CONST (disp) 6171#if TARGET_MACHO 6172 && !machopic_operand_p (disp) 6173#endif 6174 )) 6175 { 6176 is_legitimate_pic: 6177 if (TARGET_64BIT && (index || base)) 6178 { 6179 /* foo@dtpoff(%rX) is ok. */ 6180 if (GET_CODE (disp) != CONST 6181 || GET_CODE (XEXP (disp, 0)) != PLUS 6182 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 6183 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 6184 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 6185 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 6186 { 6187 reason = "non-constant pic memory reference"; 6188 goto report_error; 6189 } 6190 } 6191 else if (! legitimate_pic_address_disp_p (disp)) 6192 { 6193 reason = "displacement is an invalid pic construct"; 6194 goto report_error; 6195 } 6196 6197 /* This code used to verify that a symbolic pic displacement 6198 includes the pic_offset_table_rtx register. 6199 6200 While this is good idea, unfortunately these constructs may 6201 be created by "adds using lea" optimization for incorrect 6202 code like: 6203 6204 int a; 6205 int foo(int i) 6206 { 6207 return *(&a+i); 6208 } 6209 6210 This code is nonsensical, but results in addressing 6211 GOT table with pic_offset_table_rtx base. We can't 6212 just refuse it easily, since it gets matched by 6213 "addsi3" pattern, that later gets split to lea in the 6214 case output register differs from input. While this 6215 can be handled by separate addsi pattern for this case 6216 that never results in lea, this seems to be easier and 6217 correct fix for crash to disable this test. */ 6218 } 6219 else if (GET_CODE (disp) != LABEL_REF 6220 && GET_CODE (disp) != CONST_INT 6221 && (GET_CODE (disp) != CONST 6222 || !legitimate_constant_p (disp)) 6223 && (GET_CODE (disp) != SYMBOL_REF 6224 || !legitimate_constant_p (disp))) 6225 { 6226 reason = "displacement is not constant"; 6227 goto report_error; 6228 } 6229 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp)) 6230 { 6231 reason = "displacement is out of range"; 6232 goto report_error; 6233 } 6234 } 6235 6236 /* Everything looks valid. */ 6237 if (TARGET_DEBUG_ADDR) 6238 fprintf (stderr, "Success.\n"); 6239 return TRUE; 6240 6241 report_error: 6242 if (TARGET_DEBUG_ADDR) 6243 { 6244 fprintf (stderr, "Error: %s\n", reason); 6245 debug_rtx (reason_rtx); 6246 } 6247 return FALSE; 6248} 6249 6250/* Return an unique alias set for the GOT. */ 6251 6252static HOST_WIDE_INT 6253ix86_GOT_alias_set (void) 6254{ 6255 static HOST_WIDE_INT set = -1; 6256 if (set == -1) 6257 set = new_alias_set (); 6258 return set; 6259} 6260 6261/* Return a legitimate reference for ORIG (an address) using the 6262 register REG. If REG is 0, a new pseudo is generated. 6263 6264 There are two types of references that must be handled: 6265 6266 1. Global data references must load the address from the GOT, via 6267 the PIC reg. An insn is emitted to do this load, and the reg is 6268 returned. 6269 6270 2. Static data references, constant pool addresses, and code labels 6271 compute the address as an offset from the GOT, whose base is in 6272 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 6273 differentiate them from global data objects. The returned 6274 address is the PIC reg + an unspec constant. 6275 6276 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 6277 reg also appears in the address. */ 6278 6279rtx 6280legitimize_pic_address (rtx orig, rtx reg) 6281{ 6282 rtx addr = orig; 6283 rtx new = orig; 6284 rtx base; 6285 6286#if TARGET_MACHO 6287 if (reg == 0) 6288 reg = gen_reg_rtx (Pmode); 6289 /* Use the generic Mach-O PIC machinery. */ 6290 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 6291#endif 6292 6293 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 6294 new = addr; 6295 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 6296 { 6297 /* This symbol may be referenced via a displacement from the PIC 6298 base address (@GOTOFF). */ 6299 6300 if (reload_in_progress) 6301 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6302 if (GET_CODE (addr) == CONST) 6303 addr = XEXP (addr, 0); 6304 if (GET_CODE (addr) == PLUS) 6305 { 6306 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6307 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6308 } 6309 else 6310 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6311 new = gen_rtx_CONST (Pmode, new); 6312 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6313 6314 if (reg != 0) 6315 { 6316 emit_move_insn (reg, new); 6317 new = reg; 6318 } 6319 } 6320 else if (GET_CODE (addr) == SYMBOL_REF) 6321 { 6322 if (TARGET_64BIT) 6323 { 6324 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 6325 new = gen_rtx_CONST (Pmode, new); 6326 new = gen_rtx_MEM (Pmode, new); 6327 RTX_UNCHANGING_P (new) = 1; 6328 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6329 6330 if (reg == 0) 6331 reg = gen_reg_rtx (Pmode); 6332 /* Use directly gen_movsi, otherwise the address is loaded 6333 into register for CSE. We don't want to CSE this addresses, 6334 instead we CSE addresses from the GOT table, so skip this. */ 6335 emit_insn (gen_movsi (reg, new)); 6336 new = reg; 6337 } 6338 else 6339 { 6340 /* This symbol must be referenced via a load from the 6341 Global Offset Table (@GOT). */ 6342 6343 if (reload_in_progress) 6344 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6345 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 6346 new = gen_rtx_CONST (Pmode, new); 6347 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6348 new = gen_rtx_MEM (Pmode, new); 6349 RTX_UNCHANGING_P (new) = 1; 6350 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6351 6352 if (reg == 0) 6353 reg = gen_reg_rtx (Pmode); 6354 emit_move_insn (reg, new); 6355 new = reg; 6356 } 6357 } 6358 else 6359 { 6360 if (GET_CODE (addr) == CONST) 6361 { 6362 addr = XEXP (addr, 0); 6363 6364 /* We must match stuff we generate before. Assume the only 6365 unspecs that can get here are ours. Not that we could do 6366 anything with them anyway.... */ 6367 if (GET_CODE (addr) == UNSPEC 6368 || (GET_CODE (addr) == PLUS 6369 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 6370 return orig; 6371 if (GET_CODE (addr) != PLUS) 6372 abort (); 6373 } 6374 if (GET_CODE (addr) == PLUS) 6375 { 6376 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 6377 6378 /* Check first to see if this is a constant offset from a @GOTOFF 6379 symbol reference. */ 6380 if (local_symbolic_operand (op0, Pmode) 6381 && GET_CODE (op1) == CONST_INT) 6382 { 6383 if (!TARGET_64BIT) 6384 { 6385 if (reload_in_progress) 6386 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6387 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 6388 UNSPEC_GOTOFF); 6389 new = gen_rtx_PLUS (Pmode, new, op1); 6390 new = gen_rtx_CONST (Pmode, new); 6391 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6392 6393 if (reg != 0) 6394 { 6395 emit_move_insn (reg, new); 6396 new = reg; 6397 } 6398 } 6399 else 6400 { 6401 if (INTVAL (op1) < -16*1024*1024 6402 || INTVAL (op1) >= 16*1024*1024) 6403 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1)); 6404 } 6405 } 6406 else 6407 { 6408 base = legitimize_pic_address (XEXP (addr, 0), reg); 6409 new = legitimize_pic_address (XEXP (addr, 1), 6410 base == reg ? NULL_RTX : reg); 6411 6412 if (GET_CODE (new) == CONST_INT) 6413 new = plus_constant (base, INTVAL (new)); 6414 else 6415 { 6416 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 6417 { 6418 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 6419 new = XEXP (new, 1); 6420 } 6421 new = gen_rtx_PLUS (Pmode, base, new); 6422 } 6423 } 6424 } 6425 } 6426 return new; 6427} 6428 6429/* Load the thread pointer. If TO_REG is true, force it into a register. */ 6430 6431static rtx 6432get_thread_pointer (int to_reg) 6433{ 6434 rtx tp, reg, insn; 6435 6436 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 6437 if (!to_reg) 6438 return tp; 6439 6440 reg = gen_reg_rtx (Pmode); 6441 insn = gen_rtx_SET (VOIDmode, reg, tp); 6442 insn = emit_insn (insn); 6443 6444 return reg; 6445} 6446 6447/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 6448 false if we expect this to be used for a memory address and true if 6449 we expect to load the address into a register. */ 6450 6451static rtx 6452legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 6453{ 6454 rtx dest, base, off, pic; 6455 int type; 6456 6457 switch (model) 6458 { 6459 case TLS_MODEL_GLOBAL_DYNAMIC: 6460 dest = gen_reg_rtx (Pmode); 6461 if (TARGET_64BIT) 6462 { 6463 rtx rax = gen_rtx_REG (Pmode, 0), insns; 6464 6465 start_sequence (); 6466 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 6467 insns = get_insns (); 6468 end_sequence (); 6469 6470 emit_libcall_block (insns, dest, rax, x); 6471 } 6472 else 6473 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 6474 break; 6475 6476 case TLS_MODEL_LOCAL_DYNAMIC: 6477 base = gen_reg_rtx (Pmode); 6478 if (TARGET_64BIT) 6479 { 6480 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 6481 6482 start_sequence (); 6483 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 6484 insns = get_insns (); 6485 end_sequence (); 6486 6487 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 6488 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 6489 emit_libcall_block (insns, base, rax, note); 6490 } 6491 else 6492 emit_insn (gen_tls_local_dynamic_base_32 (base)); 6493 6494 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 6495 off = gen_rtx_CONST (Pmode, off); 6496 6497 return gen_rtx_PLUS (Pmode, base, off); 6498 6499 case TLS_MODEL_INITIAL_EXEC: 6500 if (TARGET_64BIT) 6501 { 6502 pic = NULL; 6503 type = UNSPEC_GOTNTPOFF; 6504 } 6505 else if (flag_pic) 6506 { 6507 if (reload_in_progress) 6508 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6509 pic = pic_offset_table_rtx; 6510 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 6511 } 6512 else if (!TARGET_GNU_TLS) 6513 { 6514 pic = gen_reg_rtx (Pmode); 6515 emit_insn (gen_set_got (pic)); 6516 type = UNSPEC_GOTTPOFF; 6517 } 6518 else 6519 { 6520 pic = NULL; 6521 type = UNSPEC_INDNTPOFF; 6522 } 6523 6524 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 6525 off = gen_rtx_CONST (Pmode, off); 6526 if (pic) 6527 off = gen_rtx_PLUS (Pmode, pic, off); 6528 off = gen_rtx_MEM (Pmode, off); 6529 RTX_UNCHANGING_P (off) = 1; 6530 set_mem_alias_set (off, ix86_GOT_alias_set ()); 6531 6532 if (TARGET_64BIT || TARGET_GNU_TLS) 6533 { 6534 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 6535 off = force_reg (Pmode, off); 6536 return gen_rtx_PLUS (Pmode, base, off); 6537 } 6538 else 6539 { 6540 base = get_thread_pointer (true); 6541 dest = gen_reg_rtx (Pmode); 6542 emit_insn (gen_subsi3 (dest, base, off)); 6543 } 6544 break; 6545 6546 case TLS_MODEL_LOCAL_EXEC: 6547 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 6548 (TARGET_64BIT || TARGET_GNU_TLS) 6549 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 6550 off = gen_rtx_CONST (Pmode, off); 6551 6552 if (TARGET_64BIT || TARGET_GNU_TLS) 6553 { 6554 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 6555 return gen_rtx_PLUS (Pmode, base, off); 6556 } 6557 else 6558 { 6559 base = get_thread_pointer (true); 6560 dest = gen_reg_rtx (Pmode); 6561 emit_insn (gen_subsi3 (dest, base, off)); 6562 } 6563 break; 6564 6565 default: 6566 abort (); 6567 } 6568 6569 return dest; 6570} 6571 6572/* Try machine-dependent ways of modifying an illegitimate address 6573 to be legitimate. If we find one, return the new, valid address. 6574 This macro is used in only one place: `memory_address' in explow.c. 6575 6576 OLDX is the address as it was before break_out_memory_refs was called. 6577 In some cases it is useful to look at this to decide what needs to be done. 6578 6579 MODE and WIN are passed so that this macro can use 6580 GO_IF_LEGITIMATE_ADDRESS. 6581 6582 It is always safe for this macro to do nothing. It exists to recognize 6583 opportunities to optimize the output. 6584 6585 For the 80386, we handle X+REG by loading X into a register R and 6586 using R+REG. R will go in a general reg and indexing will be used. 6587 However, if REG is a broken-out memory address or multiplication, 6588 nothing needs to be done because REG can certainly go in a general reg. 6589 6590 When -fpic is used, special handling is needed for symbolic references. 6591 See comments by legitimize_pic_address in i386.c for details. */ 6592 6593rtx 6594legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 6595{ 6596 int changed = 0; 6597 unsigned log; 6598 6599 if (TARGET_DEBUG_ADDR) 6600 { 6601 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 6602 GET_MODE_NAME (mode)); 6603 debug_rtx (x); 6604 } 6605 6606 log = tls_symbolic_operand (x, mode); 6607 if (log) 6608 return legitimize_tls_address (x, log, false); 6609 6610 if (flag_pic && SYMBOLIC_CONST (x)) 6611 return legitimize_pic_address (x, 0); 6612 6613 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 6614 if (GET_CODE (x) == ASHIFT 6615 && GET_CODE (XEXP (x, 1)) == CONST_INT 6616 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4) 6617 { 6618 changed = 1; 6619 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 6620 GEN_INT (1 << log)); 6621 } 6622 6623 if (GET_CODE (x) == PLUS) 6624 { 6625 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 6626 6627 if (GET_CODE (XEXP (x, 0)) == ASHIFT 6628 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 6629 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4) 6630 { 6631 changed = 1; 6632 XEXP (x, 0) = gen_rtx_MULT (Pmode, 6633 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 6634 GEN_INT (1 << log)); 6635 } 6636 6637 if (GET_CODE (XEXP (x, 1)) == ASHIFT 6638 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 6639 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4) 6640 { 6641 changed = 1; 6642 XEXP (x, 1) = gen_rtx_MULT (Pmode, 6643 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 6644 GEN_INT (1 << log)); 6645 } 6646 6647 /* Put multiply first if it isn't already. */ 6648 if (GET_CODE (XEXP (x, 1)) == MULT) 6649 { 6650 rtx tmp = XEXP (x, 0); 6651 XEXP (x, 0) = XEXP (x, 1); 6652 XEXP (x, 1) = tmp; 6653 changed = 1; 6654 } 6655 6656 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 6657 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 6658 created by virtual register instantiation, register elimination, and 6659 similar optimizations. */ 6660 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 6661 { 6662 changed = 1; 6663 x = gen_rtx_PLUS (Pmode, 6664 gen_rtx_PLUS (Pmode, XEXP (x, 0), 6665 XEXP (XEXP (x, 1), 0)), 6666 XEXP (XEXP (x, 1), 1)); 6667 } 6668 6669 /* Canonicalize 6670 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 6671 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 6672 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 6673 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 6674 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 6675 && CONSTANT_P (XEXP (x, 1))) 6676 { 6677 rtx constant; 6678 rtx other = NULL_RTX; 6679 6680 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6681 { 6682 constant = XEXP (x, 1); 6683 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 6684 } 6685 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 6686 { 6687 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 6688 other = XEXP (x, 1); 6689 } 6690 else 6691 constant = 0; 6692 6693 if (constant) 6694 { 6695 changed = 1; 6696 x = gen_rtx_PLUS (Pmode, 6697 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 6698 XEXP (XEXP (XEXP (x, 0), 1), 0)), 6699 plus_constant (other, INTVAL (constant))); 6700 } 6701 } 6702 6703 if (changed && legitimate_address_p (mode, x, FALSE)) 6704 return x; 6705 6706 if (GET_CODE (XEXP (x, 0)) == MULT) 6707 { 6708 changed = 1; 6709 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 6710 } 6711 6712 if (GET_CODE (XEXP (x, 1)) == MULT) 6713 { 6714 changed = 1; 6715 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 6716 } 6717 6718 if (changed 6719 && GET_CODE (XEXP (x, 1)) == REG 6720 && GET_CODE (XEXP (x, 0)) == REG) 6721 return x; 6722 6723 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 6724 { 6725 changed = 1; 6726 x = legitimize_pic_address (x, 0); 6727 } 6728 6729 if (changed && legitimate_address_p (mode, x, FALSE)) 6730 return x; 6731 6732 if (GET_CODE (XEXP (x, 0)) == REG) 6733 { 6734 rtx temp = gen_reg_rtx (Pmode); 6735 rtx val = force_operand (XEXP (x, 1), temp); 6736 if (val != temp) 6737 emit_move_insn (temp, val); 6738 6739 XEXP (x, 1) = temp; 6740 return x; 6741 } 6742 6743 else if (GET_CODE (XEXP (x, 1)) == REG) 6744 { 6745 rtx temp = gen_reg_rtx (Pmode); 6746 rtx val = force_operand (XEXP (x, 0), temp); 6747 if (val != temp) 6748 emit_move_insn (temp, val); 6749 6750 XEXP (x, 0) = temp; 6751 return x; 6752 } 6753 } 6754 6755 return x; 6756} 6757 6758/* Print an integer constant expression in assembler syntax. Addition 6759 and subtraction are the only arithmetic that may appear in these 6760 expressions. FILE is the stdio stream to write to, X is the rtx, and 6761 CODE is the operand print code from the output string. */ 6762 6763static void 6764output_pic_addr_const (FILE *file, rtx x, int code) 6765{ 6766 char buf[256]; 6767 6768 switch (GET_CODE (x)) 6769 { 6770 case PC: 6771 if (flag_pic) 6772 putc ('.', file); 6773 else 6774 abort (); 6775 break; 6776 6777 case SYMBOL_REF: 6778 assemble_name (file, XSTR (x, 0)); 6779 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 6780 fputs ("@PLT", file); 6781 break; 6782 6783 case LABEL_REF: 6784 x = XEXP (x, 0); 6785 /* FALLTHRU */ 6786 case CODE_LABEL: 6787 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 6788 assemble_name (asm_out_file, buf); 6789 break; 6790 6791 case CONST_INT: 6792 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 6793 break; 6794 6795 case CONST: 6796 /* This used to output parentheses around the expression, 6797 but that does not work on the 386 (either ATT or BSD assembler). */ 6798 output_pic_addr_const (file, XEXP (x, 0), code); 6799 break; 6800 6801 case CONST_DOUBLE: 6802 if (GET_MODE (x) == VOIDmode) 6803 { 6804 /* We can use %d if the number is <32 bits and positive. */ 6805 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 6806 fprintf (file, "0x%lx%08lx", 6807 (unsigned long) CONST_DOUBLE_HIGH (x), 6808 (unsigned long) CONST_DOUBLE_LOW (x)); 6809 else 6810 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 6811 } 6812 else 6813 /* We can't handle floating point constants; 6814 PRINT_OPERAND must handle them. */ 6815 output_operand_lossage ("floating constant misused"); 6816 break; 6817 6818 case PLUS: 6819 /* Some assemblers need integer constants to appear first. */ 6820 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 6821 { 6822 output_pic_addr_const (file, XEXP (x, 0), code); 6823 putc ('+', file); 6824 output_pic_addr_const (file, XEXP (x, 1), code); 6825 } 6826 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6827 { 6828 output_pic_addr_const (file, XEXP (x, 1), code); 6829 putc ('+', file); 6830 output_pic_addr_const (file, XEXP (x, 0), code); 6831 } 6832 else 6833 abort (); 6834 break; 6835 6836 case MINUS: 6837 if (!TARGET_MACHO) 6838 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 6839 output_pic_addr_const (file, XEXP (x, 0), code); 6840 putc ('-', file); 6841 output_pic_addr_const (file, XEXP (x, 1), code); 6842 if (!TARGET_MACHO) 6843 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 6844 break; 6845 6846 case UNSPEC: 6847 if (XVECLEN (x, 0) != 1) 6848 abort (); 6849 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 6850 switch (XINT (x, 1)) 6851 { 6852 case UNSPEC_GOT: 6853 fputs ("@GOT", file); 6854 break; 6855 case UNSPEC_GOTOFF: 6856 fputs ("@GOTOFF", file); 6857 break; 6858 case UNSPEC_GOTPCREL: 6859 fputs ("@GOTPCREL(%rip)", file); 6860 break; 6861 case UNSPEC_GOTTPOFF: 6862 /* FIXME: This might be @TPOFF in Sun ld too. */ 6863 fputs ("@GOTTPOFF", file); 6864 break; 6865 case UNSPEC_TPOFF: 6866 fputs ("@TPOFF", file); 6867 break; 6868 case UNSPEC_NTPOFF: 6869 if (TARGET_64BIT) 6870 fputs ("@TPOFF", file); 6871 else 6872 fputs ("@NTPOFF", file); 6873 break; 6874 case UNSPEC_DTPOFF: 6875 fputs ("@DTPOFF", file); 6876 break; 6877 case UNSPEC_GOTNTPOFF: 6878 if (TARGET_64BIT) 6879 fputs ("@GOTTPOFF(%rip)", file); 6880 else 6881 fputs ("@GOTNTPOFF", file); 6882 break; 6883 case UNSPEC_INDNTPOFF: 6884 fputs ("@INDNTPOFF", file); 6885 break; 6886 default: 6887 output_operand_lossage ("invalid UNSPEC as operand"); 6888 break; 6889 } 6890 break; 6891 6892 default: 6893 output_operand_lossage ("invalid expression as operand"); 6894 } 6895} 6896 6897/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST. 6898 We need to handle our special PIC relocations. */ 6899 6900void 6901i386_dwarf_output_addr_const (FILE *file, rtx x) 6902{ 6903#ifdef ASM_QUAD 6904 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG); 6905#else 6906 if (TARGET_64BIT) 6907 abort (); 6908 fprintf (file, "%s", ASM_LONG); 6909#endif 6910 if (flag_pic) 6911 output_pic_addr_const (file, x, '\0'); 6912 else 6913 output_addr_const (file, x); 6914 fputc ('\n', file); 6915} 6916 6917/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL. 6918 We need to emit DTP-relative relocations. */ 6919 6920void 6921i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 6922{ 6923 fputs (ASM_LONG, file); 6924 output_addr_const (file, x); 6925 fputs ("@DTPOFF", file); 6926 switch (size) 6927 { 6928 case 4: 6929 break; 6930 case 8: 6931 fputs (", 0", file); 6932 break; 6933 default: 6934 abort (); 6935 } 6936} 6937 6938/* In the name of slightly smaller debug output, and to cater to 6939 general assembler losage, recognize PIC+GOTOFF and turn it back 6940 into a direct symbol reference. */ 6941 6942static rtx 6943ix86_delegitimize_address (rtx orig_x) 6944{ 6945 rtx x = orig_x, y; 6946 6947 if (GET_CODE (x) == MEM) 6948 x = XEXP (x, 0); 6949 6950 if (TARGET_64BIT) 6951 { 6952 if (GET_CODE (x) != CONST 6953 || GET_CODE (XEXP (x, 0)) != UNSPEC 6954 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 6955 || GET_CODE (orig_x) != MEM) 6956 return orig_x; 6957 return XVECEXP (XEXP (x, 0), 0, 0); 6958 } 6959 6960 if (GET_CODE (x) != PLUS 6961 || GET_CODE (XEXP (x, 1)) != CONST) 6962 return orig_x; 6963 6964 if (GET_CODE (XEXP (x, 0)) == REG 6965 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 6966 /* %ebx + GOT/GOTOFF */ 6967 y = NULL; 6968 else if (GET_CODE (XEXP (x, 0)) == PLUS) 6969 { 6970 /* %ebx + %reg * scale + GOT/GOTOFF */ 6971 y = XEXP (x, 0); 6972 if (GET_CODE (XEXP (y, 0)) == REG 6973 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM) 6974 y = XEXP (y, 1); 6975 else if (GET_CODE (XEXP (y, 1)) == REG 6976 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM) 6977 y = XEXP (y, 0); 6978 else 6979 return orig_x; 6980 if (GET_CODE (y) != REG 6981 && GET_CODE (y) != MULT 6982 && GET_CODE (y) != ASHIFT) 6983 return orig_x; 6984 } 6985 else 6986 return orig_x; 6987 6988 x = XEXP (XEXP (x, 1), 0); 6989 if (GET_CODE (x) == UNSPEC 6990 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 6991 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 6992 { 6993 if (y) 6994 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0)); 6995 return XVECEXP (x, 0, 0); 6996 } 6997 6998 if (GET_CODE (x) == PLUS 6999 && GET_CODE (XEXP (x, 0)) == UNSPEC 7000 && GET_CODE (XEXP (x, 1)) == CONST_INT 7001 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 7002 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF 7003 && GET_CODE (orig_x) != MEM))) 7004 { 7005 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1)); 7006 if (y) 7007 return gen_rtx_PLUS (Pmode, y, x); 7008 return x; 7009 } 7010 7011 return orig_x; 7012} 7013 7014static void 7015put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 7016 int fp, FILE *file) 7017{ 7018 const char *suffix; 7019 7020 if (mode == CCFPmode || mode == CCFPUmode) 7021 { 7022 enum rtx_code second_code, bypass_code; 7023 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 7024 if (bypass_code != NIL || second_code != NIL) 7025 abort (); 7026 code = ix86_fp_compare_code_to_integer (code); 7027 mode = CCmode; 7028 } 7029 if (reverse) 7030 code = reverse_condition (code); 7031 7032 switch (code) 7033 { 7034 case EQ: 7035 suffix = "e"; 7036 break; 7037 case NE: 7038 suffix = "ne"; 7039 break; 7040 case GT: 7041 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode) 7042 abort (); 7043 suffix = "g"; 7044 break; 7045 case GTU: 7046 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers. 7047 Those same assemblers have the same but opposite losage on cmov. */ 7048 if (mode != CCmode) 7049 abort (); 7050 suffix = fp ? "nbe" : "a"; 7051 break; 7052 case LT: 7053 if (mode == CCNOmode || mode == CCGOCmode) 7054 suffix = "s"; 7055 else if (mode == CCmode || mode == CCGCmode) 7056 suffix = "l"; 7057 else 7058 abort (); 7059 break; 7060 case LTU: 7061 if (mode != CCmode) 7062 abort (); 7063 suffix = "b"; 7064 break; 7065 case GE: 7066 if (mode == CCNOmode || mode == CCGOCmode) 7067 suffix = "ns"; 7068 else if (mode == CCmode || mode == CCGCmode) 7069 suffix = "ge"; 7070 else 7071 abort (); 7072 break; 7073 case GEU: 7074 /* ??? As above. */ 7075 if (mode != CCmode) 7076 abort (); 7077 suffix = fp ? "nb" : "ae"; 7078 break; 7079 case LE: 7080 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode) 7081 abort (); 7082 suffix = "le"; 7083 break; 7084 case LEU: 7085 if (mode != CCmode) 7086 abort (); 7087 suffix = "be"; 7088 break; 7089 case UNORDERED: 7090 suffix = fp ? "u" : "p"; 7091 break; 7092 case ORDERED: 7093 suffix = fp ? "nu" : "np"; 7094 break; 7095 default: 7096 abort (); 7097 } 7098 fputs (suffix, file); 7099} 7100 7101/* Print the name of register X to FILE based on its machine mode and number. 7102 If CODE is 'w', pretend the mode is HImode. 7103 If CODE is 'b', pretend the mode is QImode. 7104 If CODE is 'k', pretend the mode is SImode. 7105 If CODE is 'q', pretend the mode is DImode. 7106 If CODE is 'h', pretend the reg is the `high' byte register. 7107 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ 7108 7109void 7110print_reg (rtx x, int code, FILE *file) 7111{ 7112 if (REGNO (x) == ARG_POINTER_REGNUM 7113 || REGNO (x) == FRAME_POINTER_REGNUM 7114 || REGNO (x) == FLAGS_REG 7115 || REGNO (x) == FPSR_REG) 7116 abort (); 7117 7118 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7119 putc ('%', file); 7120 7121 if (code == 'w' || MMX_REG_P (x)) 7122 code = 2; 7123 else if (code == 'b') 7124 code = 1; 7125 else if (code == 'k') 7126 code = 4; 7127 else if (code == 'q') 7128 code = 8; 7129 else if (code == 'y') 7130 code = 3; 7131 else if (code == 'h') 7132 code = 0; 7133 else 7134 code = GET_MODE_SIZE (GET_MODE (x)); 7135 7136 /* Irritatingly, AMD extended registers use different naming convention 7137 from the normal registers. */ 7138 if (REX_INT_REG_P (x)) 7139 { 7140 if (!TARGET_64BIT) 7141 abort (); 7142 switch (code) 7143 { 7144 case 0: 7145 error ("extended registers have no high halves"); 7146 break; 7147 case 1: 7148 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 7149 break; 7150 case 2: 7151 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 7152 break; 7153 case 4: 7154 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 7155 break; 7156 case 8: 7157 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 7158 break; 7159 default: 7160 error ("unsupported operand size for extended register"); 7161 break; 7162 } 7163 return; 7164 } 7165 switch (code) 7166 { 7167 case 3: 7168 if (STACK_TOP_P (x)) 7169 { 7170 fputs ("st(0)", file); 7171 break; 7172 } 7173 /* FALLTHRU */ 7174 case 8: 7175 case 4: 7176 case 12: 7177 if (! ANY_FP_REG_P (x)) 7178 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 7179 /* FALLTHRU */ 7180 case 16: 7181 case 2: 7182 normal: 7183 fputs (hi_reg_name[REGNO (x)], file); 7184 break; 7185 case 1: 7186 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 7187 goto normal; 7188 fputs (qi_reg_name[REGNO (x)], file); 7189 break; 7190 case 0: 7191 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 7192 goto normal; 7193 fputs (qi_high_reg_name[REGNO (x)], file); 7194 break; 7195 default: 7196 abort (); 7197 } 7198} 7199 7200/* Locate some local-dynamic symbol still in use by this function 7201 so that we can print its name in some tls_local_dynamic_base 7202 pattern. */ 7203 7204static const char * 7205get_some_local_dynamic_name (void) 7206{ 7207 rtx insn; 7208 7209 if (cfun->machine->some_ld_name) 7210 return cfun->machine->some_ld_name; 7211 7212 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 7213 if (INSN_P (insn) 7214 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 7215 return cfun->machine->some_ld_name; 7216 7217 abort (); 7218} 7219 7220static int 7221get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 7222{ 7223 rtx x = *px; 7224 7225 if (GET_CODE (x) == SYMBOL_REF 7226 && local_dynamic_symbolic_operand (x, Pmode)) 7227 { 7228 cfun->machine->some_ld_name = XSTR (x, 0); 7229 return 1; 7230 } 7231 7232 return 0; 7233} 7234 7235/* Meaning of CODE: 7236 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 7237 C -- print opcode suffix for set/cmov insn. 7238 c -- like C, but print reversed condition 7239 F,f -- likewise, but for floating-point. 7240 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 7241 otherwise nothing 7242 R -- print the prefix for register names. 7243 z -- print the opcode suffix for the size of the current operand. 7244 * -- print a star (in certain assembler syntax) 7245 A -- print an absolute memory reference. 7246 w -- print the operand as if it's a "word" (HImode) even if it isn't. 7247 s -- print a shift double count, followed by the assemblers argument 7248 delimiter. 7249 b -- print the QImode name of the register for the indicated operand. 7250 %b0 would print %al if operands[0] is reg 0. 7251 w -- likewise, print the HImode name of the register. 7252 k -- likewise, print the SImode name of the register. 7253 q -- likewise, print the DImode name of the register. 7254 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 7255 y -- print "st(0)" instead of "st" as a register. 7256 D -- print condition for SSE cmp instruction. 7257 P -- if PIC, print an @PLT suffix. 7258 X -- don't print any sort of PIC '@' suffix for a symbol. 7259 & -- print some in-use local-dynamic symbol name. 7260 */ 7261 7262void 7263print_operand (FILE *file, rtx x, int code) 7264{ 7265 if (code) 7266 { 7267 switch (code) 7268 { 7269 case '*': 7270 if (ASSEMBLER_DIALECT == ASM_ATT) 7271 putc ('*', file); 7272 return; 7273 7274 case '&': 7275 assemble_name (file, get_some_local_dynamic_name ()); 7276 return; 7277 7278 case 'A': 7279 if (ASSEMBLER_DIALECT == ASM_ATT) 7280 putc ('*', file); 7281 else if (ASSEMBLER_DIALECT == ASM_INTEL) 7282 { 7283 /* Intel syntax. For absolute addresses, registers should not 7284 be surrounded by braces. */ 7285 if (GET_CODE (x) != REG) 7286 { 7287 putc ('[', file); 7288 PRINT_OPERAND (file, x, 0); 7289 putc (']', file); 7290 return; 7291 } 7292 } 7293 else 7294 abort (); 7295 7296 PRINT_OPERAND (file, x, 0); 7297 return; 7298 7299 7300 case 'L': 7301 if (ASSEMBLER_DIALECT == ASM_ATT) 7302 putc ('l', file); 7303 return; 7304 7305 case 'W': 7306 if (ASSEMBLER_DIALECT == ASM_ATT) 7307 putc ('w', file); 7308 return; 7309 7310 case 'B': 7311 if (ASSEMBLER_DIALECT == ASM_ATT) 7312 putc ('b', file); 7313 return; 7314 7315 case 'Q': 7316 if (ASSEMBLER_DIALECT == ASM_ATT) 7317 putc ('l', file); 7318 return; 7319 7320 case 'S': 7321 if (ASSEMBLER_DIALECT == ASM_ATT) 7322 putc ('s', file); 7323 return; 7324 7325 case 'T': 7326 if (ASSEMBLER_DIALECT == ASM_ATT) 7327 putc ('t', file); 7328 return; 7329 7330 case 'z': 7331 /* 387 opcodes don't get size suffixes if the operands are 7332 registers. */ 7333 if (STACK_REG_P (x)) 7334 return; 7335 7336 /* Likewise if using Intel opcodes. */ 7337 if (ASSEMBLER_DIALECT == ASM_INTEL) 7338 return; 7339 7340 /* This is the size of op from size of operand. */ 7341 switch (GET_MODE_SIZE (GET_MODE (x))) 7342 { 7343 case 2: 7344#ifdef HAVE_GAS_FILDS_FISTS 7345 putc ('s', file); 7346#endif 7347 return; 7348 7349 case 4: 7350 if (GET_MODE (x) == SFmode) 7351 { 7352 putc ('s', file); 7353 return; 7354 } 7355 else 7356 putc ('l', file); 7357 return; 7358 7359 case 12: 7360 case 16: 7361 putc ('t', file); 7362 return; 7363 7364 case 8: 7365 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 7366 { 7367#ifdef GAS_MNEMONICS 7368 putc ('q', file); 7369#else 7370 putc ('l', file); 7371 putc ('l', file); 7372#endif 7373 } 7374 else 7375 putc ('l', file); 7376 return; 7377 7378 default: 7379 abort (); 7380 } 7381 7382 case 'b': 7383 case 'w': 7384 case 'k': 7385 case 'q': 7386 case 'h': 7387 case 'y': 7388 case 'X': 7389 case 'P': 7390 break; 7391 7392 case 's': 7393 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 7394 { 7395 PRINT_OPERAND (file, x, 0); 7396 putc (',', file); 7397 } 7398 return; 7399 7400 case 'D': 7401 /* Little bit of braindamage here. The SSE compare instructions 7402 does use completely different names for the comparisons that the 7403 fp conditional moves. */ 7404 switch (GET_CODE (x)) 7405 { 7406 case EQ: 7407 case UNEQ: 7408 fputs ("eq", file); 7409 break; 7410 case LT: 7411 case UNLT: 7412 fputs ("lt", file); 7413 break; 7414 case LE: 7415 case UNLE: 7416 fputs ("le", file); 7417 break; 7418 case UNORDERED: 7419 fputs ("unord", file); 7420 break; 7421 case NE: 7422 case LTGT: 7423 fputs ("neq", file); 7424 break; 7425 case UNGE: 7426 case GE: 7427 fputs ("nlt", file); 7428 break; 7429 case UNGT: 7430 case GT: 7431 fputs ("nle", file); 7432 break; 7433 case ORDERED: 7434 fputs ("ord", file); 7435 break; 7436 default: 7437 abort (); 7438 break; 7439 } 7440 return; 7441 case 'O': 7442#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7443 if (ASSEMBLER_DIALECT == ASM_ATT) 7444 { 7445 switch (GET_MODE (x)) 7446 { 7447 case HImode: putc ('w', file); break; 7448 case SImode: 7449 case SFmode: putc ('l', file); break; 7450 case DImode: 7451 case DFmode: putc ('q', file); break; 7452 default: abort (); 7453 } 7454 putc ('.', file); 7455 } 7456#endif 7457 return; 7458 case 'C': 7459 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 7460 return; 7461 case 'F': 7462#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7463 if (ASSEMBLER_DIALECT == ASM_ATT) 7464 putc ('.', file); 7465#endif 7466 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 7467 return; 7468 7469 /* Like above, but reverse condition */ 7470 case 'c': 7471 /* Check to see if argument to %c is really a constant 7472 and not a condition code which needs to be reversed. */ 7473 if (GET_RTX_CLASS (GET_CODE (x)) != '<') 7474 { 7475 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 7476 return; 7477 } 7478 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 7479 return; 7480 case 'f': 7481#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7482 if (ASSEMBLER_DIALECT == ASM_ATT) 7483 putc ('.', file); 7484#endif 7485 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 7486 return; 7487 case '+': 7488 { 7489 rtx x; 7490 7491 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 7492 return; 7493 7494 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 7495 if (x) 7496 { 7497 int pred_val = INTVAL (XEXP (x, 0)); 7498 7499 if (pred_val < REG_BR_PROB_BASE * 45 / 100 7500 || pred_val > REG_BR_PROB_BASE * 55 / 100) 7501 { 7502 int taken = pred_val > REG_BR_PROB_BASE / 2; 7503 int cputaken = final_forward_branch_p (current_output_insn) == 0; 7504 7505 /* Emit hints only in the case default branch prediction 7506 heuristics would fail. */ 7507 if (taken != cputaken) 7508 { 7509 /* We use 3e (DS) prefix for taken branches and 7510 2e (CS) prefix for not taken branches. */ 7511 if (taken) 7512 fputs ("ds ; ", file); 7513 else 7514 fputs ("cs ; ", file); 7515 } 7516 } 7517 } 7518 return; 7519 } 7520 default: 7521 output_operand_lossage ("invalid operand code `%c'", code); 7522 } 7523 } 7524 7525 if (GET_CODE (x) == REG) 7526 print_reg (x, code, file); 7527 7528 else if (GET_CODE (x) == MEM) 7529 { 7530 /* No `byte ptr' prefix for call instructions. */ 7531 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 7532 { 7533 const char * size; 7534 switch (GET_MODE_SIZE (GET_MODE (x))) 7535 { 7536 case 1: size = "BYTE"; break; 7537 case 2: size = "WORD"; break; 7538 case 4: size = "DWORD"; break; 7539 case 8: size = "QWORD"; break; 7540 case 12: size = "XWORD"; break; 7541 case 16: size = "XMMWORD"; break; 7542 default: 7543 abort (); 7544 } 7545 7546 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 7547 if (code == 'b') 7548 size = "BYTE"; 7549 else if (code == 'w') 7550 size = "WORD"; 7551 else if (code == 'k') 7552 size = "DWORD"; 7553 7554 fputs (size, file); 7555 fputs (" PTR ", file); 7556 } 7557 7558 x = XEXP (x, 0); 7559 /* Avoid (%rip) for call operands. */ 7560 if (CONSTANT_ADDRESS_P (x) && code == 'P' 7561 && GET_CODE (x) != CONST_INT) 7562 output_addr_const (file, x); 7563 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 7564 output_operand_lossage ("invalid constraints for operand"); 7565 else 7566 output_address (x); 7567 } 7568 7569 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 7570 { 7571 REAL_VALUE_TYPE r; 7572 long l; 7573 7574 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7575 REAL_VALUE_TO_TARGET_SINGLE (r, l); 7576 7577 if (ASSEMBLER_DIALECT == ASM_ATT) 7578 putc ('$', file); 7579 fprintf (file, "0x%08lx", l); 7580 } 7581 7582 /* These float cases don't actually occur as immediate operands. */ 7583 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 7584 { 7585 char dstr[30]; 7586 7587 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 7588 fprintf (file, "%s", dstr); 7589 } 7590 7591 else if (GET_CODE (x) == CONST_DOUBLE 7592 && GET_MODE (x) == XFmode) 7593 { 7594 char dstr[30]; 7595 7596 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 7597 fprintf (file, "%s", dstr); 7598 } 7599 7600 else 7601 { 7602 if (code != 'P') 7603 { 7604 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 7605 { 7606 if (ASSEMBLER_DIALECT == ASM_ATT) 7607 putc ('$', file); 7608 } 7609 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 7610 || GET_CODE (x) == LABEL_REF) 7611 { 7612 if (ASSEMBLER_DIALECT == ASM_ATT) 7613 putc ('$', file); 7614 else 7615 fputs ("OFFSET FLAT:", file); 7616 } 7617 } 7618 if (GET_CODE (x) == CONST_INT) 7619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7620 else if (flag_pic) 7621 output_pic_addr_const (file, x, code); 7622 else 7623 output_addr_const (file, x); 7624 } 7625} 7626 7627/* Print a memory operand whose address is ADDR. */ 7628 7629void 7630print_operand_address (FILE *file, rtx addr) 7631{ 7632 struct ix86_address parts; 7633 rtx base, index, disp; 7634 int scale; 7635 7636 if (! ix86_decompose_address (addr, &parts)) 7637 abort (); 7638 7639 base = parts.base; 7640 index = parts.index; 7641 disp = parts.disp; 7642 scale = parts.scale; 7643 7644 switch (parts.seg) 7645 { 7646 case SEG_DEFAULT: 7647 break; 7648 case SEG_FS: 7649 case SEG_GS: 7650 if (USER_LABEL_PREFIX[0] == 0) 7651 putc ('%', file); 7652 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 7653 break; 7654 default: 7655 abort (); 7656 } 7657 7658 if (!base && !index) 7659 { 7660 /* Displacement only requires special attention. */ 7661 7662 if (GET_CODE (disp) == CONST_INT) 7663 { 7664 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 7665 { 7666 if (USER_LABEL_PREFIX[0] == 0) 7667 putc ('%', file); 7668 fputs ("ds:", file); 7669 } 7670 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 7671 } 7672 else if (flag_pic) 7673 output_pic_addr_const (file, disp, 0); 7674 else 7675 output_addr_const (file, disp); 7676 7677 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 7678 if (TARGET_64BIT 7679 && ((GET_CODE (disp) == SYMBOL_REF 7680 && ! tls_symbolic_operand (disp, GET_MODE (disp))) 7681 || GET_CODE (disp) == LABEL_REF 7682 || (GET_CODE (disp) == CONST 7683 && GET_CODE (XEXP (disp, 0)) == PLUS 7684 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF 7685 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF) 7686 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT))) 7687 fputs ("(%rip)", file); 7688 } 7689 else 7690 { 7691 if (ASSEMBLER_DIALECT == ASM_ATT) 7692 { 7693 if (disp) 7694 { 7695 if (flag_pic) 7696 output_pic_addr_const (file, disp, 0); 7697 else if (GET_CODE (disp) == LABEL_REF) 7698 output_asm_label (disp); 7699 else 7700 output_addr_const (file, disp); 7701 } 7702 7703 putc ('(', file); 7704 if (base) 7705 print_reg (base, 0, file); 7706 if (index) 7707 { 7708 putc (',', file); 7709 print_reg (index, 0, file); 7710 if (scale != 1) 7711 fprintf (file, ",%d", scale); 7712 } 7713 putc (')', file); 7714 } 7715 else 7716 { 7717 rtx offset = NULL_RTX; 7718 7719 if (disp) 7720 { 7721 /* Pull out the offset of a symbol; print any symbol itself. */ 7722 if (GET_CODE (disp) == CONST 7723 && GET_CODE (XEXP (disp, 0)) == PLUS 7724 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 7725 { 7726 offset = XEXP (XEXP (disp, 0), 1); 7727 disp = gen_rtx_CONST (VOIDmode, 7728 XEXP (XEXP (disp, 0), 0)); 7729 } 7730 7731 if (flag_pic) 7732 output_pic_addr_const (file, disp, 0); 7733 else if (GET_CODE (disp) == LABEL_REF) 7734 output_asm_label (disp); 7735 else if (GET_CODE (disp) == CONST_INT) 7736 offset = disp; 7737 else 7738 output_addr_const (file, disp); 7739 } 7740 7741 putc ('[', file); 7742 if (base) 7743 { 7744 print_reg (base, 0, file); 7745 if (offset) 7746 { 7747 if (INTVAL (offset) >= 0) 7748 putc ('+', file); 7749 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 7750 } 7751 } 7752 else if (offset) 7753 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 7754 else 7755 putc ('0', file); 7756 7757 if (index) 7758 { 7759 putc ('+', file); 7760 print_reg (index, 0, file); 7761 if (scale != 1) 7762 fprintf (file, "*%d", scale); 7763 } 7764 putc (']', file); 7765 } 7766 } 7767} 7768 7769bool 7770output_addr_const_extra (FILE *file, rtx x) 7771{ 7772 rtx op; 7773 7774 if (GET_CODE (x) != UNSPEC) 7775 return false; 7776 7777 op = XVECEXP (x, 0, 0); 7778 switch (XINT (x, 1)) 7779 { 7780 case UNSPEC_GOTTPOFF: 7781 output_addr_const (file, op); 7782 /* FIXME: This might be @TPOFF in Sun ld. */ 7783 fputs ("@GOTTPOFF", file); 7784 break; 7785 case UNSPEC_TPOFF: 7786 output_addr_const (file, op); 7787 fputs ("@TPOFF", file); 7788 break; 7789 case UNSPEC_NTPOFF: 7790 output_addr_const (file, op); 7791 if (TARGET_64BIT) 7792 fputs ("@TPOFF", file); 7793 else 7794 fputs ("@NTPOFF", file); 7795 break; 7796 case UNSPEC_DTPOFF: 7797 output_addr_const (file, op); 7798 fputs ("@DTPOFF", file); 7799 break; 7800 case UNSPEC_GOTNTPOFF: 7801 output_addr_const (file, op); 7802 if (TARGET_64BIT) 7803 fputs ("@GOTTPOFF(%rip)", file); 7804 else 7805 fputs ("@GOTNTPOFF", file); 7806 break; 7807 case UNSPEC_INDNTPOFF: 7808 output_addr_const (file, op); 7809 fputs ("@INDNTPOFF", file); 7810 break; 7811 7812 default: 7813 return false; 7814 } 7815 7816 return true; 7817} 7818 7819/* Split one or more DImode RTL references into pairs of SImode 7820 references. The RTL can be REG, offsettable MEM, integer constant, or 7821 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 7822 split and "num" is its length. lo_half and hi_half are output arrays 7823 that parallel "operands". */ 7824 7825void 7826split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 7827{ 7828 while (num--) 7829 { 7830 rtx op = operands[num]; 7831 7832 /* simplify_subreg refuse to split volatile memory addresses, 7833 but we still have to handle it. */ 7834 if (GET_CODE (op) == MEM) 7835 { 7836 lo_half[num] = adjust_address (op, SImode, 0); 7837 hi_half[num] = adjust_address (op, SImode, 4); 7838 } 7839 else 7840 { 7841 lo_half[num] = simplify_gen_subreg (SImode, op, 7842 GET_MODE (op) == VOIDmode 7843 ? DImode : GET_MODE (op), 0); 7844 hi_half[num] = simplify_gen_subreg (SImode, op, 7845 GET_MODE (op) == VOIDmode 7846 ? DImode : GET_MODE (op), 4); 7847 } 7848 } 7849} 7850/* Split one or more TImode RTL references into pairs of SImode 7851 references. The RTL can be REG, offsettable MEM, integer constant, or 7852 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 7853 split and "num" is its length. lo_half and hi_half are output arrays 7854 that parallel "operands". */ 7855 7856void 7857split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 7858{ 7859 while (num--) 7860 { 7861 rtx op = operands[num]; 7862 7863 /* simplify_subreg refuse to split volatile memory addresses, but we 7864 still have to handle it. */ 7865 if (GET_CODE (op) == MEM) 7866 { 7867 lo_half[num] = adjust_address (op, DImode, 0); 7868 hi_half[num] = adjust_address (op, DImode, 8); 7869 } 7870 else 7871 { 7872 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 7873 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 7874 } 7875 } 7876} 7877 7878/* Output code to perform a 387 binary operation in INSN, one of PLUS, 7879 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 7880 is the expression of the binary operation. The output may either be 7881 emitted here, or returned to the caller, like all output_* functions. 7882 7883 There is no guarantee that the operands are the same mode, as they 7884 might be within FLOAT or FLOAT_EXTEND expressions. */ 7885 7886#ifndef SYSV386_COMPAT 7887/* Set to 1 for compatibility with brain-damaged assemblers. No-one 7888 wants to fix the assemblers because that causes incompatibility 7889 with gcc. No-one wants to fix gcc because that causes 7890 incompatibility with assemblers... You can use the option of 7891 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 7892#define SYSV386_COMPAT 1 7893#endif 7894 7895const char * 7896output_387_binary_op (rtx insn, rtx *operands) 7897{ 7898 static char buf[30]; 7899 const char *p; 7900 const char *ssep; 7901 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]); 7902 7903#ifdef ENABLE_CHECKING 7904 /* Even if we do not want to check the inputs, this documents input 7905 constraints. Which helps in understanding the following code. */ 7906 if (STACK_REG_P (operands[0]) 7907 && ((REG_P (operands[1]) 7908 && REGNO (operands[0]) == REGNO (operands[1]) 7909 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 7910 || (REG_P (operands[2]) 7911 && REGNO (operands[0]) == REGNO (operands[2]) 7912 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 7913 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 7914 ; /* ok */ 7915 else if (!is_sse) 7916 abort (); 7917#endif 7918 7919 switch (GET_CODE (operands[3])) 7920 { 7921 case PLUS: 7922 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7923 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7924 p = "fiadd"; 7925 else 7926 p = "fadd"; 7927 ssep = "add"; 7928 break; 7929 7930 case MINUS: 7931 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7932 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7933 p = "fisub"; 7934 else 7935 p = "fsub"; 7936 ssep = "sub"; 7937 break; 7938 7939 case MULT: 7940 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7941 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7942 p = "fimul"; 7943 else 7944 p = "fmul"; 7945 ssep = "mul"; 7946 break; 7947 7948 case DIV: 7949 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7950 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7951 p = "fidiv"; 7952 else 7953 p = "fdiv"; 7954 ssep = "div"; 7955 break; 7956 7957 default: 7958 abort (); 7959 } 7960 7961 if (is_sse) 7962 { 7963 strcpy (buf, ssep); 7964 if (GET_MODE (operands[0]) == SFmode) 7965 strcat (buf, "ss\t{%2, %0|%0, %2}"); 7966 else 7967 strcat (buf, "sd\t{%2, %0|%0, %2}"); 7968 return buf; 7969 } 7970 strcpy (buf, p); 7971 7972 switch (GET_CODE (operands[3])) 7973 { 7974 case MULT: 7975 case PLUS: 7976 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 7977 { 7978 rtx temp = operands[2]; 7979 operands[2] = operands[1]; 7980 operands[1] = temp; 7981 } 7982 7983 /* know operands[0] == operands[1]. */ 7984 7985 if (GET_CODE (operands[2]) == MEM) 7986 { 7987 p = "%z2\t%2"; 7988 break; 7989 } 7990 7991 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 7992 { 7993 if (STACK_TOP_P (operands[0])) 7994 /* How is it that we are storing to a dead operand[2]? 7995 Well, presumably operands[1] is dead too. We can't 7996 store the result to st(0) as st(0) gets popped on this 7997 instruction. Instead store to operands[2] (which I 7998 think has to be st(1)). st(1) will be popped later. 7999 gcc <= 2.8.1 didn't have this check and generated 8000 assembly code that the Unixware assembler rejected. */ 8001 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8002 else 8003 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8004 break; 8005 } 8006 8007 if (STACK_TOP_P (operands[0])) 8008 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8009 else 8010 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8011 break; 8012 8013 case MINUS: 8014 case DIV: 8015 if (GET_CODE (operands[1]) == MEM) 8016 { 8017 p = "r%z1\t%1"; 8018 break; 8019 } 8020 8021 if (GET_CODE (operands[2]) == MEM) 8022 { 8023 p = "%z2\t%2"; 8024 break; 8025 } 8026 8027 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8028 { 8029#if SYSV386_COMPAT 8030 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 8031 derived assemblers, confusingly reverse the direction of 8032 the operation for fsub{r} and fdiv{r} when the 8033 destination register is not st(0). The Intel assembler 8034 doesn't have this brain damage. Read !SYSV386_COMPAT to 8035 figure out what the hardware really does. */ 8036 if (STACK_TOP_P (operands[0])) 8037 p = "{p\t%0, %2|rp\t%2, %0}"; 8038 else 8039 p = "{rp\t%2, %0|p\t%0, %2}"; 8040#else 8041 if (STACK_TOP_P (operands[0])) 8042 /* As above for fmul/fadd, we can't store to st(0). */ 8043 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8044 else 8045 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8046#endif 8047 break; 8048 } 8049 8050 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 8051 { 8052#if SYSV386_COMPAT 8053 if (STACK_TOP_P (operands[0])) 8054 p = "{rp\t%0, %1|p\t%1, %0}"; 8055 else 8056 p = "{p\t%1, %0|rp\t%0, %1}"; 8057#else 8058 if (STACK_TOP_P (operands[0])) 8059 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 8060 else 8061 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 8062#endif 8063 break; 8064 } 8065 8066 if (STACK_TOP_P (operands[0])) 8067 { 8068 if (STACK_TOP_P (operands[1])) 8069 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8070 else 8071 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 8072 break; 8073 } 8074 else if (STACK_TOP_P (operands[1])) 8075 { 8076#if SYSV386_COMPAT 8077 p = "{\t%1, %0|r\t%0, %1}"; 8078#else 8079 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 8080#endif 8081 } 8082 else 8083 { 8084#if SYSV386_COMPAT 8085 p = "{r\t%2, %0|\t%0, %2}"; 8086#else 8087 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8088#endif 8089 } 8090 break; 8091 8092 default: 8093 abort (); 8094 } 8095 8096 strcat (buf, p); 8097 return buf; 8098} 8099 8100/* Output code to initialize control word copies used by 8101 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN 8102 is set to control word rounding downwards. */ 8103void 8104emit_i387_cw_initialization (rtx normal, rtx round_down) 8105{ 8106 rtx reg = gen_reg_rtx (HImode); 8107 8108 emit_insn (gen_x86_fnstcw_1 (normal)); 8109 emit_move_insn (reg, normal); 8110 if (!TARGET_PARTIAL_REG_STALL && !optimize_size 8111 && !TARGET_64BIT) 8112 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 8113 else 8114 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00))); 8115 emit_move_insn (round_down, reg); 8116} 8117 8118/* Output code for INSN to convert a float to a signed int. OPERANDS 8119 are the insn operands. The output may be [HSD]Imode and the input 8120 operand may be [SDX]Fmode. */ 8121 8122const char * 8123output_fix_trunc (rtx insn, rtx *operands) 8124{ 8125 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8126 int dimode_p = GET_MODE (operands[0]) == DImode; 8127 8128 /* Jump through a hoop or two for DImode, since the hardware has no 8129 non-popping instruction. We used to do this a different way, but 8130 that was somewhat fragile and broke with post-reload splitters. */ 8131 if (dimode_p && !stack_top_dies) 8132 output_asm_insn ("fld\t%y1", operands); 8133 8134 if (!STACK_TOP_P (operands[1])) 8135 abort (); 8136 8137 if (GET_CODE (operands[0]) != MEM) 8138 abort (); 8139 8140 output_asm_insn ("fldcw\t%3", operands); 8141 if (stack_top_dies || dimode_p) 8142 output_asm_insn ("fistp%z0\t%0", operands); 8143 else 8144 output_asm_insn ("fist%z0\t%0", operands); 8145 output_asm_insn ("fldcw\t%2", operands); 8146 8147 return ""; 8148} 8149 8150/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 8151 should be used and 2 when fnstsw should be used. UNORDERED_P is true 8152 when fucom should be used. */ 8153 8154const char * 8155output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 8156{ 8157 int stack_top_dies; 8158 rtx cmp_op0 = operands[0]; 8159 rtx cmp_op1 = operands[1]; 8160 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]); 8161 8162 if (eflags_p == 2) 8163 { 8164 cmp_op0 = cmp_op1; 8165 cmp_op1 = operands[2]; 8166 } 8167 if (is_sse) 8168 { 8169 if (GET_MODE (operands[0]) == SFmode) 8170 if (unordered_p) 8171 return "ucomiss\t{%1, %0|%0, %1}"; 8172 else 8173 return "comiss\t{%1, %0|%0, %1}"; 8174 else 8175 if (unordered_p) 8176 return "ucomisd\t{%1, %0|%0, %1}"; 8177 else 8178 return "comisd\t{%1, %0|%0, %1}"; 8179 } 8180 8181 if (! STACK_TOP_P (cmp_op0)) 8182 abort (); 8183 8184 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8185 8186 if (STACK_REG_P (cmp_op1) 8187 && stack_top_dies 8188 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 8189 && REGNO (cmp_op1) != FIRST_STACK_REG) 8190 { 8191 /* If both the top of the 387 stack dies, and the other operand 8192 is also a stack register that dies, then this must be a 8193 `fcompp' float compare */ 8194 8195 if (eflags_p == 1) 8196 { 8197 /* There is no double popping fcomi variant. Fortunately, 8198 eflags is immune from the fstp's cc clobbering. */ 8199 if (unordered_p) 8200 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 8201 else 8202 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 8203 return "fstp\t%y0"; 8204 } 8205 else 8206 { 8207 if (eflags_p == 2) 8208 { 8209 if (unordered_p) 8210 return "fucompp\n\tfnstsw\t%0"; 8211 else 8212 return "fcompp\n\tfnstsw\t%0"; 8213 } 8214 else 8215 { 8216 if (unordered_p) 8217 return "fucompp"; 8218 else 8219 return "fcompp"; 8220 } 8221 } 8222 } 8223 else 8224 { 8225 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 8226 8227 static const char * const alt[24] = 8228 { 8229 "fcom%z1\t%y1", 8230 "fcomp%z1\t%y1", 8231 "fucom%z1\t%y1", 8232 "fucomp%z1\t%y1", 8233 8234 "ficom%z1\t%y1", 8235 "ficomp%z1\t%y1", 8236 NULL, 8237 NULL, 8238 8239 "fcomi\t{%y1, %0|%0, %y1}", 8240 "fcomip\t{%y1, %0|%0, %y1}", 8241 "fucomi\t{%y1, %0|%0, %y1}", 8242 "fucomip\t{%y1, %0|%0, %y1}", 8243 8244 NULL, 8245 NULL, 8246 NULL, 8247 NULL, 8248 8249 "fcom%z2\t%y2\n\tfnstsw\t%0", 8250 "fcomp%z2\t%y2\n\tfnstsw\t%0", 8251 "fucom%z2\t%y2\n\tfnstsw\t%0", 8252 "fucomp%z2\t%y2\n\tfnstsw\t%0", 8253 8254 "ficom%z2\t%y2\n\tfnstsw\t%0", 8255 "ficomp%z2\t%y2\n\tfnstsw\t%0", 8256 NULL, 8257 NULL 8258 }; 8259 8260 int mask; 8261 const char *ret; 8262 8263 mask = eflags_p << 3; 8264 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2; 8265 mask |= unordered_p << 1; 8266 mask |= stack_top_dies; 8267 8268 if (mask >= 24) 8269 abort (); 8270 ret = alt[mask]; 8271 if (ret == NULL) 8272 abort (); 8273 8274 return ret; 8275 } 8276} 8277 8278void 8279ix86_output_addr_vec_elt (FILE *file, int value) 8280{ 8281 const char *directive = ASM_LONG; 8282 8283 if (TARGET_64BIT) 8284 { 8285#ifdef ASM_QUAD 8286 directive = ASM_QUAD; 8287#else 8288 abort (); 8289#endif 8290 } 8291 8292 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 8293} 8294 8295void 8296ix86_output_addr_diff_elt (FILE *file, int value, int rel) 8297{ 8298 if (TARGET_64BIT) 8299 fprintf (file, "%s%s%d-%s%d\n", 8300 ASM_LONG, LPREFIX, value, LPREFIX, rel); 8301 else if (HAVE_AS_GOTOFF_IN_DATA) 8302 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 8303#if TARGET_MACHO 8304 else if (TARGET_MACHO) 8305 { 8306 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 8307 machopic_output_function_base_name (file); 8308 fprintf(file, "\n"); 8309 } 8310#endif 8311 else 8312 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 8313 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 8314} 8315 8316/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 8317 for the target. */ 8318 8319void 8320ix86_expand_clear (rtx dest) 8321{ 8322 rtx tmp; 8323 8324 /* We play register width games, which are only valid after reload. */ 8325 if (!reload_completed) 8326 abort (); 8327 8328 /* Avoid HImode and its attendant prefix byte. */ 8329 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 8330 dest = gen_rtx_REG (SImode, REGNO (dest)); 8331 8332 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 8333 8334 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 8335 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 8336 { 8337 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 8338 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 8339 } 8340 8341 emit_insn (tmp); 8342} 8343 8344/* X is an unchanging MEM. If it is a constant pool reference, return 8345 the constant pool rtx, else NULL. */ 8346 8347static rtx 8348maybe_get_pool_constant (rtx x) 8349{ 8350 x = ix86_delegitimize_address (XEXP (x, 0)); 8351 8352 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 8353 return get_pool_constant (x); 8354 8355 return NULL_RTX; 8356} 8357 8358void 8359ix86_expand_move (enum machine_mode mode, rtx operands[]) 8360{ 8361 int strict = (reload_in_progress || reload_completed); 8362 rtx op0, op1; 8363 enum tls_model model; 8364 8365 op0 = operands[0]; 8366 op1 = operands[1]; 8367 8368 model = tls_symbolic_operand (op1, Pmode); 8369 if (model) 8370 { 8371 op1 = legitimize_tls_address (op1, model, true); 8372 op1 = force_operand (op1, op0); 8373 if (op1 == op0) 8374 return; 8375 } 8376 8377 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 8378 { 8379#if TARGET_MACHO 8380 if (MACHOPIC_PURE) 8381 { 8382 rtx temp = ((reload_in_progress 8383 || ((op0 && GET_CODE (op0) == REG) 8384 && mode == Pmode)) 8385 ? op0 : gen_reg_rtx (Pmode)); 8386 op1 = machopic_indirect_data_reference (op1, temp); 8387 op1 = machopic_legitimize_pic_address (op1, mode, 8388 temp == op1 ? 0 : temp); 8389 } 8390 else if (MACHOPIC_INDIRECT) 8391 op1 = machopic_indirect_data_reference (op1, 0); 8392 if (op0 == op1) 8393 return; 8394#else 8395 if (GET_CODE (op0) == MEM) 8396 op1 = force_reg (Pmode, op1); 8397 else 8398 { 8399 rtx temp = op0; 8400 if (GET_CODE (temp) != REG) 8401 temp = gen_reg_rtx (Pmode); 8402 temp = legitimize_pic_address (op1, temp); 8403 if (temp == op0) 8404 return; 8405 op1 = temp; 8406 } 8407#endif /* TARGET_MACHO */ 8408 } 8409 else 8410 { 8411 if (GET_CODE (op0) == MEM 8412 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 8413 || !push_operand (op0, mode)) 8414 && GET_CODE (op1) == MEM) 8415 op1 = force_reg (mode, op1); 8416 8417 if (push_operand (op0, mode) 8418 && ! general_no_elim_operand (op1, mode)) 8419 op1 = copy_to_mode_reg (mode, op1); 8420 8421 /* Force large constants in 64bit compilation into register 8422 to get them CSEed. */ 8423 if (TARGET_64BIT && mode == DImode 8424 && immediate_operand (op1, mode) 8425 && !x86_64_zero_extended_value (op1) 8426 && !register_operand (op0, mode) 8427 && optimize && !reload_completed && !reload_in_progress) 8428 op1 = copy_to_mode_reg (mode, op1); 8429 8430 if (FLOAT_MODE_P (mode)) 8431 { 8432 /* If we are loading a floating point constant to a register, 8433 force the value to memory now, since we'll get better code 8434 out the back end. */ 8435 8436 if (strict) 8437 ; 8438 else if (GET_CODE (op1) == CONST_DOUBLE) 8439 { 8440 op1 = validize_mem (force_const_mem (mode, op1)); 8441 if (!register_operand (op0, mode)) 8442 { 8443 rtx temp = gen_reg_rtx (mode); 8444 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 8445 emit_move_insn (op0, temp); 8446 return; 8447 } 8448 } 8449 } 8450 } 8451 8452 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 8453} 8454 8455void 8456ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 8457{ 8458 /* Force constants other than zero into memory. We do not know how 8459 the instructions used to build constants modify the upper 64 bits 8460 of the register, once we have that information we may be able 8461 to handle some of them more efficiently. */ 8462 if ((reload_in_progress | reload_completed) == 0 8463 && register_operand (operands[0], mode) 8464 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode)) 8465 operands[1] = validize_mem (force_const_mem (mode, operands[1])); 8466 8467 /* Make operand1 a register if it isn't already. */ 8468 if (!no_new_pseudos 8469 && !register_operand (operands[0], mode) 8470 && !register_operand (operands[1], mode)) 8471 { 8472 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]); 8473 emit_move_insn (operands[0], temp); 8474 return; 8475 } 8476 8477 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); 8478} 8479 8480/* Attempt to expand a binary operator. Make the expansion closer to the 8481 actual machine, then just general_operand, which will allow 3 separate 8482 memory references (one output, two input) in a single insn. */ 8483 8484void 8485ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 8486 rtx operands[]) 8487{ 8488 int matching_memory; 8489 rtx src1, src2, dst, op, clob; 8490 8491 dst = operands[0]; 8492 src1 = operands[1]; 8493 src2 = operands[2]; 8494 8495 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 8496 if (GET_RTX_CLASS (code) == 'c' 8497 && (rtx_equal_p (dst, src2) 8498 || immediate_operand (src1, mode))) 8499 { 8500 rtx temp = src1; 8501 src1 = src2; 8502 src2 = temp; 8503 } 8504 8505 /* If the destination is memory, and we do not have matching source 8506 operands, do things in registers. */ 8507 matching_memory = 0; 8508 if (GET_CODE (dst) == MEM) 8509 { 8510 if (rtx_equal_p (dst, src1)) 8511 matching_memory = 1; 8512 else if (GET_RTX_CLASS (code) == 'c' 8513 && rtx_equal_p (dst, src2)) 8514 matching_memory = 2; 8515 else 8516 dst = gen_reg_rtx (mode); 8517 } 8518 8519 /* Both source operands cannot be in memory. */ 8520 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 8521 { 8522 if (matching_memory != 2) 8523 src2 = force_reg (mode, src2); 8524 else 8525 src1 = force_reg (mode, src1); 8526 } 8527 8528 /* If the operation is not commutable, source 1 cannot be a constant 8529 or non-matching memory. */ 8530 if ((CONSTANT_P (src1) 8531 || (!matching_memory && GET_CODE (src1) == MEM)) 8532 && GET_RTX_CLASS (code) != 'c') 8533 src1 = force_reg (mode, src1); 8534 8535 /* If optimizing, copy to regs to improve CSE */ 8536 if (optimize && ! no_new_pseudos) 8537 { 8538 if (GET_CODE (dst) == MEM) 8539 dst = gen_reg_rtx (mode); 8540 if (GET_CODE (src1) == MEM) 8541 src1 = force_reg (mode, src1); 8542 if (GET_CODE (src2) == MEM) 8543 src2 = force_reg (mode, src2); 8544 } 8545 8546 /* Emit the instruction. */ 8547 8548 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 8549 if (reload_in_progress) 8550 { 8551 /* Reload doesn't know about the flags register, and doesn't know that 8552 it doesn't want to clobber it. We can only do this with PLUS. */ 8553 if (code != PLUS) 8554 abort (); 8555 emit_insn (op); 8556 } 8557 else 8558 { 8559 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 8560 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 8561 } 8562 8563 /* Fix up the destination if needed. */ 8564 if (dst != operands[0]) 8565 emit_move_insn (operands[0], dst); 8566} 8567 8568/* Return TRUE or FALSE depending on whether the binary operator meets the 8569 appropriate constraints. */ 8570 8571int 8572ix86_binary_operator_ok (enum rtx_code code, 8573 enum machine_mode mode ATTRIBUTE_UNUSED, 8574 rtx operands[3]) 8575{ 8576 /* Both source operands cannot be in memory. */ 8577 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 8578 return 0; 8579 /* If the operation is not commutable, source 1 cannot be a constant. */ 8580 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c') 8581 return 0; 8582 /* If the destination is memory, we must have a matching source operand. */ 8583 if (GET_CODE (operands[0]) == MEM 8584 && ! (rtx_equal_p (operands[0], operands[1]) 8585 || (GET_RTX_CLASS (code) == 'c' 8586 && rtx_equal_p (operands[0], operands[2])))) 8587 return 0; 8588 /* If the operation is not commutable and the source 1 is memory, we must 8589 have a matching destination. */ 8590 if (GET_CODE (operands[1]) == MEM 8591 && GET_RTX_CLASS (code) != 'c' 8592 && ! rtx_equal_p (operands[0], operands[1])) 8593 return 0; 8594 return 1; 8595} 8596 8597/* Attempt to expand a unary operator. Make the expansion closer to the 8598 actual machine, then just general_operand, which will allow 2 separate 8599 memory references (one output, one input) in a single insn. */ 8600 8601void 8602ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 8603 rtx operands[]) 8604{ 8605 int matching_memory; 8606 rtx src, dst, op, clob; 8607 8608 dst = operands[0]; 8609 src = operands[1]; 8610 8611 /* If the destination is memory, and we do not have matching source 8612 operands, do things in registers. */ 8613 matching_memory = 0; 8614 if (GET_CODE (dst) == MEM) 8615 { 8616 if (rtx_equal_p (dst, src)) 8617 matching_memory = 1; 8618 else 8619 dst = gen_reg_rtx (mode); 8620 } 8621 8622 /* When source operand is memory, destination must match. */ 8623 if (!matching_memory && GET_CODE (src) == MEM) 8624 src = force_reg (mode, src); 8625 8626 /* If optimizing, copy to regs to improve CSE */ 8627 if (optimize && ! no_new_pseudos) 8628 { 8629 if (GET_CODE (dst) == MEM) 8630 dst = gen_reg_rtx (mode); 8631 if (GET_CODE (src) == MEM) 8632 src = force_reg (mode, src); 8633 } 8634 8635 /* Emit the instruction. */ 8636 8637 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 8638 if (reload_in_progress || code == NOT) 8639 { 8640 /* Reload doesn't know about the flags register, and doesn't know that 8641 it doesn't want to clobber it. */ 8642 if (code != NOT) 8643 abort (); 8644 emit_insn (op); 8645 } 8646 else 8647 { 8648 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 8649 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 8650 } 8651 8652 /* Fix up the destination if needed. */ 8653 if (dst != operands[0]) 8654 emit_move_insn (operands[0], dst); 8655} 8656 8657/* Return TRUE or FALSE depending on whether the unary operator meets the 8658 appropriate constraints. */ 8659 8660int 8661ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 8662 enum machine_mode mode ATTRIBUTE_UNUSED, 8663 rtx operands[2] ATTRIBUTE_UNUSED) 8664{ 8665 /* If one of operands is memory, source and destination must match. */ 8666 if ((GET_CODE (operands[0]) == MEM 8667 || GET_CODE (operands[1]) == MEM) 8668 && ! rtx_equal_p (operands[0], operands[1])) 8669 return FALSE; 8670 return TRUE; 8671} 8672 8673/* Return TRUE or FALSE depending on whether the first SET in INSN 8674 has source and destination with matching CC modes, and that the 8675 CC mode is at least as constrained as REQ_MODE. */ 8676 8677int 8678ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 8679{ 8680 rtx set; 8681 enum machine_mode set_mode; 8682 8683 set = PATTERN (insn); 8684 if (GET_CODE (set) == PARALLEL) 8685 set = XVECEXP (set, 0, 0); 8686 if (GET_CODE (set) != SET) 8687 abort (); 8688 if (GET_CODE (SET_SRC (set)) != COMPARE) 8689 abort (); 8690 8691 set_mode = GET_MODE (SET_DEST (set)); 8692 switch (set_mode) 8693 { 8694 case CCNOmode: 8695 if (req_mode != CCNOmode 8696 && (req_mode != CCmode 8697 || XEXP (SET_SRC (set), 1) != const0_rtx)) 8698 return 0; 8699 break; 8700 case CCmode: 8701 if (req_mode == CCGCmode) 8702 return 0; 8703 /* FALLTHRU */ 8704 case CCGCmode: 8705 if (req_mode == CCGOCmode || req_mode == CCNOmode) 8706 return 0; 8707 /* FALLTHRU */ 8708 case CCGOCmode: 8709 if (req_mode == CCZmode) 8710 return 0; 8711 /* FALLTHRU */ 8712 case CCZmode: 8713 break; 8714 8715 default: 8716 abort (); 8717 } 8718 8719 return (GET_MODE (SET_SRC (set)) == set_mode); 8720} 8721 8722/* Generate insn patterns to do an integer compare of OPERANDS. */ 8723 8724static rtx 8725ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 8726{ 8727 enum machine_mode cmpmode; 8728 rtx tmp, flags; 8729 8730 cmpmode = SELECT_CC_MODE (code, op0, op1); 8731 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 8732 8733 /* This is very simple, but making the interface the same as in the 8734 FP case makes the rest of the code easier. */ 8735 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 8736 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 8737 8738 /* Return the test that should be put into the flags user, i.e. 8739 the bcc, scc, or cmov instruction. */ 8740 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 8741} 8742 8743/* Figure out whether to use ordered or unordered fp comparisons. 8744 Return the appropriate mode to use. */ 8745 8746enum machine_mode 8747ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 8748{ 8749 /* ??? In order to make all comparisons reversible, we do all comparisons 8750 non-trapping when compiling for IEEE. Once gcc is able to distinguish 8751 all forms trapping and nontrapping comparisons, we can make inequality 8752 comparisons trapping again, since it results in better code when using 8753 FCOM based compares. */ 8754 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 8755} 8756 8757enum machine_mode 8758ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 8759{ 8760 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 8761 return ix86_fp_compare_mode (code); 8762 switch (code) 8763 { 8764 /* Only zero flag is needed. */ 8765 case EQ: /* ZF=0 */ 8766 case NE: /* ZF!=0 */ 8767 return CCZmode; 8768 /* Codes needing carry flag. */ 8769 case GEU: /* CF=0 */ 8770 case GTU: /* CF=0 & ZF=0 */ 8771 case LTU: /* CF=1 */ 8772 case LEU: /* CF=1 | ZF=1 */ 8773 return CCmode; 8774 /* Codes possibly doable only with sign flag when 8775 comparing against zero. */ 8776 case GE: /* SF=OF or SF=0 */ 8777 case LT: /* SF<>OF or SF=1 */ 8778 if (op1 == const0_rtx) 8779 return CCGOCmode; 8780 else 8781 /* For other cases Carry flag is not required. */ 8782 return CCGCmode; 8783 /* Codes doable only with sign flag when comparing 8784 against zero, but we miss jump instruction for it 8785 so we need to use relational tests against overflow 8786 that thus needs to be zero. */ 8787 case GT: /* ZF=0 & SF=OF */ 8788 case LE: /* ZF=1 | SF<>OF */ 8789 if (op1 == const0_rtx) 8790 return CCNOmode; 8791 else 8792 return CCGCmode; 8793 /* strcmp pattern do (use flags) and combine may ask us for proper 8794 mode. */ 8795 case USE: 8796 return CCmode; 8797 default: 8798 abort (); 8799 } 8800} 8801 8802/* Return the fixed registers used for condition codes. */ 8803 8804static bool 8805ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 8806{ 8807 *p1 = FLAGS_REG; 8808 *p2 = FPSR_REG; 8809 return true; 8810} 8811 8812/* If two condition code modes are compatible, return a condition code 8813 mode which is compatible with both. Otherwise, return 8814 VOIDmode. */ 8815 8816static enum machine_mode 8817ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 8818{ 8819 if (m1 == m2) 8820 return m1; 8821 8822 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 8823 return VOIDmode; 8824 8825 if ((m1 == CCGCmode && m2 == CCGOCmode) 8826 || (m1 == CCGOCmode && m2 == CCGCmode)) 8827 return CCGCmode; 8828 8829 switch (m1) 8830 { 8831 default: 8832 abort (); 8833 8834 case CCmode: 8835 case CCGCmode: 8836 case CCGOCmode: 8837 case CCNOmode: 8838 case CCZmode: 8839 switch (m2) 8840 { 8841 default: 8842 return VOIDmode; 8843 8844 case CCmode: 8845 case CCGCmode: 8846 case CCGOCmode: 8847 case CCNOmode: 8848 case CCZmode: 8849 return CCmode; 8850 } 8851 8852 case CCFPmode: 8853 case CCFPUmode: 8854 /* These are only compatible with themselves, which we already 8855 checked above. */ 8856 return VOIDmode; 8857 } 8858} 8859 8860/* Return true if we should use an FCOMI instruction for this fp comparison. */ 8861 8862int 8863ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 8864{ 8865 enum rtx_code swapped_code = swap_condition (code); 8866 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 8867 || (ix86_fp_comparison_cost (swapped_code) 8868 == ix86_fp_comparison_fcomi_cost (swapped_code))); 8869} 8870 8871/* Swap, force into registers, or otherwise massage the two operands 8872 to a fp comparison. The operands are updated in place; the new 8873 comparison code is returned. */ 8874 8875static enum rtx_code 8876ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 8877{ 8878 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 8879 rtx op0 = *pop0, op1 = *pop1; 8880 enum machine_mode op_mode = GET_MODE (op0); 8881 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1); 8882 8883 /* All of the unordered compare instructions only work on registers. 8884 The same is true of the XFmode compare instructions. The same is 8885 true of the fcomi compare instructions. */ 8886 8887 if (!is_sse 8888 && (fpcmp_mode == CCFPUmode 8889 || op_mode == XFmode 8890 || ix86_use_fcomi_compare (code))) 8891 { 8892 op0 = force_reg (op_mode, op0); 8893 op1 = force_reg (op_mode, op1); 8894 } 8895 else 8896 { 8897 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 8898 things around if they appear profitable, otherwise force op0 8899 into a register. */ 8900 8901 if (standard_80387_constant_p (op0) == 0 8902 || (GET_CODE (op0) == MEM 8903 && ! (standard_80387_constant_p (op1) == 0 8904 || GET_CODE (op1) == MEM))) 8905 { 8906 rtx tmp; 8907 tmp = op0, op0 = op1, op1 = tmp; 8908 code = swap_condition (code); 8909 } 8910 8911 if (GET_CODE (op0) != REG) 8912 op0 = force_reg (op_mode, op0); 8913 8914 if (CONSTANT_P (op1)) 8915 { 8916 if (standard_80387_constant_p (op1)) 8917 op1 = force_reg (op_mode, op1); 8918 else 8919 op1 = validize_mem (force_const_mem (op_mode, op1)); 8920 } 8921 } 8922 8923 /* Try to rearrange the comparison to make it cheaper. */ 8924 if (ix86_fp_comparison_cost (code) 8925 > ix86_fp_comparison_cost (swap_condition (code)) 8926 && (GET_CODE (op1) == REG || !no_new_pseudos)) 8927 { 8928 rtx tmp; 8929 tmp = op0, op0 = op1, op1 = tmp; 8930 code = swap_condition (code); 8931 if (GET_CODE (op0) != REG) 8932 op0 = force_reg (op_mode, op0); 8933 } 8934 8935 *pop0 = op0; 8936 *pop1 = op1; 8937 return code; 8938} 8939 8940/* Convert comparison codes we use to represent FP comparison to integer 8941 code that will result in proper branch. Return UNKNOWN if no such code 8942 is available. */ 8943static enum rtx_code 8944ix86_fp_compare_code_to_integer (enum rtx_code code) 8945{ 8946 switch (code) 8947 { 8948 case GT: 8949 return GTU; 8950 case GE: 8951 return GEU; 8952 case ORDERED: 8953 case UNORDERED: 8954 return code; 8955 break; 8956 case UNEQ: 8957 return EQ; 8958 break; 8959 case UNLT: 8960 return LTU; 8961 break; 8962 case UNLE: 8963 return LEU; 8964 break; 8965 case LTGT: 8966 return NE; 8967 break; 8968 default: 8969 return UNKNOWN; 8970 } 8971} 8972 8973/* Split comparison code CODE into comparisons we can do using branch 8974 instructions. BYPASS_CODE is comparison code for branch that will 8975 branch around FIRST_CODE and SECOND_CODE. If some of branches 8976 is not required, set value to NIL. 8977 We never require more than two branches. */ 8978static void 8979ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 8980 enum rtx_code *first_code, 8981 enum rtx_code *second_code) 8982{ 8983 *first_code = code; 8984 *bypass_code = NIL; 8985 *second_code = NIL; 8986 8987 /* The fcomi comparison sets flags as follows: 8988 8989 cmp ZF PF CF 8990 > 0 0 0 8991 < 0 0 1 8992 = 1 0 0 8993 un 1 1 1 */ 8994 8995 switch (code) 8996 { 8997 case GT: /* GTU - CF=0 & ZF=0 */ 8998 case GE: /* GEU - CF=0 */ 8999 case ORDERED: /* PF=0 */ 9000 case UNORDERED: /* PF=1 */ 9001 case UNEQ: /* EQ - ZF=1 */ 9002 case UNLT: /* LTU - CF=1 */ 9003 case UNLE: /* LEU - CF=1 | ZF=1 */ 9004 case LTGT: /* EQ - ZF=0 */ 9005 break; 9006 case LT: /* LTU - CF=1 - fails on unordered */ 9007 *first_code = UNLT; 9008 *bypass_code = UNORDERED; 9009 break; 9010 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 9011 *first_code = UNLE; 9012 *bypass_code = UNORDERED; 9013 break; 9014 case EQ: /* EQ - ZF=1 - fails on unordered */ 9015 *first_code = UNEQ; 9016 *bypass_code = UNORDERED; 9017 break; 9018 case NE: /* NE - ZF=0 - fails on unordered */ 9019 *first_code = LTGT; 9020 *second_code = UNORDERED; 9021 break; 9022 case UNGE: /* GEU - CF=0 - fails on unordered */ 9023 *first_code = GE; 9024 *second_code = UNORDERED; 9025 break; 9026 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 9027 *first_code = GT; 9028 *second_code = UNORDERED; 9029 break; 9030 default: 9031 abort (); 9032 } 9033 if (!TARGET_IEEE_FP) 9034 { 9035 *second_code = NIL; 9036 *bypass_code = NIL; 9037 } 9038} 9039 9040/* Return cost of comparison done fcom + arithmetics operations on AX. 9041 All following functions do use number of instructions as a cost metrics. 9042 In future this should be tweaked to compute bytes for optimize_size and 9043 take into account performance of various instructions on various CPUs. */ 9044static int 9045ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 9046{ 9047 if (!TARGET_IEEE_FP) 9048 return 4; 9049 /* The cost of code output by ix86_expand_fp_compare. */ 9050 switch (code) 9051 { 9052 case UNLE: 9053 case UNLT: 9054 case LTGT: 9055 case GT: 9056 case GE: 9057 case UNORDERED: 9058 case ORDERED: 9059 case UNEQ: 9060 return 4; 9061 break; 9062 case LT: 9063 case NE: 9064 case EQ: 9065 case UNGE: 9066 return 5; 9067 break; 9068 case LE: 9069 case UNGT: 9070 return 6; 9071 break; 9072 default: 9073 abort (); 9074 } 9075} 9076 9077/* Return cost of comparison done using fcomi operation. 9078 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 9079static int 9080ix86_fp_comparison_fcomi_cost (enum rtx_code code) 9081{ 9082 enum rtx_code bypass_code, first_code, second_code; 9083 /* Return arbitrarily high cost when instruction is not supported - this 9084 prevents gcc from using it. */ 9085 if (!TARGET_CMOVE) 9086 return 1024; 9087 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9088 return (bypass_code != NIL || second_code != NIL) + 2; 9089} 9090 9091/* Return cost of comparison done using sahf operation. 9092 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 9093static int 9094ix86_fp_comparison_sahf_cost (enum rtx_code code) 9095{ 9096 enum rtx_code bypass_code, first_code, second_code; 9097 /* Return arbitrarily high cost when instruction is not preferred - this 9098 avoids gcc from using it. */ 9099 if (!TARGET_USE_SAHF && !optimize_size) 9100 return 1024; 9101 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9102 return (bypass_code != NIL || second_code != NIL) + 3; 9103} 9104 9105/* Compute cost of the comparison done using any method. 9106 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 9107static int 9108ix86_fp_comparison_cost (enum rtx_code code) 9109{ 9110 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 9111 int min; 9112 9113 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 9114 sahf_cost = ix86_fp_comparison_sahf_cost (code); 9115 9116 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 9117 if (min > sahf_cost) 9118 min = sahf_cost; 9119 if (min > fcomi_cost) 9120 min = fcomi_cost; 9121 return min; 9122} 9123 9124/* Generate insn patterns to do a floating point compare of OPERANDS. */ 9125 9126static rtx 9127ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 9128 rtx *second_test, rtx *bypass_test) 9129{ 9130 enum machine_mode fpcmp_mode, intcmp_mode; 9131 rtx tmp, tmp2; 9132 int cost = ix86_fp_comparison_cost (code); 9133 enum rtx_code bypass_code, first_code, second_code; 9134 9135 fpcmp_mode = ix86_fp_compare_mode (code); 9136 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 9137 9138 if (second_test) 9139 *second_test = NULL_RTX; 9140 if (bypass_test) 9141 *bypass_test = NULL_RTX; 9142 9143 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9144 9145 /* Do fcomi/sahf based test when profitable. */ 9146 if ((bypass_code == NIL || bypass_test) 9147 && (second_code == NIL || second_test) 9148 && ix86_fp_comparison_arithmetics_cost (code) > cost) 9149 { 9150 if (TARGET_CMOVE) 9151 { 9152 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 9153 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 9154 tmp); 9155 emit_insn (tmp); 9156 } 9157 else 9158 { 9159 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 9160 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 9161 if (!scratch) 9162 scratch = gen_reg_rtx (HImode); 9163 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 9164 emit_insn (gen_x86_sahf_1 (scratch)); 9165 } 9166 9167 /* The FP codes work out to act like unsigned. */ 9168 intcmp_mode = fpcmp_mode; 9169 code = first_code; 9170 if (bypass_code != NIL) 9171 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 9172 gen_rtx_REG (intcmp_mode, FLAGS_REG), 9173 const0_rtx); 9174 if (second_code != NIL) 9175 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 9176 gen_rtx_REG (intcmp_mode, FLAGS_REG), 9177 const0_rtx); 9178 } 9179 else 9180 { 9181 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 9182 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 9183 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 9184 if (!scratch) 9185 scratch = gen_reg_rtx (HImode); 9186 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 9187 9188 /* In the unordered case, we have to check C2 for NaN's, which 9189 doesn't happen to work out to anything nice combination-wise. 9190 So do some bit twiddling on the value we've got in AH to come 9191 up with an appropriate set of condition codes. */ 9192 9193 intcmp_mode = CCNOmode; 9194 switch (code) 9195 { 9196 case GT: 9197 case UNGT: 9198 if (code == GT || !TARGET_IEEE_FP) 9199 { 9200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 9201 code = EQ; 9202 } 9203 else 9204 { 9205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9206 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 9207 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 9208 intcmp_mode = CCmode; 9209 code = GEU; 9210 } 9211 break; 9212 case LT: 9213 case UNLT: 9214 if (code == LT && TARGET_IEEE_FP) 9215 { 9216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 9218 intcmp_mode = CCmode; 9219 code = EQ; 9220 } 9221 else 9222 { 9223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 9224 code = NE; 9225 } 9226 break; 9227 case GE: 9228 case UNGE: 9229 if (code == GE || !TARGET_IEEE_FP) 9230 { 9231 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 9232 code = EQ; 9233 } 9234 else 9235 { 9236 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9237 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 9238 GEN_INT (0x01))); 9239 code = NE; 9240 } 9241 break; 9242 case LE: 9243 case UNLE: 9244 if (code == LE && TARGET_IEEE_FP) 9245 { 9246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9247 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 9248 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 9249 intcmp_mode = CCmode; 9250 code = LTU; 9251 } 9252 else 9253 { 9254 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 9255 code = NE; 9256 } 9257 break; 9258 case EQ: 9259 case UNEQ: 9260 if (code == EQ && TARGET_IEEE_FP) 9261 { 9262 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9263 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 9264 intcmp_mode = CCmode; 9265 code = EQ; 9266 } 9267 else 9268 { 9269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 9270 code = NE; 9271 break; 9272 } 9273 break; 9274 case NE: 9275 case LTGT: 9276 if (code == NE && TARGET_IEEE_FP) 9277 { 9278 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9279 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 9280 GEN_INT (0x40))); 9281 code = NE; 9282 } 9283 else 9284 { 9285 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 9286 code = EQ; 9287 } 9288 break; 9289 9290 case UNORDERED: 9291 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 9292 code = NE; 9293 break; 9294 case ORDERED: 9295 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 9296 code = EQ; 9297 break; 9298 9299 default: 9300 abort (); 9301 } 9302 } 9303 9304 /* Return the test that should be put into the flags user, i.e. 9305 the bcc, scc, or cmov instruction. */ 9306 return gen_rtx_fmt_ee (code, VOIDmode, 9307 gen_rtx_REG (intcmp_mode, FLAGS_REG), 9308 const0_rtx); 9309} 9310 9311rtx 9312ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 9313{ 9314 rtx op0, op1, ret; 9315 op0 = ix86_compare_op0; 9316 op1 = ix86_compare_op1; 9317 9318 if (second_test) 9319 *second_test = NULL_RTX; 9320 if (bypass_test) 9321 *bypass_test = NULL_RTX; 9322 9323 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 9324 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 9325 second_test, bypass_test); 9326 else 9327 ret = ix86_expand_int_compare (code, op0, op1); 9328 9329 return ret; 9330} 9331 9332/* Return true if the CODE will result in nontrivial jump sequence. */ 9333bool 9334ix86_fp_jump_nontrivial_p (enum rtx_code code) 9335{ 9336 enum rtx_code bypass_code, first_code, second_code; 9337 if (!TARGET_CMOVE) 9338 return true; 9339 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9340 return bypass_code != NIL || second_code != NIL; 9341} 9342 9343void 9344ix86_expand_branch (enum rtx_code code, rtx label) 9345{ 9346 rtx tmp; 9347 9348 switch (GET_MODE (ix86_compare_op0)) 9349 { 9350 case QImode: 9351 case HImode: 9352 case SImode: 9353 simple: 9354 tmp = ix86_expand_compare (code, NULL, NULL); 9355 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 9356 gen_rtx_LABEL_REF (VOIDmode, label), 9357 pc_rtx); 9358 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 9359 return; 9360 9361 case SFmode: 9362 case DFmode: 9363 case XFmode: 9364 { 9365 rtvec vec; 9366 int use_fcomi; 9367 enum rtx_code bypass_code, first_code, second_code; 9368 9369 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 9370 &ix86_compare_op1); 9371 9372 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9373 9374 /* Check whether we will use the natural sequence with one jump. If 9375 so, we can expand jump early. Otherwise delay expansion by 9376 creating compound insn to not confuse optimizers. */ 9377 if (bypass_code == NIL && second_code == NIL 9378 && TARGET_CMOVE) 9379 { 9380 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 9381 gen_rtx_LABEL_REF (VOIDmode, label), 9382 pc_rtx, NULL_RTX); 9383 } 9384 else 9385 { 9386 tmp = gen_rtx_fmt_ee (code, VOIDmode, 9387 ix86_compare_op0, ix86_compare_op1); 9388 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 9389 gen_rtx_LABEL_REF (VOIDmode, label), 9390 pc_rtx); 9391 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 9392 9393 use_fcomi = ix86_use_fcomi_compare (code); 9394 vec = rtvec_alloc (3 + !use_fcomi); 9395 RTVEC_ELT (vec, 0) = tmp; 9396 RTVEC_ELT (vec, 1) 9397 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 9398 RTVEC_ELT (vec, 2) 9399 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 9400 if (! use_fcomi) 9401 RTVEC_ELT (vec, 3) 9402 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 9403 9404 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 9405 } 9406 return; 9407 } 9408 9409 case DImode: 9410 if (TARGET_64BIT) 9411 goto simple; 9412 /* Expand DImode branch into multiple compare+branch. */ 9413 { 9414 rtx lo[2], hi[2], label2; 9415 enum rtx_code code1, code2, code3; 9416 9417 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 9418 { 9419 tmp = ix86_compare_op0; 9420 ix86_compare_op0 = ix86_compare_op1; 9421 ix86_compare_op1 = tmp; 9422 code = swap_condition (code); 9423 } 9424 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 9425 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 9426 9427 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 9428 avoid two branches. This costs one extra insn, so disable when 9429 optimizing for size. */ 9430 9431 if ((code == EQ || code == NE) 9432 && (!optimize_size 9433 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 9434 { 9435 rtx xor0, xor1; 9436 9437 xor1 = hi[0]; 9438 if (hi[1] != const0_rtx) 9439 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1], 9440 NULL_RTX, 0, OPTAB_WIDEN); 9441 9442 xor0 = lo[0]; 9443 if (lo[1] != const0_rtx) 9444 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1], 9445 NULL_RTX, 0, OPTAB_WIDEN); 9446 9447 tmp = expand_binop (SImode, ior_optab, xor1, xor0, 9448 NULL_RTX, 0, OPTAB_WIDEN); 9449 9450 ix86_compare_op0 = tmp; 9451 ix86_compare_op1 = const0_rtx; 9452 ix86_expand_branch (code, label); 9453 return; 9454 } 9455 9456 /* Otherwise, if we are doing less-than or greater-or-equal-than, 9457 op1 is a constant and the low word is zero, then we can just 9458 examine the high word. */ 9459 9460 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 9461 switch (code) 9462 { 9463 case LT: case LTU: case GE: case GEU: 9464 ix86_compare_op0 = hi[0]; 9465 ix86_compare_op1 = hi[1]; 9466 ix86_expand_branch (code, label); 9467 return; 9468 default: 9469 break; 9470 } 9471 9472 /* Otherwise, we need two or three jumps. */ 9473 9474 label2 = gen_label_rtx (); 9475 9476 code1 = code; 9477 code2 = swap_condition (code); 9478 code3 = unsigned_condition (code); 9479 9480 switch (code) 9481 { 9482 case LT: case GT: case LTU: case GTU: 9483 break; 9484 9485 case LE: code1 = LT; code2 = GT; break; 9486 case GE: code1 = GT; code2 = LT; break; 9487 case LEU: code1 = LTU; code2 = GTU; break; 9488 case GEU: code1 = GTU; code2 = LTU; break; 9489 9490 case EQ: code1 = NIL; code2 = NE; break; 9491 case NE: code2 = NIL; break; 9492 9493 default: 9494 abort (); 9495 } 9496 9497 /* 9498 * a < b => 9499 * if (hi(a) < hi(b)) goto true; 9500 * if (hi(a) > hi(b)) goto false; 9501 * if (lo(a) < lo(b)) goto true; 9502 * false: 9503 */ 9504 9505 ix86_compare_op0 = hi[0]; 9506 ix86_compare_op1 = hi[1]; 9507 9508 if (code1 != NIL) 9509 ix86_expand_branch (code1, label); 9510 if (code2 != NIL) 9511 ix86_expand_branch (code2, label2); 9512 9513 ix86_compare_op0 = lo[0]; 9514 ix86_compare_op1 = lo[1]; 9515 ix86_expand_branch (code3, label); 9516 9517 if (code2 != NIL) 9518 emit_label (label2); 9519 return; 9520 } 9521 9522 default: 9523 abort (); 9524 } 9525} 9526 9527/* Split branch based on floating point condition. */ 9528void 9529ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 9530 rtx target1, rtx target2, rtx tmp) 9531{ 9532 rtx second, bypass; 9533 rtx label = NULL_RTX; 9534 rtx condition; 9535 int bypass_probability = -1, second_probability = -1, probability = -1; 9536 rtx i; 9537 9538 if (target2 != pc_rtx) 9539 { 9540 rtx tmp = target2; 9541 code = reverse_condition_maybe_unordered (code); 9542 target2 = target1; 9543 target1 = tmp; 9544 } 9545 9546 condition = ix86_expand_fp_compare (code, op1, op2, 9547 tmp, &second, &bypass); 9548 9549 if (split_branch_probability >= 0) 9550 { 9551 /* Distribute the probabilities across the jumps. 9552 Assume the BYPASS and SECOND to be always test 9553 for UNORDERED. */ 9554 probability = split_branch_probability; 9555 9556 /* Value of 1 is low enough to make no need for probability 9557 to be updated. Later we may run some experiments and see 9558 if unordered values are more frequent in practice. */ 9559 if (bypass) 9560 bypass_probability = 1; 9561 if (second) 9562 second_probability = 1; 9563 } 9564 if (bypass != NULL_RTX) 9565 { 9566 label = gen_label_rtx (); 9567 i = emit_jump_insn (gen_rtx_SET 9568 (VOIDmode, pc_rtx, 9569 gen_rtx_IF_THEN_ELSE (VOIDmode, 9570 bypass, 9571 gen_rtx_LABEL_REF (VOIDmode, 9572 label), 9573 pc_rtx))); 9574 if (bypass_probability >= 0) 9575 REG_NOTES (i) 9576 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9577 GEN_INT (bypass_probability), 9578 REG_NOTES (i)); 9579 } 9580 i = emit_jump_insn (gen_rtx_SET 9581 (VOIDmode, pc_rtx, 9582 gen_rtx_IF_THEN_ELSE (VOIDmode, 9583 condition, target1, target2))); 9584 if (probability >= 0) 9585 REG_NOTES (i) 9586 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9587 GEN_INT (probability), 9588 REG_NOTES (i)); 9589 if (second != NULL_RTX) 9590 { 9591 i = emit_jump_insn (gen_rtx_SET 9592 (VOIDmode, pc_rtx, 9593 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 9594 target2))); 9595 if (second_probability >= 0) 9596 REG_NOTES (i) 9597 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9598 GEN_INT (second_probability), 9599 REG_NOTES (i)); 9600 } 9601 if (label != NULL_RTX) 9602 emit_label (label); 9603} 9604 9605int 9606ix86_expand_setcc (enum rtx_code code, rtx dest) 9607{ 9608 rtx ret, tmp, tmpreg, equiv; 9609 rtx second_test, bypass_test; 9610 9611 if (GET_MODE (ix86_compare_op0) == DImode 9612 && !TARGET_64BIT) 9613 return 0; /* FAIL */ 9614 9615 if (GET_MODE (dest) != QImode) 9616 abort (); 9617 9618 ret = ix86_expand_compare (code, &second_test, &bypass_test); 9619 PUT_MODE (ret, QImode); 9620 9621 tmp = dest; 9622 tmpreg = dest; 9623 9624 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 9625 if (bypass_test || second_test) 9626 { 9627 rtx test = second_test; 9628 int bypass = 0; 9629 rtx tmp2 = gen_reg_rtx (QImode); 9630 if (bypass_test) 9631 { 9632 if (second_test) 9633 abort (); 9634 test = bypass_test; 9635 bypass = 1; 9636 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 9637 } 9638 PUT_MODE (test, QImode); 9639 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 9640 9641 if (bypass) 9642 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 9643 else 9644 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 9645 } 9646 9647 /* Attach a REG_EQUAL note describing the comparison result. */ 9648 equiv = simplify_gen_relational (code, QImode, 9649 GET_MODE (ix86_compare_op0), 9650 ix86_compare_op0, ix86_compare_op1); 9651 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 9652 9653 return 1; /* DONE */ 9654} 9655 9656/* Expand comparison setting or clearing carry flag. Return true when 9657 successful and set pop for the operation. */ 9658static bool 9659ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 9660{ 9661 enum machine_mode mode = 9662 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 9663 9664 /* Do not handle DImode compares that go trought special path. Also we can't 9665 deal with FP compares yet. This is possible to add. */ 9666 if ((mode == DImode && !TARGET_64BIT)) 9667 return false; 9668 if (FLOAT_MODE_P (mode)) 9669 { 9670 rtx second_test = NULL, bypass_test = NULL; 9671 rtx compare_op, compare_seq; 9672 9673 /* Shortcut: following common codes never translate into carry flag compares. */ 9674 if (code == EQ || code == NE || code == UNEQ || code == LTGT 9675 || code == ORDERED || code == UNORDERED) 9676 return false; 9677 9678 /* These comparisons require zero flag; swap operands so they won't. */ 9679 if ((code == GT || code == UNLE || code == LE || code == UNGT) 9680 && !TARGET_IEEE_FP) 9681 { 9682 rtx tmp = op0; 9683 op0 = op1; 9684 op1 = tmp; 9685 code = swap_condition (code); 9686 } 9687 9688 /* Try to expand the comparison and verify that we end up with carry flag 9689 based comparison. This is fails to be true only when we decide to expand 9690 comparison using arithmetic that is not too common scenario. */ 9691 start_sequence (); 9692 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 9693 &second_test, &bypass_test); 9694 compare_seq = get_insns (); 9695 end_sequence (); 9696 9697 if (second_test || bypass_test) 9698 return false; 9699 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 9700 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 9701 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 9702 else 9703 code = GET_CODE (compare_op); 9704 if (code != LTU && code != GEU) 9705 return false; 9706 emit_insn (compare_seq); 9707 *pop = compare_op; 9708 return true; 9709 } 9710 if (!INTEGRAL_MODE_P (mode)) 9711 return false; 9712 switch (code) 9713 { 9714 case LTU: 9715 case GEU: 9716 break; 9717 9718 /* Convert a==0 into (unsigned)a<1. */ 9719 case EQ: 9720 case NE: 9721 if (op1 != const0_rtx) 9722 return false; 9723 op1 = const1_rtx; 9724 code = (code == EQ ? LTU : GEU); 9725 break; 9726 9727 /* Convert a>b into b<a or a>=b-1. */ 9728 case GTU: 9729 case LEU: 9730 if (GET_CODE (op1) == CONST_INT) 9731 { 9732 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 9733 /* Bail out on overflow. We still can swap operands but that 9734 would force loading of the constant into register. */ 9735 if (op1 == const0_rtx 9736 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 9737 return false; 9738 code = (code == GTU ? GEU : LTU); 9739 } 9740 else 9741 { 9742 rtx tmp = op1; 9743 op1 = op0; 9744 op0 = tmp; 9745 code = (code == GTU ? LTU : GEU); 9746 } 9747 break; 9748 9749 /* Convert a>=0 into (unsigned)a<0x80000000. */ 9750 case LT: 9751 case GE: 9752 if (mode == DImode || op1 != const0_rtx) 9753 return false; 9754 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 9755 code = (code == LT ? GEU : LTU); 9756 break; 9757 case LE: 9758 case GT: 9759 if (mode == DImode || op1 != constm1_rtx) 9760 return false; 9761 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 9762 code = (code == LE ? GEU : LTU); 9763 break; 9764 9765 default: 9766 return false; 9767 } 9768 /* Swapping operands may cause constant to appear as first operand. */ 9769 if (!nonimmediate_operand (op0, VOIDmode)) 9770 { 9771 if (no_new_pseudos) 9772 return false; 9773 op0 = force_reg (mode, op0); 9774 } 9775 ix86_compare_op0 = op0; 9776 ix86_compare_op1 = op1; 9777 *pop = ix86_expand_compare (code, NULL, NULL); 9778 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU) 9779 abort (); 9780 return true; 9781} 9782 9783int 9784ix86_expand_int_movcc (rtx operands[]) 9785{ 9786 enum rtx_code code = GET_CODE (operands[1]), compare_code; 9787 rtx compare_seq, compare_op; 9788 rtx second_test, bypass_test; 9789 enum machine_mode mode = GET_MODE (operands[0]); 9790 bool sign_bit_compare_p = false;; 9791 9792 start_sequence (); 9793 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 9794 compare_seq = get_insns (); 9795 end_sequence (); 9796 9797 compare_code = GET_CODE (compare_op); 9798 9799 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 9800 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 9801 sign_bit_compare_p = true; 9802 9803 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 9804 HImode insns, we'd be swallowed in word prefix ops. */ 9805 9806 if ((mode != HImode || TARGET_FAST_PREFIX) 9807 && (mode != DImode || TARGET_64BIT) 9808 && GET_CODE (operands[2]) == CONST_INT 9809 && GET_CODE (operands[3]) == CONST_INT) 9810 { 9811 rtx out = operands[0]; 9812 HOST_WIDE_INT ct = INTVAL (operands[2]); 9813 HOST_WIDE_INT cf = INTVAL (operands[3]); 9814 HOST_WIDE_INT diff; 9815 9816 diff = ct - cf; 9817 /* Sign bit compares are better done using shifts than we do by using 9818 sbb. */ 9819 if (sign_bit_compare_p 9820 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 9821 ix86_compare_op1, &compare_op)) 9822 { 9823 /* Detect overlap between destination and compare sources. */ 9824 rtx tmp = out; 9825 9826 if (!sign_bit_compare_p) 9827 { 9828 bool fpcmp = false; 9829 9830 compare_code = GET_CODE (compare_op); 9831 9832 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 9833 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 9834 { 9835 fpcmp = true; 9836 compare_code = ix86_fp_compare_code_to_integer (compare_code); 9837 } 9838 9839 /* To simplify rest of code, restrict to the GEU case. */ 9840 if (compare_code == LTU) 9841 { 9842 HOST_WIDE_INT tmp = ct; 9843 ct = cf; 9844 cf = tmp; 9845 compare_code = reverse_condition (compare_code); 9846 code = reverse_condition (code); 9847 } 9848 else 9849 { 9850 if (fpcmp) 9851 PUT_CODE (compare_op, 9852 reverse_condition_maybe_unordered 9853 (GET_CODE (compare_op))); 9854 else 9855 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 9856 } 9857 diff = ct - cf; 9858 9859 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 9860 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 9861 tmp = gen_reg_rtx (mode); 9862 9863 if (mode == DImode) 9864 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 9865 else 9866 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 9867 } 9868 else 9869 { 9870 if (code == GT || code == GE) 9871 code = reverse_condition (code); 9872 else 9873 { 9874 HOST_WIDE_INT tmp = ct; 9875 ct = cf; 9876 cf = tmp; 9877 diff = ct - cf; 9878 } 9879 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 9880 ix86_compare_op1, VOIDmode, 0, -1); 9881 } 9882 9883 if (diff == 1) 9884 { 9885 /* 9886 * cmpl op0,op1 9887 * sbbl dest,dest 9888 * [addl dest, ct] 9889 * 9890 * Size 5 - 8. 9891 */ 9892 if (ct) 9893 tmp = expand_simple_binop (mode, PLUS, 9894 tmp, GEN_INT (ct), 9895 copy_rtx (tmp), 1, OPTAB_DIRECT); 9896 } 9897 else if (cf == -1) 9898 { 9899 /* 9900 * cmpl op0,op1 9901 * sbbl dest,dest 9902 * orl $ct, dest 9903 * 9904 * Size 8. 9905 */ 9906 tmp = expand_simple_binop (mode, IOR, 9907 tmp, GEN_INT (ct), 9908 copy_rtx (tmp), 1, OPTAB_DIRECT); 9909 } 9910 else if (diff == -1 && ct) 9911 { 9912 /* 9913 * cmpl op0,op1 9914 * sbbl dest,dest 9915 * notl dest 9916 * [addl dest, cf] 9917 * 9918 * Size 8 - 11. 9919 */ 9920 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 9921 if (cf) 9922 tmp = expand_simple_binop (mode, PLUS, 9923 copy_rtx (tmp), GEN_INT (cf), 9924 copy_rtx (tmp), 1, OPTAB_DIRECT); 9925 } 9926 else 9927 { 9928 /* 9929 * cmpl op0,op1 9930 * sbbl dest,dest 9931 * [notl dest] 9932 * andl cf - ct, dest 9933 * [addl dest, ct] 9934 * 9935 * Size 8 - 11. 9936 */ 9937 9938 if (cf == 0) 9939 { 9940 cf = ct; 9941 ct = 0; 9942 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 9943 } 9944 9945 tmp = expand_simple_binop (mode, AND, 9946 copy_rtx (tmp), 9947 gen_int_mode (cf - ct, mode), 9948 copy_rtx (tmp), 1, OPTAB_DIRECT); 9949 if (ct) 9950 tmp = expand_simple_binop (mode, PLUS, 9951 copy_rtx (tmp), GEN_INT (ct), 9952 copy_rtx (tmp), 1, OPTAB_DIRECT); 9953 } 9954 9955 if (!rtx_equal_p (tmp, out)) 9956 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 9957 9958 return 1; /* DONE */ 9959 } 9960 9961 if (diff < 0) 9962 { 9963 HOST_WIDE_INT tmp; 9964 tmp = ct, ct = cf, cf = tmp; 9965 diff = -diff; 9966 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 9967 { 9968 /* We may be reversing unordered compare to normal compare, that 9969 is not valid in general (we may convert non-trapping condition 9970 to trapping one), however on i386 we currently emit all 9971 comparisons unordered. */ 9972 compare_code = reverse_condition_maybe_unordered (compare_code); 9973 code = reverse_condition_maybe_unordered (code); 9974 } 9975 else 9976 { 9977 compare_code = reverse_condition (compare_code); 9978 code = reverse_condition (code); 9979 } 9980 } 9981 9982 compare_code = NIL; 9983 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 9984 && GET_CODE (ix86_compare_op1) == CONST_INT) 9985 { 9986 if (ix86_compare_op1 == const0_rtx 9987 && (code == LT || code == GE)) 9988 compare_code = code; 9989 else if (ix86_compare_op1 == constm1_rtx) 9990 { 9991 if (code == LE) 9992 compare_code = LT; 9993 else if (code == GT) 9994 compare_code = GE; 9995 } 9996 } 9997 9998 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 9999 if (compare_code != NIL 10000 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 10001 && (cf == -1 || ct == -1)) 10002 { 10003 /* If lea code below could be used, only optimize 10004 if it results in a 2 insn sequence. */ 10005 10006 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 10007 || diff == 3 || diff == 5 || diff == 9) 10008 || (compare_code == LT && ct == -1) 10009 || (compare_code == GE && cf == -1)) 10010 { 10011 /* 10012 * notl op1 (if necessary) 10013 * sarl $31, op1 10014 * orl cf, op1 10015 */ 10016 if (ct != -1) 10017 { 10018 cf = ct; 10019 ct = -1; 10020 code = reverse_condition (code); 10021 } 10022 10023 out = emit_store_flag (out, code, ix86_compare_op0, 10024 ix86_compare_op1, VOIDmode, 0, -1); 10025 10026 out = expand_simple_binop (mode, IOR, 10027 out, GEN_INT (cf), 10028 out, 1, OPTAB_DIRECT); 10029 if (out != operands[0]) 10030 emit_move_insn (operands[0], out); 10031 10032 return 1; /* DONE */ 10033 } 10034 } 10035 10036 10037 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 10038 || diff == 3 || diff == 5 || diff == 9) 10039 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 10040 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf)))) 10041 { 10042 /* 10043 * xorl dest,dest 10044 * cmpl op1,op2 10045 * setcc dest 10046 * lea cf(dest*(ct-cf)),dest 10047 * 10048 * Size 14. 10049 * 10050 * This also catches the degenerate setcc-only case. 10051 */ 10052 10053 rtx tmp; 10054 int nops; 10055 10056 out = emit_store_flag (out, code, ix86_compare_op0, 10057 ix86_compare_op1, VOIDmode, 0, 1); 10058 10059 nops = 0; 10060 /* On x86_64 the lea instruction operates on Pmode, so we need 10061 to get arithmetics done in proper mode to match. */ 10062 if (diff == 1) 10063 tmp = copy_rtx (out); 10064 else 10065 { 10066 rtx out1; 10067 out1 = copy_rtx (out); 10068 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 10069 nops++; 10070 if (diff & 1) 10071 { 10072 tmp = gen_rtx_PLUS (mode, tmp, out1); 10073 nops++; 10074 } 10075 } 10076 if (cf != 0) 10077 { 10078 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 10079 nops++; 10080 } 10081 if (!rtx_equal_p (tmp, out)) 10082 { 10083 if (nops == 1) 10084 out = force_operand (tmp, copy_rtx (out)); 10085 else 10086 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 10087 } 10088 if (!rtx_equal_p (out, operands[0])) 10089 emit_move_insn (operands[0], copy_rtx (out)); 10090 10091 return 1; /* DONE */ 10092 } 10093 10094 /* 10095 * General case: Jumpful: 10096 * xorl dest,dest cmpl op1, op2 10097 * cmpl op1, op2 movl ct, dest 10098 * setcc dest jcc 1f 10099 * decl dest movl cf, dest 10100 * andl (cf-ct),dest 1: 10101 * addl ct,dest 10102 * 10103 * Size 20. Size 14. 10104 * 10105 * This is reasonably steep, but branch mispredict costs are 10106 * high on modern cpus, so consider failing only if optimizing 10107 * for space. 10108 */ 10109 10110 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 10111 && BRANCH_COST >= 2) 10112 { 10113 if (cf == 0) 10114 { 10115 cf = ct; 10116 ct = 0; 10117 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 10118 /* We may be reversing unordered compare to normal compare, 10119 that is not valid in general (we may convert non-trapping 10120 condition to trapping one), however on i386 we currently 10121 emit all comparisons unordered. */ 10122 code = reverse_condition_maybe_unordered (code); 10123 else 10124 { 10125 code = reverse_condition (code); 10126 if (compare_code != NIL) 10127 compare_code = reverse_condition (compare_code); 10128 } 10129 } 10130 10131 if (compare_code != NIL) 10132 { 10133 /* notl op1 (if needed) 10134 sarl $31, op1 10135 andl (cf-ct), op1 10136 addl ct, op1 10137 10138 For x < 0 (resp. x <= -1) there will be no notl, 10139 so if possible swap the constants to get rid of the 10140 complement. 10141 True/false will be -1/0 while code below (store flag 10142 followed by decrement) is 0/-1, so the constants need 10143 to be exchanged once more. */ 10144 10145 if (compare_code == GE || !cf) 10146 { 10147 code = reverse_condition (code); 10148 compare_code = LT; 10149 } 10150 else 10151 { 10152 HOST_WIDE_INT tmp = cf; 10153 cf = ct; 10154 ct = tmp; 10155 } 10156 10157 out = emit_store_flag (out, code, ix86_compare_op0, 10158 ix86_compare_op1, VOIDmode, 0, -1); 10159 } 10160 else 10161 { 10162 out = emit_store_flag (out, code, ix86_compare_op0, 10163 ix86_compare_op1, VOIDmode, 0, 1); 10164 10165 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 10166 copy_rtx (out), 1, OPTAB_DIRECT); 10167 } 10168 10169 out = expand_simple_binop (mode, AND, copy_rtx (out), 10170 gen_int_mode (cf - ct, mode), 10171 copy_rtx (out), 1, OPTAB_DIRECT); 10172 if (ct) 10173 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 10174 copy_rtx (out), 1, OPTAB_DIRECT); 10175 if (!rtx_equal_p (out, operands[0])) 10176 emit_move_insn (operands[0], copy_rtx (out)); 10177 10178 return 1; /* DONE */ 10179 } 10180 } 10181 10182 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 10183 { 10184 /* Try a few things more with specific constants and a variable. */ 10185 10186 optab op; 10187 rtx var, orig_out, out, tmp; 10188 10189 if (BRANCH_COST <= 2) 10190 return 0; /* FAIL */ 10191 10192 /* If one of the two operands is an interesting constant, load a 10193 constant with the above and mask it in with a logical operation. */ 10194 10195 if (GET_CODE (operands[2]) == CONST_INT) 10196 { 10197 var = operands[3]; 10198 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 10199 operands[3] = constm1_rtx, op = and_optab; 10200 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 10201 operands[3] = const0_rtx, op = ior_optab; 10202 else 10203 return 0; /* FAIL */ 10204 } 10205 else if (GET_CODE (operands[3]) == CONST_INT) 10206 { 10207 var = operands[2]; 10208 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 10209 operands[2] = constm1_rtx, op = and_optab; 10210 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 10211 operands[2] = const0_rtx, op = ior_optab; 10212 else 10213 return 0; /* FAIL */ 10214 } 10215 else 10216 return 0; /* FAIL */ 10217 10218 orig_out = operands[0]; 10219 tmp = gen_reg_rtx (mode); 10220 operands[0] = tmp; 10221 10222 /* Recurse to get the constant loaded. */ 10223 if (ix86_expand_int_movcc (operands) == 0) 10224 return 0; /* FAIL */ 10225 10226 /* Mask in the interesting variable. */ 10227 out = expand_binop (mode, op, var, tmp, orig_out, 0, 10228 OPTAB_WIDEN); 10229 if (!rtx_equal_p (out, orig_out)) 10230 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 10231 10232 return 1; /* DONE */ 10233 } 10234 10235 /* 10236 * For comparison with above, 10237 * 10238 * movl cf,dest 10239 * movl ct,tmp 10240 * cmpl op1,op2 10241 * cmovcc tmp,dest 10242 * 10243 * Size 15. 10244 */ 10245 10246 if (! nonimmediate_operand (operands[2], mode)) 10247 operands[2] = force_reg (mode, operands[2]); 10248 if (! nonimmediate_operand (operands[3], mode)) 10249 operands[3] = force_reg (mode, operands[3]); 10250 10251 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 10252 { 10253 rtx tmp = gen_reg_rtx (mode); 10254 emit_move_insn (tmp, operands[3]); 10255 operands[3] = tmp; 10256 } 10257 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 10258 { 10259 rtx tmp = gen_reg_rtx (mode); 10260 emit_move_insn (tmp, operands[2]); 10261 operands[2] = tmp; 10262 } 10263 10264 if (! register_operand (operands[2], VOIDmode) 10265 && (mode == QImode 10266 || ! register_operand (operands[3], VOIDmode))) 10267 operands[2] = force_reg (mode, operands[2]); 10268 10269 if (mode == QImode 10270 && ! register_operand (operands[3], VOIDmode)) 10271 operands[3] = force_reg (mode, operands[3]); 10272 10273 emit_insn (compare_seq); 10274 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 10275 gen_rtx_IF_THEN_ELSE (mode, 10276 compare_op, operands[2], 10277 operands[3]))); 10278 if (bypass_test) 10279 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 10280 gen_rtx_IF_THEN_ELSE (mode, 10281 bypass_test, 10282 copy_rtx (operands[3]), 10283 copy_rtx (operands[0])))); 10284 if (second_test) 10285 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 10286 gen_rtx_IF_THEN_ELSE (mode, 10287 second_test, 10288 copy_rtx (operands[2]), 10289 copy_rtx (operands[0])))); 10290 10291 return 1; /* DONE */ 10292} 10293 10294int 10295ix86_expand_fp_movcc (rtx operands[]) 10296{ 10297 enum rtx_code code; 10298 rtx tmp; 10299 rtx compare_op, second_test, bypass_test; 10300 10301 /* For SF/DFmode conditional moves based on comparisons 10302 in same mode, we may want to use SSE min/max instructions. */ 10303 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode) 10304 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode)) 10305 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]) 10306 /* The SSE comparisons does not support the LTGT/UNEQ pair. */ 10307 && (!TARGET_IEEE_FP 10308 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ)) 10309 /* We may be called from the post-reload splitter. */ 10310 && (!REG_P (operands[0]) 10311 || SSE_REG_P (operands[0]) 10312 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)) 10313 { 10314 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1; 10315 code = GET_CODE (operands[1]); 10316 10317 /* See if we have (cross) match between comparison operands and 10318 conditional move operands. */ 10319 if (rtx_equal_p (operands[2], op1)) 10320 { 10321 rtx tmp = op0; 10322 op0 = op1; 10323 op1 = tmp; 10324 code = reverse_condition_maybe_unordered (code); 10325 } 10326 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1)) 10327 { 10328 /* Check for min operation. */ 10329 if (code == LT || code == UNLE) 10330 { 10331 if (code == UNLE) 10332 { 10333 rtx tmp = op0; 10334 op0 = op1; 10335 op1 = tmp; 10336 } 10337 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 10338 if (memory_operand (op0, VOIDmode)) 10339 op0 = force_reg (GET_MODE (operands[0]), op0); 10340 if (GET_MODE (operands[0]) == SFmode) 10341 emit_insn (gen_minsf3 (operands[0], op0, op1)); 10342 else 10343 emit_insn (gen_mindf3 (operands[0], op0, op1)); 10344 return 1; 10345 } 10346 /* Check for max operation. */ 10347 if (code == GT || code == UNGE) 10348 { 10349 if (code == UNGE) 10350 { 10351 rtx tmp = op0; 10352 op0 = op1; 10353 op1 = tmp; 10354 } 10355 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 10356 if (memory_operand (op0, VOIDmode)) 10357 op0 = force_reg (GET_MODE (operands[0]), op0); 10358 if (GET_MODE (operands[0]) == SFmode) 10359 emit_insn (gen_maxsf3 (operands[0], op0, op1)); 10360 else 10361 emit_insn (gen_maxdf3 (operands[0], op0, op1)); 10362 return 1; 10363 } 10364 } 10365 /* Manage condition to be sse_comparison_operator. In case we are 10366 in non-ieee mode, try to canonicalize the destination operand 10367 to be first in the comparison - this helps reload to avoid extra 10368 moves. */ 10369 if (!sse_comparison_operator (operands[1], VOIDmode) 10370 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP)) 10371 { 10372 rtx tmp = ix86_compare_op0; 10373 ix86_compare_op0 = ix86_compare_op1; 10374 ix86_compare_op1 = tmp; 10375 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), 10376 VOIDmode, ix86_compare_op0, 10377 ix86_compare_op1); 10378 } 10379 /* Similarly try to manage result to be first operand of conditional 10380 move. We also don't support the NE comparison on SSE, so try to 10381 avoid it. */ 10382 if ((rtx_equal_p (operands[0], operands[3]) 10383 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ)) 10384 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP)) 10385 { 10386 rtx tmp = operands[2]; 10387 operands[2] = operands[3]; 10388 operands[3] = tmp; 10389 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered 10390 (GET_CODE (operands[1])), 10391 VOIDmode, ix86_compare_op0, 10392 ix86_compare_op1); 10393 } 10394 if (GET_MODE (operands[0]) == SFmode) 10395 emit_insn (gen_sse_movsfcc (operands[0], operands[1], 10396 operands[2], operands[3], 10397 ix86_compare_op0, ix86_compare_op1)); 10398 else 10399 emit_insn (gen_sse_movdfcc (operands[0], operands[1], 10400 operands[2], operands[3], 10401 ix86_compare_op0, ix86_compare_op1)); 10402 return 1; 10403 } 10404 10405 /* The floating point conditional move instructions don't directly 10406 support conditions resulting from a signed integer comparison. */ 10407 10408 code = GET_CODE (operands[1]); 10409 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 10410 10411 /* The floating point conditional move instructions don't directly 10412 support signed integer comparisons. */ 10413 10414 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 10415 { 10416 if (second_test != NULL || bypass_test != NULL) 10417 abort (); 10418 tmp = gen_reg_rtx (QImode); 10419 ix86_expand_setcc (code, tmp); 10420 code = NE; 10421 ix86_compare_op0 = tmp; 10422 ix86_compare_op1 = const0_rtx; 10423 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 10424 } 10425 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 10426 { 10427 tmp = gen_reg_rtx (GET_MODE (operands[0])); 10428 emit_move_insn (tmp, operands[3]); 10429 operands[3] = tmp; 10430 } 10431 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 10432 { 10433 tmp = gen_reg_rtx (GET_MODE (operands[0])); 10434 emit_move_insn (tmp, operands[2]); 10435 operands[2] = tmp; 10436 } 10437 10438 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 10439 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 10440 compare_op, 10441 operands[2], 10442 operands[3]))); 10443 if (bypass_test) 10444 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 10445 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 10446 bypass_test, 10447 operands[3], 10448 operands[0]))); 10449 if (second_test) 10450 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 10451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 10452 second_test, 10453 operands[2], 10454 operands[0]))); 10455 10456 return 1; 10457} 10458 10459/* Expand conditional increment or decrement using adb/sbb instructions. 10460 The default case using setcc followed by the conditional move can be 10461 done by generic code. */ 10462int 10463ix86_expand_int_addcc (rtx operands[]) 10464{ 10465 enum rtx_code code = GET_CODE (operands[1]); 10466 rtx compare_op; 10467 rtx val = const0_rtx; 10468 bool fpcmp = false; 10469 enum machine_mode mode = GET_MODE (operands[0]); 10470 10471 if (operands[3] != const1_rtx 10472 && operands[3] != constm1_rtx) 10473 return 0; 10474 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 10475 ix86_compare_op1, &compare_op)) 10476 return 0; 10477 code = GET_CODE (compare_op); 10478 10479 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 10480 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 10481 { 10482 fpcmp = true; 10483 code = ix86_fp_compare_code_to_integer (code); 10484 } 10485 10486 if (code != LTU) 10487 { 10488 val = constm1_rtx; 10489 if (fpcmp) 10490 PUT_CODE (compare_op, 10491 reverse_condition_maybe_unordered 10492 (GET_CODE (compare_op))); 10493 else 10494 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 10495 } 10496 PUT_MODE (compare_op, mode); 10497 10498 /* Construct either adc or sbb insn. */ 10499 if ((code == LTU) == (operands[3] == constm1_rtx)) 10500 { 10501 switch (GET_MODE (operands[0])) 10502 { 10503 case QImode: 10504 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 10505 break; 10506 case HImode: 10507 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 10508 break; 10509 case SImode: 10510 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 10511 break; 10512 case DImode: 10513 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 10514 break; 10515 default: 10516 abort (); 10517 } 10518 } 10519 else 10520 { 10521 switch (GET_MODE (operands[0])) 10522 { 10523 case QImode: 10524 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 10525 break; 10526 case HImode: 10527 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 10528 break; 10529 case SImode: 10530 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 10531 break; 10532 case DImode: 10533 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 10534 break; 10535 default: 10536 abort (); 10537 } 10538 } 10539 return 1; /* DONE */ 10540} 10541 10542 10543/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 10544 works for floating pointer parameters and nonoffsetable memories. 10545 For pushes, it returns just stack offsets; the values will be saved 10546 in the right order. Maximally three parts are generated. */ 10547 10548static int 10549ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 10550{ 10551 int size; 10552 10553 if (!TARGET_64BIT) 10554 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 10555 else 10556 size = (GET_MODE_SIZE (mode) + 4) / 8; 10557 10558 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand))) 10559 abort (); 10560 if (size < 2 || size > 3) 10561 abort (); 10562 10563 /* Optimize constant pool reference to immediates. This is used by fp 10564 moves, that force all constants to memory to allow combining. */ 10565 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand)) 10566 { 10567 rtx tmp = maybe_get_pool_constant (operand); 10568 if (tmp) 10569 operand = tmp; 10570 } 10571 10572 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 10573 { 10574 /* The only non-offsetable memories we handle are pushes. */ 10575 if (! push_operand (operand, VOIDmode)) 10576 abort (); 10577 10578 operand = copy_rtx (operand); 10579 PUT_MODE (operand, Pmode); 10580 parts[0] = parts[1] = parts[2] = operand; 10581 } 10582 else if (!TARGET_64BIT) 10583 { 10584 if (mode == DImode) 10585 split_di (&operand, 1, &parts[0], &parts[1]); 10586 else 10587 { 10588 if (REG_P (operand)) 10589 { 10590 if (!reload_completed) 10591 abort (); 10592 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 10593 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 10594 if (size == 3) 10595 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 10596 } 10597 else if (offsettable_memref_p (operand)) 10598 { 10599 operand = adjust_address (operand, SImode, 0); 10600 parts[0] = operand; 10601 parts[1] = adjust_address (operand, SImode, 4); 10602 if (size == 3) 10603 parts[2] = adjust_address (operand, SImode, 8); 10604 } 10605 else if (GET_CODE (operand) == CONST_DOUBLE) 10606 { 10607 REAL_VALUE_TYPE r; 10608 long l[4]; 10609 10610 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 10611 switch (mode) 10612 { 10613 case XFmode: 10614 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 10615 parts[2] = gen_int_mode (l[2], SImode); 10616 break; 10617 case DFmode: 10618 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 10619 break; 10620 default: 10621 abort (); 10622 } 10623 parts[1] = gen_int_mode (l[1], SImode); 10624 parts[0] = gen_int_mode (l[0], SImode); 10625 } 10626 else 10627 abort (); 10628 } 10629 } 10630 else 10631 { 10632 if (mode == TImode) 10633 split_ti (&operand, 1, &parts[0], &parts[1]); 10634 if (mode == XFmode || mode == TFmode) 10635 { 10636 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 10637 if (REG_P (operand)) 10638 { 10639 if (!reload_completed) 10640 abort (); 10641 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 10642 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 10643 } 10644 else if (offsettable_memref_p (operand)) 10645 { 10646 operand = adjust_address (operand, DImode, 0); 10647 parts[0] = operand; 10648 parts[1] = adjust_address (operand, upper_mode, 8); 10649 } 10650 else if (GET_CODE (operand) == CONST_DOUBLE) 10651 { 10652 REAL_VALUE_TYPE r; 10653 long l[4]; 10654 10655 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 10656 real_to_target (l, &r, mode); 10657 10658 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 10659 if (HOST_BITS_PER_WIDE_INT >= 64) 10660 parts[0] 10661 = gen_int_mode 10662 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 10663 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 10664 DImode); 10665 else 10666 parts[0] = immed_double_const (l[0], l[1], DImode); 10667 10668 if (upper_mode == SImode) 10669 parts[1] = gen_int_mode (l[2], SImode); 10670 else if (HOST_BITS_PER_WIDE_INT >= 64) 10671 parts[1] 10672 = gen_int_mode 10673 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 10674 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 10675 DImode); 10676 else 10677 parts[1] = immed_double_const (l[2], l[3], DImode); 10678 } 10679 else 10680 abort (); 10681 } 10682 } 10683 10684 return size; 10685} 10686 10687/* Emit insns to perform a move or push of DI, DF, and XF values. 10688 Return false when normal moves are needed; true when all required 10689 insns have been emitted. Operands 2-4 contain the input values 10690 int the correct order; operands 5-7 contain the output values. */ 10691 10692void 10693ix86_split_long_move (rtx operands[]) 10694{ 10695 rtx part[2][3]; 10696 int nparts; 10697 int push = 0; 10698 int collisions = 0; 10699 enum machine_mode mode = GET_MODE (operands[0]); 10700 10701 /* The DFmode expanders may ask us to move double. 10702 For 64bit target this is single move. By hiding the fact 10703 here we simplify i386.md splitters. */ 10704 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 10705 { 10706 /* Optimize constant pool reference to immediates. This is used by 10707 fp moves, that force all constants to memory to allow combining. */ 10708 10709 if (GET_CODE (operands[1]) == MEM 10710 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 10711 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 10712 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 10713 if (push_operand (operands[0], VOIDmode)) 10714 { 10715 operands[0] = copy_rtx (operands[0]); 10716 PUT_MODE (operands[0], Pmode); 10717 } 10718 else 10719 operands[0] = gen_lowpart (DImode, operands[0]); 10720 operands[1] = gen_lowpart (DImode, operands[1]); 10721 emit_move_insn (operands[0], operands[1]); 10722 return; 10723 } 10724 10725 /* The only non-offsettable memory we handle is push. */ 10726 if (push_operand (operands[0], VOIDmode)) 10727 push = 1; 10728 else if (GET_CODE (operands[0]) == MEM 10729 && ! offsettable_memref_p (operands[0])) 10730 abort (); 10731 10732 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 10733 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 10734 10735 /* When emitting push, take care for source operands on the stack. */ 10736 if (push && GET_CODE (operands[1]) == MEM 10737 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 10738 { 10739 if (nparts == 3) 10740 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 10741 XEXP (part[1][2], 0)); 10742 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 10743 XEXP (part[1][1], 0)); 10744 } 10745 10746 /* We need to do copy in the right order in case an address register 10747 of the source overlaps the destination. */ 10748 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 10749 { 10750 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 10751 collisions++; 10752 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 10753 collisions++; 10754 if (nparts == 3 10755 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 10756 collisions++; 10757 10758 /* Collision in the middle part can be handled by reordering. */ 10759 if (collisions == 1 && nparts == 3 10760 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 10761 { 10762 rtx tmp; 10763 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 10764 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 10765 } 10766 10767 /* If there are more collisions, we can't handle it by reordering. 10768 Do an lea to the last part and use only one colliding move. */ 10769 else if (collisions > 1) 10770 { 10771 rtx base; 10772 10773 collisions = 1; 10774 10775 base = part[0][nparts - 1]; 10776 10777 /* Handle the case when the last part isn't valid for lea. 10778 Happens in 64-bit mode storing the 12-byte XFmode. */ 10779 if (GET_MODE (base) != Pmode) 10780 base = gen_rtx_REG (Pmode, REGNO (base)); 10781 10782 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 10783 part[1][0] = replace_equiv_address (part[1][0], base); 10784 part[1][1] = replace_equiv_address (part[1][1], 10785 plus_constant (base, UNITS_PER_WORD)); 10786 if (nparts == 3) 10787 part[1][2] = replace_equiv_address (part[1][2], 10788 plus_constant (base, 8)); 10789 } 10790 } 10791 10792 if (push) 10793 { 10794 if (!TARGET_64BIT) 10795 { 10796 if (nparts == 3) 10797 { 10798 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 10799 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); 10800 emit_move_insn (part[0][2], part[1][2]); 10801 } 10802 } 10803 else 10804 { 10805 /* In 64bit mode we don't have 32bit push available. In case this is 10806 register, it is OK - we will just use larger counterpart. We also 10807 retype memory - these comes from attempt to avoid REX prefix on 10808 moving of second half of TFmode value. */ 10809 if (GET_MODE (part[1][1]) == SImode) 10810 { 10811 if (GET_CODE (part[1][1]) == MEM) 10812 part[1][1] = adjust_address (part[1][1], DImode, 0); 10813 else if (REG_P (part[1][1])) 10814 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 10815 else 10816 abort (); 10817 if (GET_MODE (part[1][0]) == SImode) 10818 part[1][0] = part[1][1]; 10819 } 10820 } 10821 emit_move_insn (part[0][1], part[1][1]); 10822 emit_move_insn (part[0][0], part[1][0]); 10823 return; 10824 } 10825 10826 /* Choose correct order to not overwrite the source before it is copied. */ 10827 if ((REG_P (part[0][0]) 10828 && REG_P (part[1][1]) 10829 && (REGNO (part[0][0]) == REGNO (part[1][1]) 10830 || (nparts == 3 10831 && REGNO (part[0][0]) == REGNO (part[1][2])))) 10832 || (collisions > 0 10833 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 10834 { 10835 if (nparts == 3) 10836 { 10837 operands[2] = part[0][2]; 10838 operands[3] = part[0][1]; 10839 operands[4] = part[0][0]; 10840 operands[5] = part[1][2]; 10841 operands[6] = part[1][1]; 10842 operands[7] = part[1][0]; 10843 } 10844 else 10845 { 10846 operands[2] = part[0][1]; 10847 operands[3] = part[0][0]; 10848 operands[5] = part[1][1]; 10849 operands[6] = part[1][0]; 10850 } 10851 } 10852 else 10853 { 10854 if (nparts == 3) 10855 { 10856 operands[2] = part[0][0]; 10857 operands[3] = part[0][1]; 10858 operands[4] = part[0][2]; 10859 operands[5] = part[1][0]; 10860 operands[6] = part[1][1]; 10861 operands[7] = part[1][2]; 10862 } 10863 else 10864 { 10865 operands[2] = part[0][0]; 10866 operands[3] = part[0][1]; 10867 operands[5] = part[1][0]; 10868 operands[6] = part[1][1]; 10869 } 10870 } 10871 emit_move_insn (operands[2], operands[5]); 10872 emit_move_insn (operands[3], operands[6]); 10873 if (nparts == 3) 10874 emit_move_insn (operands[4], operands[7]); 10875 10876 return; 10877} 10878 10879void 10880ix86_split_ashldi (rtx *operands, rtx scratch) 10881{ 10882 rtx low[2], high[2]; 10883 int count; 10884 10885 if (GET_CODE (operands[2]) == CONST_INT) 10886 { 10887 split_di (operands, 2, low, high); 10888 count = INTVAL (operands[2]) & 63; 10889 10890 if (count >= 32) 10891 { 10892 emit_move_insn (high[0], low[1]); 10893 emit_move_insn (low[0], const0_rtx); 10894 10895 if (count > 32) 10896 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32))); 10897 } 10898 else 10899 { 10900 if (!rtx_equal_p (operands[0], operands[1])) 10901 emit_move_insn (operands[0], operands[1]); 10902 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); 10903 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count))); 10904 } 10905 } 10906 else 10907 { 10908 if (!rtx_equal_p (operands[0], operands[1])) 10909 emit_move_insn (operands[0], operands[1]); 10910 10911 split_di (operands, 1, low, high); 10912 10913 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); 10914 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); 10915 10916 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10917 { 10918 if (! no_new_pseudos) 10919 scratch = force_reg (SImode, const0_rtx); 10920 else 10921 emit_move_insn (scratch, const0_rtx); 10922 10923 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], 10924 scratch)); 10925 } 10926 else 10927 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 10928 } 10929} 10930 10931void 10932ix86_split_ashrdi (rtx *operands, rtx scratch) 10933{ 10934 rtx low[2], high[2]; 10935 int count; 10936 10937 if (GET_CODE (operands[2]) == CONST_INT) 10938 { 10939 split_di (operands, 2, low, high); 10940 count = INTVAL (operands[2]) & 63; 10941 10942 if (count >= 32) 10943 { 10944 emit_move_insn (low[0], high[1]); 10945 10946 if (! reload_completed) 10947 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31))); 10948 else 10949 { 10950 emit_move_insn (high[0], low[0]); 10951 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); 10952 } 10953 10954 if (count > 32) 10955 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); 10956 } 10957 else 10958 { 10959 if (!rtx_equal_p (operands[0], operands[1])) 10960 emit_move_insn (operands[0], operands[1]); 10961 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 10962 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count))); 10963 } 10964 } 10965 else 10966 { 10967 if (!rtx_equal_p (operands[0], operands[1])) 10968 emit_move_insn (operands[0], operands[1]); 10969 10970 split_di (operands, 1, low, high); 10971 10972 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 10973 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); 10974 10975 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10976 { 10977 if (! no_new_pseudos) 10978 scratch = gen_reg_rtx (SImode); 10979 emit_move_insn (scratch, high[0]); 10980 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); 10981 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 10982 scratch)); 10983 } 10984 else 10985 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 10986 } 10987} 10988 10989void 10990ix86_split_lshrdi (rtx *operands, rtx scratch) 10991{ 10992 rtx low[2], high[2]; 10993 int count; 10994 10995 if (GET_CODE (operands[2]) == CONST_INT) 10996 { 10997 split_di (operands, 2, low, high); 10998 count = INTVAL (operands[2]) & 63; 10999 11000 if (count >= 32) 11001 { 11002 emit_move_insn (low[0], high[1]); 11003 emit_move_insn (high[0], const0_rtx); 11004 11005 if (count > 32) 11006 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); 11007 } 11008 else 11009 { 11010 if (!rtx_equal_p (operands[0], operands[1])) 11011 emit_move_insn (operands[0], operands[1]); 11012 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 11013 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count))); 11014 } 11015 } 11016 else 11017 { 11018 if (!rtx_equal_p (operands[0], operands[1])) 11019 emit_move_insn (operands[0], operands[1]); 11020 11021 split_di (operands, 1, low, high); 11022 11023 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 11024 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); 11025 11026 /* Heh. By reversing the arguments, we can reuse this pattern. */ 11027 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 11028 { 11029 if (! no_new_pseudos) 11030 scratch = force_reg (SImode, const0_rtx); 11031 else 11032 emit_move_insn (scratch, const0_rtx); 11033 11034 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 11035 scratch)); 11036 } 11037 else 11038 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 11039 } 11040} 11041 11042/* Helper function for the string operations below. Dest VARIABLE whether 11043 it is aligned to VALUE bytes. If true, jump to the label. */ 11044static rtx 11045ix86_expand_aligntest (rtx variable, int value) 11046{ 11047 rtx label = gen_label_rtx (); 11048 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 11049 if (GET_MODE (variable) == DImode) 11050 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 11051 else 11052 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 11053 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 11054 1, label); 11055 return label; 11056} 11057 11058/* Adjust COUNTER by the VALUE. */ 11059static void 11060ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 11061{ 11062 if (GET_MODE (countreg) == DImode) 11063 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 11064 else 11065 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 11066} 11067 11068/* Zero extend possibly SImode EXP to Pmode register. */ 11069rtx 11070ix86_zero_extend_to_Pmode (rtx exp) 11071{ 11072 rtx r; 11073 if (GET_MODE (exp) == VOIDmode) 11074 return force_reg (Pmode, exp); 11075 if (GET_MODE (exp) == Pmode) 11076 return copy_to_mode_reg (Pmode, exp); 11077 r = gen_reg_rtx (Pmode); 11078 emit_insn (gen_zero_extendsidi2 (r, exp)); 11079 return r; 11080} 11081 11082/* Expand string move (memcpy) operation. Use i386 string operations when 11083 profitable. expand_clrstr contains similar code. */ 11084int 11085ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp) 11086{ 11087 rtx srcreg, destreg, countreg, srcexp, destexp; 11088 enum machine_mode counter_mode; 11089 HOST_WIDE_INT align = 0; 11090 unsigned HOST_WIDE_INT count = 0; 11091 11092 if (GET_CODE (align_exp) == CONST_INT) 11093 align = INTVAL (align_exp); 11094 11095 /* Can't use any of this if the user has appropriated esi or edi. */ 11096 if (global_regs[4] || global_regs[5]) 11097 return 0; 11098 11099 /* This simple hack avoids all inlining code and simplifies code below. */ 11100 if (!TARGET_ALIGN_STRINGOPS) 11101 align = 64; 11102 11103 if (GET_CODE (count_exp) == CONST_INT) 11104 { 11105 count = INTVAL (count_exp); 11106 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 11107 return 0; 11108 } 11109 11110 /* Figure out proper mode for counter. For 32bits it is always SImode, 11111 for 64bits use SImode when possible, otherwise DImode. 11112 Set count to number of bytes copied when known at compile time. */ 11113 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 11114 || x86_64_zero_extended_value (count_exp)) 11115 counter_mode = SImode; 11116 else 11117 counter_mode = DImode; 11118 11119 if (counter_mode != SImode && counter_mode != DImode) 11120 abort (); 11121 11122 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 11123 if (destreg != XEXP (dst, 0)) 11124 dst = replace_equiv_address_nv (dst, destreg); 11125 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 11126 if (srcreg != XEXP (src, 0)) 11127 src = replace_equiv_address_nv (src, srcreg); 11128 11129 /* When optimizing for size emit simple rep ; movsb instruction for 11130 counts not divisible by 4. */ 11131 11132 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 11133 { 11134 emit_insn (gen_cld ()); 11135 countreg = ix86_zero_extend_to_Pmode (count_exp); 11136 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 11137 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); 11138 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, 11139 destexp, srcexp)); 11140 } 11141 11142 /* For constant aligned (or small unaligned) copies use rep movsl 11143 followed by code copying the rest. For PentiumPro ensure 8 byte 11144 alignment to allow rep movsl acceleration. */ 11145 11146 else if (count != 0 11147 && (align >= 8 11148 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 11149 || optimize_size || count < (unsigned int) 64)) 11150 { 11151 unsigned HOST_WIDE_INT offset = 0; 11152 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 11153 rtx srcmem, dstmem; 11154 11155 emit_insn (gen_cld ()); 11156 if (count & ~(size - 1)) 11157 { 11158 countreg = copy_to_mode_reg (counter_mode, 11159 GEN_INT ((count >> (size == 4 ? 2 : 3)) 11160 & (TARGET_64BIT ? -1 : 0x3fffffff))); 11161 countreg = ix86_zero_extend_to_Pmode (countreg); 11162 11163 destexp = gen_rtx_ASHIFT (Pmode, countreg, 11164 GEN_INT (size == 4 ? 2 : 3)); 11165 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 11166 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 11167 11168 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 11169 countreg, destexp, srcexp)); 11170 offset = count & ~(size - 1); 11171 } 11172 if (size == 8 && (count & 0x04)) 11173 { 11174 srcmem = adjust_automodify_address_nv (src, SImode, srcreg, 11175 offset); 11176 dstmem = adjust_automodify_address_nv (dst, SImode, destreg, 11177 offset); 11178 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11179 offset += 4; 11180 } 11181 if (count & 0x02) 11182 { 11183 srcmem = adjust_automodify_address_nv (src, HImode, srcreg, 11184 offset); 11185 dstmem = adjust_automodify_address_nv (dst, HImode, destreg, 11186 offset); 11187 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11188 offset += 2; 11189 } 11190 if (count & 0x01) 11191 { 11192 srcmem = adjust_automodify_address_nv (src, QImode, srcreg, 11193 offset); 11194 dstmem = adjust_automodify_address_nv (dst, QImode, destreg, 11195 offset); 11196 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11197 } 11198 } 11199 /* The generic code based on the glibc implementation: 11200 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 11201 allowing accelerated copying there) 11202 - copy the data using rep movsl 11203 - copy the rest. */ 11204 else 11205 { 11206 rtx countreg2; 11207 rtx label = NULL; 11208 rtx srcmem, dstmem; 11209 int desired_alignment = (TARGET_PENTIUMPRO 11210 && (count == 0 || count >= (unsigned int) 260) 11211 ? 8 : UNITS_PER_WORD); 11212 /* Get rid of MEM_OFFSETs, they won't be accurate. */ 11213 dst = change_address (dst, BLKmode, destreg); 11214 src = change_address (src, BLKmode, srcreg); 11215 11216 /* In case we don't know anything about the alignment, default to 11217 library version, since it is usually equally fast and result in 11218 shorter code. 11219 11220 Also emit call when we know that the count is large and call overhead 11221 will not be important. */ 11222 if (!TARGET_INLINE_ALL_STRINGOPS 11223 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 11224 return 0; 11225 11226 if (TARGET_SINGLE_STRINGOP) 11227 emit_insn (gen_cld ()); 11228 11229 countreg2 = gen_reg_rtx (Pmode); 11230 countreg = copy_to_mode_reg (counter_mode, count_exp); 11231 11232 /* We don't use loops to align destination and to copy parts smaller 11233 than 4 bytes, because gcc is able to optimize such code better (in 11234 the case the destination or the count really is aligned, gcc is often 11235 able to predict the branches) and also it is friendlier to the 11236 hardware branch prediction. 11237 11238 Using loops is beneficial for generic case, because we can 11239 handle small counts using the loops. Many CPUs (such as Athlon) 11240 have large REP prefix setup costs. 11241 11242 This is quite costly. Maybe we can revisit this decision later or 11243 add some customizability to this code. */ 11244 11245 if (count == 0 && align < desired_alignment) 11246 { 11247 label = gen_label_rtx (); 11248 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 11249 LEU, 0, counter_mode, 1, label); 11250 } 11251 if (align <= 1) 11252 { 11253 rtx label = ix86_expand_aligntest (destreg, 1); 11254 srcmem = change_address (src, QImode, srcreg); 11255 dstmem = change_address (dst, QImode, destreg); 11256 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11257 ix86_adjust_counter (countreg, 1); 11258 emit_label (label); 11259 LABEL_NUSES (label) = 1; 11260 } 11261 if (align <= 2) 11262 { 11263 rtx label = ix86_expand_aligntest (destreg, 2); 11264 srcmem = change_address (src, HImode, srcreg); 11265 dstmem = change_address (dst, HImode, destreg); 11266 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11267 ix86_adjust_counter (countreg, 2); 11268 emit_label (label); 11269 LABEL_NUSES (label) = 1; 11270 } 11271 if (align <= 4 && desired_alignment > 4) 11272 { 11273 rtx label = ix86_expand_aligntest (destreg, 4); 11274 srcmem = change_address (src, SImode, srcreg); 11275 dstmem = change_address (dst, SImode, destreg); 11276 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11277 ix86_adjust_counter (countreg, 4); 11278 emit_label (label); 11279 LABEL_NUSES (label) = 1; 11280 } 11281 11282 if (label && desired_alignment > 4 && !TARGET_64BIT) 11283 { 11284 emit_label (label); 11285 LABEL_NUSES (label) = 1; 11286 label = NULL_RTX; 11287 } 11288 if (!TARGET_SINGLE_STRINGOP) 11289 emit_insn (gen_cld ()); 11290 if (TARGET_64BIT) 11291 { 11292 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 11293 GEN_INT (3))); 11294 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 11295 } 11296 else 11297 { 11298 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 11299 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 11300 } 11301 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 11302 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 11303 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 11304 countreg2, destexp, srcexp)); 11305 11306 if (label) 11307 { 11308 emit_label (label); 11309 LABEL_NUSES (label) = 1; 11310 } 11311 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 11312 { 11313 srcmem = change_address (src, SImode, srcreg); 11314 dstmem = change_address (dst, SImode, destreg); 11315 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11316 } 11317 if ((align <= 4 || count == 0) && TARGET_64BIT) 11318 { 11319 rtx label = ix86_expand_aligntest (countreg, 4); 11320 srcmem = change_address (src, SImode, srcreg); 11321 dstmem = change_address (dst, SImode, destreg); 11322 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11323 emit_label (label); 11324 LABEL_NUSES (label) = 1; 11325 } 11326 if (align > 2 && count != 0 && (count & 2)) 11327 { 11328 srcmem = change_address (src, HImode, srcreg); 11329 dstmem = change_address (dst, HImode, destreg); 11330 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11331 } 11332 if (align <= 2 || count == 0) 11333 { 11334 rtx label = ix86_expand_aligntest (countreg, 2); 11335 srcmem = change_address (src, HImode, srcreg); 11336 dstmem = change_address (dst, HImode, destreg); 11337 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11338 emit_label (label); 11339 LABEL_NUSES (label) = 1; 11340 } 11341 if (align > 1 && count != 0 && (count & 1)) 11342 { 11343 srcmem = change_address (src, QImode, srcreg); 11344 dstmem = change_address (dst, QImode, destreg); 11345 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11346 } 11347 if (align <= 1 || count == 0) 11348 { 11349 rtx label = ix86_expand_aligntest (countreg, 1); 11350 srcmem = change_address (src, QImode, srcreg); 11351 dstmem = change_address (dst, QImode, destreg); 11352 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11353 emit_label (label); 11354 LABEL_NUSES (label) = 1; 11355 } 11356 } 11357 11358 return 1; 11359} 11360 11361/* Expand string clear operation (bzero). Use i386 string operations when 11362 profitable. expand_movstr contains similar code. */ 11363int 11364ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp) 11365{ 11366 rtx destreg, zeroreg, countreg, destexp; 11367 enum machine_mode counter_mode; 11368 HOST_WIDE_INT align = 0; 11369 unsigned HOST_WIDE_INT count = 0; 11370 11371 if (GET_CODE (align_exp) == CONST_INT) 11372 align = INTVAL (align_exp); 11373 11374 /* Can't use any of this if the user has appropriated esi. */ 11375 if (global_regs[4]) 11376 return 0; 11377 11378 /* This simple hack avoids all inlining code and simplifies code below. */ 11379 if (!TARGET_ALIGN_STRINGOPS) 11380 align = 32; 11381 11382 if (GET_CODE (count_exp) == CONST_INT) 11383 { 11384 count = INTVAL (count_exp); 11385 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 11386 return 0; 11387 } 11388 /* Figure out proper mode for counter. For 32bits it is always SImode, 11389 for 64bits use SImode when possible, otherwise DImode. 11390 Set count to number of bytes copied when known at compile time. */ 11391 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 11392 || x86_64_zero_extended_value (count_exp)) 11393 counter_mode = SImode; 11394 else 11395 counter_mode = DImode; 11396 11397 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 11398 if (destreg != XEXP (dst, 0)) 11399 dst = replace_equiv_address_nv (dst, destreg); 11400 11401 emit_insn (gen_cld ()); 11402 11403 /* When optimizing for size emit simple rep ; movsb instruction for 11404 counts not divisible by 4. */ 11405 11406 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 11407 { 11408 countreg = ix86_zero_extend_to_Pmode (count_exp); 11409 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 11410 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 11411 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 11412 } 11413 else if (count != 0 11414 && (align >= 8 11415 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 11416 || optimize_size || count < (unsigned int) 64)) 11417 { 11418 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 11419 unsigned HOST_WIDE_INT offset = 0; 11420 11421 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 11422 if (count & ~(size - 1)) 11423 { 11424 countreg = copy_to_mode_reg (counter_mode, 11425 GEN_INT ((count >> (size == 4 ? 2 : 3)) 11426 & (TARGET_64BIT ? -1 : 0x3fffffff))); 11427 countreg = ix86_zero_extend_to_Pmode (countreg); 11428 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3)); 11429 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 11430 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 11431 offset = count & ~(size - 1); 11432 } 11433 if (size == 8 && (count & 0x04)) 11434 { 11435 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, 11436 offset); 11437 emit_insn (gen_strset (destreg, mem, 11438 gen_rtx_SUBREG (SImode, zeroreg, 0))); 11439 offset += 4; 11440 } 11441 if (count & 0x02) 11442 { 11443 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, 11444 offset); 11445 emit_insn (gen_strset (destreg, mem, 11446 gen_rtx_SUBREG (HImode, zeroreg, 0))); 11447 offset += 2; 11448 } 11449 if (count & 0x01) 11450 { 11451 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, 11452 offset); 11453 emit_insn (gen_strset (destreg, mem, 11454 gen_rtx_SUBREG (QImode, zeroreg, 0))); 11455 } 11456 } 11457 else 11458 { 11459 rtx countreg2; 11460 rtx label = NULL; 11461 /* Compute desired alignment of the string operation. */ 11462 int desired_alignment = (TARGET_PENTIUMPRO 11463 && (count == 0 || count >= (unsigned int) 260) 11464 ? 8 : UNITS_PER_WORD); 11465 11466 /* In case we don't know anything about the alignment, default to 11467 library version, since it is usually equally fast and result in 11468 shorter code. 11469 11470 Also emit call when we know that the count is large and call overhead 11471 will not be important. */ 11472 if (!TARGET_INLINE_ALL_STRINGOPS 11473 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 11474 return 0; 11475 11476 if (TARGET_SINGLE_STRINGOP) 11477 emit_insn (gen_cld ()); 11478 11479 countreg2 = gen_reg_rtx (Pmode); 11480 countreg = copy_to_mode_reg (counter_mode, count_exp); 11481 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 11482 /* Get rid of MEM_OFFSET, it won't be accurate. */ 11483 dst = change_address (dst, BLKmode, destreg); 11484 11485 if (count == 0 && align < desired_alignment) 11486 { 11487 label = gen_label_rtx (); 11488 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 11489 LEU, 0, counter_mode, 1, label); 11490 } 11491 if (align <= 1) 11492 { 11493 rtx label = ix86_expand_aligntest (destreg, 1); 11494 emit_insn (gen_strset (destreg, dst, 11495 gen_rtx_SUBREG (QImode, zeroreg, 0))); 11496 ix86_adjust_counter (countreg, 1); 11497 emit_label (label); 11498 LABEL_NUSES (label) = 1; 11499 } 11500 if (align <= 2) 11501 { 11502 rtx label = ix86_expand_aligntest (destreg, 2); 11503 emit_insn (gen_strset (destreg, dst, 11504 gen_rtx_SUBREG (HImode, zeroreg, 0))); 11505 ix86_adjust_counter (countreg, 2); 11506 emit_label (label); 11507 LABEL_NUSES (label) = 1; 11508 } 11509 if (align <= 4 && desired_alignment > 4) 11510 { 11511 rtx label = ix86_expand_aligntest (destreg, 4); 11512 emit_insn (gen_strset (destreg, dst, 11513 (TARGET_64BIT 11514 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 11515 : zeroreg))); 11516 ix86_adjust_counter (countreg, 4); 11517 emit_label (label); 11518 LABEL_NUSES (label) = 1; 11519 } 11520 11521 if (label && desired_alignment > 4 && !TARGET_64BIT) 11522 { 11523 emit_label (label); 11524 LABEL_NUSES (label) = 1; 11525 label = NULL_RTX; 11526 } 11527 11528 if (!TARGET_SINGLE_STRINGOP) 11529 emit_insn (gen_cld ()); 11530 if (TARGET_64BIT) 11531 { 11532 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 11533 GEN_INT (3))); 11534 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 11535 } 11536 else 11537 { 11538 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 11539 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 11540 } 11541 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 11542 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); 11543 11544 if (label) 11545 { 11546 emit_label (label); 11547 LABEL_NUSES (label) = 1; 11548 } 11549 11550 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 11551 emit_insn (gen_strset (destreg, dst, 11552 gen_rtx_SUBREG (SImode, zeroreg, 0))); 11553 if (TARGET_64BIT && (align <= 4 || count == 0)) 11554 { 11555 rtx label = ix86_expand_aligntest (countreg, 4); 11556 emit_insn (gen_strset (destreg, dst, 11557 gen_rtx_SUBREG (SImode, zeroreg, 0))); 11558 emit_label (label); 11559 LABEL_NUSES (label) = 1; 11560 } 11561 if (align > 2 && count != 0 && (count & 2)) 11562 emit_insn (gen_strset (destreg, dst, 11563 gen_rtx_SUBREG (HImode, zeroreg, 0))); 11564 if (align <= 2 || count == 0) 11565 { 11566 rtx label = ix86_expand_aligntest (countreg, 2); 11567 emit_insn (gen_strset (destreg, dst, 11568 gen_rtx_SUBREG (HImode, zeroreg, 0))); 11569 emit_label (label); 11570 LABEL_NUSES (label) = 1; 11571 } 11572 if (align > 1 && count != 0 && (count & 1)) 11573 emit_insn (gen_strset (destreg, dst, 11574 gen_rtx_SUBREG (QImode, zeroreg, 0))); 11575 if (align <= 1 || count == 0) 11576 { 11577 rtx label = ix86_expand_aligntest (countreg, 1); 11578 emit_insn (gen_strset (destreg, dst, 11579 gen_rtx_SUBREG (QImode, zeroreg, 0))); 11580 emit_label (label); 11581 LABEL_NUSES (label) = 1; 11582 } 11583 } 11584 return 1; 11585} 11586 11587/* Expand strlen. */ 11588int 11589ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 11590{ 11591 rtx addr, scratch1, scratch2, scratch3, scratch4; 11592 11593 /* The generic case of strlen expander is long. Avoid it's 11594 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 11595 11596 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 11597 && !TARGET_INLINE_ALL_STRINGOPS 11598 && !optimize_size 11599 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 11600 return 0; 11601 11602 addr = force_reg (Pmode, XEXP (src, 0)); 11603 scratch1 = gen_reg_rtx (Pmode); 11604 11605 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 11606 && !optimize_size) 11607 { 11608 /* Well it seems that some optimizer does not combine a call like 11609 foo(strlen(bar), strlen(bar)); 11610 when the move and the subtraction is done here. It does calculate 11611 the length just once when these instructions are done inside of 11612 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 11613 often used and I use one fewer register for the lifetime of 11614 output_strlen_unroll() this is better. */ 11615 11616 emit_move_insn (out, addr); 11617 11618 ix86_expand_strlensi_unroll_1 (out, src, align); 11619 11620 /* strlensi_unroll_1 returns the address of the zero at the end of 11621 the string, like memchr(), so compute the length by subtracting 11622 the start address. */ 11623 if (TARGET_64BIT) 11624 emit_insn (gen_subdi3 (out, out, addr)); 11625 else 11626 emit_insn (gen_subsi3 (out, out, addr)); 11627 } 11628 else 11629 { 11630 rtx unspec; 11631 scratch2 = gen_reg_rtx (Pmode); 11632 scratch3 = gen_reg_rtx (Pmode); 11633 scratch4 = force_reg (Pmode, constm1_rtx); 11634 11635 emit_move_insn (scratch3, addr); 11636 eoschar = force_reg (QImode, eoschar); 11637 11638 emit_insn (gen_cld ()); 11639 src = replace_equiv_address_nv (src, scratch3); 11640 11641 /* If .md starts supporting :P, this can be done in .md. */ 11642 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 11643 scratch4), UNSPEC_SCAS); 11644 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 11645 if (TARGET_64BIT) 11646 { 11647 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 11648 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 11649 } 11650 else 11651 { 11652 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 11653 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 11654 } 11655 } 11656 return 1; 11657} 11658 11659/* Expand the appropriate insns for doing strlen if not just doing 11660 repnz; scasb 11661 11662 out = result, initialized with the start address 11663 align_rtx = alignment of the address. 11664 scratch = scratch register, initialized with the startaddress when 11665 not aligned, otherwise undefined 11666 11667 This is just the body. It needs the initializations mentioned above and 11668 some address computing at the end. These things are done in i386.md. */ 11669 11670static void 11671ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 11672{ 11673 int align; 11674 rtx tmp; 11675 rtx align_2_label = NULL_RTX; 11676 rtx align_3_label = NULL_RTX; 11677 rtx align_4_label = gen_label_rtx (); 11678 rtx end_0_label = gen_label_rtx (); 11679 rtx mem; 11680 rtx tmpreg = gen_reg_rtx (SImode); 11681 rtx scratch = gen_reg_rtx (SImode); 11682 rtx cmp; 11683 11684 align = 0; 11685 if (GET_CODE (align_rtx) == CONST_INT) 11686 align = INTVAL (align_rtx); 11687 11688 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 11689 11690 /* Is there a known alignment and is it less than 4? */ 11691 if (align < 4) 11692 { 11693 rtx scratch1 = gen_reg_rtx (Pmode); 11694 emit_move_insn (scratch1, out); 11695 /* Is there a known alignment and is it not 2? */ 11696 if (align != 2) 11697 { 11698 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 11699 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 11700 11701 /* Leave just the 3 lower bits. */ 11702 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 11703 NULL_RTX, 0, OPTAB_WIDEN); 11704 11705 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 11706 Pmode, 1, align_4_label); 11707 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL, 11708 Pmode, 1, align_2_label); 11709 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL, 11710 Pmode, 1, align_3_label); 11711 } 11712 else 11713 { 11714 /* Since the alignment is 2, we have to check 2 or 0 bytes; 11715 check if is aligned to 4 - byte. */ 11716 11717 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2), 11718 NULL_RTX, 0, OPTAB_WIDEN); 11719 11720 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 11721 Pmode, 1, align_4_label); 11722 } 11723 11724 mem = change_address (src, QImode, out); 11725 11726 /* Now compare the bytes. */ 11727 11728 /* Compare the first n unaligned byte on a byte per byte basis. */ 11729 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 11730 QImode, 1, end_0_label); 11731 11732 /* Increment the address. */ 11733 if (TARGET_64BIT) 11734 emit_insn (gen_adddi3 (out, out, const1_rtx)); 11735 else 11736 emit_insn (gen_addsi3 (out, out, const1_rtx)); 11737 11738 /* Not needed with an alignment of 2 */ 11739 if (align != 2) 11740 { 11741 emit_label (align_2_label); 11742 11743 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 11744 end_0_label); 11745 11746 if (TARGET_64BIT) 11747 emit_insn (gen_adddi3 (out, out, const1_rtx)); 11748 else 11749 emit_insn (gen_addsi3 (out, out, const1_rtx)); 11750 11751 emit_label (align_3_label); 11752 } 11753 11754 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 11755 end_0_label); 11756 11757 if (TARGET_64BIT) 11758 emit_insn (gen_adddi3 (out, out, const1_rtx)); 11759 else 11760 emit_insn (gen_addsi3 (out, out, const1_rtx)); 11761 } 11762 11763 /* Generate loop to check 4 bytes at a time. It is not a good idea to 11764 align this loop. It gives only huge programs, but does not help to 11765 speed up. */ 11766 emit_label (align_4_label); 11767 11768 mem = change_address (src, SImode, out); 11769 emit_move_insn (scratch, mem); 11770 if (TARGET_64BIT) 11771 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 11772 else 11773 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 11774 11775 /* This formula yields a nonzero result iff one of the bytes is zero. 11776 This saves three branches inside loop and many cycles. */ 11777 11778 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 11779 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 11780 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 11781 emit_insn (gen_andsi3 (tmpreg, tmpreg, 11782 gen_int_mode (0x80808080, SImode))); 11783 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 11784 align_4_label); 11785 11786 if (TARGET_CMOVE) 11787 { 11788 rtx reg = gen_reg_rtx (SImode); 11789 rtx reg2 = gen_reg_rtx (Pmode); 11790 emit_move_insn (reg, tmpreg); 11791 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 11792 11793 /* If zero is not in the first two bytes, move two bytes forward. */ 11794 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 11795 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11796 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 11797 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 11798 gen_rtx_IF_THEN_ELSE (SImode, tmp, 11799 reg, 11800 tmpreg))); 11801 /* Emit lea manually to avoid clobbering of flags. */ 11802 emit_insn (gen_rtx_SET (SImode, reg2, 11803 gen_rtx_PLUS (Pmode, out, GEN_INT (2)))); 11804 11805 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11806 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 11807 emit_insn (gen_rtx_SET (VOIDmode, out, 11808 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 11809 reg2, 11810 out))); 11811 11812 } 11813 else 11814 { 11815 rtx end_2_label = gen_label_rtx (); 11816 /* Is zero in the first two bytes? */ 11817 11818 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 11819 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11820 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 11821 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 11822 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 11823 pc_rtx); 11824 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 11825 JUMP_LABEL (tmp) = end_2_label; 11826 11827 /* Not in the first two. Move two bytes forward. */ 11828 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 11829 if (TARGET_64BIT) 11830 emit_insn (gen_adddi3 (out, out, GEN_INT (2))); 11831 else 11832 emit_insn (gen_addsi3 (out, out, GEN_INT (2))); 11833 11834 emit_label (end_2_label); 11835 11836 } 11837 11838 /* Avoid branch in fixing the byte. */ 11839 tmpreg = gen_lowpart (QImode, tmpreg); 11840 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 11841 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); 11842 if (TARGET_64BIT) 11843 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); 11844 else 11845 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); 11846 11847 emit_label (end_0_label); 11848} 11849 11850void 11851ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 11852 rtx callarg2 ATTRIBUTE_UNUSED, 11853 rtx pop, int sibcall) 11854{ 11855 rtx use = NULL, call; 11856 11857 if (pop == const0_rtx) 11858 pop = NULL; 11859 if (TARGET_64BIT && pop) 11860 abort (); 11861 11862#if TARGET_MACHO 11863 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 11864 fnaddr = machopic_indirect_call_target (fnaddr); 11865#else 11866 /* Static functions and indirect calls don't need the pic register. */ 11867 if (! TARGET_64BIT && flag_pic 11868 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 11869 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 11870 use_reg (&use, pic_offset_table_rtx); 11871 11872 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 11873 { 11874 rtx al = gen_rtx_REG (QImode, 0); 11875 emit_move_insn (al, callarg2); 11876 use_reg (&use, al); 11877 } 11878#endif /* TARGET_MACHO */ 11879 11880 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 11881 { 11882 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 11883 fnaddr = gen_rtx_MEM (QImode, fnaddr); 11884 } 11885 if (sibcall && TARGET_64BIT 11886 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) 11887 { 11888 rtx addr; 11889 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 11890 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); 11891 emit_move_insn (fnaddr, addr); 11892 fnaddr = gen_rtx_MEM (QImode, fnaddr); 11893 } 11894 11895 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 11896 if (retval) 11897 call = gen_rtx_SET (VOIDmode, retval, call); 11898 if (pop) 11899 { 11900 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 11901 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 11902 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 11903 } 11904 11905 call = emit_call_insn (call); 11906 if (use) 11907 CALL_INSN_FUNCTION_USAGE (call) = use; 11908} 11909 11910 11911/* Clear stack slot assignments remembered from previous functions. 11912 This is called from INIT_EXPANDERS once before RTL is emitted for each 11913 function. */ 11914 11915static struct machine_function * 11916ix86_init_machine_status (void) 11917{ 11918 struct machine_function *f; 11919 11920 f = ggc_alloc_cleared (sizeof (struct machine_function)); 11921 f->use_fast_prologue_epilogue_nregs = -1; 11922 11923 return f; 11924} 11925 11926/* Return a MEM corresponding to a stack slot with mode MODE. 11927 Allocate a new slot if necessary. 11928 11929 The RTL for a function can have several slots available: N is 11930 which slot to use. */ 11931 11932rtx 11933assign_386_stack_local (enum machine_mode mode, int n) 11934{ 11935 struct stack_local_entry *s; 11936 11937 if (n < 0 || n >= MAX_386_STACK_LOCALS) 11938 abort (); 11939 11940 for (s = ix86_stack_locals; s; s = s->next) 11941 if (s->mode == mode && s->n == n) 11942 return s->rtl; 11943 11944 s = (struct stack_local_entry *) 11945 ggc_alloc (sizeof (struct stack_local_entry)); 11946 s->n = n; 11947 s->mode = mode; 11948 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 11949 11950 s->next = ix86_stack_locals; 11951 ix86_stack_locals = s; 11952 return s->rtl; 11953} 11954 11955/* Construct the SYMBOL_REF for the tls_get_addr function. */ 11956 11957static GTY(()) rtx ix86_tls_symbol; 11958rtx 11959ix86_tls_get_addr (void) 11960{ 11961 11962 if (!ix86_tls_symbol) 11963 { 11964 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 11965 (TARGET_GNU_TLS && !TARGET_64BIT) 11966 ? "___tls_get_addr" 11967 : "__tls_get_addr"); 11968 } 11969 11970 return ix86_tls_symbol; 11971} 11972 11973/* Calculate the length of the memory address in the instruction 11974 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 11975 11976static int 11977memory_address_length (rtx addr) 11978{ 11979 struct ix86_address parts; 11980 rtx base, index, disp; 11981 int len; 11982 11983 if (GET_CODE (addr) == PRE_DEC 11984 || GET_CODE (addr) == POST_INC 11985 || GET_CODE (addr) == PRE_MODIFY 11986 || GET_CODE (addr) == POST_MODIFY) 11987 return 0; 11988 11989 if (! ix86_decompose_address (addr, &parts)) 11990 abort (); 11991 11992 base = parts.base; 11993 index = parts.index; 11994 disp = parts.disp; 11995 len = 0; 11996 11997 /* Rule of thumb: 11998 - esp as the base always wants an index, 11999 - ebp as the base always wants a displacement. */ 12000 12001 /* Register Indirect. */ 12002 if (base && !index && !disp) 12003 { 12004 /* esp (for its index) and ebp (for its displacement) need 12005 the two-byte modrm form. */ 12006 if (addr == stack_pointer_rtx 12007 || addr == arg_pointer_rtx 12008 || addr == frame_pointer_rtx 12009 || addr == hard_frame_pointer_rtx) 12010 len = 1; 12011 } 12012 12013 /* Direct Addressing. */ 12014 else if (disp && !base && !index) 12015 len = 4; 12016 12017 else 12018 { 12019 /* Find the length of the displacement constant. */ 12020 if (disp) 12021 { 12022 if (GET_CODE (disp) == CONST_INT 12023 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K') 12024 && base) 12025 len = 1; 12026 else 12027 len = 4; 12028 } 12029 /* ebp always wants a displacement. */ 12030 else if (base == hard_frame_pointer_rtx) 12031 len = 1; 12032 12033 /* An index requires the two-byte modrm form.... */ 12034 if (index 12035 /* ...like esp, which always wants an index. */ 12036 || base == stack_pointer_rtx 12037 || base == arg_pointer_rtx 12038 || base == frame_pointer_rtx) 12039 len += 1; 12040 } 12041 12042 return len; 12043} 12044 12045/* Compute default value for "length_immediate" attribute. When SHORTFORM 12046 is set, expect that insn have 8bit immediate alternative. */ 12047int 12048ix86_attr_length_immediate_default (rtx insn, int shortform) 12049{ 12050 int len = 0; 12051 int i; 12052 extract_insn_cached (insn); 12053 for (i = recog_data.n_operands - 1; i >= 0; --i) 12054 if (CONSTANT_P (recog_data.operand[i])) 12055 { 12056 if (len) 12057 abort (); 12058 if (shortform 12059 && GET_CODE (recog_data.operand[i]) == CONST_INT 12060 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K')) 12061 len = 1; 12062 else 12063 { 12064 switch (get_attr_mode (insn)) 12065 { 12066 case MODE_QI: 12067 len+=1; 12068 break; 12069 case MODE_HI: 12070 len+=2; 12071 break; 12072 case MODE_SI: 12073 len+=4; 12074 break; 12075 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 12076 case MODE_DI: 12077 len+=4; 12078 break; 12079 default: 12080 fatal_insn ("unknown insn mode", insn); 12081 } 12082 } 12083 } 12084 return len; 12085} 12086/* Compute default value for "length_address" attribute. */ 12087int 12088ix86_attr_length_address_default (rtx insn) 12089{ 12090 int i; 12091 12092 if (get_attr_type (insn) == TYPE_LEA) 12093 { 12094 rtx set = PATTERN (insn); 12095 if (GET_CODE (set) == SET) 12096 ; 12097 else if (GET_CODE (set) == PARALLEL 12098 && GET_CODE (XVECEXP (set, 0, 0)) == SET) 12099 set = XVECEXP (set, 0, 0); 12100 else 12101 { 12102#ifdef ENABLE_CHECKING 12103 abort (); 12104#endif 12105 return 0; 12106 } 12107 12108 return memory_address_length (SET_SRC (set)); 12109 } 12110 12111 extract_insn_cached (insn); 12112 for (i = recog_data.n_operands - 1; i >= 0; --i) 12113 if (GET_CODE (recog_data.operand[i]) == MEM) 12114 { 12115 return memory_address_length (XEXP (recog_data.operand[i], 0)); 12116 break; 12117 } 12118 return 0; 12119} 12120 12121/* Return the maximum number of instructions a cpu can issue. */ 12122 12123static int 12124ix86_issue_rate (void) 12125{ 12126 switch (ix86_tune) 12127 { 12128 case PROCESSOR_PENTIUM: 12129 case PROCESSOR_K6: 12130 return 2; 12131 12132 case PROCESSOR_PENTIUMPRO: 12133 case PROCESSOR_PENTIUM4: 12134 case PROCESSOR_ATHLON: 12135 case PROCESSOR_K8: 12136 return 3; 12137 12138 default: 12139 return 1; 12140 } 12141} 12142 12143/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 12144 by DEP_INSN and nothing set by DEP_INSN. */ 12145 12146static int 12147ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type) 12148{ 12149 rtx set, set2; 12150 12151 /* Simplify the test for uninteresting insns. */ 12152 if (insn_type != TYPE_SETCC 12153 && insn_type != TYPE_ICMOV 12154 && insn_type != TYPE_FCMOV 12155 && insn_type != TYPE_IBR) 12156 return 0; 12157 12158 if ((set = single_set (dep_insn)) != 0) 12159 { 12160 set = SET_DEST (set); 12161 set2 = NULL_RTX; 12162 } 12163 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 12164 && XVECLEN (PATTERN (dep_insn), 0) == 2 12165 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 12166 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 12167 { 12168 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 12169 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 12170 } 12171 else 12172 return 0; 12173 12174 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 12175 return 0; 12176 12177 /* This test is true if the dependent insn reads the flags but 12178 not any other potentially set register. */ 12179 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 12180 return 0; 12181 12182 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 12183 return 0; 12184 12185 return 1; 12186} 12187 12188/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 12189 address with operands set by DEP_INSN. */ 12190 12191static int 12192ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type) 12193{ 12194 rtx addr; 12195 12196 if (insn_type == TYPE_LEA 12197 && TARGET_PENTIUM) 12198 { 12199 addr = PATTERN (insn); 12200 if (GET_CODE (addr) == SET) 12201 ; 12202 else if (GET_CODE (addr) == PARALLEL 12203 && GET_CODE (XVECEXP (addr, 0, 0)) == SET) 12204 addr = XVECEXP (addr, 0, 0); 12205 else 12206 abort (); 12207 addr = SET_SRC (addr); 12208 } 12209 else 12210 { 12211 int i; 12212 extract_insn_cached (insn); 12213 for (i = recog_data.n_operands - 1; i >= 0; --i) 12214 if (GET_CODE (recog_data.operand[i]) == MEM) 12215 { 12216 addr = XEXP (recog_data.operand[i], 0); 12217 goto found; 12218 } 12219 return 0; 12220 found:; 12221 } 12222 12223 return modified_in_p (addr, dep_insn); 12224} 12225 12226static int 12227ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 12228{ 12229 enum attr_type insn_type, dep_insn_type; 12230 enum attr_memory memory, dep_memory; 12231 rtx set, set2; 12232 int dep_insn_code_number; 12233 12234 /* Anti and output dependencies have zero cost on all CPUs. */ 12235 if (REG_NOTE_KIND (link) != 0) 12236 return 0; 12237 12238 dep_insn_code_number = recog_memoized (dep_insn); 12239 12240 /* If we can't recognize the insns, we can't really do anything. */ 12241 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 12242 return cost; 12243 12244 insn_type = get_attr_type (insn); 12245 dep_insn_type = get_attr_type (dep_insn); 12246 12247 switch (ix86_tune) 12248 { 12249 case PROCESSOR_PENTIUM: 12250 /* Address Generation Interlock adds a cycle of latency. */ 12251 if (ix86_agi_dependant (insn, dep_insn, insn_type)) 12252 cost += 1; 12253 12254 /* ??? Compares pair with jump/setcc. */ 12255 if (ix86_flags_dependant (insn, dep_insn, insn_type)) 12256 cost = 0; 12257 12258 /* Floating point stores require value to be ready one cycle earlier. */ 12259 if (insn_type == TYPE_FMOV 12260 && get_attr_memory (insn) == MEMORY_STORE 12261 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 12262 cost += 1; 12263 break; 12264 12265 case PROCESSOR_PENTIUMPRO: 12266 memory = get_attr_memory (insn); 12267 dep_memory = get_attr_memory (dep_insn); 12268 12269 /* Since we can't represent delayed latencies of load+operation, 12270 increase the cost here for non-imov insns. */ 12271 if (dep_insn_type != TYPE_IMOV 12272 && dep_insn_type != TYPE_FMOV 12273 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)) 12274 cost += 1; 12275 12276 /* INT->FP conversion is expensive. */ 12277 if (get_attr_fp_int_src (dep_insn)) 12278 cost += 5; 12279 12280 /* There is one cycle extra latency between an FP op and a store. */ 12281 if (insn_type == TYPE_FMOV 12282 && (set = single_set (dep_insn)) != NULL_RTX 12283 && (set2 = single_set (insn)) != NULL_RTX 12284 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 12285 && GET_CODE (SET_DEST (set2)) == MEM) 12286 cost += 1; 12287 12288 /* Show ability of reorder buffer to hide latency of load by executing 12289 in parallel with previous instruction in case 12290 previous instruction is not needed to compute the address. */ 12291 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 12292 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 12293 { 12294 /* Claim moves to take one cycle, as core can issue one load 12295 at time and the next load can start cycle later. */ 12296 if (dep_insn_type == TYPE_IMOV 12297 || dep_insn_type == TYPE_FMOV) 12298 cost = 1; 12299 else if (cost > 1) 12300 cost--; 12301 } 12302 break; 12303 12304 case PROCESSOR_K6: 12305 memory = get_attr_memory (insn); 12306 dep_memory = get_attr_memory (dep_insn); 12307 /* The esp dependency is resolved before the instruction is really 12308 finished. */ 12309 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 12310 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 12311 return 1; 12312 12313 /* Since we can't represent delayed latencies of load+operation, 12314 increase the cost here for non-imov insns. */ 12315 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 12316 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; 12317 12318 /* INT->FP conversion is expensive. */ 12319 if (get_attr_fp_int_src (dep_insn)) 12320 cost += 5; 12321 12322 /* Show ability of reorder buffer to hide latency of load by executing 12323 in parallel with previous instruction in case 12324 previous instruction is not needed to compute the address. */ 12325 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 12326 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 12327 { 12328 /* Claim moves to take one cycle, as core can issue one load 12329 at time and the next load can start cycle later. */ 12330 if (dep_insn_type == TYPE_IMOV 12331 || dep_insn_type == TYPE_FMOV) 12332 cost = 1; 12333 else if (cost > 2) 12334 cost -= 2; 12335 else 12336 cost = 1; 12337 } 12338 break; 12339 12340 case PROCESSOR_ATHLON: 12341 case PROCESSOR_K8: 12342 memory = get_attr_memory (insn); 12343 dep_memory = get_attr_memory (dep_insn); 12344 12345 /* Show ability of reorder buffer to hide latency of load by executing 12346 in parallel with previous instruction in case 12347 previous instruction is not needed to compute the address. */ 12348 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 12349 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 12350 { 12351 enum attr_unit unit = get_attr_unit (insn); 12352 int loadcost = 3; 12353 12354 /* Because of the difference between the length of integer and 12355 floating unit pipeline preparation stages, the memory operands 12356 for floating point are cheaper. 12357 12358 ??? For Athlon it the difference is most probably 2. */ 12359 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 12360 loadcost = 3; 12361 else 12362 loadcost = TARGET_ATHLON ? 2 : 0; 12363 12364 if (cost >= loadcost) 12365 cost -= loadcost; 12366 else 12367 cost = 0; 12368 } 12369 12370 default: 12371 break; 12372 } 12373 12374 return cost; 12375} 12376 12377static union 12378{ 12379 struct ppro_sched_data 12380 { 12381 rtx decode[3]; 12382 int issued_this_cycle; 12383 } ppro; 12384} ix86_sched_data; 12385 12386static enum attr_ppro_uops 12387ix86_safe_ppro_uops (rtx insn) 12388{ 12389 if (recog_memoized (insn) >= 0) 12390 return get_attr_ppro_uops (insn); 12391 else 12392 return PPRO_UOPS_MANY; 12393} 12394 12395static void 12396ix86_dump_ppro_packet (FILE *dump) 12397{ 12398 if (ix86_sched_data.ppro.decode[0]) 12399 { 12400 fprintf (dump, "PPRO packet: %d", 12401 INSN_UID (ix86_sched_data.ppro.decode[0])); 12402 if (ix86_sched_data.ppro.decode[1]) 12403 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1])); 12404 if (ix86_sched_data.ppro.decode[2]) 12405 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2])); 12406 fputc ('\n', dump); 12407 } 12408} 12409 12410/* We're beginning a new block. Initialize data structures as necessary. */ 12411 12412static void 12413ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED, 12414 int sched_verbose ATTRIBUTE_UNUSED, 12415 int veclen ATTRIBUTE_UNUSED) 12416{ 12417 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data)); 12418} 12419 12420/* Shift INSN to SLOT, and shift everything else down. */ 12421 12422static void 12423ix86_reorder_insn (rtx *insnp, rtx *slot) 12424{ 12425 if (insnp != slot) 12426 { 12427 rtx insn = *insnp; 12428 do 12429 insnp[0] = insnp[1]; 12430 while (++insnp != slot); 12431 *insnp = insn; 12432 } 12433} 12434 12435static void 12436ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready) 12437{ 12438 rtx decode[3]; 12439 enum attr_ppro_uops cur_uops; 12440 int issued_this_cycle; 12441 rtx *insnp; 12442 int i; 12443 12444 /* At this point .ppro.decode contains the state of the three 12445 decoders from last "cycle". That is, those insns that were 12446 actually independent. But here we're scheduling for the 12447 decoder, and we may find things that are decodable in the 12448 same cycle. */ 12449 12450 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode)); 12451 issued_this_cycle = 0; 12452 12453 insnp = e_ready; 12454 cur_uops = ix86_safe_ppro_uops (*insnp); 12455 12456 /* If the decoders are empty, and we've a complex insn at the 12457 head of the priority queue, let it issue without complaint. */ 12458 if (decode[0] == NULL) 12459 { 12460 if (cur_uops == PPRO_UOPS_MANY) 12461 { 12462 decode[0] = *insnp; 12463 goto ppro_done; 12464 } 12465 12466 /* Otherwise, search for a 2-4 uop unsn to issue. */ 12467 while (cur_uops != PPRO_UOPS_FEW) 12468 { 12469 if (insnp == ready) 12470 break; 12471 cur_uops = ix86_safe_ppro_uops (*--insnp); 12472 } 12473 12474 /* If so, move it to the head of the line. */ 12475 if (cur_uops == PPRO_UOPS_FEW) 12476 ix86_reorder_insn (insnp, e_ready); 12477 12478 /* Issue the head of the queue. */ 12479 issued_this_cycle = 1; 12480 decode[0] = *e_ready--; 12481 } 12482 12483 /* Look for simple insns to fill in the other two slots. */ 12484 for (i = 1; i < 3; ++i) 12485 if (decode[i] == NULL) 12486 { 12487 if (ready > e_ready) 12488 goto ppro_done; 12489 12490 insnp = e_ready; 12491 cur_uops = ix86_safe_ppro_uops (*insnp); 12492 while (cur_uops != PPRO_UOPS_ONE) 12493 { 12494 if (insnp == ready) 12495 break; 12496 cur_uops = ix86_safe_ppro_uops (*--insnp); 12497 } 12498 12499 /* Found one. Move it to the head of the queue and issue it. */ 12500 if (cur_uops == PPRO_UOPS_ONE) 12501 { 12502 ix86_reorder_insn (insnp, e_ready); 12503 decode[i] = *e_ready--; 12504 issued_this_cycle++; 12505 continue; 12506 } 12507 12508 /* ??? Didn't find one. Ideally, here we would do a lazy split 12509 of 2-uop insns, issue one and queue the other. */ 12510 } 12511 12512 ppro_done: 12513 if (issued_this_cycle == 0) 12514 issued_this_cycle = 1; 12515 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle; 12516} 12517 12518/* We are about to being issuing insns for this clock cycle. 12519 Override the default sort algorithm to better slot instructions. */ 12520static int 12521ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, 12522 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready, 12523 int *n_readyp, int clock_var ATTRIBUTE_UNUSED) 12524{ 12525 int n_ready = *n_readyp; 12526 rtx *e_ready = ready + n_ready - 1; 12527 12528 /* Make sure to go ahead and initialize key items in 12529 ix86_sched_data if we are not going to bother trying to 12530 reorder the ready queue. */ 12531 if (n_ready < 2) 12532 { 12533 ix86_sched_data.ppro.issued_this_cycle = 1; 12534 goto out; 12535 } 12536 12537 switch (ix86_tune) 12538 { 12539 default: 12540 break; 12541 12542 case PROCESSOR_PENTIUMPRO: 12543 ix86_sched_reorder_ppro (ready, e_ready); 12544 break; 12545 } 12546 12547out: 12548 return ix86_issue_rate (); 12549} 12550 12551/* We are about to issue INSN. Return the number of insns left on the 12552 ready queue that can be issued this cycle. */ 12553 12554static int 12555ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn, 12556 int can_issue_more) 12557{ 12558 int i; 12559 switch (ix86_tune) 12560 { 12561 default: 12562 return can_issue_more - 1; 12563 12564 case PROCESSOR_PENTIUMPRO: 12565 { 12566 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn); 12567 12568 if (uops == PPRO_UOPS_MANY) 12569 { 12570 if (sched_verbose) 12571 ix86_dump_ppro_packet (dump); 12572 ix86_sched_data.ppro.decode[0] = insn; 12573 ix86_sched_data.ppro.decode[1] = NULL; 12574 ix86_sched_data.ppro.decode[2] = NULL; 12575 if (sched_verbose) 12576 ix86_dump_ppro_packet (dump); 12577 ix86_sched_data.ppro.decode[0] = NULL; 12578 } 12579 else if (uops == PPRO_UOPS_FEW) 12580 { 12581 if (sched_verbose) 12582 ix86_dump_ppro_packet (dump); 12583 ix86_sched_data.ppro.decode[0] = insn; 12584 ix86_sched_data.ppro.decode[1] = NULL; 12585 ix86_sched_data.ppro.decode[2] = NULL; 12586 } 12587 else 12588 { 12589 for (i = 0; i < 3; ++i) 12590 if (ix86_sched_data.ppro.decode[i] == NULL) 12591 { 12592 ix86_sched_data.ppro.decode[i] = insn; 12593 break; 12594 } 12595 if (i == 3) 12596 abort (); 12597 if (i == 2) 12598 { 12599 if (sched_verbose) 12600 ix86_dump_ppro_packet (dump); 12601 ix86_sched_data.ppro.decode[0] = NULL; 12602 ix86_sched_data.ppro.decode[1] = NULL; 12603 ix86_sched_data.ppro.decode[2] = NULL; 12604 } 12605 } 12606 } 12607 return --ix86_sched_data.ppro.issued_this_cycle; 12608 } 12609} 12610 12611static int 12612ia32_use_dfa_pipeline_interface (void) 12613{ 12614 if (TARGET_PENTIUM || TARGET_ATHLON_K8) 12615 return 1; 12616 return 0; 12617} 12618 12619/* How many alternative schedules to try. This should be as wide as the 12620 scheduling freedom in the DFA, but no wider. Making this value too 12621 large results extra work for the scheduler. */ 12622 12623static int 12624ia32_multipass_dfa_lookahead (void) 12625{ 12626 if (ix86_tune == PROCESSOR_PENTIUM) 12627 return 2; 12628 else 12629 return 0; 12630} 12631 12632 12633/* Compute the alignment given to a constant that is being placed in memory. 12634 EXP is the constant and ALIGN is the alignment that the object would 12635 ordinarily have. 12636 The value of this function is used instead of that alignment to align 12637 the object. */ 12638 12639int 12640ix86_constant_alignment (tree exp, int align) 12641{ 12642 if (TREE_CODE (exp) == REAL_CST) 12643 { 12644 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 12645 return 64; 12646 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 12647 return 128; 12648 } 12649 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 12650 && !TARGET_NO_ALIGN_LONG_STRINGS 12651 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 12652 return BITS_PER_WORD; 12653 12654 return align; 12655} 12656 12657/* Compute the alignment for a static variable. 12658 TYPE is the data type, and ALIGN is the alignment that 12659 the object would ordinarily have. The value of this function is used 12660 instead of that alignment to align the object. */ 12661 12662int 12663ix86_data_alignment (tree type, int align) 12664{ 12665 if (AGGREGATE_TYPE_P (type) 12666 && TYPE_SIZE (type) 12667 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 12668 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256 12669 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256) 12670 return 256; 12671 12672 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 12673 to 16byte boundary. */ 12674 if (TARGET_64BIT) 12675 { 12676 if (AGGREGATE_TYPE_P (type) 12677 && TYPE_SIZE (type) 12678 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 12679 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 12680 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 12681 return 128; 12682 } 12683 12684 if (TREE_CODE (type) == ARRAY_TYPE) 12685 { 12686 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 12687 return 64; 12688 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 12689 return 128; 12690 } 12691 else if (TREE_CODE (type) == COMPLEX_TYPE) 12692 { 12693 12694 if (TYPE_MODE (type) == DCmode && align < 64) 12695 return 64; 12696 if (TYPE_MODE (type) == XCmode && align < 128) 12697 return 128; 12698 } 12699 else if ((TREE_CODE (type) == RECORD_TYPE 12700 || TREE_CODE (type) == UNION_TYPE 12701 || TREE_CODE (type) == QUAL_UNION_TYPE) 12702 && TYPE_FIELDS (type)) 12703 { 12704 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 12705 return 64; 12706 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 12707 return 128; 12708 } 12709 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 12710 || TREE_CODE (type) == INTEGER_TYPE) 12711 { 12712 if (TYPE_MODE (type) == DFmode && align < 64) 12713 return 64; 12714 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 12715 return 128; 12716 } 12717 12718 return align; 12719} 12720 12721/* Compute the alignment for a local variable. 12722 TYPE is the data type, and ALIGN is the alignment that 12723 the object would ordinarily have. The value of this macro is used 12724 instead of that alignment to align the object. */ 12725 12726int 12727ix86_local_alignment (tree type, int align) 12728{ 12729 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 12730 to 16byte boundary. */ 12731 if (TARGET_64BIT) 12732 { 12733 if (AGGREGATE_TYPE_P (type) 12734 && TYPE_SIZE (type) 12735 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 12736 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 12737 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 12738 return 128; 12739 } 12740 if (TREE_CODE (type) == ARRAY_TYPE) 12741 { 12742 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 12743 return 64; 12744 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 12745 return 128; 12746 } 12747 else if (TREE_CODE (type) == COMPLEX_TYPE) 12748 { 12749 if (TYPE_MODE (type) == DCmode && align < 64) 12750 return 64; 12751 if (TYPE_MODE (type) == XCmode && align < 128) 12752 return 128; 12753 } 12754 else if ((TREE_CODE (type) == RECORD_TYPE 12755 || TREE_CODE (type) == UNION_TYPE 12756 || TREE_CODE (type) == QUAL_UNION_TYPE) 12757 && TYPE_FIELDS (type)) 12758 { 12759 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 12760 return 64; 12761 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 12762 return 128; 12763 } 12764 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 12765 || TREE_CODE (type) == INTEGER_TYPE) 12766 { 12767 12768 if (TYPE_MODE (type) == DFmode && align < 64) 12769 return 64; 12770 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 12771 return 128; 12772 } 12773 return align; 12774} 12775 12776/* Emit RTL insns to initialize the variable parts of a trampoline. 12777 FNADDR is an RTX for the address of the function's pure code. 12778 CXT is an RTX for the static chain value for the function. */ 12779void 12780x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 12781{ 12782 if (!TARGET_64BIT) 12783 { 12784 /* Compute offset from the end of the jmp to the target function. */ 12785 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 12786 plus_constant (tramp, 10), 12787 NULL_RTX, 1, OPTAB_DIRECT); 12788 emit_move_insn (gen_rtx_MEM (QImode, tramp), 12789 gen_int_mode (0xb9, QImode)); 12790 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 12791 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 12792 gen_int_mode (0xe9, QImode)); 12793 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 12794 } 12795 else 12796 { 12797 int offset = 0; 12798 /* Try to load address using shorter movl instead of movabs. 12799 We may want to support movq for kernel mode, but kernel does not use 12800 trampolines at the moment. */ 12801 if (x86_64_zero_extended_value (fnaddr)) 12802 { 12803 fnaddr = copy_to_mode_reg (DImode, fnaddr); 12804 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12805 gen_int_mode (0xbb41, HImode)); 12806 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 12807 gen_lowpart (SImode, fnaddr)); 12808 offset += 6; 12809 } 12810 else 12811 { 12812 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12813 gen_int_mode (0xbb49, HImode)); 12814 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 12815 fnaddr); 12816 offset += 10; 12817 } 12818 /* Load static chain using movabs to r10. */ 12819 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12820 gen_int_mode (0xba49, HImode)); 12821 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 12822 cxt); 12823 offset += 10; 12824 /* Jump to the r11 */ 12825 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12826 gen_int_mode (0xff49, HImode)); 12827 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 12828 gen_int_mode (0xe3, QImode)); 12829 offset += 3; 12830 if (offset > TRAMPOLINE_SIZE) 12831 abort (); 12832 } 12833 12834#ifdef ENABLE_EXECUTE_STACK 12835 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), 12836 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 12837#endif 12838} 12839 12840#define def_builtin(MASK, NAME, TYPE, CODE) \ 12841do { \ 12842 if ((MASK) & target_flags \ 12843 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 12844 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 12845 NULL, NULL_TREE); \ 12846} while (0) 12847 12848struct builtin_description 12849{ 12850 const unsigned int mask; 12851 const enum insn_code icode; 12852 const char *const name; 12853 const enum ix86_builtins code; 12854 const enum rtx_code comparison; 12855 const unsigned int flag; 12856}; 12857 12858static const struct builtin_description bdesc_comi[] = 12859{ 12860 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 12861 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 12862 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 12863 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 12864 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 12865 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 12866 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 12867 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 12868 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 12869 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 12870 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 12871 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 12872 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 12873 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 12874 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 12875 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 12876 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 12877 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 12878 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 12879 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 12880 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 12881 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 12882 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 12883 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 12884}; 12885 12886static const struct builtin_description bdesc_2arg[] = 12887{ 12888 /* SSE */ 12889 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 12890 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 12891 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 12892 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 12893 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 12894 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 12895 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 12896 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 12897 12898 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 12899 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 12900 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 12901 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, 12902 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, 12903 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 12904 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, 12905 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, 12906 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, 12907 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, 12908 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, 12909 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, 12910 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 12911 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 12912 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 12913 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 12914 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, 12915 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, 12916 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, 12917 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, 12918 12919 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 12920 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 12921 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 12922 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 12923 12924 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 12925 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 12926 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 12927 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 12928 12929 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 12930 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 12931 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 12932 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 12933 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 12934 12935 /* MMX */ 12936 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 12937 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 12938 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 12939 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 12940 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 12941 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 12942 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 12943 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 12944 12945 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 12946 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 12947 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 12948 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 12949 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 12950 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 12951 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 12952 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 12953 12954 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 12955 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 12956 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 12957 12958 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 12959 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 12960 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 12961 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 12962 12963 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 12964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 12965 12966 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 12967 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 12968 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 12969 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 12970 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 12971 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 12972 12973 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 12974 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 12975 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 12976 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 12977 12978 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 12979 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 12980 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 12981 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 12982 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 12983 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 12984 12985 /* Special. */ 12986 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 12987 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 12988 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 12989 12990 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 12991 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 12992 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 12993 12994 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 12995 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 12996 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 12997 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 12998 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 12999 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 13000 13001 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 13002 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 13003 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 13004 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 13005 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 13006 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 13007 13008 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 13009 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 13010 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 13011 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 13012 13013 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 13014 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 13015 13016 /* SSE2 */ 13017 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 13018 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 13019 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 13020 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 13021 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 13022 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 13023 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 13024 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 13025 13026 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 13027 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 13028 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 13029 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 }, 13030 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 }, 13031 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 13032 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 }, 13033 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 }, 13034 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 }, 13035 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 }, 13036 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 }, 13037 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 }, 13038 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 13039 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 13040 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 13041 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 13042 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 }, 13043 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 }, 13044 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 }, 13045 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 }, 13046 13047 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 13048 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 13049 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 13050 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 13051 13052 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 13053 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 13054 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 13055 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 13056 13057 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 13058 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 13059 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 13060 13061 /* SSE2 MMX */ 13062 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 13063 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 13064 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 13065 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 13066 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 13067 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 13068 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 13069 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 13070 13071 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 13072 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 13073 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 13074 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 13075 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 13076 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 13077 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 13078 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 13079 13080 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 13081 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 13082 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 }, 13083 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 }, 13084 13085 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 13086 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 13087 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 13088 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 13089 13090 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 13091 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 13092 13093 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 13094 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 13095 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 13096 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 13097 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 13098 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 13099 13100 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 13101 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 13102 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 13103 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 13104 13105 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 13106 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 13107 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 13108 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 13109 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 13110 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 13111 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 13112 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 13113 13114 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 13115 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 13116 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 13117 13118 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 13119 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 13120 13121 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 }, 13122 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 13123 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 }, 13124 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 13125 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 }, 13126 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 13127 13128 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 }, 13129 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 13130 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 }, 13131 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 13132 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 }, 13133 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 13134 13135 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 }, 13136 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 13137 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 }, 13138 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 13139 13140 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 13141 13142 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 13143 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 13144 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 13145 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, 13146 13147 /* SSE3 MMX */ 13148 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, 13149 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, 13150 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, 13151 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, 13152 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, 13153 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } 13154}; 13155 13156static const struct builtin_description bdesc_1arg[] = 13157{ 13158 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 13159 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 13160 13161 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 13162 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 13163 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 13164 13165 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 13166 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 13167 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 13168 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 13169 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 13170 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 13171 13172 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 13173 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 13174 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 }, 13175 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 }, 13176 13177 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 13178 13179 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 13180 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 13181 13182 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 13183 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 13184 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 13185 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 13186 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 13187 13188 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 13189 13190 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 13191 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 13192 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 13193 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 13194 13195 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 13196 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 13197 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 13198 13199 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }, 13200 13201 /* SSE3 */ 13202 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, 13203 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, 13204 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 } 13205}; 13206 13207void 13208ix86_init_builtins (void) 13209{ 13210 if (TARGET_MMX) 13211 ix86_init_mmx_sse_builtins (); 13212} 13213 13214/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 13215 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 13216 builtins. */ 13217static void 13218ix86_init_mmx_sse_builtins (void) 13219{ 13220 const struct builtin_description * d; 13221 size_t i; 13222 13223 tree pchar_type_node = build_pointer_type (char_type_node); 13224 tree pcchar_type_node = build_pointer_type ( 13225 build_type_variant (char_type_node, 1, 0)); 13226 tree pfloat_type_node = build_pointer_type (float_type_node); 13227 tree pcfloat_type_node = build_pointer_type ( 13228 build_type_variant (float_type_node, 1, 0)); 13229 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 13230 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 13231 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 13232 13233 /* Comparisons. */ 13234 tree int_ftype_v4sf_v4sf 13235 = build_function_type_list (integer_type_node, 13236 V4SF_type_node, V4SF_type_node, NULL_TREE); 13237 tree v4si_ftype_v4sf_v4sf 13238 = build_function_type_list (V4SI_type_node, 13239 V4SF_type_node, V4SF_type_node, NULL_TREE); 13240 /* MMX/SSE/integer conversions. */ 13241 tree int_ftype_v4sf 13242 = build_function_type_list (integer_type_node, 13243 V4SF_type_node, NULL_TREE); 13244 tree int64_ftype_v4sf 13245 = build_function_type_list (long_long_integer_type_node, 13246 V4SF_type_node, NULL_TREE); 13247 tree int_ftype_v8qi 13248 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 13249 tree v4sf_ftype_v4sf_int 13250 = build_function_type_list (V4SF_type_node, 13251 V4SF_type_node, integer_type_node, NULL_TREE); 13252 tree v4sf_ftype_v4sf_int64 13253 = build_function_type_list (V4SF_type_node, 13254 V4SF_type_node, long_long_integer_type_node, 13255 NULL_TREE); 13256 tree v4sf_ftype_v4sf_v2si 13257 = build_function_type_list (V4SF_type_node, 13258 V4SF_type_node, V2SI_type_node, NULL_TREE); 13259 tree int_ftype_v4hi_int 13260 = build_function_type_list (integer_type_node, 13261 V4HI_type_node, integer_type_node, NULL_TREE); 13262 tree v4hi_ftype_v4hi_int_int 13263 = build_function_type_list (V4HI_type_node, V4HI_type_node, 13264 integer_type_node, integer_type_node, 13265 NULL_TREE); 13266 /* Miscellaneous. */ 13267 tree v8qi_ftype_v4hi_v4hi 13268 = build_function_type_list (V8QI_type_node, 13269 V4HI_type_node, V4HI_type_node, NULL_TREE); 13270 tree v4hi_ftype_v2si_v2si 13271 = build_function_type_list (V4HI_type_node, 13272 V2SI_type_node, V2SI_type_node, NULL_TREE); 13273 tree v4sf_ftype_v4sf_v4sf_int 13274 = build_function_type_list (V4SF_type_node, 13275 V4SF_type_node, V4SF_type_node, 13276 integer_type_node, NULL_TREE); 13277 tree v2si_ftype_v4hi_v4hi 13278 = build_function_type_list (V2SI_type_node, 13279 V4HI_type_node, V4HI_type_node, NULL_TREE); 13280 tree v4hi_ftype_v4hi_int 13281 = build_function_type_list (V4HI_type_node, 13282 V4HI_type_node, integer_type_node, NULL_TREE); 13283 tree v4hi_ftype_v4hi_di 13284 = build_function_type_list (V4HI_type_node, 13285 V4HI_type_node, long_long_unsigned_type_node, 13286 NULL_TREE); 13287 tree v2si_ftype_v2si_di 13288 = build_function_type_list (V2SI_type_node, 13289 V2SI_type_node, long_long_unsigned_type_node, 13290 NULL_TREE); 13291 tree void_ftype_void 13292 = build_function_type (void_type_node, void_list_node); 13293 tree void_ftype_unsigned 13294 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 13295 tree void_ftype_unsigned_unsigned 13296 = build_function_type_list (void_type_node, unsigned_type_node, 13297 unsigned_type_node, NULL_TREE); 13298 tree void_ftype_pcvoid_unsigned_unsigned 13299 = build_function_type_list (void_type_node, const_ptr_type_node, 13300 unsigned_type_node, unsigned_type_node, 13301 NULL_TREE); 13302 tree unsigned_ftype_void 13303 = build_function_type (unsigned_type_node, void_list_node); 13304 tree di_ftype_void 13305 = build_function_type (long_long_unsigned_type_node, void_list_node); 13306 tree v4sf_ftype_void 13307 = build_function_type (V4SF_type_node, void_list_node); 13308 tree v2si_ftype_v4sf 13309 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 13310 /* Loads/stores. */ 13311 tree void_ftype_v8qi_v8qi_pchar 13312 = build_function_type_list (void_type_node, 13313 V8QI_type_node, V8QI_type_node, 13314 pchar_type_node, NULL_TREE); 13315 tree v4sf_ftype_pcfloat 13316 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 13317 /* @@@ the type is bogus */ 13318 tree v4sf_ftype_v4sf_pv2si 13319 = build_function_type_list (V4SF_type_node, 13320 V4SF_type_node, pv2si_type_node, NULL_TREE); 13321 tree void_ftype_pv2si_v4sf 13322 = build_function_type_list (void_type_node, 13323 pv2si_type_node, V4SF_type_node, NULL_TREE); 13324 tree void_ftype_pfloat_v4sf 13325 = build_function_type_list (void_type_node, 13326 pfloat_type_node, V4SF_type_node, NULL_TREE); 13327 tree void_ftype_pdi_di 13328 = build_function_type_list (void_type_node, 13329 pdi_type_node, long_long_unsigned_type_node, 13330 NULL_TREE); 13331 tree void_ftype_pv2di_v2di 13332 = build_function_type_list (void_type_node, 13333 pv2di_type_node, V2DI_type_node, NULL_TREE); 13334 /* Normal vector unops. */ 13335 tree v4sf_ftype_v4sf 13336 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 13337 13338 /* Normal vector binops. */ 13339 tree v4sf_ftype_v4sf_v4sf 13340 = build_function_type_list (V4SF_type_node, 13341 V4SF_type_node, V4SF_type_node, NULL_TREE); 13342 tree v8qi_ftype_v8qi_v8qi 13343 = build_function_type_list (V8QI_type_node, 13344 V8QI_type_node, V8QI_type_node, NULL_TREE); 13345 tree v4hi_ftype_v4hi_v4hi 13346 = build_function_type_list (V4HI_type_node, 13347 V4HI_type_node, V4HI_type_node, NULL_TREE); 13348 tree v2si_ftype_v2si_v2si 13349 = build_function_type_list (V2SI_type_node, 13350 V2SI_type_node, V2SI_type_node, NULL_TREE); 13351 tree di_ftype_di_di 13352 = build_function_type_list (long_long_unsigned_type_node, 13353 long_long_unsigned_type_node, 13354 long_long_unsigned_type_node, NULL_TREE); 13355 13356 tree v2si_ftype_v2sf 13357 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 13358 tree v2sf_ftype_v2si 13359 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 13360 tree v2si_ftype_v2si 13361 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 13362 tree v2sf_ftype_v2sf 13363 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 13364 tree v2sf_ftype_v2sf_v2sf 13365 = build_function_type_list (V2SF_type_node, 13366 V2SF_type_node, V2SF_type_node, NULL_TREE); 13367 tree v2si_ftype_v2sf_v2sf 13368 = build_function_type_list (V2SI_type_node, 13369 V2SF_type_node, V2SF_type_node, NULL_TREE); 13370 tree pint_type_node = build_pointer_type (integer_type_node); 13371 tree pcint_type_node = build_pointer_type ( 13372 build_type_variant (integer_type_node, 1, 0)); 13373 tree pdouble_type_node = build_pointer_type (double_type_node); 13374 tree pcdouble_type_node = build_pointer_type ( 13375 build_type_variant (double_type_node, 1, 0)); 13376 tree int_ftype_v2df_v2df 13377 = build_function_type_list (integer_type_node, 13378 V2DF_type_node, V2DF_type_node, NULL_TREE); 13379 13380 tree ti_ftype_void 13381 = build_function_type (intTI_type_node, void_list_node); 13382 tree v2di_ftype_void 13383 = build_function_type (V2DI_type_node, void_list_node); 13384 tree ti_ftype_ti_ti 13385 = build_function_type_list (intTI_type_node, 13386 intTI_type_node, intTI_type_node, NULL_TREE); 13387 tree void_ftype_pcvoid 13388 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 13389 tree v2di_ftype_di 13390 = build_function_type_list (V2DI_type_node, 13391 long_long_unsigned_type_node, NULL_TREE); 13392 tree di_ftype_v2di 13393 = build_function_type_list (long_long_unsigned_type_node, 13394 V2DI_type_node, NULL_TREE); 13395 tree v4sf_ftype_v4si 13396 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 13397 tree v4si_ftype_v4sf 13398 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 13399 tree v2df_ftype_v4si 13400 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 13401 tree v4si_ftype_v2df 13402 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 13403 tree v2si_ftype_v2df 13404 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 13405 tree v4sf_ftype_v2df 13406 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 13407 tree v2df_ftype_v2si 13408 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 13409 tree v2df_ftype_v4sf 13410 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 13411 tree int_ftype_v2df 13412 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 13413 tree int64_ftype_v2df 13414 = build_function_type_list (long_long_integer_type_node, 13415 V2DF_type_node, NULL_TREE); 13416 tree v2df_ftype_v2df_int 13417 = build_function_type_list (V2DF_type_node, 13418 V2DF_type_node, integer_type_node, NULL_TREE); 13419 tree v2df_ftype_v2df_int64 13420 = build_function_type_list (V2DF_type_node, 13421 V2DF_type_node, long_long_integer_type_node, 13422 NULL_TREE); 13423 tree v4sf_ftype_v4sf_v2df 13424 = build_function_type_list (V4SF_type_node, 13425 V4SF_type_node, V2DF_type_node, NULL_TREE); 13426 tree v2df_ftype_v2df_v4sf 13427 = build_function_type_list (V2DF_type_node, 13428 V2DF_type_node, V4SF_type_node, NULL_TREE); 13429 tree v2df_ftype_v2df_v2df_int 13430 = build_function_type_list (V2DF_type_node, 13431 V2DF_type_node, V2DF_type_node, 13432 integer_type_node, 13433 NULL_TREE); 13434 tree v2df_ftype_v2df_pv2si 13435 = build_function_type_list (V2DF_type_node, 13436 V2DF_type_node, pv2si_type_node, NULL_TREE); 13437 tree void_ftype_pv2si_v2df 13438 = build_function_type_list (void_type_node, 13439 pv2si_type_node, V2DF_type_node, NULL_TREE); 13440 tree void_ftype_pdouble_v2df 13441 = build_function_type_list (void_type_node, 13442 pdouble_type_node, V2DF_type_node, NULL_TREE); 13443 tree void_ftype_pint_int 13444 = build_function_type_list (void_type_node, 13445 pint_type_node, integer_type_node, NULL_TREE); 13446 tree void_ftype_v16qi_v16qi_pchar 13447 = build_function_type_list (void_type_node, 13448 V16QI_type_node, V16QI_type_node, 13449 pchar_type_node, NULL_TREE); 13450 tree v2df_ftype_pcdouble 13451 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 13452 tree v2df_ftype_v2df_v2df 13453 = build_function_type_list (V2DF_type_node, 13454 V2DF_type_node, V2DF_type_node, NULL_TREE); 13455 tree v16qi_ftype_v16qi_v16qi 13456 = build_function_type_list (V16QI_type_node, 13457 V16QI_type_node, V16QI_type_node, NULL_TREE); 13458 tree v8hi_ftype_v8hi_v8hi 13459 = build_function_type_list (V8HI_type_node, 13460 V8HI_type_node, V8HI_type_node, NULL_TREE); 13461 tree v4si_ftype_v4si_v4si 13462 = build_function_type_list (V4SI_type_node, 13463 V4SI_type_node, V4SI_type_node, NULL_TREE); 13464 tree v2di_ftype_v2di_v2di 13465 = build_function_type_list (V2DI_type_node, 13466 V2DI_type_node, V2DI_type_node, NULL_TREE); 13467 tree v2di_ftype_v2df_v2df 13468 = build_function_type_list (V2DI_type_node, 13469 V2DF_type_node, V2DF_type_node, NULL_TREE); 13470 tree v2df_ftype_v2df 13471 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 13472 tree v2df_ftype_double 13473 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE); 13474 tree v2df_ftype_double_double 13475 = build_function_type_list (V2DF_type_node, 13476 double_type_node, double_type_node, NULL_TREE); 13477 tree int_ftype_v8hi_int 13478 = build_function_type_list (integer_type_node, 13479 V8HI_type_node, integer_type_node, NULL_TREE); 13480 tree v8hi_ftype_v8hi_int_int 13481 = build_function_type_list (V8HI_type_node, 13482 V8HI_type_node, integer_type_node, 13483 integer_type_node, NULL_TREE); 13484 tree v2di_ftype_v2di_int 13485 = build_function_type_list (V2DI_type_node, 13486 V2DI_type_node, integer_type_node, NULL_TREE); 13487 tree v4si_ftype_v4si_int 13488 = build_function_type_list (V4SI_type_node, 13489 V4SI_type_node, integer_type_node, NULL_TREE); 13490 tree v8hi_ftype_v8hi_int 13491 = build_function_type_list (V8HI_type_node, 13492 V8HI_type_node, integer_type_node, NULL_TREE); 13493 tree v8hi_ftype_v8hi_v2di 13494 = build_function_type_list (V8HI_type_node, 13495 V8HI_type_node, V2DI_type_node, NULL_TREE); 13496 tree v4si_ftype_v4si_v2di 13497 = build_function_type_list (V4SI_type_node, 13498 V4SI_type_node, V2DI_type_node, NULL_TREE); 13499 tree v4si_ftype_v8hi_v8hi 13500 = build_function_type_list (V4SI_type_node, 13501 V8HI_type_node, V8HI_type_node, NULL_TREE); 13502 tree di_ftype_v8qi_v8qi 13503 = build_function_type_list (long_long_unsigned_type_node, 13504 V8QI_type_node, V8QI_type_node, NULL_TREE); 13505 tree v2di_ftype_v16qi_v16qi 13506 = build_function_type_list (V2DI_type_node, 13507 V16QI_type_node, V16QI_type_node, NULL_TREE); 13508 tree int_ftype_v16qi 13509 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 13510 tree v16qi_ftype_pcchar 13511 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 13512 tree void_ftype_pchar_v16qi 13513 = build_function_type_list (void_type_node, 13514 pchar_type_node, V16QI_type_node, NULL_TREE); 13515 tree v4si_ftype_pcint 13516 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); 13517 tree void_ftype_pcint_v4si 13518 = build_function_type_list (void_type_node, 13519 pcint_type_node, V4SI_type_node, NULL_TREE); 13520 tree v2di_ftype_v2di 13521 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); 13522 13523 tree float80_type; 13524 tree float128_type; 13525 13526 /* The __float80 type. */ 13527 if (TYPE_MODE (long_double_type_node) == XFmode) 13528 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 13529 "__float80"); 13530 else 13531 { 13532 /* The __float80 type. */ 13533 float80_type = make_node (REAL_TYPE); 13534 TYPE_PRECISION (float80_type) = 96; 13535 layout_type (float80_type); 13536 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 13537 } 13538 13539 float128_type = make_node (REAL_TYPE); 13540 TYPE_PRECISION (float128_type) = 128; 13541 layout_type (float128_type); 13542 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 13543 13544 /* Add all builtins that are more or less simple operations on two 13545 operands. */ 13546 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 13547 { 13548 /* Use one of the operands; the target can have a different mode for 13549 mask-generating compares. */ 13550 enum machine_mode mode; 13551 tree type; 13552 13553 if (d->name == 0) 13554 continue; 13555 mode = insn_data[d->icode].operand[1].mode; 13556 13557 switch (mode) 13558 { 13559 case V16QImode: 13560 type = v16qi_ftype_v16qi_v16qi; 13561 break; 13562 case V8HImode: 13563 type = v8hi_ftype_v8hi_v8hi; 13564 break; 13565 case V4SImode: 13566 type = v4si_ftype_v4si_v4si; 13567 break; 13568 case V2DImode: 13569 type = v2di_ftype_v2di_v2di; 13570 break; 13571 case V2DFmode: 13572 type = v2df_ftype_v2df_v2df; 13573 break; 13574 case TImode: 13575 type = ti_ftype_ti_ti; 13576 break; 13577 case V4SFmode: 13578 type = v4sf_ftype_v4sf_v4sf; 13579 break; 13580 case V8QImode: 13581 type = v8qi_ftype_v8qi_v8qi; 13582 break; 13583 case V4HImode: 13584 type = v4hi_ftype_v4hi_v4hi; 13585 break; 13586 case V2SImode: 13587 type = v2si_ftype_v2si_v2si; 13588 break; 13589 case DImode: 13590 type = di_ftype_di_di; 13591 break; 13592 13593 default: 13594 abort (); 13595 } 13596 13597 /* Override for comparisons. */ 13598 if (d->icode == CODE_FOR_maskcmpv4sf3 13599 || d->icode == CODE_FOR_maskncmpv4sf3 13600 || d->icode == CODE_FOR_vmmaskcmpv4sf3 13601 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 13602 type = v4si_ftype_v4sf_v4sf; 13603 13604 if (d->icode == CODE_FOR_maskcmpv2df3 13605 || d->icode == CODE_FOR_maskncmpv2df3 13606 || d->icode == CODE_FOR_vmmaskcmpv2df3 13607 || d->icode == CODE_FOR_vmmaskncmpv2df3) 13608 type = v2di_ftype_v2df_v2df; 13609 13610 def_builtin (d->mask, d->name, type, d->code); 13611 } 13612 13613 /* Add the remaining MMX insns with somewhat more complicated types. */ 13614 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); 13615 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 13616 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 13617 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 13618 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 13619 13620 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 13621 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 13622 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 13623 13624 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 13625 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 13626 13627 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 13628 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 13629 13630 /* comi/ucomi insns. */ 13631 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 13632 if (d->mask == MASK_SSE2) 13633 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 13634 else 13635 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 13636 13637 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 13638 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 13639 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 13640 13641 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 13642 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 13643 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 13644 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 13645 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 13646 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 13647 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 13648 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 13649 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 13650 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 13651 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 13652 13653 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); 13654 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); 13655 13656 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 13657 13658 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); 13659 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 13660 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); 13661 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); 13662 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 13663 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); 13664 13665 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 13666 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 13667 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 13668 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 13669 13670 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 13671 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 13672 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 13673 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 13674 13675 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 13676 13677 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 13678 13679 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 13680 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 13681 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 13682 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 13683 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 13684 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 13685 13686 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 13687 13688 /* Original 3DNow! */ 13689 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 13690 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 13691 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 13692 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 13693 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 13694 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 13695 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 13696 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 13697 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 13698 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 13699 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 13700 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 13701 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 13702 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 13703 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 13704 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 13705 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 13706 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 13707 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 13708 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 13709 13710 /* 3DNow! extension as used in the Athlon CPU. */ 13711 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 13712 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 13713 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 13714 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 13715 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 13716 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 13717 13718 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); 13719 13720 /* SSE2 */ 13721 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128); 13722 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128); 13723 13724 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 13725 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ); 13726 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q); 13727 13728 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD); 13729 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 13730 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD); 13731 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD); 13732 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 13733 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); 13734 13735 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD); 13736 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD); 13737 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD); 13738 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD); 13739 13740 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 13741 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 13742 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 13743 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 13744 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 13745 13746 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 13747 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 13748 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 13749 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 13750 13751 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 13752 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 13753 13754 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 13755 13756 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 13757 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 13758 13759 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 13760 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 13761 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 13762 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 13763 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 13764 13765 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 13766 13767 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 13768 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 13769 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 13770 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 13771 13772 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 13773 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 13774 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 13775 13776 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 13777 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 13778 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 13779 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 13780 13781 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1); 13782 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD); 13783 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD); 13784 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1); 13785 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD); 13786 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1); 13787 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD); 13788 13789 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 13790 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 13791 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 13792 13793 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA); 13794 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 13795 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD); 13796 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA); 13797 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 13798 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); 13799 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); 13800 13801 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); 13802 13803 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128); 13804 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128); 13805 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 13806 13807 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128); 13808 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128); 13809 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 13810 13811 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128); 13812 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128); 13813 13814 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 13815 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 13816 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 13817 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 13818 13819 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 13820 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 13821 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 13822 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 13823 13824 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 13825 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 13826 13827 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 13828 13829 /* Prescott New Instructions. */ 13830 def_builtin (MASK_SSE3, "__builtin_ia32_monitor", 13831 void_ftype_pcvoid_unsigned_unsigned, 13832 IX86_BUILTIN_MONITOR); 13833 def_builtin (MASK_SSE3, "__builtin_ia32_mwait", 13834 void_ftype_unsigned_unsigned, 13835 IX86_BUILTIN_MWAIT); 13836 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", 13837 v4sf_ftype_v4sf, 13838 IX86_BUILTIN_MOVSHDUP); 13839 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", 13840 v4sf_ftype_v4sf, 13841 IX86_BUILTIN_MOVSLDUP); 13842 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", 13843 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); 13844 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup", 13845 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP); 13846 def_builtin (MASK_SSE3, "__builtin_ia32_movddup", 13847 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP); 13848} 13849 13850/* Errors in the source file can cause expand_expr to return const0_rtx 13851 where we expect a vector. To avoid crashing, use one of the vector 13852 clear instructions. */ 13853static rtx 13854safe_vector_operand (rtx x, enum machine_mode mode) 13855{ 13856 if (x != const0_rtx) 13857 return x; 13858 x = gen_reg_rtx (mode); 13859 13860 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) 13861 emit_insn (gen_mmx_clrdi (mode == DImode ? x 13862 : gen_rtx_SUBREG (DImode, x, 0))); 13863 else 13864 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x 13865 : gen_rtx_SUBREG (V4SFmode, x, 0), 13866 CONST0_RTX (V4SFmode))); 13867 return x; 13868} 13869 13870/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 13871 13872static rtx 13873ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) 13874{ 13875 rtx pat; 13876 tree arg0 = TREE_VALUE (arglist); 13877 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13878 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13879 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13880 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13881 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13882 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 13883 13884 if (VECTOR_MODE_P (mode0)) 13885 op0 = safe_vector_operand (op0, mode0); 13886 if (VECTOR_MODE_P (mode1)) 13887 op1 = safe_vector_operand (op1, mode1); 13888 13889 if (! target 13890 || GET_MODE (target) != tmode 13891 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13892 target = gen_reg_rtx (tmode); 13893 13894 if (GET_MODE (op1) == SImode && mode1 == TImode) 13895 { 13896 rtx x = gen_reg_rtx (V4SImode); 13897 emit_insn (gen_sse2_loadd (x, op1)); 13898 op1 = gen_lowpart (TImode, x); 13899 } 13900 13901 /* In case the insn wants input operands in modes different from 13902 the result, abort. */ 13903 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode) 13904 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)) 13905 abort (); 13906 13907 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13908 op0 = copy_to_mode_reg (mode0, op0); 13909 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13910 op1 = copy_to_mode_reg (mode1, op1); 13911 13912 /* In the commutative cases, both op0 and op1 are nonimmediate_operand, 13913 yet one of the two must not be a memory. This is normally enforced 13914 by expanders, but we didn't bother to create one here. */ 13915 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) 13916 op0 = copy_to_mode_reg (mode0, op0); 13917 13918 pat = GEN_FCN (icode) (target, op0, op1); 13919 if (! pat) 13920 return 0; 13921 emit_insn (pat); 13922 return target; 13923} 13924 13925/* Subroutine of ix86_expand_builtin to take care of stores. */ 13926 13927static rtx 13928ix86_expand_store_builtin (enum insn_code icode, tree arglist) 13929{ 13930 rtx pat; 13931 tree arg0 = TREE_VALUE (arglist); 13932 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13933 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13934 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13935 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 13936 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 13937 13938 if (VECTOR_MODE_P (mode1)) 13939 op1 = safe_vector_operand (op1, mode1); 13940 13941 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13942 op1 = copy_to_mode_reg (mode1, op1); 13943 13944 pat = GEN_FCN (icode) (op0, op1); 13945 if (pat) 13946 emit_insn (pat); 13947 return 0; 13948} 13949 13950/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 13951 13952static rtx 13953ix86_expand_unop_builtin (enum insn_code icode, tree arglist, 13954 rtx target, int do_load) 13955{ 13956 rtx pat; 13957 tree arg0 = TREE_VALUE (arglist); 13958 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13959 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13960 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13961 13962 if (! target 13963 || GET_MODE (target) != tmode
|