i386.c revision 97827
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002 Free Software Foundation, Inc. 4 5This file is part of GNU CC. 6 7GNU CC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GNU CC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GNU CC; see the file COPYING. If not, write to 19the Free Software Foundation, 59 Temple Place - Suite 330, 20Boston, MA 02111-1307, USA. */ 21 22 23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 97827 2002-06-04 18:06:12Z obrien $ */ 24 25 26#include "config.h" 27#include "system.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-attr.h" 38#include "flags.h" 39#include "except.h" 40#include "function.h" 41#include "recog.h" 42#include "expr.h" 43#include "optabs.h" 44#include "toplev.h" 45#include "basic-block.h" 46#include "ggc.h" 47#include "target.h" 48#include "target-def.h" 49 50#ifndef CHECK_STACK_LIMIT 51#define CHECK_STACK_LIMIT (-1) 52#endif 53 54#warning NEED TO REVISIT "PIC_REG_USED" AND -mprofiler-epilogue SUPPORT 55#if 0 56#define PIC_REG_USED \ 57 (flag_pic && (current_function_uses_pic_offset_table \ 58 || current_function_uses_const_pool \ 59 || profile_flag || profile_block_flag)) 60#endif 61 62/* Processor costs (relative to an add) */ 63static const 64struct processor_costs size_cost = { /* costs for tunning for size */ 65 2, /* cost of an add instruction */ 66 3, /* cost of a lea instruction */ 67 2, /* variable shift costs */ 68 3, /* constant shift costs */ 69 3, /* cost of starting a multiply */ 70 0, /* cost of multiply per each bit set */ 71 3, /* cost of a divide/mod */ 72 3, /* cost of movsx */ 73 3, /* cost of movzx */ 74 0, /* "large" insn */ 75 2, /* MOVE_RATIO */ 76 2, /* cost for loading QImode using movzbl */ 77 {2, 2, 2}, /* cost of loading integer registers 78 in QImode, HImode and SImode. 79 Relative to reg-reg move (2). */ 80 {2, 2, 2}, /* cost of storing integer registers */ 81 2, /* cost of reg,reg fld/fst */ 82 {2, 2, 2}, /* cost of loading fp registers 83 in SFmode, DFmode and XFmode */ 84 {2, 2, 2}, /* cost of loading integer registers */ 85 3, /* cost of moving MMX register */ 86 {3, 3}, /* cost of loading MMX registers 87 in SImode and DImode */ 88 {3, 3}, /* cost of storing MMX registers 89 in SImode and DImode */ 90 3, /* cost of moving SSE register */ 91 {3, 3, 3}, /* cost of loading SSE registers 92 in SImode, DImode and TImode */ 93 {3, 3, 3}, /* cost of storing SSE registers 94 in SImode, DImode and TImode */ 95 3, /* MMX or SSE register to integer */ 96 0, /* size of prefetch block */ 97 0, /* number of parallel prefetches */ 98}; 99/* Processor costs (relative to an add) */ 100static const 101struct processor_costs i386_cost = { /* 386 specific costs */ 102 1, /* cost of an add instruction */ 103 1, /* cost of a lea instruction */ 104 3, /* variable shift costs */ 105 2, /* constant shift costs */ 106 6, /* cost of starting a multiply */ 107 1, /* cost of multiply per each bit set */ 108 23, /* cost of a divide/mod */ 109 3, /* cost of movsx */ 110 2, /* cost of movzx */ 111 15, /* "large" insn */ 112 3, /* MOVE_RATIO */ 113 4, /* cost for loading QImode using movzbl */ 114 {2, 4, 2}, /* cost of loading integer registers 115 in QImode, HImode and SImode. 116 Relative to reg-reg move (2). */ 117 {2, 4, 2}, /* cost of storing integer registers */ 118 2, /* cost of reg,reg fld/fst */ 119 {8, 8, 8}, /* cost of loading fp registers 120 in SFmode, DFmode and XFmode */ 121 {8, 8, 8}, /* cost of loading integer registers */ 122 2, /* cost of moving MMX register */ 123 {4, 8}, /* cost of loading MMX registers 124 in SImode and DImode */ 125 {4, 8}, /* cost of storing MMX registers 126 in SImode and DImode */ 127 2, /* cost of moving SSE register */ 128 {4, 8, 16}, /* cost of loading SSE registers 129 in SImode, DImode and TImode */ 130 {4, 8, 16}, /* cost of storing SSE registers 131 in SImode, DImode and TImode */ 132 3, /* MMX or SSE register to integer */ 133 0, /* size of prefetch block */ 134 0, /* number of parallel prefetches */ 135}; 136 137static const 138struct processor_costs i486_cost = { /* 486 specific costs */ 139 1, /* cost of an add instruction */ 140 1, /* cost of a lea instruction */ 141 3, /* variable shift costs */ 142 2, /* constant shift costs */ 143 12, /* cost of starting a multiply */ 144 1, /* cost of multiply per each bit set */ 145 40, /* cost of a divide/mod */ 146 3, /* cost of movsx */ 147 2, /* cost of movzx */ 148 15, /* "large" insn */ 149 3, /* MOVE_RATIO */ 150 4, /* cost for loading QImode using movzbl */ 151 {2, 4, 2}, /* cost of loading integer registers 152 in QImode, HImode and SImode. 153 Relative to reg-reg move (2). */ 154 {2, 4, 2}, /* cost of storing integer registers */ 155 2, /* cost of reg,reg fld/fst */ 156 {8, 8, 8}, /* cost of loading fp registers 157 in SFmode, DFmode and XFmode */ 158 {8, 8, 8}, /* cost of loading integer registers */ 159 2, /* cost of moving MMX register */ 160 {4, 8}, /* cost of loading MMX registers 161 in SImode and DImode */ 162 {4, 8}, /* cost of storing MMX registers 163 in SImode and DImode */ 164 2, /* cost of moving SSE register */ 165 {4, 8, 16}, /* cost of loading SSE registers 166 in SImode, DImode and TImode */ 167 {4, 8, 16}, /* cost of storing SSE registers 168 in SImode, DImode and TImode */ 169 3, /* MMX or SSE register to integer */ 170 0, /* size of prefetch block */ 171 0, /* number of parallel prefetches */ 172}; 173 174static const 175struct processor_costs pentium_cost = { 176 1, /* cost of an add instruction */ 177 1, /* cost of a lea instruction */ 178 4, /* variable shift costs */ 179 1, /* constant shift costs */ 180 11, /* cost of starting a multiply */ 181 0, /* cost of multiply per each bit set */ 182 25, /* cost of a divide/mod */ 183 3, /* cost of movsx */ 184 2, /* cost of movzx */ 185 8, /* "large" insn */ 186 6, /* MOVE_RATIO */ 187 6, /* cost for loading QImode using movzbl */ 188 {2, 4, 2}, /* cost of loading integer registers 189 in QImode, HImode and SImode. 190 Relative to reg-reg move (2). */ 191 {2, 4, 2}, /* cost of storing integer registers */ 192 2, /* cost of reg,reg fld/fst */ 193 {2, 2, 6}, /* cost of loading fp registers 194 in SFmode, DFmode and XFmode */ 195 {4, 4, 6}, /* cost of loading integer registers */ 196 8, /* cost of moving MMX register */ 197 {8, 8}, /* cost of loading MMX registers 198 in SImode and DImode */ 199 {8, 8}, /* cost of storing MMX registers 200 in SImode and DImode */ 201 2, /* cost of moving SSE register */ 202 {4, 8, 16}, /* cost of loading SSE registers 203 in SImode, DImode and TImode */ 204 {4, 8, 16}, /* cost of storing SSE registers 205 in SImode, DImode and TImode */ 206 3, /* MMX or SSE register to integer */ 207 0, /* size of prefetch block */ 208 0, /* number of parallel prefetches */ 209}; 210 211static const 212struct processor_costs pentiumpro_cost = { 213 1, /* cost of an add instruction */ 214 1, /* cost of a lea instruction */ 215 1, /* variable shift costs */ 216 1, /* constant shift costs */ 217 4, /* cost of starting a multiply */ 218 0, /* cost of multiply per each bit set */ 219 17, /* cost of a divide/mod */ 220 1, /* cost of movsx */ 221 1, /* cost of movzx */ 222 8, /* "large" insn */ 223 6, /* MOVE_RATIO */ 224 2, /* cost for loading QImode using movzbl */ 225 {4, 4, 4}, /* cost of loading integer registers 226 in QImode, HImode and SImode. 227 Relative to reg-reg move (2). */ 228 {2, 2, 2}, /* cost of storing integer registers */ 229 2, /* cost of reg,reg fld/fst */ 230 {2, 2, 6}, /* cost of loading fp registers 231 in SFmode, DFmode and XFmode */ 232 {4, 4, 6}, /* cost of loading integer registers */ 233 2, /* cost of moving MMX register */ 234 {2, 2}, /* cost of loading MMX registers 235 in SImode and DImode */ 236 {2, 2}, /* cost of storing MMX registers 237 in SImode and DImode */ 238 2, /* cost of moving SSE register */ 239 {2, 2, 8}, /* cost of loading SSE registers 240 in SImode, DImode and TImode */ 241 {2, 2, 8}, /* cost of storing SSE registers 242 in SImode, DImode and TImode */ 243 3, /* MMX or SSE register to integer */ 244 32, /* size of prefetch block */ 245 6, /* number of parallel prefetches */ 246}; 247 248static const 249struct processor_costs k6_cost = { 250 1, /* cost of an add instruction */ 251 2, /* cost of a lea instruction */ 252 1, /* variable shift costs */ 253 1, /* constant shift costs */ 254 3, /* cost of starting a multiply */ 255 0, /* cost of multiply per each bit set */ 256 18, /* cost of a divide/mod */ 257 2, /* cost of movsx */ 258 2, /* cost of movzx */ 259 8, /* "large" insn */ 260 4, /* MOVE_RATIO */ 261 3, /* cost for loading QImode using movzbl */ 262 {4, 5, 4}, /* cost of loading integer registers 263 in QImode, HImode and SImode. 264 Relative to reg-reg move (2). */ 265 {2, 3, 2}, /* cost of storing integer registers */ 266 4, /* cost of reg,reg fld/fst */ 267 {6, 6, 6}, /* cost of loading fp registers 268 in SFmode, DFmode and XFmode */ 269 {4, 4, 4}, /* cost of loading integer registers */ 270 2, /* cost of moving MMX register */ 271 {2, 2}, /* cost of loading MMX registers 272 in SImode and DImode */ 273 {2, 2}, /* cost of storing MMX registers 274 in SImode and DImode */ 275 2, /* cost of moving SSE register */ 276 {2, 2, 8}, /* cost of loading SSE registers 277 in SImode, DImode and TImode */ 278 {2, 2, 8}, /* cost of storing SSE registers 279 in SImode, DImode and TImode */ 280 6, /* MMX or SSE register to integer */ 281 32, /* size of prefetch block */ 282 1, /* number of parallel prefetches */ 283}; 284 285static const 286struct processor_costs athlon_cost = { 287 1, /* cost of an add instruction */ 288 2, /* cost of a lea instruction */ 289 1, /* variable shift costs */ 290 1, /* constant shift costs */ 291 5, /* cost of starting a multiply */ 292 0, /* cost of multiply per each bit set */ 293 42, /* cost of a divide/mod */ 294 1, /* cost of movsx */ 295 1, /* cost of movzx */ 296 8, /* "large" insn */ 297 9, /* MOVE_RATIO */ 298 4, /* cost for loading QImode using movzbl */ 299 {4, 5, 4}, /* cost of loading integer registers 300 in QImode, HImode and SImode. 301 Relative to reg-reg move (2). */ 302 {2, 3, 2}, /* cost of storing integer registers */ 303 4, /* cost of reg,reg fld/fst */ 304 {6, 6, 20}, /* cost of loading fp registers 305 in SFmode, DFmode and XFmode */ 306 {4, 4, 16}, /* cost of loading integer registers */ 307 2, /* cost of moving MMX register */ 308 {2, 2}, /* cost of loading MMX registers 309 in SImode and DImode */ 310 {2, 2}, /* cost of storing MMX registers 311 in SImode and DImode */ 312 2, /* cost of moving SSE register */ 313 {2, 2, 8}, /* cost of loading SSE registers 314 in SImode, DImode and TImode */ 315 {2, 2, 8}, /* cost of storing SSE registers 316 in SImode, DImode and TImode */ 317 6, /* MMX or SSE register to integer */ 318 64, /* size of prefetch block */ 319 6, /* number of parallel prefetches */ 320}; 321 322static const 323struct processor_costs pentium4_cost = { 324 1, /* cost of an add instruction */ 325 1, /* cost of a lea instruction */ 326 8, /* variable shift costs */ 327 8, /* constant shift costs */ 328 30, /* cost of starting a multiply */ 329 0, /* cost of multiply per each bit set */ 330 112, /* cost of a divide/mod */ 331 1, /* cost of movsx */ 332 1, /* cost of movzx */ 333 16, /* "large" insn */ 334 6, /* MOVE_RATIO */ 335 2, /* cost for loading QImode using movzbl */ 336 {4, 5, 4}, /* cost of loading integer registers 337 in QImode, HImode and SImode. 338 Relative to reg-reg move (2). */ 339 {2, 3, 2}, /* cost of storing integer registers */ 340 2, /* cost of reg,reg fld/fst */ 341 {2, 2, 6}, /* cost of loading fp registers 342 in SFmode, DFmode and XFmode */ 343 {4, 4, 6}, /* cost of loading integer registers */ 344 2, /* cost of moving MMX register */ 345 {2, 2}, /* cost of loading MMX registers 346 in SImode and DImode */ 347 {2, 2}, /* cost of storing MMX registers 348 in SImode and DImode */ 349 12, /* cost of moving SSE register */ 350 {12, 12, 12}, /* cost of loading SSE registers 351 in SImode, DImode and TImode */ 352 {2, 2, 8}, /* cost of storing SSE registers 353 in SImode, DImode and TImode */ 354 10, /* MMX or SSE register to integer */ 355 64, /* size of prefetch block */ 356 6, /* number of parallel prefetches */ 357}; 358 359const struct processor_costs *ix86_cost = &pentium_cost; 360 361/* Processor feature/optimization bitmasks. */ 362#define m_386 (1<<PROCESSOR_I386) 363#define m_486 (1<<PROCESSOR_I486) 364#define m_PENT (1<<PROCESSOR_PENTIUM) 365#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 366#define m_K6 (1<<PROCESSOR_K6) 367#define m_ATHLON (1<<PROCESSOR_ATHLON) 368#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 369 370const int x86_use_leave = m_386 | m_K6 | m_ATHLON; 371const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4; 372const int x86_zero_extend_with_and = m_486 | m_PENT; 373const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */; 374const int x86_double_with_add = ~m_386; 375const int x86_use_bit_test = m_386; 376const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; 377const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4; 378const int x86_3dnow_a = m_ATHLON; 379const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4; 380const int x86_branch_hints = m_PENT4; 381const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; 382const int x86_partial_reg_stall = m_PPRO; 383const int x86_use_loop = m_K6; 384const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT); 385const int x86_use_mov0 = m_K6; 386const int x86_use_cltd = ~(m_PENT | m_K6); 387const int x86_read_modify_write = ~m_PENT; 388const int x86_read_modify = ~(m_PENT | m_PPRO); 389const int x86_split_long_moves = m_PPRO; 390const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486; 391const int x86_single_stringop = m_386 | m_PENT4; 392const int x86_qimode_math = ~(0); 393const int x86_promote_qi_regs = 0; 394const int x86_himode_math = ~(m_PPRO); 395const int x86_promote_hi_regs = m_PPRO; 396const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4; 397const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4; 398const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4; 399const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; 400const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4); 401const int x86_partial_reg_dependency = m_ATHLON | m_PENT4; 402const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4; 403const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO; 404const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 405const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 406const int x86_decompose_lea = m_PENT4; 407const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4; 408 409/* In case the avreage insn count for single function invocation is 410 lower than this constant, emit fast (but longer) prologue and 411 epilogue code. */ 412#define FAST_PROLOGUE_INSN_COUNT 30 413/* Set by prologue expander and used by epilogue expander to determine 414 the style used. */ 415static int use_fast_prologue_epilogue; 416 417#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx)) 418 419static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */ 420static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */ 421static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */ 422 423/* Array of the smallest class containing reg number REGNO, indexed by 424 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 425 426enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 427{ 428 /* ax, dx, cx, bx */ 429 AREG, DREG, CREG, BREG, 430 /* si, di, bp, sp */ 431 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 432 /* FP registers */ 433 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 434 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 435 /* arg pointer */ 436 NON_Q_REGS, 437 /* flags, fpsr, dirflag, frame */ 438 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 439 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 440 SSE_REGS, SSE_REGS, 441 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 442 MMX_REGS, MMX_REGS, 443 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 444 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 445 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 446 SSE_REGS, SSE_REGS, 447}; 448 449/* The "default" register map used in 32bit mode. */ 450 451int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 452{ 453 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 454 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 455 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 456 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 457 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 458 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 459 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 460}; 461 462static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/, 463 1 /*RDX*/, 2 /*RCX*/, 464 FIRST_REX_INT_REG /*R8 */, 465 FIRST_REX_INT_REG + 1 /*R9 */}; 466static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4}; 467 468/* The "default" register map used in 64bit mode. */ 469int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 470{ 471 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 472 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */ 473 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 474 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 475 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 476 8,9,10,11,12,13,14,15, /* extended integer registers */ 477 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 478}; 479 480/* Define the register numbers to be used in Dwarf debugging information. 481 The SVR4 reference port C compiler uses the following register numbers 482 in its Dwarf output code: 483 0 for %eax (gcc regno = 0) 484 1 for %ecx (gcc regno = 2) 485 2 for %edx (gcc regno = 1) 486 3 for %ebx (gcc regno = 3) 487 4 for %esp (gcc regno = 7) 488 5 for %ebp (gcc regno = 6) 489 6 for %esi (gcc regno = 4) 490 7 for %edi (gcc regno = 5) 491 The following three DWARF register numbers are never generated by 492 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 493 believes these numbers have these meanings. 494 8 for %eip (no gcc equivalent) 495 9 for %eflags (gcc regno = 17) 496 10 for %trapno (no gcc equivalent) 497 It is not at all clear how we should number the FP stack registers 498 for the x86 architecture. If the version of SDB on x86/svr4 were 499 a bit less brain dead with respect to floating-point then we would 500 have a precedent to follow with respect to DWARF register numbers 501 for x86 FP registers, but the SDB on x86/svr4 is so completely 502 broken with respect to FP registers that it is hardly worth thinking 503 of it as something to strive for compatibility with. 504 The version of x86/svr4 SDB I have at the moment does (partially) 505 seem to believe that DWARF register number 11 is associated with 506 the x86 register %st(0), but that's about all. Higher DWARF 507 register numbers don't seem to be associated with anything in 508 particular, and even for DWARF regno 11, SDB only seems to under- 509 stand that it should say that a variable lives in %st(0) (when 510 asked via an `=' command) if we said it was in DWARF regno 11, 511 but SDB still prints garbage when asked for the value of the 512 variable in question (via a `/' command). 513 (Also note that the labels SDB prints for various FP stack regs 514 when doing an `x' command are all wrong.) 515 Note that these problems generally don't affect the native SVR4 516 C compiler because it doesn't allow the use of -O with -g and 517 because when it is *not* optimizing, it allocates a memory 518 location for each floating-point variable, and the memory 519 location is what gets described in the DWARF AT_location 520 attribute for the variable in question. 521 Regardless of the severe mental illness of the x86/svr4 SDB, we 522 do something sensible here and we use the following DWARF 523 register numbers. Note that these are all stack-top-relative 524 numbers. 525 11 for %st(0) (gcc regno = 8) 526 12 for %st(1) (gcc regno = 9) 527 13 for %st(2) (gcc regno = 10) 528 14 for %st(3) (gcc regno = 11) 529 15 for %st(4) (gcc regno = 12) 530 16 for %st(5) (gcc regno = 13) 531 17 for %st(6) (gcc regno = 14) 532 18 for %st(7) (gcc regno = 15) 533*/ 534int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 535{ 536 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 537 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 538 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 539 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 540 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 541 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */ 542 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */ 543}; 544 545/* Test and compare insns in i386.md store the information needed to 546 generate branch and scc insns here. */ 547 548rtx ix86_compare_op0 = NULL_RTX; 549rtx ix86_compare_op1 = NULL_RTX; 550 551#define MAX_386_STACK_LOCALS 3 552/* Size of the register save area. */ 553#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 554 555/* Define the structure for the machine field in struct function. */ 556struct machine_function 557{ 558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS]; 559 int save_varrargs_registers; 560 int accesses_prev_frame; 561}; 562 563#define ix86_stack_locals (cfun->machine->stack_locals) 564#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) 565 566/* Structure describing stack frame layout. 567 Stack grows downward: 568 569 [arguments] 570 <- ARG_POINTER 571 saved pc 572 573 saved frame pointer if frame_pointer_needed 574 <- HARD_FRAME_POINTER 575 [saved regs] 576 577 [padding1] \ 578 ) 579 [va_arg registers] ( 580 > to_allocate <- FRAME_POINTER 581 [frame] ( 582 ) 583 [padding2] / 584 */ 585struct ix86_frame 586{ 587 int nregs; 588 int padding1; 589 int va_arg_size; 590 HOST_WIDE_INT frame; 591 int padding2; 592 int outgoing_arguments_size; 593 int red_zone_size; 594 595 HOST_WIDE_INT to_allocate; 596 /* The offsets relative to ARG_POINTER. */ 597 HOST_WIDE_INT frame_pointer_offset; 598 HOST_WIDE_INT hard_frame_pointer_offset; 599 HOST_WIDE_INT stack_pointer_offset; 600}; 601 602/* Used to enable/disable debugging features. */ 603const char *ix86_debug_arg_string, *ix86_debug_addr_string; 604/* Code model option as passed by user. */ 605const char *ix86_cmodel_string; 606/* Parsed value. */ 607enum cmodel ix86_cmodel; 608/* Asm dialect. */ 609const char *ix86_asm_string; 610enum asm_dialect ix86_asm_dialect = ASM_ATT; 611 612/* which cpu are we scheduling for */ 613enum processor_type ix86_cpu; 614 615/* which unit we are generating floating point math for */ 616enum fpmath_unit ix86_fpmath; 617 618/* which instruction set architecture to use. */ 619int ix86_arch; 620 621/* Strings to hold which cpu and instruction set architecture to use. */ 622const char *ix86_cpu_string; /* for -mcpu=<xxx> */ 623const char *ix86_arch_string; /* for -march=<xxx> */ 624const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */ 625 626/* # of registers to use to pass arguments. */ 627const char *ix86_regparm_string; 628 629/* true if sse prefetch instruction is not NOOP. */ 630int x86_prefetch_sse; 631 632/* ix86_regparm_string as a number */ 633int ix86_regparm; 634 635/* Alignment to use for loops and jumps: */ 636 637/* Power of two alignment for loops. */ 638const char *ix86_align_loops_string; 639 640/* Power of two alignment for non-loop jumps. */ 641const char *ix86_align_jumps_string; 642 643/* Power of two alignment for stack boundary in bytes. */ 644const char *ix86_preferred_stack_boundary_string; 645 646/* Preferred alignment for stack boundary in bits. */ 647int ix86_preferred_stack_boundary; 648 649/* Values 1-5: see jump.c */ 650int ix86_branch_cost; 651const char *ix86_branch_cost_string; 652 653/* Power of two alignment for functions. */ 654const char *ix86_align_funcs_string; 655 656/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 657static char internal_label_prefix[16]; 658static int internal_label_prefix_len; 659 660static int local_symbolic_operand PARAMS ((rtx, enum machine_mode)); 661static void output_pic_addr_const PARAMS ((FILE *, rtx, int)); 662static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode, 663 int, int, FILE *)); 664static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx)); 665static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code, 666 rtx *, rtx *)); 667static rtx gen_push PARAMS ((rtx)); 668static int memory_address_length PARAMS ((rtx addr)); 669static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type)); 670static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type)); 671static int ix86_safe_length PARAMS ((rtx)); 672static enum attr_memory ix86_safe_memory PARAMS ((rtx)); 673static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx)); 674static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx)); 675static void ix86_dump_ppro_packet PARAMS ((FILE *)); 676static void ix86_reorder_insn PARAMS ((rtx *, rtx *)); 677static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair, 678 rtx)); 679static void ix86_init_machine_status PARAMS ((struct function *)); 680static void ix86_mark_machine_status PARAMS ((struct function *)); 681static void ix86_free_machine_status PARAMS ((struct function *)); 682static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode)); 683static int ix86_safe_length_prefix PARAMS ((rtx)); 684static int ix86_nsaved_regs PARAMS ((void)); 685static void ix86_emit_save_regs PARAMS ((void)); 686static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT)); 687static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int)); 688static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx)); 689static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *)); 690static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *)); 691static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void)); 692static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT)); 693static rtx ix86_expand_aligntest PARAMS ((rtx, int)); 694static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx)); 695static int ix86_issue_rate PARAMS ((void)); 696static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int)); 697static void ix86_sched_init PARAMS ((FILE *, int, int)); 698static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); 699static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int)); 700static void ix86_init_mmx_sse_builtins PARAMS ((void)); 701 702struct ix86_address 703{ 704 rtx base, index, disp; 705 HOST_WIDE_INT scale; 706}; 707 708static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *)); 709 710struct builtin_description; 711static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *, 712 tree, rtx)); 713static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, 714 tree, rtx)); 715static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); 716static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); 717static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); 718static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code, 719 tree, rtx)); 720static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); 721static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); 722static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); 723static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code, 724 enum rtx_code *, 725 enum rtx_code *, 726 enum rtx_code *)); 727static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx, 728 rtx *, rtx *)); 729static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code)); 730static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code)); 731static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code)); 732static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code)); 733static int ix86_save_reg PARAMS ((int, int)); 734static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *)); 735static int ix86_comp_type_attributes PARAMS ((tree, tree)); 736const struct attribute_spec ix86_attribute_table[]; 737static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *)); 738static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *)); 739 740#ifdef DO_GLOBAL_CTORS_BODY 741static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); 742#endif 743 744/* Register class used for passing given 64bit part of the argument. 745 These represent classes as documented by the PS ABI, with the exception 746 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 747 use SF or DFmode move instead of DImode to avoid reformating penalties. 748 749 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 750 whenever possible (upper half does contain padding). 751 */ 752enum x86_64_reg_class 753 { 754 X86_64_NO_CLASS, 755 X86_64_INTEGER_CLASS, 756 X86_64_INTEGERSI_CLASS, 757 X86_64_SSE_CLASS, 758 X86_64_SSESF_CLASS, 759 X86_64_SSEDF_CLASS, 760 X86_64_SSEUP_CLASS, 761 X86_64_X87_CLASS, 762 X86_64_X87UP_CLASS, 763 X86_64_MEMORY_CLASS 764 }; 765static const char * const x86_64_reg_class_name[] = 766 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"}; 767 768#define MAX_CLASSES 4 769static int classify_argument PARAMS ((enum machine_mode, tree, 770 enum x86_64_reg_class [MAX_CLASSES], 771 int)); 772static int examine_argument PARAMS ((enum machine_mode, tree, int, int *, 773 int *)); 774static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int, 775 const int *, int)); 776static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, 777 enum x86_64_reg_class)); 778 779/* Initialize the GCC target structure. */ 780#undef TARGET_ATTRIBUTE_TABLE 781#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 782#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 783# undef TARGET_MERGE_DECL_ATTRIBUTES 784# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 785#endif 786 787#undef TARGET_COMP_TYPE_ATTRIBUTES 788#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 789 790#undef TARGET_INIT_BUILTINS 791#define TARGET_INIT_BUILTINS ix86_init_builtins 792 793#undef TARGET_EXPAND_BUILTIN 794#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 795 796#if defined (OSF_OS) || defined (TARGET_OSF1ELF) 797 static void ix86_osf_output_function_prologue PARAMS ((FILE *, 798 HOST_WIDE_INT)); 799# undef TARGET_ASM_FUNCTION_PROLOGUE 800# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue 801#endif 802 803#undef TARGET_ASM_OPEN_PAREN 804#define TARGET_ASM_OPEN_PAREN "" 805#undef TARGET_ASM_CLOSE_PAREN 806#define TARGET_ASM_CLOSE_PAREN "" 807 808#undef TARGET_ASM_ALIGNED_HI_OP 809#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 810#undef TARGET_ASM_ALIGNED_SI_OP 811#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 812#ifdef ASM_QUAD 813#undef TARGET_ASM_ALIGNED_DI_OP 814#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 815#endif 816 817#undef TARGET_ASM_UNALIGNED_HI_OP 818#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 819#undef TARGET_ASM_UNALIGNED_SI_OP 820#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 821#undef TARGET_ASM_UNALIGNED_DI_OP 822#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 823 824#undef TARGET_SCHED_ADJUST_COST 825#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 826#undef TARGET_SCHED_ISSUE_RATE 827#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 828#undef TARGET_SCHED_VARIABLE_ISSUE 829#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue 830#undef TARGET_SCHED_INIT 831#define TARGET_SCHED_INIT ix86_sched_init 832#undef TARGET_SCHED_REORDER 833#define TARGET_SCHED_REORDER ix86_sched_reorder 834 835struct gcc_target targetm = TARGET_INITIALIZER; 836 837/* Sometimes certain combinations of command options do not make 838 sense on a particular target machine. You can define a macro 839 `OVERRIDE_OPTIONS' to take account of this. This macro, if 840 defined, is executed once just after all the command options have 841 been parsed. 842 843 Don't use this macro to turn on various extra optimizations for 844 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 845 846void 847override_options () 848{ 849 int i; 850 /* Comes from final.c -- no real reason to change it. */ 851#define MAX_CODE_ALIGN 16 852 853 static struct ptt 854 { 855 const struct processor_costs *cost; /* Processor costs */ 856 const int target_enable; /* Target flags to enable. */ 857 const int target_disable; /* Target flags to disable. */ 858 const int align_loop; /* Default alignments. */ 859 const int align_loop_max_skip; 860 const int align_jump; 861 const int align_jump_max_skip; 862 const int align_func; 863 const int branch_cost; 864 } 865 const processor_target_table[PROCESSOR_max] = 866 { 867 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1}, 868 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1}, 869 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1}, 870 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1}, 871 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1}, 872 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1}, 873 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1} 874 }; 875 876 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 877 static struct pta 878 { 879 const char *const name; /* processor name or nickname. */ 880 const enum processor_type processor; 881 const enum pta_flags 882 { 883 PTA_SSE = 1, 884 PTA_SSE2 = 2, 885 PTA_MMX = 4, 886 PTA_PREFETCH_SSE = 8, 887 PTA_3DNOW = 16, 888 PTA_3DNOW_A = 64 889 } flags; 890 } 891 const processor_alias_table[] = 892 { 893 {"i386", PROCESSOR_I386, 0}, 894 {"i486", PROCESSOR_I486, 0}, 895 {"i586", PROCESSOR_PENTIUM, 0}, 896 {"pentium", PROCESSOR_PENTIUM, 0}, 897 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 898 {"i686", PROCESSOR_PENTIUMPRO, 0}, 899 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 900 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 901 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 902 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | 903 PTA_MMX | PTA_PREFETCH_SSE}, 904 {"k6", PROCESSOR_K6, PTA_MMX}, 905 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 906 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 907 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 908 | PTA_3DNOW_A}, 909 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 910 | PTA_3DNOW | PTA_3DNOW_A}, 911 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 912 | PTA_3DNOW_A | PTA_SSE}, 913 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 914 | PTA_3DNOW_A | PTA_SSE}, 915 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 916 | PTA_3DNOW_A | PTA_SSE}, 917 }; 918 919 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta); 920 921#ifdef SUBTARGET_OVERRIDE_OPTIONS 922 SUBTARGET_OVERRIDE_OPTIONS; 923#endif 924 925 if (!ix86_cpu_string && ix86_arch_string) 926 ix86_cpu_string = ix86_arch_string; 927 if (!ix86_cpu_string) 928 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT]; 929 if (!ix86_arch_string) 930 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386"; 931 932 if (ix86_cmodel_string != 0) 933 { 934 if (!strcmp (ix86_cmodel_string, "small")) 935 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 936 else if (flag_pic) 937 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 938 else if (!strcmp (ix86_cmodel_string, "32")) 939 ix86_cmodel = CM_32; 940 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 941 ix86_cmodel = CM_KERNEL; 942 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic) 943 ix86_cmodel = CM_MEDIUM; 944 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 945 ix86_cmodel = CM_LARGE; 946 else 947 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 948 } 949 else 950 { 951 ix86_cmodel = CM_32; 952 if (TARGET_64BIT) 953 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 954 } 955 if (ix86_asm_string != 0) 956 { 957 if (!strcmp (ix86_asm_string, "intel")) 958 ix86_asm_dialect = ASM_INTEL; 959 else if (!strcmp (ix86_asm_string, "att")) 960 ix86_asm_dialect = ASM_ATT; 961 else 962 error ("bad value (%s) for -masm= switch", ix86_asm_string); 963 } 964 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 965 error ("code model `%s' not supported in the %s bit mode", 966 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 967 if (ix86_cmodel == CM_LARGE) 968 sorry ("code model `large' not supported yet"); 969 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 970 sorry ("%i-bit mode not compiled in", 971 (target_flags & MASK_64BIT) ? 64 : 32); 972 973 for (i = 0; i < pta_size; i++) 974 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 975 { 976 ix86_arch = processor_alias_table[i].processor; 977 /* Default cpu tuning to the architecture. */ 978 ix86_cpu = ix86_arch; 979 if (processor_alias_table[i].flags & PTA_MMX 980 && !(target_flags & MASK_MMX_SET)) 981 target_flags |= MASK_MMX; 982 if (processor_alias_table[i].flags & PTA_3DNOW 983 && !(target_flags & MASK_3DNOW_SET)) 984 target_flags |= MASK_3DNOW; 985 if (processor_alias_table[i].flags & PTA_3DNOW_A 986 && !(target_flags & MASK_3DNOW_A_SET)) 987 target_flags |= MASK_3DNOW_A; 988 if (processor_alias_table[i].flags & PTA_SSE 989 && !(target_flags & MASK_SSE_SET)) 990 target_flags |= MASK_SSE; 991 if (processor_alias_table[i].flags & PTA_SSE2 992 && !(target_flags & MASK_SSE2_SET)) 993 target_flags |= MASK_SSE2; 994 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 995 x86_prefetch_sse = true; 996 break; 997 } 998 999 if (i == pta_size) 1000 error ("bad value (%s) for -march= switch", ix86_arch_string); 1001 1002 for (i = 0; i < pta_size; i++) 1003 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name)) 1004 { 1005 ix86_cpu = processor_alias_table[i].processor; 1006 break; 1007 } 1008 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1009 x86_prefetch_sse = true; 1010 if (i == pta_size) 1011 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string); 1012 1013 if (optimize_size) 1014 ix86_cost = &size_cost; 1015 else 1016 ix86_cost = processor_target_table[ix86_cpu].cost; 1017 target_flags |= processor_target_table[ix86_cpu].target_enable; 1018 target_flags &= ~processor_target_table[ix86_cpu].target_disable; 1019 1020 /* Arrange to set up i386_stack_locals for all functions. */ 1021 init_machine_status = ix86_init_machine_status; 1022 mark_machine_status = ix86_mark_machine_status; 1023 free_machine_status = ix86_free_machine_status; 1024 1025 /* Validate -mregparm= value. */ 1026 if (ix86_regparm_string) 1027 { 1028 i = atoi (ix86_regparm_string); 1029 if (i < 0 || i > REGPARM_MAX) 1030 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1031 else 1032 ix86_regparm = i; 1033 } 1034 else 1035 if (TARGET_64BIT) 1036 ix86_regparm = REGPARM_MAX; 1037 1038 /* If the user has provided any of the -malign-* options, 1039 warn and use that value only if -falign-* is not set. 1040 Remove this code in GCC 3.2 or later. */ 1041 if (ix86_align_loops_string) 1042 { 1043 warning ("-malign-loops is obsolete, use -falign-loops"); 1044 if (align_loops == 0) 1045 { 1046 i = atoi (ix86_align_loops_string); 1047 if (i < 0 || i > MAX_CODE_ALIGN) 1048 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1049 else 1050 align_loops = 1 << i; 1051 } 1052 } 1053 1054 if (ix86_align_jumps_string) 1055 { 1056 warning ("-malign-jumps is obsolete, use -falign-jumps"); 1057 if (align_jumps == 0) 1058 { 1059 i = atoi (ix86_align_jumps_string); 1060 if (i < 0 || i > MAX_CODE_ALIGN) 1061 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1062 else 1063 align_jumps = 1 << i; 1064 } 1065 } 1066 1067 if (ix86_align_funcs_string) 1068 { 1069 warning ("-malign-functions is obsolete, use -falign-functions"); 1070 if (align_functions == 0) 1071 { 1072 i = atoi (ix86_align_funcs_string); 1073 if (i < 0 || i > MAX_CODE_ALIGN) 1074 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1075 else 1076 align_functions = 1 << i; 1077 } 1078 } 1079 1080 /* Default align_* from the processor table. */ 1081 if (align_loops == 0) 1082 { 1083 align_loops = processor_target_table[ix86_cpu].align_loop; 1084 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip; 1085 } 1086 if (align_jumps == 0) 1087 { 1088 align_jumps = processor_target_table[ix86_cpu].align_jump; 1089 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip; 1090 } 1091 if (align_functions == 0) 1092 { 1093 align_functions = processor_target_table[ix86_cpu].align_func; 1094 } 1095 1096 /* Validate -mpreferred-stack-boundary= value, or provide default. 1097 The default of 128 bits is for Pentium III's SSE __m128, but we 1098 don't want additional code to keep the stack aligned when 1099 optimizing for code size. */ 1100 ix86_preferred_stack_boundary = (optimize_size 1101 ? TARGET_64BIT ? 128 : 32 1102 : 128); 1103 if (ix86_preferred_stack_boundary_string) 1104 { 1105 i = atoi (ix86_preferred_stack_boundary_string); 1106 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 1107 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1108 TARGET_64BIT ? 4 : 2); 1109 else 1110 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1111 } 1112 1113 /* Validate -mbranch-cost= value, or provide default. */ 1114 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost; 1115 if (ix86_branch_cost_string) 1116 { 1117 i = atoi (ix86_branch_cost_string); 1118 if (i < 0 || i > 5) 1119 error ("-mbranch-cost=%d is not between 0 and 5", i); 1120 else 1121 ix86_branch_cost = i; 1122 } 1123 1124 /* Keep nonleaf frame pointers. */ 1125 if (TARGET_OMIT_LEAF_FRAME_POINTER) 1126 flag_omit_frame_pointer = 1; 1127 1128 /* If we're doing fast math, we don't care about comparison order 1129 wrt NaNs. This lets us use a shorter comparison sequence. */ 1130 if (flag_unsafe_math_optimizations) 1131 target_flags &= ~MASK_IEEE_FP; 1132 1133 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1134 since the insns won't need emulation. */ 1135 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1136 target_flags &= ~MASK_NO_FANCY_MATH_387; 1137 1138 if (TARGET_64BIT) 1139 { 1140 if (TARGET_ALIGN_DOUBLE) 1141 error ("-malign-double makes no sense in the 64bit mode"); 1142 if (TARGET_RTD) 1143 error ("-mrtd calling convention not supported in the 64bit mode"); 1144 /* Enable by default the SSE and MMX builtins. */ 1145 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE); 1146 ix86_fpmath = FPMATH_SSE; 1147 } 1148 else 1149 ix86_fpmath = FPMATH_387; 1150 1151 if (ix86_fpmath_string != 0) 1152 { 1153 if (! strcmp (ix86_fpmath_string, "387")) 1154 ix86_fpmath = FPMATH_387; 1155 else if (! strcmp (ix86_fpmath_string, "sse")) 1156 { 1157 if (!TARGET_SSE) 1158 { 1159 warning ("SSE instruction set disabled, using 387 arithmetics"); 1160 ix86_fpmath = FPMATH_387; 1161 } 1162 else 1163 ix86_fpmath = FPMATH_SSE; 1164 } 1165 else if (! strcmp (ix86_fpmath_string, "387,sse") 1166 || ! strcmp (ix86_fpmath_string, "sse,387")) 1167 { 1168 if (!TARGET_SSE) 1169 { 1170 warning ("SSE instruction set disabled, using 387 arithmetics"); 1171 ix86_fpmath = FPMATH_387; 1172 } 1173 else if (!TARGET_80387) 1174 { 1175 warning ("387 instruction set disabled, using SSE arithmetics"); 1176 ix86_fpmath = FPMATH_SSE; 1177 } 1178 else 1179 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1180 } 1181 else 1182 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1183 } 1184 1185 /* It makes no sense to ask for just SSE builtins, so MMX is also turned 1186 on by -msse. */ 1187 if (TARGET_SSE) 1188 { 1189 target_flags |= MASK_MMX; 1190 x86_prefetch_sse = true; 1191 } 1192 1193 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ 1194 if (TARGET_3DNOW) 1195 { 1196 target_flags |= MASK_MMX; 1197 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX 1198 extensions it adds. */ 1199 if (x86_3dnow_a & (1 << ix86_arch)) 1200 target_flags |= MASK_3DNOW_A; 1201 } 1202 if ((x86_accumulate_outgoing_args & CPUMASK) 1203 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET) 1204 && !optimize_size) 1205 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1206 1207 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1208 { 1209 char *p; 1210 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1211 p = strchr (internal_label_prefix, 'X'); 1212 internal_label_prefix_len = p - internal_label_prefix; 1213 *p = '\0'; 1214 } 1215} 1216 1217void 1218optimization_options (level, size) 1219 int level; 1220 int size ATTRIBUTE_UNUSED; 1221{ 1222 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 1223 make the problem with not enough registers even worse. */ 1224#ifdef INSN_SCHEDULING 1225 if (level > 1) 1226 flag_schedule_insns = 0; 1227#endif 1228 if (TARGET_64BIT && optimize >= 1) 1229 flag_omit_frame_pointer = 1; 1230 if (TARGET_64BIT) 1231 { 1232 flag_pcc_struct_return = 0; 1233 flag_asynchronous_unwind_tables = 1; 1234 } 1235} 1236 1237/* Table of valid machine attributes. */ 1238const struct attribute_spec ix86_attribute_table[] = 1239{ 1240 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 1241 /* Stdcall attribute says callee is responsible for popping arguments 1242 if they are not variable. */ 1243 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1244 /* Cdecl attribute says the callee is a normal C declaration */ 1245 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1246 /* Regparm attribute specifies how many integer arguments are to be 1247 passed in registers. */ 1248 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute }, 1249#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 1250 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1251 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1252 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 1253#endif 1254 { NULL, 0, 0, false, false, false, NULL } 1255}; 1256 1257/* Handle a "cdecl" or "stdcall" attribute; 1258 arguments as in struct attribute_spec.handler. */ 1259static tree 1260ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs) 1261 tree *node; 1262 tree name; 1263 tree args ATTRIBUTE_UNUSED; 1264 int flags ATTRIBUTE_UNUSED; 1265 bool *no_add_attrs; 1266{ 1267 if (TREE_CODE (*node) != FUNCTION_TYPE 1268 && TREE_CODE (*node) != METHOD_TYPE 1269 && TREE_CODE (*node) != FIELD_DECL 1270 && TREE_CODE (*node) != TYPE_DECL) 1271 { 1272 warning ("`%s' attribute only applies to functions", 1273 IDENTIFIER_POINTER (name)); 1274 *no_add_attrs = true; 1275 } 1276 1277 if (TARGET_64BIT) 1278 { 1279 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name)); 1280 *no_add_attrs = true; 1281 } 1282 1283 return NULL_TREE; 1284} 1285 1286/* Handle a "regparm" attribute; 1287 arguments as in struct attribute_spec.handler. */ 1288static tree 1289ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs) 1290 tree *node; 1291 tree name; 1292 tree args; 1293 int flags ATTRIBUTE_UNUSED; 1294 bool *no_add_attrs; 1295{ 1296 if (TREE_CODE (*node) != FUNCTION_TYPE 1297 && TREE_CODE (*node) != METHOD_TYPE 1298 && TREE_CODE (*node) != FIELD_DECL 1299 && TREE_CODE (*node) != TYPE_DECL) 1300 { 1301 warning ("`%s' attribute only applies to functions", 1302 IDENTIFIER_POINTER (name)); 1303 *no_add_attrs = true; 1304 } 1305 else 1306 { 1307 tree cst; 1308 1309 cst = TREE_VALUE (args); 1310 if (TREE_CODE (cst) != INTEGER_CST) 1311 { 1312 warning ("`%s' attribute requires an integer constant argument", 1313 IDENTIFIER_POINTER (name)); 1314 *no_add_attrs = true; 1315 } 1316 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 1317 { 1318 warning ("argument to `%s' attribute larger than %d", 1319 IDENTIFIER_POINTER (name), REGPARM_MAX); 1320 *no_add_attrs = true; 1321 } 1322 } 1323 1324 return NULL_TREE; 1325} 1326 1327#if defined (OSF_OS) || defined (TARGET_OSF1ELF) 1328 1329/* Generate the assembly code for function entry. FILE is a stdio 1330 stream to output the code to. SIZE is an int: how many units of 1331 temporary storage to allocate. 1332 1333 Refer to the array `regs_ever_live' to determine which registers to 1334 save; `regs_ever_live[I]' is nonzero if register number I is ever 1335 used in the function. This function is responsible for knowing 1336 which registers should not be saved even if used. 1337 1338 We override it here to allow for the new profiling code to go before 1339 the prologue and the old mcount code to go after the prologue (and 1340 after %ebx has been set up for ELF shared library support). */ 1341 1342static void 1343ix86_osf_output_function_prologue (file, size) 1344 FILE *file; 1345 HOST_WIDE_INT size; 1346{ 1347 const char *prefix = ""; 1348 const char *const lprefix = LPREFIX; 1349 int labelno = current_function_profile_label_no; 1350 1351#ifdef OSF_OS 1352 1353 if (TARGET_UNDERSCORES) 1354 prefix = "_"; 1355 1356 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE) 1357 { 1358 if (!flag_pic && !HALF_PIC_P ()) 1359 { 1360 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1361 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix); 1362 } 1363 1364 else if (HALF_PIC_P ()) 1365 { 1366 rtx symref; 1367 1368 HALF_PIC_EXTERNAL ("_mcount_ptr"); 1369 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode, 1370 "_mcount_ptr")); 1371 1372 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1373 fprintf (file, "\tmovl %s%s,%%eax\n", prefix, 1374 XSTR (symref, 0)); 1375 fprintf (file, "\tcall *(%%eax)\n"); 1376 } 1377 1378 else 1379 { 1380 static int call_no = 0; 1381 1382 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no); 1383 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no); 1384 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n", 1385 lprefix, call_no++); 1386 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n", 1387 lprefix, labelno); 1388 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n", 1389 prefix); 1390 fprintf (file, "\tcall *(%%eax)\n"); 1391 } 1392 } 1393 1394#else /* !OSF_OS */ 1395 1396 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE) 1397 { 1398 if (!flag_pic) 1399 { 1400 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1401 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix); 1402 } 1403 1404 else 1405 { 1406 static int call_no = 0; 1407 1408 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no); 1409 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no); 1410 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n", 1411 lprefix, call_no++); 1412 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n", 1413 lprefix, labelno); 1414 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n", 1415 prefix); 1416 fprintf (file, "\tcall *(%%eax)\n"); 1417 } 1418 } 1419#endif /* !OSF_OS */ 1420 1421 function_prologue (file, size); 1422} 1423 1424#endif /* OSF_OS || TARGET_OSF1ELF */ 1425 1426/* Return 0 if the attributes for two types are incompatible, 1 if they 1427 are compatible, and 2 if they are nearly compatible (which causes a 1428 warning to be generated). */ 1429 1430static int 1431ix86_comp_type_attributes (type1, type2) 1432 tree type1; 1433 tree type2; 1434{ 1435 /* Check for mismatch of non-default calling convention. */ 1436 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 1437 1438 if (TREE_CODE (type1) != FUNCTION_TYPE) 1439 return 1; 1440 1441 /* Check for mismatched return types (cdecl vs stdcall). */ 1442 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 1443 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 1444 return 0; 1445 return 1; 1446} 1447 1448/* Value is the number of bytes of arguments automatically 1449 popped when returning from a subroutine call. 1450 FUNDECL is the declaration node of the function (as a tree), 1451 FUNTYPE is the data type of the function (as a tree), 1452 or for a library call it is an identifier node for the subroutine name. 1453 SIZE is the number of bytes of arguments passed on the stack. 1454 1455 On the 80386, the RTD insn may be used to pop them if the number 1456 of args is fixed, but if the number is variable then the caller 1457 must pop them all. RTD can't be used for library calls now 1458 because the library is compiled with the Unix compiler. 1459 Use of RTD is a selectable option, since it is incompatible with 1460 standard Unix calling sequences. If the option is not selected, 1461 the caller must always pop the args. 1462 1463 The attribute stdcall is equivalent to RTD on a per module basis. */ 1464 1465int 1466ix86_return_pops_args (fundecl, funtype, size) 1467 tree fundecl; 1468 tree funtype; 1469 int size; 1470{ 1471 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 1472 1473 /* Cdecl functions override -mrtd, and never pop the stack. */ 1474 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 1475 1476 /* Stdcall functions will pop the stack if not variable args. */ 1477 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))) 1478 rtd = 1; 1479 1480 if (rtd 1481 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 1482 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 1483 == void_type_node))) 1484 return size; 1485 } 1486 1487 /* Lose any fake structure return argument if it is passed on the stack. */ 1488 if (aggregate_value_p (TREE_TYPE (funtype)) 1489 && !TARGET_64BIT) 1490 { 1491 int nregs = ix86_regparm; 1492 1493 if (funtype) 1494 { 1495 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype)); 1496 1497 if (attr) 1498 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1499 } 1500 1501 if (!nregs) 1502 return GET_MODE_SIZE (Pmode); 1503 } 1504 1505 return 0; 1506} 1507 1508/* Argument support functions. */ 1509 1510/* Return true when register may be used to pass function parameters. */ 1511bool 1512ix86_function_arg_regno_p (regno) 1513 int regno; 1514{ 1515 int i; 1516 if (!TARGET_64BIT) 1517 return (regno < REGPARM_MAX 1518 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 1519 if (SSE_REGNO_P (regno) && TARGET_SSE) 1520 return true; 1521 /* RAX is used as hidden argument to va_arg functions. */ 1522 if (!regno) 1523 return true; 1524 for (i = 0; i < REGPARM_MAX; i++) 1525 if (regno == x86_64_int_parameter_registers[i]) 1526 return true; 1527 return false; 1528} 1529 1530/* Initialize a variable CUM of type CUMULATIVE_ARGS 1531 for a call to a function whose data type is FNTYPE. 1532 For a library call, FNTYPE is 0. */ 1533 1534void 1535init_cumulative_args (cum, fntype, libname) 1536 CUMULATIVE_ARGS *cum; /* Argument info to initialize */ 1537 tree fntype; /* tree ptr for function decl */ 1538 rtx libname; /* SYMBOL_REF of library name or 0 */ 1539{ 1540 static CUMULATIVE_ARGS zero_cum; 1541 tree param, next_param; 1542 1543 if (TARGET_DEBUG_ARG) 1544 { 1545 fprintf (stderr, "\ninit_cumulative_args ("); 1546 if (fntype) 1547 fprintf (stderr, "fntype code = %s, ret code = %s", 1548 tree_code_name[(int) TREE_CODE (fntype)], 1549 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 1550 else 1551 fprintf (stderr, "no fntype"); 1552 1553 if (libname) 1554 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 1555 } 1556 1557 *cum = zero_cum; 1558 1559 /* Set up the number of registers to use for passing arguments. */ 1560 cum->nregs = ix86_regparm; 1561 cum->sse_nregs = SSE_REGPARM_MAX; 1562 if (fntype && !TARGET_64BIT) 1563 { 1564 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype)); 1565 1566 if (attr) 1567 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1568 } 1569 cum->maybe_vaarg = false; 1570 1571 /* Determine if this function has variable arguments. This is 1572 indicated by the last argument being 'void_type_mode' if there 1573 are no variable arguments. If there are variable arguments, then 1574 we won't pass anything in registers */ 1575 1576 if (cum->nregs) 1577 { 1578 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 1579 param != 0; param = next_param) 1580 { 1581 next_param = TREE_CHAIN (param); 1582 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 1583 { 1584 if (!TARGET_64BIT) 1585 cum->nregs = 0; 1586 cum->maybe_vaarg = true; 1587 } 1588 } 1589 } 1590 if ((!fntype && !libname) 1591 || (fntype && !TYPE_ARG_TYPES (fntype))) 1592 cum->maybe_vaarg = 1; 1593 1594 if (TARGET_DEBUG_ARG) 1595 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 1596 1597 return; 1598} 1599 1600/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal 1601 of this code is to classify each 8bytes of incoming argument by the register 1602 class and assign registers accordingly. */ 1603 1604/* Return the union class of CLASS1 and CLASS2. 1605 See the x86-64 PS ABI for details. */ 1606 1607static enum x86_64_reg_class 1608merge_classes (class1, class2) 1609 enum x86_64_reg_class class1, class2; 1610{ 1611 /* Rule #1: If both classes are equal, this is the resulting class. */ 1612 if (class1 == class2) 1613 return class1; 1614 1615 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 1616 the other class. */ 1617 if (class1 == X86_64_NO_CLASS) 1618 return class2; 1619 if (class2 == X86_64_NO_CLASS) 1620 return class1; 1621 1622 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 1623 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 1624 return X86_64_MEMORY_CLASS; 1625 1626 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 1627 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 1628 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 1629 return X86_64_INTEGERSI_CLASS; 1630 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 1631 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 1632 return X86_64_INTEGER_CLASS; 1633 1634 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */ 1635 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS 1636 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS) 1637 return X86_64_MEMORY_CLASS; 1638 1639 /* Rule #6: Otherwise class SSE is used. */ 1640 return X86_64_SSE_CLASS; 1641} 1642 1643/* Classify the argument of type TYPE and mode MODE. 1644 CLASSES will be filled by the register class used to pass each word 1645 of the operand. The number of words is returned. In case the parameter 1646 should be passed in memory, 0 is returned. As a special case for zero 1647 sized containers, classes[0] will be NO_CLASS and 1 is returned. 1648 1649 BIT_OFFSET is used internally for handling records and specifies offset 1650 of the offset in bits modulo 256 to avoid overflow cases. 1651 1652 See the x86-64 PS ABI for details. 1653*/ 1654 1655static int 1656classify_argument (mode, type, classes, bit_offset) 1657 enum machine_mode mode; 1658 tree type; 1659 enum x86_64_reg_class classes[MAX_CLASSES]; 1660 int bit_offset; 1661{ 1662 int bytes = 1663 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1664 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1665 1666 if (type && AGGREGATE_TYPE_P (type)) 1667 { 1668 int i; 1669 tree field; 1670 enum x86_64_reg_class subclasses[MAX_CLASSES]; 1671 1672 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 1673 if (bytes > 16) 1674 return 0; 1675 1676 for (i = 0; i < words; i++) 1677 classes[i] = X86_64_NO_CLASS; 1678 1679 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 1680 signalize memory class, so handle it as special case. */ 1681 if (!words) 1682 { 1683 classes[0] = X86_64_NO_CLASS; 1684 return 1; 1685 } 1686 1687 /* Classify each field of record and merge classes. */ 1688 if (TREE_CODE (type) == RECORD_TYPE) 1689 { 1690 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1691 { 1692 if (TREE_CODE (field) == FIELD_DECL) 1693 { 1694 int num; 1695 1696 /* Bitfields are always classified as integer. Handle them 1697 early, since later code would consider them to be 1698 misaligned integers. */ 1699 if (DECL_BIT_FIELD (field)) 1700 { 1701 for (i = int_bit_position (field) / 8 / 8; 1702 i < (int_bit_position (field) 1703 + tree_low_cst (DECL_SIZE (field), 0) 1704 + 63) / 8 / 8; i++) 1705 classes[i] = 1706 merge_classes (X86_64_INTEGER_CLASS, 1707 classes[i]); 1708 } 1709 else 1710 { 1711 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1712 TREE_TYPE (field), subclasses, 1713 (int_bit_position (field) 1714 + bit_offset) % 256); 1715 if (!num) 1716 return 0; 1717 for (i = 0; i < num; i++) 1718 { 1719 int pos = 1720 (int_bit_position (field) + bit_offset) / 8 / 8; 1721 classes[i + pos] = 1722 merge_classes (subclasses[i], classes[i + pos]); 1723 } 1724 } 1725 } 1726 } 1727 } 1728 /* Arrays are handled as small records. */ 1729 else if (TREE_CODE (type) == ARRAY_TYPE) 1730 { 1731 int num; 1732 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 1733 TREE_TYPE (type), subclasses, bit_offset); 1734 if (!num) 1735 return 0; 1736 1737 /* The partial classes are now full classes. */ 1738 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 1739 subclasses[0] = X86_64_SSE_CLASS; 1740 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 1741 subclasses[0] = X86_64_INTEGER_CLASS; 1742 1743 for (i = 0; i < words; i++) 1744 classes[i] = subclasses[i % num]; 1745 } 1746 /* Unions are similar to RECORD_TYPE but offset is always 0. */ 1747 else if (TREE_CODE (type) == UNION_TYPE 1748 || TREE_CODE (type) == QUAL_UNION_TYPE) 1749 { 1750 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1751 { 1752 if (TREE_CODE (field) == FIELD_DECL) 1753 { 1754 int num; 1755 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1756 TREE_TYPE (field), subclasses, 1757 bit_offset); 1758 if (!num) 1759 return 0; 1760 for (i = 0; i < num; i++) 1761 classes[i] = merge_classes (subclasses[i], classes[i]); 1762 } 1763 } 1764 } 1765 else 1766 abort (); 1767 1768 /* Final merger cleanup. */ 1769 for (i = 0; i < words; i++) 1770 { 1771 /* If one class is MEMORY, everything should be passed in 1772 memory. */ 1773 if (classes[i] == X86_64_MEMORY_CLASS) 1774 return 0; 1775 1776 /* The X86_64_SSEUP_CLASS should be always preceded by 1777 X86_64_SSE_CLASS. */ 1778 if (classes[i] == X86_64_SSEUP_CLASS 1779 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 1780 classes[i] = X86_64_SSE_CLASS; 1781 1782 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 1783 if (classes[i] == X86_64_X87UP_CLASS 1784 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 1785 classes[i] = X86_64_SSE_CLASS; 1786 } 1787 return words; 1788 } 1789 1790 /* Compute alignment needed. We align all types to natural boundaries with 1791 exception of XFmode that is aligned to 64bits. */ 1792 if (mode != VOIDmode && mode != BLKmode) 1793 { 1794 int mode_alignment = GET_MODE_BITSIZE (mode); 1795 1796 if (mode == XFmode) 1797 mode_alignment = 128; 1798 else if (mode == XCmode) 1799 mode_alignment = 256; 1800 /* Misaligned fields are always returned in memory. */ 1801 if (bit_offset % mode_alignment) 1802 return 0; 1803 } 1804 1805 /* Classification of atomic types. */ 1806 switch (mode) 1807 { 1808 case DImode: 1809 case SImode: 1810 case HImode: 1811 case QImode: 1812 case CSImode: 1813 case CHImode: 1814 case CQImode: 1815 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 1816 classes[0] = X86_64_INTEGERSI_CLASS; 1817 else 1818 classes[0] = X86_64_INTEGER_CLASS; 1819 return 1; 1820 case CDImode: 1821 case TImode: 1822 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1823 return 2; 1824 case CTImode: 1825 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1826 classes[2] = classes[3] = X86_64_INTEGER_CLASS; 1827 return 4; 1828 case SFmode: 1829 if (!(bit_offset % 64)) 1830 classes[0] = X86_64_SSESF_CLASS; 1831 else 1832 classes[0] = X86_64_SSE_CLASS; 1833 return 1; 1834 case DFmode: 1835 classes[0] = X86_64_SSEDF_CLASS; 1836 return 1; 1837 case TFmode: 1838 classes[0] = X86_64_X87_CLASS; 1839 classes[1] = X86_64_X87UP_CLASS; 1840 return 2; 1841 case TCmode: 1842 classes[0] = X86_64_X87_CLASS; 1843 classes[1] = X86_64_X87UP_CLASS; 1844 classes[2] = X86_64_X87_CLASS; 1845 classes[3] = X86_64_X87UP_CLASS; 1846 return 4; 1847 case DCmode: 1848 classes[0] = X86_64_SSEDF_CLASS; 1849 classes[1] = X86_64_SSEDF_CLASS; 1850 return 2; 1851 case SCmode: 1852 classes[0] = X86_64_SSE_CLASS; 1853 return 1; 1854 case V4SFmode: 1855 case V4SImode: 1856 classes[0] = X86_64_SSE_CLASS; 1857 classes[1] = X86_64_SSEUP_CLASS; 1858 return 2; 1859 case V2SFmode: 1860 case V2SImode: 1861 case V4HImode: 1862 case V8QImode: 1863 classes[0] = X86_64_SSE_CLASS; 1864 return 1; 1865 case BLKmode: 1866 case VOIDmode: 1867 return 0; 1868 default: 1869 abort (); 1870 } 1871} 1872 1873/* Examine the argument and return set number of register required in each 1874 class. Return 0 iff parameter should be passed in memory. */ 1875static int 1876examine_argument (mode, type, in_return, int_nregs, sse_nregs) 1877 enum machine_mode mode; 1878 tree type; 1879 int *int_nregs, *sse_nregs; 1880 int in_return; 1881{ 1882 enum x86_64_reg_class class[MAX_CLASSES]; 1883 int n = classify_argument (mode, type, class, 0); 1884 1885 *int_nregs = 0; 1886 *sse_nregs = 0; 1887 if (!n) 1888 return 0; 1889 for (n--; n >= 0; n--) 1890 switch (class[n]) 1891 { 1892 case X86_64_INTEGER_CLASS: 1893 case X86_64_INTEGERSI_CLASS: 1894 (*int_nregs)++; 1895 break; 1896 case X86_64_SSE_CLASS: 1897 case X86_64_SSESF_CLASS: 1898 case X86_64_SSEDF_CLASS: 1899 (*sse_nregs)++; 1900 break; 1901 case X86_64_NO_CLASS: 1902 case X86_64_SSEUP_CLASS: 1903 break; 1904 case X86_64_X87_CLASS: 1905 case X86_64_X87UP_CLASS: 1906 if (!in_return) 1907 return 0; 1908 break; 1909 case X86_64_MEMORY_CLASS: 1910 abort (); 1911 } 1912 return 1; 1913} 1914/* Construct container for the argument used by GCC interface. See 1915 FUNCTION_ARG for the detailed description. */ 1916static rtx 1917construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno) 1918 enum machine_mode mode; 1919 tree type; 1920 int in_return; 1921 int nintregs, nsseregs; 1922 const int * intreg; 1923 int sse_regno; 1924{ 1925 enum machine_mode tmpmode; 1926 int bytes = 1927 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1928 enum x86_64_reg_class class[MAX_CLASSES]; 1929 int n; 1930 int i; 1931 int nexps = 0; 1932 int needed_sseregs, needed_intregs; 1933 rtx exp[MAX_CLASSES]; 1934 rtx ret; 1935 1936 n = classify_argument (mode, type, class, 0); 1937 if (TARGET_DEBUG_ARG) 1938 { 1939 if (!n) 1940 fprintf (stderr, "Memory class\n"); 1941 else 1942 { 1943 fprintf (stderr, "Classes:"); 1944 for (i = 0; i < n; i++) 1945 { 1946 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 1947 } 1948 fprintf (stderr, "\n"); 1949 } 1950 } 1951 if (!n) 1952 return NULL; 1953 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs)) 1954 return NULL; 1955 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 1956 return NULL; 1957 1958 /* First construct simple cases. Avoid SCmode, since we want to use 1959 single register to pass this type. */ 1960 if (n == 1 && mode != SCmode) 1961 switch (class[0]) 1962 { 1963 case X86_64_INTEGER_CLASS: 1964 case X86_64_INTEGERSI_CLASS: 1965 return gen_rtx_REG (mode, intreg[0]); 1966 case X86_64_SSE_CLASS: 1967 case X86_64_SSESF_CLASS: 1968 case X86_64_SSEDF_CLASS: 1969 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 1970 case X86_64_X87_CLASS: 1971 return gen_rtx_REG (mode, FIRST_STACK_REG); 1972 case X86_64_NO_CLASS: 1973 /* Zero sized array, struct or class. */ 1974 return NULL; 1975 default: 1976 abort (); 1977 } 1978 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS) 1979 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 1980 if (n == 2 1981 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 1982 return gen_rtx_REG (TFmode, FIRST_STACK_REG); 1983 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 1984 && class[1] == X86_64_INTEGER_CLASS 1985 && (mode == CDImode || mode == TImode) 1986 && intreg[0] + 1 == intreg[1]) 1987 return gen_rtx_REG (mode, intreg[0]); 1988 if (n == 4 1989 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS 1990 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS) 1991 return gen_rtx_REG (TCmode, FIRST_STACK_REG); 1992 1993 /* Otherwise figure out the entries of the PARALLEL. */ 1994 for (i = 0; i < n; i++) 1995 { 1996 switch (class[i]) 1997 { 1998 case X86_64_NO_CLASS: 1999 break; 2000 case X86_64_INTEGER_CLASS: 2001 case X86_64_INTEGERSI_CLASS: 2002 /* Merge TImodes on aligned occassions here too. */ 2003 if (i * 8 + 8 > bytes) 2004 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 2005 else if (class[i] == X86_64_INTEGERSI_CLASS) 2006 tmpmode = SImode; 2007 else 2008 tmpmode = DImode; 2009 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 2010 if (tmpmode == BLKmode) 2011 tmpmode = DImode; 2012 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2013 gen_rtx_REG (tmpmode, *intreg), 2014 GEN_INT (i*8)); 2015 intreg++; 2016 break; 2017 case X86_64_SSESF_CLASS: 2018 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2019 gen_rtx_REG (SFmode, 2020 SSE_REGNO (sse_regno)), 2021 GEN_INT (i*8)); 2022 sse_regno++; 2023 break; 2024 case X86_64_SSEDF_CLASS: 2025 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2026 gen_rtx_REG (DFmode, 2027 SSE_REGNO (sse_regno)), 2028 GEN_INT (i*8)); 2029 sse_regno++; 2030 break; 2031 case X86_64_SSE_CLASS: 2032 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 2033 tmpmode = TImode, i++; 2034 else 2035 tmpmode = DImode; 2036 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2037 gen_rtx_REG (tmpmode, 2038 SSE_REGNO (sse_regno)), 2039 GEN_INT (i*8)); 2040 sse_regno++; 2041 break; 2042 default: 2043 abort (); 2044 } 2045 } 2046 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 2047 for (i = 0; i < nexps; i++) 2048 XVECEXP (ret, 0, i) = exp [i]; 2049 return ret; 2050} 2051 2052/* Update the data in CUM to advance over an argument 2053 of mode MODE and data type TYPE. 2054 (TYPE is null for libcalls where that information may not be available.) */ 2055 2056void 2057function_arg_advance (cum, mode, type, named) 2058 CUMULATIVE_ARGS *cum; /* current arg information */ 2059 enum machine_mode mode; /* current arg mode */ 2060 tree type; /* type of the argument or 0 if lib support */ 2061 int named; /* whether or not the argument was named */ 2062{ 2063 int bytes = 2064 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2065 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2066 2067 if (TARGET_DEBUG_ARG) 2068 fprintf (stderr, 2069 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n", 2070 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2071 if (TARGET_64BIT) 2072 { 2073 int int_nregs, sse_nregs; 2074 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 2075 cum->words += words; 2076 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 2077 { 2078 cum->nregs -= int_nregs; 2079 cum->sse_nregs -= sse_nregs; 2080 cum->regno += int_nregs; 2081 cum->sse_regno += sse_nregs; 2082 } 2083 else 2084 cum->words += words; 2085 } 2086 else 2087 { 2088 if (TARGET_SSE && mode == TImode) 2089 { 2090 cum->sse_words += words; 2091 cum->sse_nregs -= 1; 2092 cum->sse_regno += 1; 2093 if (cum->sse_nregs <= 0) 2094 { 2095 cum->sse_nregs = 0; 2096 cum->sse_regno = 0; 2097 } 2098 } 2099 else 2100 { 2101 cum->words += words; 2102 cum->nregs -= words; 2103 cum->regno += words; 2104 2105 if (cum->nregs <= 0) 2106 { 2107 cum->nregs = 0; 2108 cum->regno = 0; 2109 } 2110 } 2111 } 2112 return; 2113} 2114 2115/* Define where to put the arguments to a function. 2116 Value is zero to push the argument on the stack, 2117 or a hard register in which to store the argument. 2118 2119 MODE is the argument's machine mode. 2120 TYPE is the data type of the argument (as a tree). 2121 This is null for libcalls where that information may 2122 not be available. 2123 CUM is a variable of type CUMULATIVE_ARGS which gives info about 2124 the preceding args and about the function being called. 2125 NAMED is nonzero if this argument is a named parameter 2126 (otherwise it is an extra parameter matching an ellipsis). */ 2127 2128rtx 2129function_arg (cum, mode, type, named) 2130 CUMULATIVE_ARGS *cum; /* current arg information */ 2131 enum machine_mode mode; /* current arg mode */ 2132 tree type; /* type of the argument or 0 if lib support */ 2133 int named; /* != 0 for normal args, == 0 for ... args */ 2134{ 2135 rtx ret = NULL_RTX; 2136 int bytes = 2137 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2138 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2139 2140 /* Handle an hidden AL argument containing number of registers for varargs 2141 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 2142 any AL settings. */ 2143 if (mode == VOIDmode) 2144 { 2145 if (TARGET_64BIT) 2146 return GEN_INT (cum->maybe_vaarg 2147 ? (cum->sse_nregs < 0 2148 ? SSE_REGPARM_MAX 2149 : cum->sse_regno) 2150 : -1); 2151 else 2152 return constm1_rtx; 2153 } 2154 if (TARGET_64BIT) 2155 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs, 2156 &x86_64_int_parameter_registers [cum->regno], 2157 cum->sse_regno); 2158 else 2159 switch (mode) 2160 { 2161 /* For now, pass fp/complex values on the stack. */ 2162 default: 2163 break; 2164 2165 case BLKmode: 2166 case DImode: 2167 case SImode: 2168 case HImode: 2169 case QImode: 2170 if (words <= cum->nregs) 2171 ret = gen_rtx_REG (mode, cum->regno); 2172 break; 2173 case TImode: 2174 if (cum->sse_nregs) 2175 ret = gen_rtx_REG (mode, cum->sse_regno); 2176 break; 2177 } 2178 2179 if (TARGET_DEBUG_ARG) 2180 { 2181 fprintf (stderr, 2182 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d", 2183 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2184 2185 if (ret) 2186 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]); 2187 else 2188 fprintf (stderr, ", stack"); 2189 2190 fprintf (stderr, " )\n"); 2191 } 2192 2193 return ret; 2194} 2195 2196/* Gives the alignment boundary, in bits, of an argument with the specified mode 2197 and type. */ 2198 2199int 2200ix86_function_arg_boundary (mode, type) 2201 enum machine_mode mode; 2202 tree type; 2203{ 2204 int align; 2205 if (!TARGET_64BIT) 2206 return PARM_BOUNDARY; 2207 if (type) 2208 align = TYPE_ALIGN (type); 2209 else 2210 align = GET_MODE_ALIGNMENT (mode); 2211 if (align < PARM_BOUNDARY) 2212 align = PARM_BOUNDARY; 2213 if (align > 128) 2214 align = 128; 2215 return align; 2216} 2217 2218/* Return true if N is a possible register number of function value. */ 2219bool 2220ix86_function_value_regno_p (regno) 2221 int regno; 2222{ 2223 if (!TARGET_64BIT) 2224 { 2225 return ((regno) == 0 2226 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 2227 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 2228 } 2229 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 2230 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 2231 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 2232} 2233 2234/* Define how to find the value returned by a function. 2235 VALTYPE is the data type of the value (as a tree). 2236 If the precise function being called is known, FUNC is its FUNCTION_DECL; 2237 otherwise, FUNC is 0. */ 2238rtx 2239ix86_function_value (valtype) 2240 tree valtype; 2241{ 2242 if (TARGET_64BIT) 2243 { 2244 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1, 2245 REGPARM_MAX, SSE_REGPARM_MAX, 2246 x86_64_int_return_registers, 0); 2247 /* For zero sized structures, construct_continer return NULL, but we need 2248 to keep rest of compiler happy by returning meaningfull value. */ 2249 if (!ret) 2250 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 2251 return ret; 2252 } 2253 else 2254 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype))); 2255} 2256 2257/* Return false iff type is returned in memory. */ 2258int 2259ix86_return_in_memory (type) 2260 tree type; 2261{ 2262 int needed_intregs, needed_sseregs; 2263 if (TARGET_64BIT) 2264 { 2265 return !examine_argument (TYPE_MODE (type), type, 1, 2266 &needed_intregs, &needed_sseregs); 2267 } 2268 else 2269 { 2270 if (TYPE_MODE (type) == BLKmode 2271 || (VECTOR_MODE_P (TYPE_MODE (type)) 2272 && int_size_in_bytes (type) == 8) 2273 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode 2274 && TYPE_MODE (type) != TFmode 2275 && !VECTOR_MODE_P (TYPE_MODE (type)))) 2276 return 1; 2277 return 0; 2278 } 2279} 2280 2281/* Define how to find the value returned by a library function 2282 assuming the value has mode MODE. */ 2283rtx 2284ix86_libcall_value (mode) 2285 enum machine_mode mode; 2286{ 2287 if (TARGET_64BIT) 2288 { 2289 switch (mode) 2290 { 2291 case SFmode: 2292 case SCmode: 2293 case DFmode: 2294 case DCmode: 2295 return gen_rtx_REG (mode, FIRST_SSE_REG); 2296 case TFmode: 2297 case TCmode: 2298 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 2299 default: 2300 return gen_rtx_REG (mode, 0); 2301 } 2302 } 2303 else 2304 return gen_rtx_REG (mode, VALUE_REGNO (mode)); 2305} 2306 2307/* Create the va_list data type. */ 2308 2309tree 2310ix86_build_va_list () 2311{ 2312 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 2313 2314 /* For i386 we use plain pointer to argument area. */ 2315 if (!TARGET_64BIT) 2316 return build_pointer_type (char_type_node); 2317 2318 record = make_lang_type (RECORD_TYPE); 2319 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 2320 2321 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 2322 unsigned_type_node); 2323 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 2324 unsigned_type_node); 2325 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 2326 ptr_type_node); 2327 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 2328 ptr_type_node); 2329 2330 DECL_FIELD_CONTEXT (f_gpr) = record; 2331 DECL_FIELD_CONTEXT (f_fpr) = record; 2332 DECL_FIELD_CONTEXT (f_ovf) = record; 2333 DECL_FIELD_CONTEXT (f_sav) = record; 2334 2335 TREE_CHAIN (record) = type_decl; 2336 TYPE_NAME (record) = type_decl; 2337 TYPE_FIELDS (record) = f_gpr; 2338 TREE_CHAIN (f_gpr) = f_fpr; 2339 TREE_CHAIN (f_fpr) = f_ovf; 2340 TREE_CHAIN (f_ovf) = f_sav; 2341 2342 layout_type (record); 2343 2344 /* The correct type is an array type of one element. */ 2345 return build_array_type (record, build_index_type (size_zero_node)); 2346} 2347 2348/* Perform any needed actions needed for a function that is receiving a 2349 variable number of arguments. 2350 2351 CUM is as above. 2352 2353 MODE and TYPE are the mode and type of the current parameter. 2354 2355 PRETEND_SIZE is a variable that should be set to the amount of stack 2356 that must be pushed by the prolog to pretend that our caller pushed 2357 it. 2358 2359 Normally, this macro will push all remaining incoming registers on the 2360 stack and set PRETEND_SIZE to the length of the registers pushed. */ 2361 2362void 2363ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) 2364 CUMULATIVE_ARGS *cum; 2365 enum machine_mode mode; 2366 tree type; 2367 int *pretend_size ATTRIBUTE_UNUSED; 2368 int no_rtl; 2369 2370{ 2371 CUMULATIVE_ARGS next_cum; 2372 rtx save_area = NULL_RTX, mem; 2373 rtx label; 2374 rtx label_ref; 2375 rtx tmp_reg; 2376 rtx nsse_reg; 2377 int set; 2378 tree fntype; 2379 int stdarg_p; 2380 int i; 2381 2382 if (!TARGET_64BIT) 2383 return; 2384 2385 /* Indicate to allocate space on the stack for varargs save area. */ 2386 ix86_save_varrargs_registers = 1; 2387 2388 fntype = TREE_TYPE (current_function_decl); 2389 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 2390 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 2391 != void_type_node)); 2392 2393 /* For varargs, we do not want to skip the dummy va_dcl argument. 2394 For stdargs, we do want to skip the last named argument. */ 2395 next_cum = *cum; 2396 if (stdarg_p) 2397 function_arg_advance (&next_cum, mode, type, 1); 2398 2399 if (!no_rtl) 2400 save_area = frame_pointer_rtx; 2401 2402 set = get_varargs_alias_set (); 2403 2404 for (i = next_cum.regno; i < ix86_regparm; i++) 2405 { 2406 mem = gen_rtx_MEM (Pmode, 2407 plus_constant (save_area, i * UNITS_PER_WORD)); 2408 set_mem_alias_set (mem, set); 2409 emit_move_insn (mem, gen_rtx_REG (Pmode, 2410 x86_64_int_parameter_registers[i])); 2411 } 2412 2413 if (next_cum.sse_nregs) 2414 { 2415 /* Now emit code to save SSE registers. The AX parameter contains number 2416 of SSE parameter regsiters used to call this function. We use 2417 sse_prologue_save insn template that produces computed jump across 2418 SSE saves. We need some preparation work to get this working. */ 2419 2420 label = gen_label_rtx (); 2421 label_ref = gen_rtx_LABEL_REF (Pmode, label); 2422 2423 /* Compute address to jump to : 2424 label - 5*eax + nnamed_sse_arguments*5 */ 2425 tmp_reg = gen_reg_rtx (Pmode); 2426 nsse_reg = gen_reg_rtx (Pmode); 2427 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 2428 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2429 gen_rtx_MULT (Pmode, nsse_reg, 2430 GEN_INT (4)))); 2431 if (next_cum.sse_regno) 2432 emit_move_insn 2433 (nsse_reg, 2434 gen_rtx_CONST (DImode, 2435 gen_rtx_PLUS (DImode, 2436 label_ref, 2437 GEN_INT (next_cum.sse_regno * 4)))); 2438 else 2439 emit_move_insn (nsse_reg, label_ref); 2440 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 2441 2442 /* Compute address of memory block we save into. We always use pointer 2443 pointing 127 bytes after first byte to store - this is needed to keep 2444 instruction size limited by 4 bytes. */ 2445 tmp_reg = gen_reg_rtx (Pmode); 2446 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2447 plus_constant (save_area, 2448 8 * REGPARM_MAX + 127))); 2449 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 2450 set_mem_alias_set (mem, set); 2451 set_mem_align (mem, BITS_PER_WORD); 2452 2453 /* And finally do the dirty job! */ 2454 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 2455 GEN_INT (next_cum.sse_regno), label)); 2456 } 2457 2458} 2459 2460/* Implement va_start. */ 2461 2462void 2463ix86_va_start (stdarg_p, valist, nextarg) 2464 int stdarg_p; 2465 tree valist; 2466 rtx nextarg; 2467{ 2468 HOST_WIDE_INT words, n_gpr, n_fpr; 2469 tree f_gpr, f_fpr, f_ovf, f_sav; 2470 tree gpr, fpr, ovf, sav, t; 2471 2472 /* Only 64bit target needs something special. */ 2473 if (!TARGET_64BIT) 2474 { 2475 std_expand_builtin_va_start (stdarg_p, valist, nextarg); 2476 return; 2477 } 2478 2479 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2480 f_fpr = TREE_CHAIN (f_gpr); 2481 f_ovf = TREE_CHAIN (f_fpr); 2482 f_sav = TREE_CHAIN (f_ovf); 2483 2484 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2485 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2486 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2487 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2488 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2489 2490 /* Count number of gp and fp argument registers used. */ 2491 words = current_function_args_info.words; 2492 n_gpr = current_function_args_info.regno; 2493 n_fpr = current_function_args_info.sse_regno; 2494 2495 if (TARGET_DEBUG_ARG) 2496 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 2497 (int) words, (int) n_gpr, (int) n_fpr); 2498 2499 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 2500 build_int_2 (n_gpr * 8, 0)); 2501 TREE_SIDE_EFFECTS (t) = 1; 2502 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2503 2504 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 2505 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0)); 2506 TREE_SIDE_EFFECTS (t) = 1; 2507 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2508 2509 /* Find the overflow area. */ 2510 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 2511 if (words != 0) 2512 t = build (PLUS_EXPR, TREE_TYPE (ovf), t, 2513 build_int_2 (words * UNITS_PER_WORD, 0)); 2514 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2515 TREE_SIDE_EFFECTS (t) = 1; 2516 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2517 2518 /* Find the register save area. 2519 Prologue of the function save it right above stack frame. */ 2520 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx); 2521 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 2522 TREE_SIDE_EFFECTS (t) = 1; 2523 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2524 cfun->preferred_stack_boundary = 128; 2525} 2526 2527/* Implement va_arg. */ 2528rtx 2529ix86_va_arg (valist, type) 2530 tree valist, type; 2531{ 2532 static int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 2533 tree f_gpr, f_fpr, f_ovf, f_sav; 2534 tree gpr, fpr, ovf, sav, t; 2535 int size, rsize; 2536 rtx lab_false, lab_over = NULL_RTX; 2537 rtx addr_rtx, r; 2538 rtx container; 2539 2540 /* Only 64bit target needs something special. */ 2541 if (!TARGET_64BIT) 2542 { 2543 return std_expand_builtin_va_arg (valist, type); 2544 } 2545 2546 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2547 f_fpr = TREE_CHAIN (f_gpr); 2548 f_ovf = TREE_CHAIN (f_fpr); 2549 f_sav = TREE_CHAIN (f_ovf); 2550 2551 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2552 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2553 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2554 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2555 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2556 2557 size = int_size_in_bytes (type); 2558 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2559 2560 container = construct_container (TYPE_MODE (type), type, 0, 2561 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 2562 /* 2563 * Pull the value out of the saved registers ... 2564 */ 2565 2566 addr_rtx = gen_reg_rtx (Pmode); 2567 2568 if (container) 2569 { 2570 rtx int_addr_rtx, sse_addr_rtx; 2571 int needed_intregs, needed_sseregs; 2572 int need_temp; 2573 2574 lab_over = gen_label_rtx (); 2575 lab_false = gen_label_rtx (); 2576 2577 examine_argument (TYPE_MODE (type), type, 0, 2578 &needed_intregs, &needed_sseregs); 2579 2580 2581 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64) 2582 || TYPE_ALIGN (type) > 128); 2583 2584 /* In case we are passing structure, verify that it is consetuctive block 2585 on the register save area. If not we need to do moves. */ 2586 if (!need_temp && !REG_P (container)) 2587 { 2588 /* Verify that all registers are strictly consetuctive */ 2589 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 2590 { 2591 int i; 2592 2593 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2594 { 2595 rtx slot = XVECEXP (container, 0, i); 2596 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 2597 || INTVAL (XEXP (slot, 1)) != i * 16) 2598 need_temp = 1; 2599 } 2600 } 2601 else 2602 { 2603 int i; 2604 2605 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2606 { 2607 rtx slot = XVECEXP (container, 0, i); 2608 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 2609 || INTVAL (XEXP (slot, 1)) != i * 8) 2610 need_temp = 1; 2611 } 2612 } 2613 } 2614 if (!need_temp) 2615 { 2616 int_addr_rtx = addr_rtx; 2617 sse_addr_rtx = addr_rtx; 2618 } 2619 else 2620 { 2621 int_addr_rtx = gen_reg_rtx (Pmode); 2622 sse_addr_rtx = gen_reg_rtx (Pmode); 2623 } 2624 /* First ensure that we fit completely in registers. */ 2625 if (needed_intregs) 2626 { 2627 emit_cmp_and_jump_insns (expand_expr 2628 (gpr, NULL_RTX, SImode, EXPAND_NORMAL), 2629 GEN_INT ((REGPARM_MAX - needed_intregs + 2630 1) * 8), GE, const1_rtx, SImode, 2631 1, lab_false); 2632 } 2633 if (needed_sseregs) 2634 { 2635 emit_cmp_and_jump_insns (expand_expr 2636 (fpr, NULL_RTX, SImode, EXPAND_NORMAL), 2637 GEN_INT ((SSE_REGPARM_MAX - 2638 needed_sseregs + 1) * 16 + 2639 REGPARM_MAX * 8), GE, const1_rtx, 2640 SImode, 1, lab_false); 2641 } 2642 2643 /* Compute index to start of area used for integer regs. */ 2644 if (needed_intregs) 2645 { 2646 t = build (PLUS_EXPR, ptr_type_node, sav, gpr); 2647 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL); 2648 if (r != int_addr_rtx) 2649 emit_move_insn (int_addr_rtx, r); 2650 } 2651 if (needed_sseregs) 2652 { 2653 t = build (PLUS_EXPR, ptr_type_node, sav, fpr); 2654 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL); 2655 if (r != sse_addr_rtx) 2656 emit_move_insn (sse_addr_rtx, r); 2657 } 2658 if (need_temp) 2659 { 2660 int i; 2661 rtx mem; 2662 2663 /* Never use the memory itself, as it has the alias set. */ 2664 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0); 2665 mem = gen_rtx_MEM (BLKmode, addr_rtx); 2666 set_mem_alias_set (mem, get_varargs_alias_set ()); 2667 set_mem_align (mem, BITS_PER_UNIT); 2668 2669 for (i = 0; i < XVECLEN (container, 0); i++) 2670 { 2671 rtx slot = XVECEXP (container, 0, i); 2672 rtx reg = XEXP (slot, 0); 2673 enum machine_mode mode = GET_MODE (reg); 2674 rtx src_addr; 2675 rtx src_mem; 2676 int src_offset; 2677 rtx dest_mem; 2678 2679 if (SSE_REGNO_P (REGNO (reg))) 2680 { 2681 src_addr = sse_addr_rtx; 2682 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 2683 } 2684 else 2685 { 2686 src_addr = int_addr_rtx; 2687 src_offset = REGNO (reg) * 8; 2688 } 2689 src_mem = gen_rtx_MEM (mode, src_addr); 2690 set_mem_alias_set (src_mem, get_varargs_alias_set ()); 2691 src_mem = adjust_address (src_mem, mode, src_offset); 2692 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1))); 2693 emit_move_insn (dest_mem, src_mem); 2694 } 2695 } 2696 2697 if (needed_intregs) 2698 { 2699 t = 2700 build (PLUS_EXPR, TREE_TYPE (gpr), gpr, 2701 build_int_2 (needed_intregs * 8, 0)); 2702 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 2703 TREE_SIDE_EFFECTS (t) = 1; 2704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2705 } 2706 if (needed_sseregs) 2707 { 2708 t = 2709 build (PLUS_EXPR, TREE_TYPE (fpr), fpr, 2710 build_int_2 (needed_sseregs * 16, 0)); 2711 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 2712 TREE_SIDE_EFFECTS (t) = 1; 2713 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2714 } 2715 2716 emit_jump_insn (gen_jump (lab_over)); 2717 emit_barrier (); 2718 emit_label (lab_false); 2719 } 2720 2721 /* ... otherwise out of the overflow area. */ 2722 2723 /* Care for on-stack alignment if needed. */ 2724 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64) 2725 t = ovf; 2726 else 2727 { 2728 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 2729 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0)); 2730 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1)); 2731 } 2732 t = save_expr (t); 2733 2734 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL); 2735 if (r != addr_rtx) 2736 emit_move_insn (addr_rtx, r); 2737 2738 t = 2739 build (PLUS_EXPR, TREE_TYPE (t), t, 2740 build_int_2 (rsize * UNITS_PER_WORD, 0)); 2741 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2742 TREE_SIDE_EFFECTS (t) = 1; 2743 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2744 2745 if (container) 2746 emit_label (lab_over); 2747 2748 return addr_rtx; 2749} 2750 2751/* Return nonzero if OP is general operand representable on x86_64. */ 2752 2753int 2754x86_64_general_operand (op, mode) 2755 rtx op; 2756 enum machine_mode mode; 2757{ 2758 if (!TARGET_64BIT) 2759 return general_operand (op, mode); 2760 if (nonimmediate_operand (op, mode)) 2761 return 1; 2762 return x86_64_sign_extended_value (op); 2763} 2764 2765/* Return nonzero if OP is general operand representable on x86_64 2766 as either sign extended or zero extended constant. */ 2767 2768int 2769x86_64_szext_general_operand (op, mode) 2770 rtx op; 2771 enum machine_mode mode; 2772{ 2773 if (!TARGET_64BIT) 2774 return general_operand (op, mode); 2775 if (nonimmediate_operand (op, mode)) 2776 return 1; 2777 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 2778} 2779 2780/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 2781 2782int 2783x86_64_nonmemory_operand (op, mode) 2784 rtx op; 2785 enum machine_mode mode; 2786{ 2787 if (!TARGET_64BIT) 2788 return nonmemory_operand (op, mode); 2789 if (register_operand (op, mode)) 2790 return 1; 2791 return x86_64_sign_extended_value (op); 2792} 2793 2794/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */ 2795 2796int 2797x86_64_movabs_operand (op, mode) 2798 rtx op; 2799 enum machine_mode mode; 2800{ 2801 if (!TARGET_64BIT || !flag_pic) 2802 return nonmemory_operand (op, mode); 2803 if (register_operand (op, mode) || x86_64_sign_extended_value (op)) 2804 return 1; 2805 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op)) 2806 return 1; 2807 return 0; 2808} 2809 2810/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 2811 2812int 2813x86_64_szext_nonmemory_operand (op, mode) 2814 rtx op; 2815 enum machine_mode mode; 2816{ 2817 if (!TARGET_64BIT) 2818 return nonmemory_operand (op, mode); 2819 if (register_operand (op, mode)) 2820 return 1; 2821 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 2822} 2823 2824/* Return nonzero if OP is immediate operand representable on x86_64. */ 2825 2826int 2827x86_64_immediate_operand (op, mode) 2828 rtx op; 2829 enum machine_mode mode; 2830{ 2831 if (!TARGET_64BIT) 2832 return immediate_operand (op, mode); 2833 return x86_64_sign_extended_value (op); 2834} 2835 2836/* Return nonzero if OP is immediate operand representable on x86_64. */ 2837 2838int 2839x86_64_zext_immediate_operand (op, mode) 2840 rtx op; 2841 enum machine_mode mode ATTRIBUTE_UNUSED; 2842{ 2843 return x86_64_zero_extended_value (op); 2844} 2845 2846/* Return nonzero if OP is (const_int 1), else return zero. */ 2847 2848int 2849const_int_1_operand (op, mode) 2850 rtx op; 2851 enum machine_mode mode ATTRIBUTE_UNUSED; 2852{ 2853 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1); 2854} 2855 2856/* Returns 1 if OP is either a symbol reference or a sum of a symbol 2857 reference and a constant. */ 2858 2859int 2860symbolic_operand (op, mode) 2861 register rtx op; 2862 enum machine_mode mode ATTRIBUTE_UNUSED; 2863{ 2864 switch (GET_CODE (op)) 2865 { 2866 case SYMBOL_REF: 2867 case LABEL_REF: 2868 return 1; 2869 2870 case CONST: 2871 op = XEXP (op, 0); 2872 if (GET_CODE (op) == SYMBOL_REF 2873 || GET_CODE (op) == LABEL_REF 2874 || (GET_CODE (op) == UNSPEC 2875 && (XINT (op, 1) == 6 2876 || XINT (op, 1) == 7 2877 || XINT (op, 1) == 15))) 2878 return 1; 2879 if (GET_CODE (op) != PLUS 2880 || GET_CODE (XEXP (op, 1)) != CONST_INT) 2881 return 0; 2882 2883 op = XEXP (op, 0); 2884 if (GET_CODE (op) == SYMBOL_REF 2885 || GET_CODE (op) == LABEL_REF) 2886 return 1; 2887 /* Only @GOTOFF gets offsets. */ 2888 if (GET_CODE (op) != UNSPEC 2889 || XINT (op, 1) != 7) 2890 return 0; 2891 2892 op = XVECEXP (op, 0, 0); 2893 if (GET_CODE (op) == SYMBOL_REF 2894 || GET_CODE (op) == LABEL_REF) 2895 return 1; 2896 return 0; 2897 2898 default: 2899 return 0; 2900 } 2901} 2902 2903/* Return true if the operand contains a @GOT or @GOTOFF reference. */ 2904 2905int 2906pic_symbolic_operand (op, mode) 2907 register rtx op; 2908 enum machine_mode mode ATTRIBUTE_UNUSED; 2909{ 2910 if (GET_CODE (op) != CONST) 2911 return 0; 2912 op = XEXP (op, 0); 2913 if (TARGET_64BIT) 2914 { 2915 if (GET_CODE (XEXP (op, 0)) == UNSPEC) 2916 return 1; 2917 } 2918 else 2919 { 2920 if (GET_CODE (op) == UNSPEC) 2921 return 1; 2922 if (GET_CODE (op) != PLUS 2923 || GET_CODE (XEXP (op, 1)) != CONST_INT) 2924 return 0; 2925 op = XEXP (op, 0); 2926 if (GET_CODE (op) == UNSPEC) 2927 return 1; 2928 } 2929 return 0; 2930} 2931 2932/* Return true if OP is a symbolic operand that resolves locally. */ 2933 2934static int 2935local_symbolic_operand (op, mode) 2936 rtx op; 2937 enum machine_mode mode ATTRIBUTE_UNUSED; 2938{ 2939 if (GET_CODE (op) == LABEL_REF) 2940 return 1; 2941 2942 if (GET_CODE (op) == CONST 2943 && GET_CODE (XEXP (op, 0)) == PLUS 2944 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 2945 op = XEXP (XEXP (op, 0), 0); 2946 2947 if (GET_CODE (op) != SYMBOL_REF) 2948 return 0; 2949 2950 /* These we've been told are local by varasm and encode_section_info 2951 respectively. */ 2952 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op)) 2953 return 1; 2954 2955 /* There is, however, a not insubstantial body of code in the rest of 2956 the compiler that assumes it can just stick the results of 2957 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ 2958 /* ??? This is a hack. Should update the body of the compiler to 2959 always create a DECL an invoke ENCODE_SECTION_INFO. */ 2960 if (strncmp (XSTR (op, 0), internal_label_prefix, 2961 internal_label_prefix_len) == 0) 2962 return 1; 2963 2964 return 0; 2965} 2966 2967/* Test for a valid operand for a call instruction. Don't allow the 2968 arg pointer register or virtual regs since they may decay into 2969 reg + const, which the patterns can't handle. */ 2970 2971int 2972call_insn_operand (op, mode) 2973 rtx op; 2974 enum machine_mode mode ATTRIBUTE_UNUSED; 2975{ 2976 /* Disallow indirect through a virtual register. This leads to 2977 compiler aborts when trying to eliminate them. */ 2978 if (GET_CODE (op) == REG 2979 && (op == arg_pointer_rtx 2980 || op == frame_pointer_rtx 2981 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 2982 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 2983 return 0; 2984 2985 /* Disallow `call 1234'. Due to varying assembler lameness this 2986 gets either rejected or translated to `call .+1234'. */ 2987 if (GET_CODE (op) == CONST_INT) 2988 return 0; 2989 2990 /* Explicitly allow SYMBOL_REF even if pic. */ 2991 if (GET_CODE (op) == SYMBOL_REF) 2992 return 1; 2993 2994 /* Half-pic doesn't allow anything but registers and constants. 2995 We've just taken care of the later. */ 2996 if (HALF_PIC_P ()) 2997 return register_operand (op, Pmode); 2998 2999 /* Otherwise we can allow any general_operand in the address. */ 3000 return general_operand (op, Pmode); 3001} 3002 3003int 3004constant_call_address_operand (op, mode) 3005 rtx op; 3006 enum machine_mode mode ATTRIBUTE_UNUSED; 3007{ 3008 if (GET_CODE (op) == CONST 3009 && GET_CODE (XEXP (op, 0)) == PLUS 3010 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3011 op = XEXP (XEXP (op, 0), 0); 3012 return GET_CODE (op) == SYMBOL_REF; 3013} 3014 3015/* Match exactly zero and one. */ 3016 3017int 3018const0_operand (op, mode) 3019 register rtx op; 3020 enum machine_mode mode; 3021{ 3022 return op == CONST0_RTX (mode); 3023} 3024 3025int 3026const1_operand (op, mode) 3027 register rtx op; 3028 enum machine_mode mode ATTRIBUTE_UNUSED; 3029{ 3030 return op == const1_rtx; 3031} 3032 3033/* Match 2, 4, or 8. Used for leal multiplicands. */ 3034 3035int 3036const248_operand (op, mode) 3037 register rtx op; 3038 enum machine_mode mode ATTRIBUTE_UNUSED; 3039{ 3040 return (GET_CODE (op) == CONST_INT 3041 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8)); 3042} 3043 3044/* True if this is a constant appropriate for an increment or decremenmt. */ 3045 3046int 3047incdec_operand (op, mode) 3048 register rtx op; 3049 enum machine_mode mode ATTRIBUTE_UNUSED; 3050{ 3051 /* On Pentium4, the inc and dec operations causes extra dependency on flag 3052 registers, since carry flag is not set. */ 3053 if (TARGET_PENTIUM4 && !optimize_size) 3054 return 0; 3055 return op == const1_rtx || op == constm1_rtx; 3056} 3057 3058/* Return nonzero if OP is acceptable as operand of DImode shift 3059 expander. */ 3060 3061int 3062shiftdi_operand (op, mode) 3063 rtx op; 3064 enum machine_mode mode ATTRIBUTE_UNUSED; 3065{ 3066 if (TARGET_64BIT) 3067 return nonimmediate_operand (op, mode); 3068 else 3069 return register_operand (op, mode); 3070} 3071 3072/* Return false if this is the stack pointer, or any other fake 3073 register eliminable to the stack pointer. Otherwise, this is 3074 a register operand. 3075 3076 This is used to prevent esp from being used as an index reg. 3077 Which would only happen in pathological cases. */ 3078 3079int 3080reg_no_sp_operand (op, mode) 3081 register rtx op; 3082 enum machine_mode mode; 3083{ 3084 rtx t = op; 3085 if (GET_CODE (t) == SUBREG) 3086 t = SUBREG_REG (t); 3087 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx) 3088 return 0; 3089 3090 return register_operand (op, mode); 3091} 3092 3093int 3094mmx_reg_operand (op, mode) 3095 register rtx op; 3096 enum machine_mode mode ATTRIBUTE_UNUSED; 3097{ 3098 return MMX_REG_P (op); 3099} 3100 3101/* Return false if this is any eliminable register. Otherwise 3102 general_operand. */ 3103 3104int 3105general_no_elim_operand (op, mode) 3106 register rtx op; 3107 enum machine_mode mode; 3108{ 3109 rtx t = op; 3110 if (GET_CODE (t) == SUBREG) 3111 t = SUBREG_REG (t); 3112 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3113 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3114 || t == virtual_stack_dynamic_rtx) 3115 return 0; 3116 if (REG_P (t) 3117 && REGNO (t) >= FIRST_VIRTUAL_REGISTER 3118 && REGNO (t) <= LAST_VIRTUAL_REGISTER) 3119 return 0; 3120 3121 return general_operand (op, mode); 3122} 3123 3124/* Return false if this is any eliminable register. Otherwise 3125 register_operand or const_int. */ 3126 3127int 3128nonmemory_no_elim_operand (op, mode) 3129 register rtx op; 3130 enum machine_mode mode; 3131{ 3132 rtx t = op; 3133 if (GET_CODE (t) == SUBREG) 3134 t = SUBREG_REG (t); 3135 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3136 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3137 || t == virtual_stack_dynamic_rtx) 3138 return 0; 3139 3140 return GET_CODE (op) == CONST_INT || register_operand (op, mode); 3141} 3142 3143/* Return true if op is a Q_REGS class register. */ 3144 3145int 3146q_regs_operand (op, mode) 3147 register rtx op; 3148 enum machine_mode mode; 3149{ 3150 if (mode != VOIDmode && GET_MODE (op) != mode) 3151 return 0; 3152 if (GET_CODE (op) == SUBREG) 3153 op = SUBREG_REG (op); 3154 return QI_REG_P (op); 3155} 3156 3157/* Return true if op is a NON_Q_REGS class register. */ 3158 3159int 3160non_q_regs_operand (op, mode) 3161 register rtx op; 3162 enum machine_mode mode; 3163{ 3164 if (mode != VOIDmode && GET_MODE (op) != mode) 3165 return 0; 3166 if (GET_CODE (op) == SUBREG) 3167 op = SUBREG_REG (op); 3168 return NON_QI_REG_P (op); 3169} 3170 3171/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS 3172 insns. */ 3173int 3174sse_comparison_operator (op, mode) 3175 rtx op; 3176 enum machine_mode mode ATTRIBUTE_UNUSED; 3177{ 3178 enum rtx_code code = GET_CODE (op); 3179 switch (code) 3180 { 3181 /* Operations supported directly. */ 3182 case EQ: 3183 case LT: 3184 case LE: 3185 case UNORDERED: 3186 case NE: 3187 case UNGE: 3188 case UNGT: 3189 case ORDERED: 3190 return 1; 3191 /* These are equivalent to ones above in non-IEEE comparisons. */ 3192 case UNEQ: 3193 case UNLT: 3194 case UNLE: 3195 case LTGT: 3196 case GE: 3197 case GT: 3198 return !TARGET_IEEE_FP; 3199 default: 3200 return 0; 3201 } 3202} 3203/* Return 1 if OP is a valid comparison operator in valid mode. */ 3204int 3205ix86_comparison_operator (op, mode) 3206 register rtx op; 3207 enum machine_mode mode; 3208{ 3209 enum machine_mode inmode; 3210 enum rtx_code code = GET_CODE (op); 3211 if (mode != VOIDmode && GET_MODE (op) != mode) 3212 return 0; 3213 if (GET_RTX_CLASS (code) != '<') 3214 return 0; 3215 inmode = GET_MODE (XEXP (op, 0)); 3216 3217 if (inmode == CCFPmode || inmode == CCFPUmode) 3218 { 3219 enum rtx_code second_code, bypass_code; 3220 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3221 return (bypass_code == NIL && second_code == NIL); 3222 } 3223 switch (code) 3224 { 3225 case EQ: case NE: 3226 return 1; 3227 case LT: case GE: 3228 if (inmode == CCmode || inmode == CCGCmode 3229 || inmode == CCGOCmode || inmode == CCNOmode) 3230 return 1; 3231 return 0; 3232 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU: 3233 if (inmode == CCmode) 3234 return 1; 3235 return 0; 3236 case GT: case LE: 3237 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode) 3238 return 1; 3239 return 0; 3240 default: 3241 return 0; 3242 } 3243} 3244 3245/* Return 1 if OP is a comparison operator that can be issued by fcmov. */ 3246 3247int 3248fcmov_comparison_operator (op, mode) 3249 register rtx op; 3250 enum machine_mode mode; 3251{ 3252 enum machine_mode inmode; 3253 enum rtx_code code = GET_CODE (op); 3254 if (mode != VOIDmode && GET_MODE (op) != mode) 3255 return 0; 3256 if (GET_RTX_CLASS (code) != '<') 3257 return 0; 3258 inmode = GET_MODE (XEXP (op, 0)); 3259 if (inmode == CCFPmode || inmode == CCFPUmode) 3260 { 3261 enum rtx_code second_code, bypass_code; 3262 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3263 if (bypass_code != NIL || second_code != NIL) 3264 return 0; 3265 code = ix86_fp_compare_code_to_integer (code); 3266 } 3267 /* i387 supports just limited amount of conditional codes. */ 3268 switch (code) 3269 { 3270 case LTU: case GTU: case LEU: case GEU: 3271 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode) 3272 return 1; 3273 return 0; 3274 case ORDERED: case UNORDERED: 3275 case EQ: case NE: 3276 return 1; 3277 default: 3278 return 0; 3279 } 3280} 3281 3282/* Return 1 if OP is a binary operator that can be promoted to wider mode. */ 3283 3284int 3285promotable_binary_operator (op, mode) 3286 register rtx op; 3287 enum machine_mode mode ATTRIBUTE_UNUSED; 3288{ 3289 switch (GET_CODE (op)) 3290 { 3291 case MULT: 3292 /* Modern CPUs have same latency for HImode and SImode multiply, 3293 but 386 and 486 do HImode multiply faster. */ 3294 return ix86_cpu > PROCESSOR_I486; 3295 case PLUS: 3296 case AND: 3297 case IOR: 3298 case XOR: 3299 case ASHIFT: 3300 return 1; 3301 default: 3302 return 0; 3303 } 3304} 3305 3306/* Nearly general operand, but accept any const_double, since we wish 3307 to be able to drop them into memory rather than have them get pulled 3308 into registers. */ 3309 3310int 3311cmp_fp_expander_operand (op, mode) 3312 register rtx op; 3313 enum machine_mode mode; 3314{ 3315 if (mode != VOIDmode && mode != GET_MODE (op)) 3316 return 0; 3317 if (GET_CODE (op) == CONST_DOUBLE) 3318 return 1; 3319 return general_operand (op, mode); 3320} 3321 3322/* Match an SI or HImode register for a zero_extract. */ 3323 3324int 3325ext_register_operand (op, mode) 3326 register rtx op; 3327 enum machine_mode mode ATTRIBUTE_UNUSED; 3328{ 3329 int regno; 3330 if ((!TARGET_64BIT || GET_MODE (op) != DImode) 3331 && GET_MODE (op) != SImode && GET_MODE (op) != HImode) 3332 return 0; 3333 3334 if (!register_operand (op, VOIDmode)) 3335 return 0; 3336 3337 /* Be curefull to accept only registers having upper parts. */ 3338 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op)); 3339 return (regno > LAST_VIRTUAL_REGISTER || regno < 4); 3340} 3341 3342/* Return 1 if this is a valid binary floating-point operation. 3343 OP is the expression matched, and MODE is its mode. */ 3344 3345int 3346binary_fp_operator (op, mode) 3347 register rtx op; 3348 enum machine_mode mode; 3349{ 3350 if (mode != VOIDmode && mode != GET_MODE (op)) 3351 return 0; 3352 3353 switch (GET_CODE (op)) 3354 { 3355 case PLUS: 3356 case MINUS: 3357 case MULT: 3358 case DIV: 3359 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT; 3360 3361 default: 3362 return 0; 3363 } 3364} 3365 3366int 3367mult_operator (op, mode) 3368 register rtx op; 3369 enum machine_mode mode ATTRIBUTE_UNUSED; 3370{ 3371 return GET_CODE (op) == MULT; 3372} 3373 3374int 3375div_operator (op, mode) 3376 register rtx op; 3377 enum machine_mode mode ATTRIBUTE_UNUSED; 3378{ 3379 return GET_CODE (op) == DIV; 3380} 3381 3382int 3383arith_or_logical_operator (op, mode) 3384 rtx op; 3385 enum machine_mode mode; 3386{ 3387 return ((mode == VOIDmode || GET_MODE (op) == mode) 3388 && (GET_RTX_CLASS (GET_CODE (op)) == 'c' 3389 || GET_RTX_CLASS (GET_CODE (op)) == '2')); 3390} 3391 3392/* Returns 1 if OP is memory operand with a displacement. */ 3393 3394int 3395memory_displacement_operand (op, mode) 3396 register rtx op; 3397 enum machine_mode mode; 3398{ 3399 struct ix86_address parts; 3400 3401 if (! memory_operand (op, mode)) 3402 return 0; 3403 3404 if (! ix86_decompose_address (XEXP (op, 0), &parts)) 3405 abort (); 3406 3407 return parts.disp != NULL_RTX; 3408} 3409 3410/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0, 3411 re-recognize the operand to avoid a copy_to_mode_reg that will fail. 3412 3413 ??? It seems likely that this will only work because cmpsi is an 3414 expander, and no actual insns use this. */ 3415 3416int 3417cmpsi_operand (op, mode) 3418 rtx op; 3419 enum machine_mode mode; 3420{ 3421 if (nonimmediate_operand (op, mode)) 3422 return 1; 3423 3424 if (GET_CODE (op) == AND 3425 && GET_MODE (op) == SImode 3426 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT 3427 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT 3428 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT 3429 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8 3430 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8 3431 && GET_CODE (XEXP (op, 1)) == CONST_INT) 3432 return 1; 3433 3434 return 0; 3435} 3436 3437/* Returns 1 if OP is memory operand that can not be represented by the 3438 modRM array. */ 3439 3440int 3441long_memory_operand (op, mode) 3442 register rtx op; 3443 enum machine_mode mode; 3444{ 3445 if (! memory_operand (op, mode)) 3446 return 0; 3447 3448 return memory_address_length (op) != 0; 3449} 3450 3451/* Return nonzero if the rtx is known aligned. */ 3452 3453int 3454aligned_operand (op, mode) 3455 rtx op; 3456 enum machine_mode mode; 3457{ 3458 struct ix86_address parts; 3459 3460 if (!general_operand (op, mode)) 3461 return 0; 3462 3463 /* Registers and immediate operands are always "aligned". */ 3464 if (GET_CODE (op) != MEM) 3465 return 1; 3466 3467 /* Don't even try to do any aligned optimizations with volatiles. */ 3468 if (MEM_VOLATILE_P (op)) 3469 return 0; 3470 3471 op = XEXP (op, 0); 3472 3473 /* Pushes and pops are only valid on the stack pointer. */ 3474 if (GET_CODE (op) == PRE_DEC 3475 || GET_CODE (op) == POST_INC) 3476 return 1; 3477 3478 /* Decode the address. */ 3479 if (! ix86_decompose_address (op, &parts)) 3480 abort (); 3481 3482 /* Look for some component that isn't known to be aligned. */ 3483 if (parts.index) 3484 { 3485 if (parts.scale < 4 3486 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32) 3487 return 0; 3488 } 3489 if (parts.base) 3490 { 3491 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32) 3492 return 0; 3493 } 3494 if (parts.disp) 3495 { 3496 if (GET_CODE (parts.disp) != CONST_INT 3497 || (INTVAL (parts.disp) & 3) != 0) 3498 return 0; 3499 } 3500 3501 /* Didn't find one -- this must be an aligned address. */ 3502 return 1; 3503} 3504 3505/* Return true if the constant is something that can be loaded with 3506 a special instruction. Only handle 0.0 and 1.0; others are less 3507 worthwhile. */ 3508 3509int 3510standard_80387_constant_p (x) 3511 rtx x; 3512{ 3513 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 3514 return -1; 3515 /* Note that on the 80387, other constants, such as pi, that we should support 3516 too. On some machines, these are much slower to load as standard constant, 3517 than to load from doubles in memory. */ 3518 if (x == CONST0_RTX (GET_MODE (x))) 3519 return 1; 3520 if (x == CONST1_RTX (GET_MODE (x))) 3521 return 2; 3522 return 0; 3523} 3524 3525/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 3526 */ 3527int 3528standard_sse_constant_p (x) 3529 rtx x; 3530{ 3531 if (GET_CODE (x) != CONST_DOUBLE) 3532 return -1; 3533 return (x == CONST0_RTX (GET_MODE (x))); 3534} 3535 3536/* Returns 1 if OP contains a symbol reference */ 3537 3538int 3539symbolic_reference_mentioned_p (op) 3540 rtx op; 3541{ 3542 register const char *fmt; 3543 register int i; 3544 3545 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 3546 return 1; 3547 3548 fmt = GET_RTX_FORMAT (GET_CODE (op)); 3549 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 3550 { 3551 if (fmt[i] == 'E') 3552 { 3553 register int j; 3554 3555 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 3556 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 3557 return 1; 3558 } 3559 3560 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 3561 return 1; 3562 } 3563 3564 return 0; 3565} 3566 3567/* Return 1 if it is appropriate to emit `ret' instructions in the 3568 body of a function. Do this only if the epilogue is simple, needing a 3569 couple of insns. Prior to reloading, we can't tell how many registers 3570 must be saved, so return 0 then. Return 0 if there is no frame 3571 marker to de-allocate. 3572 3573 If NON_SAVING_SETJMP is defined and true, then it is not possible 3574 for the epilogue to be simple, so return 0. This is a special case 3575 since NON_SAVING_SETJMP will not cause regs_ever_live to change 3576 until final, but jump_optimize may need to know sooner if a 3577 `return' is OK. */ 3578 3579int 3580ix86_can_use_return_insn_p () 3581{ 3582 struct ix86_frame frame; 3583 3584#ifdef NON_SAVING_SETJMP 3585 if (NON_SAVING_SETJMP && current_function_calls_setjmp) 3586 return 0; 3587#endif 3588 3589 if (! reload_completed || frame_pointer_needed) 3590 return 0; 3591 3592 /* Don't allow more than 32 pop, since that's all we can do 3593 with one instruction. */ 3594 if (current_function_pops_args 3595 && current_function_args_size >= 32768) 3596 return 0; 3597 3598 ix86_compute_frame_layout (&frame); 3599 return frame.to_allocate == 0 && frame.nregs == 0; 3600} 3601 3602/* Return 1 if VALUE can be stored in the sign extended immediate field. */ 3603int 3604x86_64_sign_extended_value (value) 3605 rtx value; 3606{ 3607 switch (GET_CODE (value)) 3608 { 3609 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known 3610 to be at least 32 and this all acceptable constants are 3611 represented as CONST_INT. */ 3612 case CONST_INT: 3613 if (HOST_BITS_PER_WIDE_INT == 32) 3614 return 1; 3615 else 3616 { 3617 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode); 3618 return trunc_int_for_mode (val, SImode) == val; 3619 } 3620 break; 3621 3622 /* For certain code models, the symbolic references are known to fit. */ 3623 case SYMBOL_REF: 3624 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL; 3625 3626 /* For certain code models, the code is near as well. */ 3627 case LABEL_REF: 3628 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC; 3629 3630 /* We also may accept the offsetted memory references in certain special 3631 cases. */ 3632 case CONST: 3633 if (GET_CODE (XEXP (value, 0)) == UNSPEC 3634 && XVECLEN (XEXP (value, 0), 0) == 1 3635 && XINT (XEXP (value, 0), 1) == 15) 3636 return 1; 3637 else if (GET_CODE (XEXP (value, 0)) == PLUS) 3638 { 3639 rtx op1 = XEXP (XEXP (value, 0), 0); 3640 rtx op2 = XEXP (XEXP (value, 0), 1); 3641 HOST_WIDE_INT offset; 3642 3643 if (ix86_cmodel == CM_LARGE) 3644 return 0; 3645 if (GET_CODE (op2) != CONST_INT) 3646 return 0; 3647 offset = trunc_int_for_mode (INTVAL (op2), DImode); 3648 switch (GET_CODE (op1)) 3649 { 3650 case SYMBOL_REF: 3651 /* For CM_SMALL assume that latest object is 1MB before 3652 end of 31bits boundary. We may also accept pretty 3653 large negative constants knowing that all objects are 3654 in the positive half of address space. */ 3655 if (ix86_cmodel == CM_SMALL 3656 && offset < 1024*1024*1024 3657 && trunc_int_for_mode (offset, SImode) == offset) 3658 return 1; 3659 /* For CM_KERNEL we know that all object resist in the 3660 negative half of 32bits address space. We may not 3661 accept negative offsets, since they may be just off 3662 and we may accept pretty large positive ones. */ 3663 if (ix86_cmodel == CM_KERNEL 3664 && offset > 0 3665 && trunc_int_for_mode (offset, SImode) == offset) 3666 return 1; 3667 break; 3668 case LABEL_REF: 3669 /* These conditions are similar to SYMBOL_REF ones, just the 3670 constraints for code models differ. */ 3671 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 3672 && offset < 1024*1024*1024 3673 && trunc_int_for_mode (offset, SImode) == offset) 3674 return 1; 3675 if (ix86_cmodel == CM_KERNEL 3676 && offset > 0 3677 && trunc_int_for_mode (offset, SImode) == offset) 3678 return 1; 3679 break; 3680 default: 3681 return 0; 3682 } 3683 } 3684 return 0; 3685 default: 3686 return 0; 3687 } 3688} 3689 3690/* Return 1 if VALUE can be stored in the zero extended immediate field. */ 3691int 3692x86_64_zero_extended_value (value) 3693 rtx value; 3694{ 3695 switch (GET_CODE (value)) 3696 { 3697 case CONST_DOUBLE: 3698 if (HOST_BITS_PER_WIDE_INT == 32) 3699 return (GET_MODE (value) == VOIDmode 3700 && !CONST_DOUBLE_HIGH (value)); 3701 else 3702 return 0; 3703 case CONST_INT: 3704 if (HOST_BITS_PER_WIDE_INT == 32) 3705 return INTVAL (value) >= 0; 3706 else 3707 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff); 3708 break; 3709 3710 /* For certain code models, the symbolic references are known to fit. */ 3711 case SYMBOL_REF: 3712 return ix86_cmodel == CM_SMALL; 3713 3714 /* For certain code models, the code is near as well. */ 3715 case LABEL_REF: 3716 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM; 3717 3718 /* We also may accept the offsetted memory references in certain special 3719 cases. */ 3720 case CONST: 3721 if (GET_CODE (XEXP (value, 0)) == PLUS) 3722 { 3723 rtx op1 = XEXP (XEXP (value, 0), 0); 3724 rtx op2 = XEXP (XEXP (value, 0), 1); 3725 3726 if (ix86_cmodel == CM_LARGE) 3727 return 0; 3728 switch (GET_CODE (op1)) 3729 { 3730 case SYMBOL_REF: 3731 return 0; 3732 /* For small code model we may accept pretty large positive 3733 offsets, since one bit is available for free. Negative 3734 offsets are limited by the size of NULL pointer area 3735 specified by the ABI. */ 3736 if (ix86_cmodel == CM_SMALL 3737 && GET_CODE (op2) == CONST_INT 3738 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 3739 && (trunc_int_for_mode (INTVAL (op2), SImode) 3740 == INTVAL (op2))) 3741 return 1; 3742 /* ??? For the kernel, we may accept adjustment of 3743 -0x10000000, since we know that it will just convert 3744 negative address space to positive, but perhaps this 3745 is not worthwhile. */ 3746 break; 3747 case LABEL_REF: 3748 /* These conditions are similar to SYMBOL_REF ones, just the 3749 constraints for code models differ. */ 3750 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 3751 && GET_CODE (op2) == CONST_INT 3752 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 3753 && (trunc_int_for_mode (INTVAL (op2), SImode) 3754 == INTVAL (op2))) 3755 return 1; 3756 break; 3757 default: 3758 return 0; 3759 } 3760 } 3761 return 0; 3762 default: 3763 return 0; 3764 } 3765} 3766 3767/* Value should be nonzero if functions must have frame pointers. 3768 Zero means the frame pointer need not be set up (and parms may 3769 be accessed via the stack pointer) in functions that seem suitable. */ 3770 3771int 3772ix86_frame_pointer_required () 3773{ 3774 /* If we accessed previous frames, then the generated code expects 3775 to be able to access the saved ebp value in our frame. */ 3776 if (cfun->machine->accesses_prev_frame) 3777 return 1; 3778 3779 /* Several x86 os'es need a frame pointer for other reasons, 3780 usually pertaining to setjmp. */ 3781 if (SUBTARGET_FRAME_POINTER_REQUIRED) 3782 return 1; 3783 3784 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 3785 the frame pointer by default. Turn it back on now if we've not 3786 got a leaf function. */ 3787 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ()) 3788 return 1; 3789 3790 return 0; 3791} 3792 3793/* Record that the current function accesses previous call frames. */ 3794 3795void 3796ix86_setup_frame_addresses () 3797{ 3798 cfun->machine->accesses_prev_frame = 1; 3799} 3800 3801static char pic_label_name[32]; 3802 3803/* This function generates code for -fpic that loads %ebx with 3804 the return address of the caller and then returns. */ 3805 3806void 3807ix86_asm_file_end (file) 3808 FILE *file; 3809{ 3810 rtx xops[2]; 3811 3812 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0) 3813 return; 3814 3815 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related 3816 to updating relocations to a section being discarded such that this 3817 doesn't work. Ought to detect this at configure time. */ 3818#if 0 3819 /* The trick here is to create a linkonce section containing the 3820 pic label thunk, but to refer to it with an internal label. 3821 Because the label is internal, we don't have inter-dso name 3822 binding issues on hosts that don't support ".hidden". 3823 3824 In order to use these macros, however, we must create a fake 3825 function decl. */ 3826 if (targetm.have_named_sections) 3827 { 3828 tree decl = build_decl (FUNCTION_DECL, 3829 get_identifier ("i686.get_pc_thunk"), 3830 error_mark_node); 3831 DECL_ONE_ONLY (decl) = 1; 3832 UNIQUE_SECTION (decl, 0); 3833 named_section (decl, NULL); 3834 } 3835 else 3836#else 3837 text_section (); 3838#endif 3839 3840 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an 3841 internal (non-global) label that's being emitted, it didn't make 3842 sense to have .type information for local labels. This caused 3843 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving 3844 me debug info for a label that you're declaring non-global?) this 3845 was changed to call ASM_OUTPUT_LABEL() instead. */ 3846 3847 ASM_OUTPUT_LABEL (file, pic_label_name); 3848 3849 xops[0] = pic_offset_table_rtx; 3850 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 3851 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 3852 output_asm_insn ("ret", xops); 3853} 3854 3855void 3856load_pic_register () 3857{ 3858 rtx gotsym, pclab; 3859 3860 if (TARGET_64BIT) 3861 abort (); 3862 3863 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 3864 3865 if (TARGET_DEEP_BRANCH_PREDICTION) 3866 { 3867 if (! pic_label_name[0]) 3868 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0); 3869 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name)); 3870 } 3871 else 3872 { 3873 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 3874 } 3875 3876 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab)); 3877 3878 if (! TARGET_DEEP_BRANCH_PREDICTION) 3879 emit_insn (gen_popsi1 (pic_offset_table_rtx)); 3880 3881 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab)); 3882} 3883 3884/* Generate an "push" pattern for input ARG. */ 3885 3886static rtx 3887gen_push (arg) 3888 rtx arg; 3889{ 3890 return gen_rtx_SET (VOIDmode, 3891 gen_rtx_MEM (Pmode, 3892 gen_rtx_PRE_DEC (Pmode, 3893 stack_pointer_rtx)), 3894 arg); 3895} 3896 3897/* Return 1 if we need to save REGNO. */ 3898static int 3899ix86_save_reg (regno, maybe_eh_return) 3900 int regno; 3901 int maybe_eh_return; 3902{ 3903 if (regno == PIC_OFFSET_TABLE_REGNUM 3904 && (current_function_uses_pic_offset_table 3905 || current_function_uses_const_pool 3906 || current_function_calls_eh_return)) 3907 return 1; 3908 3909 if (current_function_calls_eh_return && maybe_eh_return) 3910 { 3911 unsigned i; 3912 for (i = 0; ; i++) 3913 { 3914 unsigned test = EH_RETURN_DATA_REGNO (i); 3915 if (test == INVALID_REGNUM) 3916 break; 3917 if (test == (unsigned) regno) 3918 return 1; 3919 } 3920 } 3921 3922 return (regs_ever_live[regno] 3923 && !call_used_regs[regno] 3924 && !fixed_regs[regno] 3925 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 3926} 3927 3928/* Return number of registers to be saved on the stack. */ 3929 3930static int 3931ix86_nsaved_regs () 3932{ 3933 int nregs = 0; 3934 int regno; 3935 3936 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 3937 if (ix86_save_reg (regno, true)) 3938 nregs++; 3939 return nregs; 3940} 3941 3942/* Return the offset between two registers, one to be eliminated, and the other 3943 its replacement, at the start of a routine. */ 3944 3945HOST_WIDE_INT 3946ix86_initial_elimination_offset (from, to) 3947 int from; 3948 int to; 3949{ 3950 struct ix86_frame frame; 3951 ix86_compute_frame_layout (&frame); 3952 3953 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 3954 return frame.hard_frame_pointer_offset; 3955 else if (from == FRAME_POINTER_REGNUM 3956 && to == HARD_FRAME_POINTER_REGNUM) 3957 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 3958 else 3959 { 3960 if (to != STACK_POINTER_REGNUM) 3961 abort (); 3962 else if (from == ARG_POINTER_REGNUM) 3963 return frame.stack_pointer_offset; 3964 else if (from != FRAME_POINTER_REGNUM) 3965 abort (); 3966 else 3967 return frame.stack_pointer_offset - frame.frame_pointer_offset; 3968 } 3969} 3970 3971/* Fill structure ix86_frame about frame of currently computed function. */ 3972 3973static void 3974ix86_compute_frame_layout (frame) 3975 struct ix86_frame *frame; 3976{ 3977 HOST_WIDE_INT total_size; 3978 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 3979 int offset; 3980 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 3981 HOST_WIDE_INT size = get_frame_size (); 3982 3983 frame->nregs = ix86_nsaved_regs (); 3984 total_size = size; 3985 3986 /* Skip return value and save base pointer. */ 3987 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 3988 3989 frame->hard_frame_pointer_offset = offset; 3990 3991 /* Do some sanity checking of stack_alignment_needed and 3992 preferred_alignment, since i386 port is the only using those features 3993 that may break easily. */ 3994 3995 if (size && !stack_alignment_needed) 3996 abort (); 3997 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT) 3998 abort (); 3999 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 4000 abort (); 4001 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 4002 abort (); 4003 4004 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 4005 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 4006 4007 /* Register save area */ 4008 offset += frame->nregs * UNITS_PER_WORD; 4009 4010 /* Va-arg area */ 4011 if (ix86_save_varrargs_registers) 4012 { 4013 offset += X86_64_VARARGS_SIZE; 4014 frame->va_arg_size = X86_64_VARARGS_SIZE; 4015 } 4016 else 4017 frame->va_arg_size = 0; 4018 4019 /* Align start of frame for local function. */ 4020 frame->padding1 = ((offset + stack_alignment_needed - 1) 4021 & -stack_alignment_needed) - offset; 4022 4023 offset += frame->padding1; 4024 4025 /* Frame pointer points here. */ 4026 frame->frame_pointer_offset = offset; 4027 4028 offset += size; 4029 4030 /* Add outgoing arguments area. */ 4031 if (ACCUMULATE_OUTGOING_ARGS) 4032 { 4033 offset += current_function_outgoing_args_size; 4034 frame->outgoing_arguments_size = current_function_outgoing_args_size; 4035 } 4036 else 4037 frame->outgoing_arguments_size = 0; 4038 4039 /* Align stack boundary. */ 4040 frame->padding2 = ((offset + preferred_alignment - 1) 4041 & -preferred_alignment) - offset; 4042 4043 offset += frame->padding2; 4044 4045 /* We've reached end of stack frame. */ 4046 frame->stack_pointer_offset = offset; 4047 4048 /* Size prologue needs to allocate. */ 4049 frame->to_allocate = 4050 (size + frame->padding1 + frame->padding2 4051 + frame->outgoing_arguments_size + frame->va_arg_size); 4052 4053 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging 4054 && current_function_is_leaf) 4055 { 4056 frame->red_zone_size = frame->to_allocate; 4057 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 4058 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 4059 } 4060 else 4061 frame->red_zone_size = 0; 4062 frame->to_allocate -= frame->red_zone_size; 4063 frame->stack_pointer_offset -= frame->red_zone_size; 4064#if 0 4065 fprintf (stderr, "nregs: %i\n", frame->nregs); 4066 fprintf (stderr, "size: %i\n", size); 4067 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 4068 fprintf (stderr, "padding1: %i\n", frame->padding1); 4069 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 4070 fprintf (stderr, "padding2: %i\n", frame->padding2); 4071 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 4072 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 4073 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 4074 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 4075 frame->hard_frame_pointer_offset); 4076 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 4077#endif 4078} 4079 4080/* Emit code to save registers in the prologue. */ 4081 4082static void 4083ix86_emit_save_regs () 4084{ 4085 register int regno; 4086 rtx insn; 4087 4088 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4089 if (ix86_save_reg (regno, true)) 4090 { 4091 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 4092 RTX_FRAME_RELATED_P (insn) = 1; 4093 } 4094} 4095 4096/* Emit code to save registers using MOV insns. First register 4097 is restored from POINTER + OFFSET. */ 4098static void 4099ix86_emit_save_regs_using_mov (pointer, offset) 4100 rtx pointer; 4101 HOST_WIDE_INT offset; 4102{ 4103 int regno; 4104 rtx insn; 4105 4106 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4107 if (ix86_save_reg (regno, true)) 4108 { 4109 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 4110 Pmode, offset), 4111 gen_rtx_REG (Pmode, regno)); 4112 RTX_FRAME_RELATED_P (insn) = 1; 4113 offset += UNITS_PER_WORD; 4114 } 4115} 4116 4117/* Expand the prologue into a bunch of separate insns. */ 4118 4119void 4120ix86_expand_prologue () 4121{ 4122 rtx insn; 4123 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table 4124 || current_function_uses_const_pool) 4125 && !TARGET_64BIT); 4126 struct ix86_frame frame; 4127 int use_mov = 0; 4128 HOST_WIDE_INT allocate; 4129 4130 if (!optimize_size) 4131 { 4132 use_fast_prologue_epilogue 4133 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT); 4134 if (TARGET_PROLOGUE_USING_MOVE) 4135 use_mov = use_fast_prologue_epilogue; 4136 } 4137 ix86_compute_frame_layout (&frame); 4138 4139 /* Note: AT&T enter does NOT have reversed args. Enter is probably 4140 slower on all targets. Also sdb doesn't like it. */ 4141 4142 if (frame_pointer_needed) 4143 { 4144 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 4145 RTX_FRAME_RELATED_P (insn) = 1; 4146 4147 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 4148 RTX_FRAME_RELATED_P (insn) = 1; 4149 } 4150 4151 allocate = frame.to_allocate; 4152 /* In case we are dealing only with single register and empty frame, 4153 push is equivalent of the mov+add sequence. */ 4154 if (allocate == 0 && frame.nregs <= 1) 4155 use_mov = 0; 4156 4157 if (!use_mov) 4158 ix86_emit_save_regs (); 4159 else 4160 allocate += frame.nregs * UNITS_PER_WORD; 4161 4162 if (allocate == 0) 4163 ; 4164 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 4165 { 4166 insn = emit_insn (gen_pro_epilogue_adjust_stack 4167 (stack_pointer_rtx, stack_pointer_rtx, 4168 GEN_INT (-allocate))); 4169 RTX_FRAME_RELATED_P (insn) = 1; 4170 } 4171 else 4172 { 4173 /* ??? Is this only valid for Win32? */ 4174 4175 rtx arg0, sym; 4176 4177 if (TARGET_64BIT) 4178 abort (); 4179 4180 arg0 = gen_rtx_REG (SImode, 0); 4181 emit_move_insn (arg0, GEN_INT (allocate)); 4182 4183 sym = gen_rtx_MEM (FUNCTION_MODE, 4184 gen_rtx_SYMBOL_REF (Pmode, "_alloca")); 4185 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx)); 4186 4187 CALL_INSN_FUNCTION_USAGE (insn) 4188 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0), 4189 CALL_INSN_FUNCTION_USAGE (insn)); 4190 } 4191 if (use_mov) 4192 { 4193 if (!frame_pointer_needed || !frame.to_allocate) 4194 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 4195 else 4196 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 4197 -frame.nregs * UNITS_PER_WORD); 4198 } 4199 4200#ifdef SUBTARGET_PROLOGUE 4201 SUBTARGET_PROLOGUE; 4202#endif 4203 4204 if (pic_reg_used) 4205 load_pic_register (); 4206 4207 /* If we are profiling, make sure no instructions are scheduled before 4208 the call to mcount. However, if -fpic, the above call will have 4209 done that. */ 4210 if (current_function_profile && ! pic_reg_used) 4211 emit_insn (gen_blockage ()); 4212} 4213 4214/* Emit code to restore saved registers using MOV insns. First register 4215 is restored from POINTER + OFFSET. */ 4216static void 4217ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return) 4218 rtx pointer; 4219 int offset; 4220 int maybe_eh_return; 4221{ 4222 int regno; 4223 4224 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4225 if (ix86_save_reg (regno, maybe_eh_return)) 4226 { 4227 emit_move_insn (gen_rtx_REG (Pmode, regno), 4228 adjust_address (gen_rtx_MEM (Pmode, pointer), 4229 Pmode, offset)); 4230 offset += UNITS_PER_WORD; 4231 } 4232} 4233 4234/* Restore function stack, frame, and registers. */ 4235 4236void 4237ix86_expand_epilogue (style) 4238 int style; 4239{ 4240 int regno; 4241 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 4242 struct ix86_frame frame; 4243 HOST_WIDE_INT offset; 4244 4245 ix86_compute_frame_layout (&frame); 4246 4247 /* Calculate start of saved registers relative to ebp. Special care 4248 must be taken for the normal return case of a function using 4249 eh_return: the eax and edx registers are marked as saved, but not 4250 restored along this path. */ 4251 offset = frame.nregs; 4252 if (current_function_calls_eh_return && style != 2) 4253 offset -= 2; 4254 offset *= -UNITS_PER_WORD; 4255 4256 /* If we're only restoring one register and sp is not valid then 4257 using a move instruction to restore the register since it's 4258 less work than reloading sp and popping the register. 4259 4260 The default code result in stack adjustment using add/lea instruction, 4261 while this code results in LEAVE instruction (or discrete equivalent), 4262 so it is profitable in some other cases as well. Especially when there 4263 are no registers to restore. We also use this code when TARGET_USE_LEAVE 4264 and there is exactly one register to pop. This heruistic may need some 4265 tuning in future. */ 4266 if ((!sp_valid && frame.nregs <= 1) 4267 || (TARGET_EPILOGUE_USING_MOVE 4268 && use_fast_prologue_epilogue 4269 && (frame.nregs > 1 || frame.to_allocate)) 4270 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 4271 || (frame_pointer_needed && TARGET_USE_LEAVE 4272 && use_fast_prologue_epilogue && frame.nregs == 1) 4273 || current_function_calls_eh_return) 4274 { 4275 /* Restore registers. We can use ebp or esp to address the memory 4276 locations. If both are available, default to ebp, since offsets 4277 are known to be small. Only exception is esp pointing directly to the 4278 end of block of saved registers, where we may simplify addressing 4279 mode. */ 4280 4281 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 4282 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 4283 frame.to_allocate, style == 2); 4284 else 4285 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 4286 offset, style == 2); 4287 4288 /* eh_return epilogues need %ecx added to the stack pointer. */ 4289 if (style == 2) 4290 { 4291 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 4292 4293 if (frame_pointer_needed) 4294 { 4295 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 4296 tmp = plus_constant (tmp, UNITS_PER_WORD); 4297 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 4298 4299 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 4300 emit_move_insn (hard_frame_pointer_rtx, tmp); 4301 4302 emit_insn (gen_pro_epilogue_adjust_stack 4303 (stack_pointer_rtx, sa, const0_rtx)); 4304 } 4305 else 4306 { 4307 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 4308 tmp = plus_constant (tmp, (frame.to_allocate 4309 + frame.nregs * UNITS_PER_WORD)); 4310 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 4311 } 4312 } 4313 else if (!frame_pointer_needed) 4314 emit_insn (gen_pro_epilogue_adjust_stack 4315 (stack_pointer_rtx, stack_pointer_rtx, 4316 GEN_INT (frame.to_allocate 4317 + frame.nregs * UNITS_PER_WORD))); 4318 /* If not an i386, mov & pop is faster than "leave". */ 4319 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue) 4320 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4321 else 4322 { 4323 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4324 hard_frame_pointer_rtx, 4325 const0_rtx)); 4326 if (TARGET_64BIT) 4327 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4328 else 4329 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4330 } 4331 } 4332 else 4333 { 4334 /* First step is to deallocate the stack frame so that we can 4335 pop the registers. */ 4336 if (!sp_valid) 4337 { 4338 if (!frame_pointer_needed) 4339 abort (); 4340 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4341 hard_frame_pointer_rtx, 4342 GEN_INT (offset))); 4343 } 4344 else if (frame.to_allocate) 4345 emit_insn (gen_pro_epilogue_adjust_stack 4346 (stack_pointer_rtx, stack_pointer_rtx, 4347 GEN_INT (frame.to_allocate))); 4348 4349 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4350 if (ix86_save_reg (regno, false)) 4351 { 4352 if (TARGET_64BIT) 4353 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 4354 else 4355 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 4356 } 4357 if (frame_pointer_needed) 4358 { 4359 /* Leave results in shorter dependency chains on CPUs that are 4360 able to grok it fast. */ 4361 if (TARGET_USE_LEAVE) 4362 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4363 else if (TARGET_64BIT) 4364 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4365 else 4366 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4367 } 4368 } 4369 4370 /* Sibcall epilogues don't want a return instruction. */ 4371 if (style == 0) 4372 return; 4373 4374 if (current_function_pops_args && current_function_args_size) 4375 { 4376 rtx popc = GEN_INT (current_function_pops_args); 4377 4378 /* i386 can only pop 64K bytes. If asked to pop more, pop 4379 return address, do explicit add, and jump indirectly to the 4380 caller. */ 4381 4382 if (current_function_pops_args >= 65536) 4383 { 4384 rtx ecx = gen_rtx_REG (SImode, 2); 4385 4386 /* There are is no "pascal" calling convention in 64bit ABI. */ 4387 if (TARGET_64BIT) 4388 abort (); 4389 4390 emit_insn (gen_popsi1 (ecx)); 4391 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 4392 emit_jump_insn (gen_return_indirect_internal (ecx)); 4393 } 4394 else 4395 emit_jump_insn (gen_return_pop_internal (popc)); 4396 } 4397 else 4398 emit_jump_insn (gen_return_internal ()); 4399} 4400 4401/* Extract the parts of an RTL expression that is a valid memory address 4402 for an instruction. Return 0 if the structure of the address is 4403 grossly off. Return -1 if the address contains ASHIFT, so it is not 4404 strictly valid, but still used for computing length of lea instruction. 4405 */ 4406 4407static int 4408ix86_decompose_address (addr, out) 4409 register rtx addr; 4410 struct ix86_address *out; 4411{ 4412 rtx base = NULL_RTX; 4413 rtx index = NULL_RTX; 4414 rtx disp = NULL_RTX; 4415 HOST_WIDE_INT scale = 1; 4416 rtx scale_rtx = NULL_RTX; 4417 int retval = 1; 4418 4419 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 4420 base = addr; 4421 else if (GET_CODE (addr) == PLUS) 4422 { 4423 rtx op0 = XEXP (addr, 0); 4424 rtx op1 = XEXP (addr, 1); 4425 enum rtx_code code0 = GET_CODE (op0); 4426 enum rtx_code code1 = GET_CODE (op1); 4427 4428 if (code0 == REG || code0 == SUBREG) 4429 { 4430 if (code1 == REG || code1 == SUBREG) 4431 index = op0, base = op1; /* index + base */ 4432 else 4433 base = op0, disp = op1; /* base + displacement */ 4434 } 4435 else if (code0 == MULT) 4436 { 4437 index = XEXP (op0, 0); 4438 scale_rtx = XEXP (op0, 1); 4439 if (code1 == REG || code1 == SUBREG) 4440 base = op1; /* index*scale + base */ 4441 else 4442 disp = op1; /* index*scale + disp */ 4443 } 4444 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT) 4445 { 4446 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */ 4447 scale_rtx = XEXP (XEXP (op0, 0), 1); 4448 base = XEXP (op0, 1); 4449 disp = op1; 4450 } 4451 else if (code0 == PLUS) 4452 { 4453 index = XEXP (op0, 0); /* index + base + disp */ 4454 base = XEXP (op0, 1); 4455 disp = op1; 4456 } 4457 else 4458 return 0; 4459 } 4460 else if (GET_CODE (addr) == MULT) 4461 { 4462 index = XEXP (addr, 0); /* index*scale */ 4463 scale_rtx = XEXP (addr, 1); 4464 } 4465 else if (GET_CODE (addr) == ASHIFT) 4466 { 4467 rtx tmp; 4468 4469 /* We're called for lea too, which implements ashift on occasion. */ 4470 index = XEXP (addr, 0); 4471 tmp = XEXP (addr, 1); 4472 if (GET_CODE (tmp) != CONST_INT) 4473 return 0; 4474 scale = INTVAL (tmp); 4475 if ((unsigned HOST_WIDE_INT) scale > 3) 4476 return 0; 4477 scale = 1 << scale; 4478 retval = -1; 4479 } 4480 else 4481 disp = addr; /* displacement */ 4482 4483 /* Extract the integral value of scale. */ 4484 if (scale_rtx) 4485 { 4486 if (GET_CODE (scale_rtx) != CONST_INT) 4487 return 0; 4488 scale = INTVAL (scale_rtx); 4489 } 4490 4491 /* Allow arg pointer and stack pointer as index if there is not scaling */ 4492 if (base && index && scale == 1 4493 && (index == arg_pointer_rtx || index == frame_pointer_rtx 4494 || index == stack_pointer_rtx)) 4495 { 4496 rtx tmp = base; 4497 base = index; 4498 index = tmp; 4499 } 4500 4501 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 4502 if ((base == hard_frame_pointer_rtx 4503 || base == frame_pointer_rtx 4504 || base == arg_pointer_rtx) && !disp) 4505 disp = const0_rtx; 4506 4507 /* Special case: on K6, [%esi] makes the instruction vector decoded. 4508 Avoid this by transforming to [%esi+0]. */ 4509 if (ix86_cpu == PROCESSOR_K6 && !optimize_size 4510 && base && !index && !disp 4511 && REG_P (base) 4512 && REGNO_REG_CLASS (REGNO (base)) == SIREG) 4513 disp = const0_rtx; 4514 4515 /* Special case: encode reg+reg instead of reg*2. */ 4516 if (!base && index && scale && scale == 2) 4517 base = index, scale = 1; 4518 4519 /* Special case: scaling cannot be encoded without base or displacement. */ 4520 if (!base && !disp && index && scale != 1) 4521 disp = const0_rtx; 4522 4523 out->base = base; 4524 out->index = index; 4525 out->disp = disp; 4526 out->scale = scale; 4527 4528 return retval; 4529} 4530 4531/* Return cost of the memory address x. 4532 For i386, it is better to use a complex address than let gcc copy 4533 the address into a reg and make a new pseudo. But not if the address 4534 requires to two regs - that would mean more pseudos with longer 4535 lifetimes. */ 4536int 4537ix86_address_cost (x) 4538 rtx x; 4539{ 4540 struct ix86_address parts; 4541 int cost = 1; 4542 4543 if (!ix86_decompose_address (x, &parts)) 4544 abort (); 4545 4546 /* More complex memory references are better. */ 4547 if (parts.disp && parts.disp != const0_rtx) 4548 cost--; 4549 4550 /* Attempt to minimize number of registers in the address. */ 4551 if ((parts.base 4552 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 4553 || (parts.index 4554 && (!REG_P (parts.index) 4555 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 4556 cost++; 4557 4558 if (parts.base 4559 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 4560 && parts.index 4561 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 4562 && parts.base != parts.index) 4563 cost++; 4564 4565 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 4566 since it's predecode logic can't detect the length of instructions 4567 and it degenerates to vector decoded. Increase cost of such 4568 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 4569 to split such addresses or even refuse such addresses at all. 4570 4571 Following addressing modes are affected: 4572 [base+scale*index] 4573 [scale*index+disp] 4574 [base+index] 4575 4576 The first and last case may be avoidable by explicitly coding the zero in 4577 memory address, but I don't have AMD-K6 machine handy to check this 4578 theory. */ 4579 4580 if (TARGET_K6 4581 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 4582 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 4583 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 4584 cost += 10; 4585 4586 return cost; 4587} 4588 4589/* If X is a machine specific address (i.e. a symbol or label being 4590 referenced as a displacement from the GOT implemented using an 4591 UNSPEC), then return the base term. Otherwise return X. */ 4592 4593rtx 4594ix86_find_base_term (x) 4595 rtx x; 4596{ 4597 rtx term; 4598 4599 if (TARGET_64BIT) 4600 { 4601 if (GET_CODE (x) != CONST) 4602 return x; 4603 term = XEXP (x, 0); 4604 if (GET_CODE (term) == PLUS 4605 && (GET_CODE (XEXP (term, 1)) == CONST_INT 4606 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 4607 term = XEXP (term, 0); 4608 if (GET_CODE (term) != UNSPEC 4609 || XVECLEN (term, 0) != 1 4610 || XINT (term, 1) != 15) 4611 return x; 4612 4613 term = XVECEXP (term, 0, 0); 4614 4615 if (GET_CODE (term) != SYMBOL_REF 4616 && GET_CODE (term) != LABEL_REF) 4617 return x; 4618 4619 return term; 4620 } 4621 4622 if (GET_CODE (x) != PLUS 4623 || XEXP (x, 0) != pic_offset_table_rtx 4624 || GET_CODE (XEXP (x, 1)) != CONST) 4625 return x; 4626 4627 term = XEXP (XEXP (x, 1), 0); 4628 4629 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT) 4630 term = XEXP (term, 0); 4631 4632 if (GET_CODE (term) != UNSPEC 4633 || XVECLEN (term, 0) != 1 4634 || XINT (term, 1) != 7) 4635 return x; 4636 4637 term = XVECEXP (term, 0, 0); 4638 4639 if (GET_CODE (term) != SYMBOL_REF 4640 && GET_CODE (term) != LABEL_REF) 4641 return x; 4642 4643 return term; 4644} 4645 4646/* Determine if a given CONST RTX is a valid memory displacement 4647 in PIC mode. */ 4648 4649int 4650legitimate_pic_address_disp_p (disp) 4651 register rtx disp; 4652{ 4653 /* In 64bit mode we can allow direct addresses of symbols and labels 4654 when they are not dynamic symbols. */ 4655 if (TARGET_64BIT) 4656 { 4657 rtx x = disp; 4658 if (GET_CODE (disp) == CONST) 4659 x = XEXP (disp, 0); 4660 /* ??? Handle PIC code models */ 4661 if (GET_CODE (x) == PLUS 4662 && (GET_CODE (XEXP (x, 1)) == CONST_INT 4663 && ix86_cmodel == CM_SMALL_PIC 4664 && INTVAL (XEXP (x, 1)) < 1024*1024*1024 4665 && INTVAL (XEXP (x, 1)) > -1024*1024*1024)) 4666 x = XEXP (x, 0); 4667 if (local_symbolic_operand (x, Pmode)) 4668 return 1; 4669 } 4670 if (GET_CODE (disp) != CONST) 4671 return 0; 4672 disp = XEXP (disp, 0); 4673 4674 if (TARGET_64BIT) 4675 { 4676 /* We are unsafe to allow PLUS expressions. This limit allowed distance 4677 of GOT tables. We should not need these anyway. */ 4678 if (GET_CODE (disp) != UNSPEC 4679 || XVECLEN (disp, 0) != 1 4680 || XINT (disp, 1) != 15) 4681 return 0; 4682 4683 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 4684 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 4685 return 0; 4686 return 1; 4687 } 4688 4689 if (GET_CODE (disp) == PLUS) 4690 { 4691 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 4692 return 0; 4693 disp = XEXP (disp, 0); 4694 } 4695 4696 if (GET_CODE (disp) != UNSPEC 4697 || XVECLEN (disp, 0) != 1) 4698 return 0; 4699 4700 /* Must be @GOT or @GOTOFF. */ 4701 switch (XINT (disp, 1)) 4702 { 4703 case 6: /* @GOT */ 4704 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 4705 4706 case 7: /* @GOTOFF */ 4707 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 4708 } 4709 4710 return 0; 4711} 4712 4713/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 4714 memory address for an instruction. The MODE argument is the machine mode 4715 for the MEM expression that wants to use this address. 4716 4717 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 4718 convert common non-canonical forms to canonical form so that they will 4719 be recognized. */ 4720 4721int 4722legitimate_address_p (mode, addr, strict) 4723 enum machine_mode mode; 4724 register rtx addr; 4725 int strict; 4726{ 4727 struct ix86_address parts; 4728 rtx base, index, disp; 4729 HOST_WIDE_INT scale; 4730 const char *reason = NULL; 4731 rtx reason_rtx = NULL_RTX; 4732 4733 if (TARGET_DEBUG_ADDR) 4734 { 4735 fprintf (stderr, 4736 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 4737 GET_MODE_NAME (mode), strict); 4738 debug_rtx (addr); 4739 } 4740 4741 if (ix86_decompose_address (addr, &parts) <= 0) 4742 { 4743 reason = "decomposition failed"; 4744 goto report_error; 4745 } 4746 4747 base = parts.base; 4748 index = parts.index; 4749 disp = parts.disp; 4750 scale = parts.scale; 4751 4752 /* Validate base register. 4753 4754 Don't allow SUBREG's here, it can lead to spill failures when the base 4755 is one word out of a two word structure, which is represented internally 4756 as a DImode int. */ 4757 4758 if (base) 4759 { 4760 reason_rtx = base; 4761 4762 if (GET_CODE (base) != REG) 4763 { 4764 reason = "base is not a register"; 4765 goto report_error; 4766 } 4767 4768 if (GET_MODE (base) != Pmode) 4769 { 4770 reason = "base is not in Pmode"; 4771 goto report_error; 4772 } 4773 4774 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) 4775 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) 4776 { 4777 reason = "base is not valid"; 4778 goto report_error; 4779 } 4780 } 4781 4782 /* Validate index register. 4783 4784 Don't allow SUBREG's here, it can lead to spill failures when the index 4785 is one word out of a two word structure, which is represented internally 4786 as a DImode int. */ 4787 4788 if (index) 4789 { 4790 reason_rtx = index; 4791 4792 if (GET_CODE (index) != REG) 4793 { 4794 reason = "index is not a register"; 4795 goto report_error; 4796 } 4797 4798 if (GET_MODE (index) != Pmode) 4799 { 4800 reason = "index is not in Pmode"; 4801 goto report_error; 4802 } 4803 4804 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index)) 4805 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index))) 4806 { 4807 reason = "index is not valid"; 4808 goto report_error; 4809 } 4810 } 4811 4812 /* Validate scale factor. */ 4813 if (scale != 1) 4814 { 4815 reason_rtx = GEN_INT (scale); 4816 if (!index) 4817 { 4818 reason = "scale without index"; 4819 goto report_error; 4820 } 4821 4822 if (scale != 2 && scale != 4 && scale != 8) 4823 { 4824 reason = "scale is not a valid multiplier"; 4825 goto report_error; 4826 } 4827 } 4828 4829 /* Validate displacement. */ 4830 if (disp) 4831 { 4832 reason_rtx = disp; 4833 4834 if (!CONSTANT_ADDRESS_P (disp)) 4835 { 4836 reason = "displacement is not constant"; 4837 goto report_error; 4838 } 4839 4840 if (TARGET_64BIT) 4841 { 4842 if (!x86_64_sign_extended_value (disp)) 4843 { 4844 reason = "displacement is out of range"; 4845 goto report_error; 4846 } 4847 } 4848 else 4849 { 4850 if (GET_CODE (disp) == CONST_DOUBLE) 4851 { 4852 reason = "displacement is a const_double"; 4853 goto report_error; 4854 } 4855 } 4856 4857 if (flag_pic && SYMBOLIC_CONST (disp)) 4858 { 4859 if (TARGET_64BIT && (index || base)) 4860 { 4861 reason = "non-constant pic memory reference"; 4862 goto report_error; 4863 } 4864 if (! legitimate_pic_address_disp_p (disp)) 4865 { 4866 reason = "displacement is an invalid pic construct"; 4867 goto report_error; 4868 } 4869 4870 /* This code used to verify that a symbolic pic displacement 4871 includes the pic_offset_table_rtx register. 4872 4873 While this is good idea, unfortunately these constructs may 4874 be created by "adds using lea" optimization for incorrect 4875 code like: 4876 4877 int a; 4878 int foo(int i) 4879 { 4880 return *(&a+i); 4881 } 4882 4883 This code is nonsensical, but results in addressing 4884 GOT table with pic_offset_table_rtx base. We can't 4885 just refuse it easily, since it gets matched by 4886 "addsi3" pattern, that later gets split to lea in the 4887 case output register differs from input. While this 4888 can be handled by separate addsi pattern for this case 4889 that never results in lea, this seems to be easier and 4890 correct fix for crash to disable this test. */ 4891 } 4892 else if (HALF_PIC_P ()) 4893 { 4894 if (! HALF_PIC_ADDRESS_P (disp) 4895 || (base != NULL_RTX || index != NULL_RTX)) 4896 { 4897 reason = "displacement is an invalid half-pic reference"; 4898 goto report_error; 4899 } 4900 } 4901 } 4902 4903 /* Everything looks valid. */ 4904 if (TARGET_DEBUG_ADDR) 4905 fprintf (stderr, "Success.\n"); 4906 return TRUE; 4907 4908report_error: 4909 if (TARGET_DEBUG_ADDR) 4910 { 4911 fprintf (stderr, "Error: %s\n", reason); 4912 debug_rtx (reason_rtx); 4913 } 4914 return FALSE; 4915} 4916 4917/* Return an unique alias set for the GOT. */ 4918 4919static HOST_WIDE_INT 4920ix86_GOT_alias_set () 4921{ 4922 static HOST_WIDE_INT set = -1; 4923 if (set == -1) 4924 set = new_alias_set (); 4925 return set; 4926} 4927 4928/* Return a legitimate reference for ORIG (an address) using the 4929 register REG. If REG is 0, a new pseudo is generated. 4930 4931 There are two types of references that must be handled: 4932 4933 1. Global data references must load the address from the GOT, via 4934 the PIC reg. An insn is emitted to do this load, and the reg is 4935 returned. 4936 4937 2. Static data references, constant pool addresses, and code labels 4938 compute the address as an offset from the GOT, whose base is in 4939 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to 4940 differentiate them from global data objects. The returned 4941 address is the PIC reg + an unspec constant. 4942 4943 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 4944 reg also appears in the address. */ 4945 4946rtx 4947legitimize_pic_address (orig, reg) 4948 rtx orig; 4949 rtx reg; 4950{ 4951 rtx addr = orig; 4952 rtx new = orig; 4953 rtx base; 4954 4955 if (local_symbolic_operand (addr, Pmode)) 4956 { 4957 /* In 64bit mode we can address such objects directly. */ 4958 if (TARGET_64BIT) 4959 new = addr; 4960 else 4961 { 4962 /* This symbol may be referenced via a displacement from the PIC 4963 base address (@GOTOFF). */ 4964 4965 current_function_uses_pic_offset_table = 1; 4966 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7); 4967 new = gen_rtx_CONST (Pmode, new); 4968 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 4969 4970 if (reg != 0) 4971 { 4972 emit_move_insn (reg, new); 4973 new = reg; 4974 } 4975 } 4976 } 4977 else if (GET_CODE (addr) == SYMBOL_REF) 4978 { 4979 if (TARGET_64BIT) 4980 { 4981 current_function_uses_pic_offset_table = 1; 4982 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15); 4983 new = gen_rtx_CONST (Pmode, new); 4984 new = gen_rtx_MEM (Pmode, new); 4985 RTX_UNCHANGING_P (new) = 1; 4986 set_mem_alias_set (new, ix86_GOT_alias_set ()); 4987 4988 if (reg == 0) 4989 reg = gen_reg_rtx (Pmode); 4990 /* Use directly gen_movsi, otherwise the address is loaded 4991 into register for CSE. We don't want to CSE this addresses, 4992 instead we CSE addresses from the GOT table, so skip this. */ 4993 emit_insn (gen_movsi (reg, new)); 4994 new = reg; 4995 } 4996 else 4997 { 4998 /* This symbol must be referenced via a load from the 4999 Global Offset Table (@GOT). */ 5000 5001 current_function_uses_pic_offset_table = 1; 5002 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6); 5003 new = gen_rtx_CONST (Pmode, new); 5004 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5005 new = gen_rtx_MEM (Pmode, new); 5006 RTX_UNCHANGING_P (new) = 1; 5007 set_mem_alias_set (new, ix86_GOT_alias_set ()); 5008 5009 if (reg == 0) 5010 reg = gen_reg_rtx (Pmode); 5011 emit_move_insn (reg, new); 5012 new = reg; 5013 } 5014 } 5015 else 5016 { 5017 if (GET_CODE (addr) == CONST) 5018 { 5019 addr = XEXP (addr, 0); 5020 5021 /* We must match stuff we generate before. Assume the only 5022 unspecs that can get here are ours. Not that we could do 5023 anything with them anyway... */ 5024 if (GET_CODE (addr) == UNSPEC 5025 || (GET_CODE (addr) == PLUS 5026 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 5027 return orig; 5028 if (GET_CODE (addr) != PLUS) 5029 abort (); 5030 } 5031 if (GET_CODE (addr) == PLUS) 5032 { 5033 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 5034 5035 /* Check first to see if this is a constant offset from a @GOTOFF 5036 symbol reference. */ 5037 if (local_symbolic_operand (op0, Pmode) 5038 && GET_CODE (op1) == CONST_INT) 5039 { 5040 if (!TARGET_64BIT) 5041 { 5042 current_function_uses_pic_offset_table = 1; 5043 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7); 5044 new = gen_rtx_PLUS (Pmode, new, op1); 5045 new = gen_rtx_CONST (Pmode, new); 5046 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5047 5048 if (reg != 0) 5049 { 5050 emit_move_insn (reg, new); 5051 new = reg; 5052 } 5053 } 5054 else 5055 { 5056 /* ??? We need to limit offsets here. */ 5057 } 5058 } 5059 else 5060 { 5061 base = legitimize_pic_address (XEXP (addr, 0), reg); 5062 new = legitimize_pic_address (XEXP (addr, 1), 5063 base == reg ? NULL_RTX : reg); 5064 5065 if (GET_CODE (new) == CONST_INT) 5066 new = plus_constant (base, INTVAL (new)); 5067 else 5068 { 5069 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 5070 { 5071 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 5072 new = XEXP (new, 1); 5073 } 5074 new = gen_rtx_PLUS (Pmode, base, new); 5075 } 5076 } 5077 } 5078 } 5079 return new; 5080} 5081 5082/* Try machine-dependent ways of modifying an illegitimate address 5083 to be legitimate. If we find one, return the new, valid address. 5084 This macro is used in only one place: `memory_address' in explow.c. 5085 5086 OLDX is the address as it was before break_out_memory_refs was called. 5087 In some cases it is useful to look at this to decide what needs to be done. 5088 5089 MODE and WIN are passed so that this macro can use 5090 GO_IF_LEGITIMATE_ADDRESS. 5091 5092 It is always safe for this macro to do nothing. It exists to recognize 5093 opportunities to optimize the output. 5094 5095 For the 80386, we handle X+REG by loading X into a register R and 5096 using R+REG. R will go in a general reg and indexing will be used. 5097 However, if REG is a broken-out memory address or multiplication, 5098 nothing needs to be done because REG can certainly go in a general reg. 5099 5100 When -fpic is used, special handling is needed for symbolic references. 5101 See comments by legitimize_pic_address in i386.c for details. */ 5102 5103rtx 5104legitimize_address (x, oldx, mode) 5105 register rtx x; 5106 register rtx oldx ATTRIBUTE_UNUSED; 5107 enum machine_mode mode; 5108{ 5109 int changed = 0; 5110 unsigned log; 5111 5112 if (TARGET_DEBUG_ADDR) 5113 { 5114 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 5115 GET_MODE_NAME (mode)); 5116 debug_rtx (x); 5117 } 5118 5119 if (flag_pic && SYMBOLIC_CONST (x)) 5120 return legitimize_pic_address (x, 0); 5121 5122 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 5123 if (GET_CODE (x) == ASHIFT 5124 && GET_CODE (XEXP (x, 1)) == CONST_INT 5125 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4) 5126 { 5127 changed = 1; 5128 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 5129 GEN_INT (1 << log)); 5130 } 5131 5132 if (GET_CODE (x) == PLUS) 5133 { 5134 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 5135 5136 if (GET_CODE (XEXP (x, 0)) == ASHIFT 5137 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 5138 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4) 5139 { 5140 changed = 1; 5141 XEXP (x, 0) = gen_rtx_MULT (Pmode, 5142 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 5143 GEN_INT (1 << log)); 5144 } 5145 5146 if (GET_CODE (XEXP (x, 1)) == ASHIFT 5147 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 5148 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4) 5149 { 5150 changed = 1; 5151 XEXP (x, 1) = gen_rtx_MULT (Pmode, 5152 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 5153 GEN_INT (1 << log)); 5154 } 5155 5156 /* Put multiply first if it isn't already. */ 5157 if (GET_CODE (XEXP (x, 1)) == MULT) 5158 { 5159 rtx tmp = XEXP (x, 0); 5160 XEXP (x, 0) = XEXP (x, 1); 5161 XEXP (x, 1) = tmp; 5162 changed = 1; 5163 } 5164 5165 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 5166 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 5167 created by virtual register instantiation, register elimination, and 5168 similar optimizations. */ 5169 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 5170 { 5171 changed = 1; 5172 x = gen_rtx_PLUS (Pmode, 5173 gen_rtx_PLUS (Pmode, XEXP (x, 0), 5174 XEXP (XEXP (x, 1), 0)), 5175 XEXP (XEXP (x, 1), 1)); 5176 } 5177 5178 /* Canonicalize 5179 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 5180 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 5181 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 5182 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 5183 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 5184 && CONSTANT_P (XEXP (x, 1))) 5185 { 5186 rtx constant; 5187 rtx other = NULL_RTX; 5188 5189 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5190 { 5191 constant = XEXP (x, 1); 5192 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 5193 } 5194 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 5195 { 5196 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 5197 other = XEXP (x, 1); 5198 } 5199 else 5200 constant = 0; 5201 5202 if (constant) 5203 { 5204 changed = 1; 5205 x = gen_rtx_PLUS (Pmode, 5206 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 5207 XEXP (XEXP (XEXP (x, 0), 1), 0)), 5208 plus_constant (other, INTVAL (constant))); 5209 } 5210 } 5211 5212 if (changed && legitimate_address_p (mode, x, FALSE)) 5213 return x; 5214 5215 if (GET_CODE (XEXP (x, 0)) == MULT) 5216 { 5217 changed = 1; 5218 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 5219 } 5220 5221 if (GET_CODE (XEXP (x, 1)) == MULT) 5222 { 5223 changed = 1; 5224 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 5225 } 5226 5227 if (changed 5228 && GET_CODE (XEXP (x, 1)) == REG 5229 && GET_CODE (XEXP (x, 0)) == REG) 5230 return x; 5231 5232 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 5233 { 5234 changed = 1; 5235 x = legitimize_pic_address (x, 0); 5236 } 5237 5238 if (changed && legitimate_address_p (mode, x, FALSE)) 5239 return x; 5240 5241 if (GET_CODE (XEXP (x, 0)) == REG) 5242 { 5243 register rtx temp = gen_reg_rtx (Pmode); 5244 register rtx val = force_operand (XEXP (x, 1), temp); 5245 if (val != temp) 5246 emit_move_insn (temp, val); 5247 5248 XEXP (x, 1) = temp; 5249 return x; 5250 } 5251 5252 else if (GET_CODE (XEXP (x, 1)) == REG) 5253 { 5254 register rtx temp = gen_reg_rtx (Pmode); 5255 register rtx val = force_operand (XEXP (x, 0), temp); 5256 if (val != temp) 5257 emit_move_insn (temp, val); 5258 5259 XEXP (x, 0) = temp; 5260 return x; 5261 } 5262 } 5263 5264 return x; 5265} 5266 5267/* Print an integer constant expression in assembler syntax. Addition 5268 and subtraction are the only arithmetic that may appear in these 5269 expressions. FILE is the stdio stream to write to, X is the rtx, and 5270 CODE is the operand print code from the output string. */ 5271 5272static void 5273output_pic_addr_const (file, x, code) 5274 FILE *file; 5275 rtx x; 5276 int code; 5277{ 5278 char buf[256]; 5279 5280 switch (GET_CODE (x)) 5281 { 5282 case PC: 5283 if (flag_pic) 5284 putc ('.', file); 5285 else 5286 abort (); 5287 break; 5288 5289 case SYMBOL_REF: 5290 assemble_name (file, XSTR (x, 0)); 5291 if (code == 'P' && ! SYMBOL_REF_FLAG (x)) 5292 fputs ("@PLT", file); 5293 break; 5294 5295 case LABEL_REF: 5296 x = XEXP (x, 0); 5297 /* FALLTHRU */ 5298 case CODE_LABEL: 5299 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 5300 assemble_name (asm_out_file, buf); 5301 break; 5302 5303 case CONST_INT: 5304 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 5305 break; 5306 5307 case CONST: 5308 /* This used to output parentheses around the expression, 5309 but that does not work on the 386 (either ATT or BSD assembler). */ 5310 output_pic_addr_const (file, XEXP (x, 0), code); 5311 break; 5312 5313 case CONST_DOUBLE: 5314 if (GET_MODE (x) == VOIDmode) 5315 { 5316 /* We can use %d if the number is <32 bits and positive. */ 5317 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 5318 fprintf (file, "0x%lx%08lx", 5319 (unsigned long) CONST_DOUBLE_HIGH (x), 5320 (unsigned long) CONST_DOUBLE_LOW (x)); 5321 else 5322 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 5323 } 5324 else 5325 /* We can't handle floating point constants; 5326 PRINT_OPERAND must handle them. */ 5327 output_operand_lossage ("floating constant misused"); 5328 break; 5329 5330 case PLUS: 5331 /* Some assemblers need integer constants to appear first. */ 5332 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 5333 { 5334 output_pic_addr_const (file, XEXP (x, 0), code); 5335 putc ('+', file); 5336 output_pic_addr_const (file, XEXP (x, 1), code); 5337 } 5338 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5339 { 5340 output_pic_addr_const (file, XEXP (x, 1), code); 5341 putc ('+', file); 5342 output_pic_addr_const (file, XEXP (x, 0), code); 5343 } 5344 else 5345 abort (); 5346 break; 5347 5348 case MINUS: 5349 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 5350 output_pic_addr_const (file, XEXP (x, 0), code); 5351 putc ('-', file); 5352 output_pic_addr_const (file, XEXP (x, 1), code); 5353 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 5354 break; 5355 5356 case UNSPEC: 5357 if (XVECLEN (x, 0) != 1) 5358 abort (); 5359 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 5360 switch (XINT (x, 1)) 5361 { 5362 case 6: 5363 fputs ("@GOT", file); 5364 break; 5365 case 7: 5366 fputs ("@GOTOFF", file); 5367 break; 5368 case 8: 5369 fputs ("@PLT", file); 5370 break; 5371 case 15: 5372 fputs ("@GOTPCREL(%RIP)", file); 5373 break; 5374 default: 5375 output_operand_lossage ("invalid UNSPEC as operand"); 5376 break; 5377 } 5378 break; 5379 5380 default: 5381 output_operand_lossage ("invalid expression as operand"); 5382 } 5383} 5384 5385/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST. 5386 We need to handle our special PIC relocations. */ 5387 5388void 5389i386_dwarf_output_addr_const (file, x) 5390 FILE *file; 5391 rtx x; 5392{ 5393#ifdef ASM_QUAD 5394 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG); 5395#else 5396 if (TARGET_64BIT) 5397 abort (); 5398 fprintf (file, "%s", ASM_LONG); 5399#endif 5400 if (flag_pic) 5401 output_pic_addr_const (file, x, '\0'); 5402 else 5403 output_addr_const (file, x); 5404 fputc ('\n', file); 5405} 5406 5407/* In the name of slightly smaller debug output, and to cater to 5408 general assembler losage, recognize PIC+GOTOFF and turn it back 5409 into a direct symbol reference. */ 5410 5411rtx 5412i386_simplify_dwarf_addr (orig_x) 5413 rtx orig_x; 5414{ 5415 rtx x = orig_x, y; 5416 5417 if (GET_CODE (x) == MEM) 5418 x = XEXP (x, 0); 5419 5420 if (TARGET_64BIT) 5421 { 5422 if (GET_CODE (x) != CONST 5423 || GET_CODE (XEXP (x, 0)) != UNSPEC 5424 || XINT (XEXP (x, 0), 1) != 15 5425 || GET_CODE (orig_x) != MEM) 5426 return orig_x; 5427 return XVECEXP (XEXP (x, 0), 0, 0); 5428 } 5429 5430 if (GET_CODE (x) != PLUS 5431 || GET_CODE (XEXP (x, 1)) != CONST) 5432 return orig_x; 5433 5434 if (GET_CODE (XEXP (x, 0)) == REG 5435 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 5436 /* %ebx + GOT/GOTOFF */ 5437 y = NULL; 5438 else if (GET_CODE (XEXP (x, 0)) == PLUS) 5439 { 5440 /* %ebx + %reg * scale + GOT/GOTOFF */ 5441 y = XEXP (x, 0); 5442 if (GET_CODE (XEXP (y, 0)) == REG 5443 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM) 5444 y = XEXP (y, 1); 5445 else if (GET_CODE (XEXP (y, 1)) == REG 5446 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM) 5447 y = XEXP (y, 0); 5448 else 5449 return orig_x; 5450 if (GET_CODE (y) != REG 5451 && GET_CODE (y) != MULT 5452 && GET_CODE (y) != ASHIFT) 5453 return orig_x; 5454 } 5455 else 5456 return orig_x; 5457 5458 x = XEXP (XEXP (x, 1), 0); 5459 if (GET_CODE (x) == UNSPEC 5460 && ((XINT (x, 1) == 6 && GET_CODE (orig_x) == MEM) 5461 || (XINT (x, 1) == 7 && GET_CODE (orig_x) != MEM))) 5462 { 5463 if (y) 5464 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0)); 5465 return XVECEXP (x, 0, 0); 5466 } 5467 5468 if (GET_CODE (x) == PLUS 5469 && GET_CODE (XEXP (x, 0)) == UNSPEC 5470 && GET_CODE (XEXP (x, 1)) == CONST_INT 5471 && ((XINT (XEXP (x, 0), 1) == 6 && GET_CODE (orig_x) == MEM) 5472 || (XINT (XEXP (x, 0), 1) == 7 && GET_CODE (orig_x) != MEM))) 5473 { 5474 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1)); 5475 if (y) 5476 return gen_rtx_PLUS (Pmode, y, x); 5477 return x; 5478 } 5479 5480 return orig_x; 5481} 5482 5483static void 5484put_condition_code (code, mode, reverse, fp, file) 5485 enum rtx_code code; 5486 enum machine_mode mode; 5487 int reverse, fp; 5488 FILE *file; 5489{ 5490 const char *suffix; 5491 5492 if (mode == CCFPmode || mode == CCFPUmode) 5493 { 5494 enum rtx_code second_code, bypass_code; 5495 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 5496 if (bypass_code != NIL || second_code != NIL) 5497 abort (); 5498 code = ix86_fp_compare_code_to_integer (code); 5499 mode = CCmode; 5500 } 5501 if (reverse) 5502 code = reverse_condition (code); 5503 5504 switch (code) 5505 { 5506 case EQ: 5507 suffix = "e"; 5508 break; 5509 case NE: 5510 suffix = "ne"; 5511 break; 5512 case GT: 5513 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode) 5514 abort (); 5515 suffix = "g"; 5516 break; 5517 case GTU: 5518 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers. 5519 Those same assemblers have the same but opposite losage on cmov. */ 5520 if (mode != CCmode) 5521 abort (); 5522 suffix = fp ? "nbe" : "a"; 5523 break; 5524 case LT: 5525 if (mode == CCNOmode || mode == CCGOCmode) 5526 suffix = "s"; 5527 else if (mode == CCmode || mode == CCGCmode) 5528 suffix = "l"; 5529 else 5530 abort (); 5531 break; 5532 case LTU: 5533 if (mode != CCmode) 5534 abort (); 5535 suffix = "b"; 5536 break; 5537 case GE: 5538 if (mode == CCNOmode || mode == CCGOCmode) 5539 suffix = "ns"; 5540 else if (mode == CCmode || mode == CCGCmode) 5541 suffix = "ge"; 5542 else 5543 abort (); 5544 break; 5545 case GEU: 5546 /* ??? As above. */ 5547 if (mode != CCmode) 5548 abort (); 5549 suffix = fp ? "nb" : "ae"; 5550 break; 5551 case LE: 5552 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode) 5553 abort (); 5554 suffix = "le"; 5555 break; 5556 case LEU: 5557 if (mode != CCmode) 5558 abort (); 5559 suffix = "be"; 5560 break; 5561 case UNORDERED: 5562 suffix = fp ? "u" : "p"; 5563 break; 5564 case ORDERED: 5565 suffix = fp ? "nu" : "np"; 5566 break; 5567 default: 5568 abort (); 5569 } 5570 fputs (suffix, file); 5571} 5572 5573void 5574print_reg (x, code, file) 5575 rtx x; 5576 int code; 5577 FILE *file; 5578{ 5579 if (REGNO (x) == ARG_POINTER_REGNUM 5580 || REGNO (x) == FRAME_POINTER_REGNUM 5581 || REGNO (x) == FLAGS_REG 5582 || REGNO (x) == FPSR_REG) 5583 abort (); 5584 5585 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 5586 putc ('%', file); 5587 5588 if (code == 'w' || MMX_REG_P (x)) 5589 code = 2; 5590 else if (code == 'b') 5591 code = 1; 5592 else if (code == 'k') 5593 code = 4; 5594 else if (code == 'q') 5595 code = 8; 5596 else if (code == 'y') 5597 code = 3; 5598 else if (code == 'h') 5599 code = 0; 5600 else 5601 code = GET_MODE_SIZE (GET_MODE (x)); 5602 5603 /* Irritatingly, AMD extended registers use different naming convention 5604 from the normal registers. */ 5605 if (REX_INT_REG_P (x)) 5606 { 5607 if (!TARGET_64BIT) 5608 abort (); 5609 switch (code) 5610 { 5611 case 0: 5612 error ("extended registers have no high halves"); 5613 break; 5614 case 1: 5615 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 5616 break; 5617 case 2: 5618 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 5619 break; 5620 case 4: 5621 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 5622 break; 5623 case 8: 5624 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 5625 break; 5626 default: 5627 error ("unsupported operand size for extended register"); 5628 break; 5629 } 5630 return; 5631 } 5632 switch (code) 5633 { 5634 case 3: 5635 if (STACK_TOP_P (x)) 5636 { 5637 fputs ("st(0)", file); 5638 break; 5639 } 5640 /* FALLTHRU */ 5641 case 8: 5642 case 4: 5643 case 12: 5644 if (! ANY_FP_REG_P (x)) 5645 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 5646 /* FALLTHRU */ 5647 case 16: 5648 case 2: 5649 fputs (hi_reg_name[REGNO (x)], file); 5650 break; 5651 case 1: 5652 fputs (qi_reg_name[REGNO (x)], file); 5653 break; 5654 case 0: 5655 fputs (qi_high_reg_name[REGNO (x)], file); 5656 break; 5657 default: 5658 abort (); 5659 } 5660} 5661 5662/* Meaning of CODE: 5663 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 5664 C -- print opcode suffix for set/cmov insn. 5665 c -- like C, but print reversed condition 5666 F,f -- likewise, but for floating-point. 5667 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise 5668 nothing 5669 R -- print the prefix for register names. 5670 z -- print the opcode suffix for the size of the current operand. 5671 * -- print a star (in certain assembler syntax) 5672 A -- print an absolute memory reference. 5673 w -- print the operand as if it's a "word" (HImode) even if it isn't. 5674 s -- print a shift double count, followed by the assemblers argument 5675 delimiter. 5676 b -- print the QImode name of the register for the indicated operand. 5677 %b0 would print %al if operands[0] is reg 0. 5678 w -- likewise, print the HImode name of the register. 5679 k -- likewise, print the SImode name of the register. 5680 q -- likewise, print the DImode name of the register. 5681 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 5682 y -- print "st(0)" instead of "st" as a register. 5683 D -- print condition for SSE cmp instruction. 5684 P -- if PIC, print an @PLT suffix. 5685 X -- don't print any sort of PIC '@' suffix for a symbol. 5686 */ 5687 5688void 5689print_operand (file, x, code) 5690 FILE *file; 5691 rtx x; 5692 int code; 5693{ 5694 if (code) 5695 { 5696 switch (code) 5697 { 5698 case '*': 5699 if (ASSEMBLER_DIALECT == ASM_ATT) 5700 putc ('*', file); 5701 return; 5702 5703 case 'A': 5704 if (ASSEMBLER_DIALECT == ASM_ATT) 5705 putc ('*', file); 5706 else if (ASSEMBLER_DIALECT == ASM_INTEL) 5707 { 5708 /* Intel syntax. For absolute addresses, registers should not 5709 be surrounded by braces. */ 5710 if (GET_CODE (x) != REG) 5711 { 5712 putc ('[', file); 5713 PRINT_OPERAND (file, x, 0); 5714 putc (']', file); 5715 return; 5716 } 5717 } 5718 else 5719 abort (); 5720 5721 PRINT_OPERAND (file, x, 0); 5722 return; 5723 5724 5725 case 'L': 5726 if (ASSEMBLER_DIALECT == ASM_ATT) 5727 putc ('l', file); 5728 return; 5729 5730 case 'W': 5731 if (ASSEMBLER_DIALECT == ASM_ATT) 5732 putc ('w', file); 5733 return; 5734 5735 case 'B': 5736 if (ASSEMBLER_DIALECT == ASM_ATT) 5737 putc ('b', file); 5738 return; 5739 5740 case 'Q': 5741 if (ASSEMBLER_DIALECT == ASM_ATT) 5742 putc ('l', file); 5743 return; 5744 5745 case 'S': 5746 if (ASSEMBLER_DIALECT == ASM_ATT) 5747 putc ('s', file); 5748 return; 5749 5750 case 'T': 5751 if (ASSEMBLER_DIALECT == ASM_ATT) 5752 putc ('t', file); 5753 return; 5754 5755 case 'z': 5756 /* 387 opcodes don't get size suffixes if the operands are 5757 registers. */ 5758 if (STACK_REG_P (x)) 5759 return; 5760 5761 /* Likewise if using Intel opcodes. */ 5762 if (ASSEMBLER_DIALECT == ASM_INTEL) 5763 return; 5764 5765 /* This is the size of op from size of operand. */ 5766 switch (GET_MODE_SIZE (GET_MODE (x))) 5767 { 5768 case 2: 5769#ifdef HAVE_GAS_FILDS_FISTS 5770 putc ('s', file); 5771#endif 5772 return; 5773 5774 case 4: 5775 if (GET_MODE (x) == SFmode) 5776 { 5777 putc ('s', file); 5778 return; 5779 } 5780 else 5781 putc ('l', file); 5782 return; 5783 5784 case 12: 5785 case 16: 5786 putc ('t', file); 5787 return; 5788 5789 case 8: 5790 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 5791 { 5792#ifdef GAS_MNEMONICS 5793 putc ('q', file); 5794#else 5795 putc ('l', file); 5796 putc ('l', file); 5797#endif 5798 } 5799 else 5800 putc ('l', file); 5801 return; 5802 5803 default: 5804 abort (); 5805 } 5806 5807 case 'b': 5808 case 'w': 5809 case 'k': 5810 case 'q': 5811 case 'h': 5812 case 'y': 5813 case 'X': 5814 case 'P': 5815 break; 5816 5817 case 's': 5818 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 5819 { 5820 PRINT_OPERAND (file, x, 0); 5821 putc (',', file); 5822 } 5823 return; 5824 5825 case 'D': 5826 /* Little bit of braindamage here. The SSE compare instructions 5827 does use completely different names for the comparisons that the 5828 fp conditional moves. */ 5829 switch (GET_CODE (x)) 5830 { 5831 case EQ: 5832 case UNEQ: 5833 fputs ("eq", file); 5834 break; 5835 case LT: 5836 case UNLT: 5837 fputs ("lt", file); 5838 break; 5839 case LE: 5840 case UNLE: 5841 fputs ("le", file); 5842 break; 5843 case UNORDERED: 5844 fputs ("unord", file); 5845 break; 5846 case NE: 5847 case LTGT: 5848 fputs ("neq", file); 5849 break; 5850 case UNGE: 5851 case GE: 5852 fputs ("nlt", file); 5853 break; 5854 case UNGT: 5855 case GT: 5856 fputs ("nle", file); 5857 break; 5858 case ORDERED: 5859 fputs ("ord", file); 5860 break; 5861 default: 5862 abort (); 5863 break; 5864 } 5865 return; 5866 case 'O': 5867#ifdef CMOV_SUN_AS_SYNTAX 5868 if (ASSEMBLER_DIALECT == ASM_ATT) 5869 { 5870 switch (GET_MODE (x)) 5871 { 5872 case HImode: putc ('w', file); break; 5873 case SImode: 5874 case SFmode: putc ('l', file); break; 5875 case DImode: 5876 case DFmode: putc ('q', file); break; 5877 default: abort (); 5878 } 5879 putc ('.', file); 5880 } 5881#endif 5882 return; 5883 case 'C': 5884 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 5885 return; 5886 case 'F': 5887#ifdef CMOV_SUN_AS_SYNTAX 5888 if (ASSEMBLER_DIALECT == ASM_ATT) 5889 putc ('.', file); 5890#endif 5891 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 5892 return; 5893 5894 /* Like above, but reverse condition */ 5895 case 'c': 5896 /* Check to see if argument to %c is really a constant 5897 and not a condition code which needs to be reversed. */ 5898 if (GET_RTX_CLASS (GET_CODE (x)) != '<') 5899 { 5900 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 5901 return; 5902 } 5903 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 5904 return; 5905 case 'f': 5906#ifdef CMOV_SUN_AS_SYNTAX 5907 if (ASSEMBLER_DIALECT == ASM_ATT) 5908 putc ('.', file); 5909#endif 5910 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 5911 return; 5912 case '+': 5913 { 5914 rtx x; 5915 5916 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 5917 return; 5918 5919 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 5920 if (x) 5921 { 5922 int pred_val = INTVAL (XEXP (x, 0)); 5923 5924 if (pred_val < REG_BR_PROB_BASE * 45 / 100 5925 || pred_val > REG_BR_PROB_BASE * 55 / 100) 5926 { 5927 int taken = pred_val > REG_BR_PROB_BASE / 2; 5928 int cputaken = final_forward_branch_p (current_output_insn) == 0; 5929 5930 /* Emit hints only in the case default branch prediction 5931 heruistics would fail. */ 5932 if (taken != cputaken) 5933 { 5934 /* We use 3e (DS) prefix for taken branches and 5935 2e (CS) prefix for not taken branches. */ 5936 if (taken) 5937 fputs ("ds ; ", file); 5938 else 5939 fputs ("cs ; ", file); 5940 } 5941 } 5942 } 5943 return; 5944 } 5945 default: 5946 output_operand_lossage ("invalid operand code `%c'", code); 5947 } 5948 } 5949 5950 if (GET_CODE (x) == REG) 5951 { 5952 PRINT_REG (x, code, file); 5953 } 5954 5955 else if (GET_CODE (x) == MEM) 5956 { 5957 /* No `byte ptr' prefix for call instructions. */ 5958 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 5959 { 5960 const char * size; 5961 switch (GET_MODE_SIZE (GET_MODE (x))) 5962 { 5963 case 1: size = "BYTE"; break; 5964 case 2: size = "WORD"; break; 5965 case 4: size = "DWORD"; break; 5966 case 8: size = "QWORD"; break; 5967 case 12: size = "XWORD"; break; 5968 case 16: size = "XMMWORD"; break; 5969 default: 5970 abort (); 5971 } 5972 5973 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 5974 if (code == 'b') 5975 size = "BYTE"; 5976 else if (code == 'w') 5977 size = "WORD"; 5978 else if (code == 'k') 5979 size = "DWORD"; 5980 5981 fputs (size, file); 5982 fputs (" PTR ", file); 5983 } 5984 5985 x = XEXP (x, 0); 5986 if (flag_pic && CONSTANT_ADDRESS_P (x)) 5987 output_pic_addr_const (file, x, code); 5988 /* Avoid (%rip) for call operands. */ 5989 else if (CONSTANT_ADDRESS_P (x) && code =='P' 5990 && GET_CODE (x) != CONST_INT) 5991 output_addr_const (file, x); 5992 else 5993 output_address (x); 5994 } 5995 5996 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 5997 { 5998 REAL_VALUE_TYPE r; 5999 long l; 6000 6001 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 6002 REAL_VALUE_TO_TARGET_SINGLE (r, l); 6003 6004 if (ASSEMBLER_DIALECT == ASM_ATT) 6005 putc ('$', file); 6006 fprintf (file, "0x%lx", l); 6007 } 6008 6009 /* These float cases don't actually occur as immediate operands. */ 6010 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 6011 { 6012 REAL_VALUE_TYPE r; 6013 char dstr[30]; 6014 6015 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 6016 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr); 6017 fprintf (file, "%s", dstr); 6018 } 6019 6020 else if (GET_CODE (x) == CONST_DOUBLE 6021 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)) 6022 { 6023 REAL_VALUE_TYPE r; 6024 char dstr[30]; 6025 6026 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 6027 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr); 6028 fprintf (file, "%s", dstr); 6029 } 6030 else 6031 { 6032 if (code != 'P') 6033 { 6034 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 6035 { 6036 if (ASSEMBLER_DIALECT == ASM_ATT) 6037 putc ('$', file); 6038 } 6039 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 6040 || GET_CODE (x) == LABEL_REF) 6041 { 6042 if (ASSEMBLER_DIALECT == ASM_ATT) 6043 putc ('$', file); 6044 else 6045 fputs ("OFFSET FLAT:", file); 6046 } 6047 } 6048 if (GET_CODE (x) == CONST_INT) 6049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 6050 else if (flag_pic) 6051 output_pic_addr_const (file, x, code); 6052 else 6053 output_addr_const (file, x); 6054 } 6055} 6056 6057/* Print a memory operand whose address is ADDR. */ 6058 6059void 6060print_operand_address (file, addr) 6061 FILE *file; 6062 register rtx addr; 6063{ 6064 struct ix86_address parts; 6065 rtx base, index, disp; 6066 int scale; 6067 6068 if (! ix86_decompose_address (addr, &parts)) 6069 abort (); 6070 6071 base = parts.base; 6072 index = parts.index; 6073 disp = parts.disp; 6074 scale = parts.scale; 6075 6076 if (!base && !index) 6077 { 6078 /* Displacement only requires special attention. */ 6079 6080 if (GET_CODE (disp) == CONST_INT) 6081 { 6082 if (ASSEMBLER_DIALECT == ASM_INTEL) 6083 { 6084 if (USER_LABEL_PREFIX[0] == 0) 6085 putc ('%', file); 6086 fputs ("ds:", file); 6087 } 6088 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr)); 6089 } 6090 else if (flag_pic) 6091 output_pic_addr_const (file, addr, 0); 6092 else 6093 output_addr_const (file, addr); 6094 6095 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 6096 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT) 6097 fputs ("(%rip)", file); 6098 } 6099 else 6100 { 6101 if (ASSEMBLER_DIALECT == ASM_ATT) 6102 { 6103 if (disp) 6104 { 6105 if (flag_pic) 6106 output_pic_addr_const (file, disp, 0); 6107 else if (GET_CODE (disp) == LABEL_REF) 6108 output_asm_label (disp); 6109 else 6110 output_addr_const (file, disp); 6111 } 6112 6113 putc ('(', file); 6114 if (base) 6115 PRINT_REG (base, 0, file); 6116 if (index) 6117 { 6118 putc (',', file); 6119 PRINT_REG (index, 0, file); 6120 if (scale != 1) 6121 fprintf (file, ",%d", scale); 6122 } 6123 putc (')', file); 6124 } 6125 else 6126 { 6127 rtx offset = NULL_RTX; 6128 6129 if (disp) 6130 { 6131 /* Pull out the offset of a symbol; print any symbol itself. */ 6132 if (GET_CODE (disp) == CONST 6133 && GET_CODE (XEXP (disp, 0)) == PLUS 6134 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 6135 { 6136 offset = XEXP (XEXP (disp, 0), 1); 6137 disp = gen_rtx_CONST (VOIDmode, 6138 XEXP (XEXP (disp, 0), 0)); 6139 } 6140 6141 if (flag_pic) 6142 output_pic_addr_const (file, disp, 0); 6143 else if (GET_CODE (disp) == LABEL_REF) 6144 output_asm_label (disp); 6145 else if (GET_CODE (disp) == CONST_INT) 6146 offset = disp; 6147 else 6148 output_addr_const (file, disp); 6149 } 6150 6151 putc ('[', file); 6152 if (base) 6153 { 6154 PRINT_REG (base, 0, file); 6155 if (offset) 6156 { 6157 if (INTVAL (offset) >= 0) 6158 putc ('+', file); 6159 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 6160 } 6161 } 6162 else if (offset) 6163 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 6164 else 6165 putc ('0', file); 6166 6167 if (index) 6168 { 6169 putc ('+', file); 6170 PRINT_REG (index, 0, file); 6171 if (scale != 1) 6172 fprintf (file, "*%d", scale); 6173 } 6174 putc (']', file); 6175 } 6176 } 6177} 6178 6179/* Split one or more DImode RTL references into pairs of SImode 6180 references. The RTL can be REG, offsettable MEM, integer constant, or 6181 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 6182 split and "num" is its length. lo_half and hi_half are output arrays 6183 that parallel "operands". */ 6184 6185void 6186split_di (operands, num, lo_half, hi_half) 6187 rtx operands[]; 6188 int num; 6189 rtx lo_half[], hi_half[]; 6190{ 6191 while (num--) 6192 { 6193 rtx op = operands[num]; 6194 6195 /* simplify_subreg refuse to split volatile memory addresses, 6196 but we still have to handle it. */ 6197 if (GET_CODE (op) == MEM) 6198 { 6199 lo_half[num] = adjust_address (op, SImode, 0); 6200 hi_half[num] = adjust_address (op, SImode, 4); 6201 } 6202 else 6203 { 6204 lo_half[num] = simplify_gen_subreg (SImode, op, 6205 GET_MODE (op) == VOIDmode 6206 ? DImode : GET_MODE (op), 0); 6207 hi_half[num] = simplify_gen_subreg (SImode, op, 6208 GET_MODE (op) == VOIDmode 6209 ? DImode : GET_MODE (op), 4); 6210 } 6211 } 6212} 6213/* Split one or more TImode RTL references into pairs of SImode 6214 references. The RTL can be REG, offsettable MEM, integer constant, or 6215 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 6216 split and "num" is its length. lo_half and hi_half are output arrays 6217 that parallel "operands". */ 6218 6219void 6220split_ti (operands, num, lo_half, hi_half) 6221 rtx operands[]; 6222 int num; 6223 rtx lo_half[], hi_half[]; 6224{ 6225 while (num--) 6226 { 6227 rtx op = operands[num]; 6228 6229 /* simplify_subreg refuse to split volatile memory addresses, but we 6230 still have to handle it. */ 6231 if (GET_CODE (op) == MEM) 6232 { 6233 lo_half[num] = adjust_address (op, DImode, 0); 6234 hi_half[num] = adjust_address (op, DImode, 8); 6235 } 6236 else 6237 { 6238 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 6239 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 6240 } 6241 } 6242} 6243 6244/* Output code to perform a 387 binary operation in INSN, one of PLUS, 6245 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 6246 is the expression of the binary operation. The output may either be 6247 emitted here, or returned to the caller, like all output_* functions. 6248 6249 There is no guarantee that the operands are the same mode, as they 6250 might be within FLOAT or FLOAT_EXTEND expressions. */ 6251 6252#ifndef SYSV386_COMPAT 6253/* Set to 1 for compatibility with brain-damaged assemblers. No-one 6254 wants to fix the assemblers because that causes incompatibility 6255 with gcc. No-one wants to fix gcc because that causes 6256 incompatibility with assemblers... You can use the option of 6257 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 6258#define SYSV386_COMPAT 1 6259#endif 6260 6261const char * 6262output_387_binary_op (insn, operands) 6263 rtx insn; 6264 rtx *operands; 6265{ 6266 static char buf[30]; 6267 const char *p; 6268 const char *ssep; 6269 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]); 6270 6271#ifdef ENABLE_CHECKING 6272 /* Even if we do not want to check the inputs, this documents input 6273 constraints. Which helps in understanding the following code. */ 6274 if (STACK_REG_P (operands[0]) 6275 && ((REG_P (operands[1]) 6276 && REGNO (operands[0]) == REGNO (operands[1]) 6277 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 6278 || (REG_P (operands[2]) 6279 && REGNO (operands[0]) == REGNO (operands[2]) 6280 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 6281 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 6282 ; /* ok */ 6283 else if (!is_sse) 6284 abort (); 6285#endif 6286 6287 switch (GET_CODE (operands[3])) 6288 { 6289 case PLUS: 6290 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6291 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6292 p = "fiadd"; 6293 else 6294 p = "fadd"; 6295 ssep = "add"; 6296 break; 6297 6298 case MINUS: 6299 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6300 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6301 p = "fisub"; 6302 else 6303 p = "fsub"; 6304 ssep = "sub"; 6305 break; 6306 6307 case MULT: 6308 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6309 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6310 p = "fimul"; 6311 else 6312 p = "fmul"; 6313 ssep = "mul"; 6314 break; 6315 6316 case DIV: 6317 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6318 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6319 p = "fidiv"; 6320 else 6321 p = "fdiv"; 6322 ssep = "div"; 6323 break; 6324 6325 default: 6326 abort (); 6327 } 6328 6329 if (is_sse) 6330 { 6331 strcpy (buf, ssep); 6332 if (GET_MODE (operands[0]) == SFmode) 6333 strcat (buf, "ss\t{%2, %0|%0, %2}"); 6334 else 6335 strcat (buf, "sd\t{%2, %0|%0, %2}"); 6336 return buf; 6337 } 6338 strcpy (buf, p); 6339 6340 switch (GET_CODE (operands[3])) 6341 { 6342 case MULT: 6343 case PLUS: 6344 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 6345 { 6346 rtx temp = operands[2]; 6347 operands[2] = operands[1]; 6348 operands[1] = temp; 6349 } 6350 6351 /* know operands[0] == operands[1]. */ 6352 6353 if (GET_CODE (operands[2]) == MEM) 6354 { 6355 p = "%z2\t%2"; 6356 break; 6357 } 6358 6359 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 6360 { 6361 if (STACK_TOP_P (operands[0])) 6362 /* How is it that we are storing to a dead operand[2]? 6363 Well, presumably operands[1] is dead too. We can't 6364 store the result to st(0) as st(0) gets popped on this 6365 instruction. Instead store to operands[2] (which I 6366 think has to be st(1)). st(1) will be popped later. 6367 gcc <= 2.8.1 didn't have this check and generated 6368 assembly code that the Unixware assembler rejected. */ 6369 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 6370 else 6371 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 6372 break; 6373 } 6374 6375 if (STACK_TOP_P (operands[0])) 6376 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 6377 else 6378 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 6379 break; 6380 6381 case MINUS: 6382 case DIV: 6383 if (GET_CODE (operands[1]) == MEM) 6384 { 6385 p = "r%z1\t%1"; 6386 break; 6387 } 6388 6389 if (GET_CODE (operands[2]) == MEM) 6390 { 6391 p = "%z2\t%2"; 6392 break; 6393 } 6394 6395 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 6396 { 6397#if SYSV386_COMPAT 6398 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 6399 derived assemblers, confusingly reverse the direction of 6400 the operation for fsub{r} and fdiv{r} when the 6401 destination register is not st(0). The Intel assembler 6402 doesn't have this brain damage. Read !SYSV386_COMPAT to 6403 figure out what the hardware really does. */ 6404 if (STACK_TOP_P (operands[0])) 6405 p = "{p\t%0, %2|rp\t%2, %0}"; 6406 else 6407 p = "{rp\t%2, %0|p\t%0, %2}"; 6408#else 6409 if (STACK_TOP_P (operands[0])) 6410 /* As above for fmul/fadd, we can't store to st(0). */ 6411 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 6412 else 6413 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 6414#endif 6415 break; 6416 } 6417 6418 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 6419 { 6420#if SYSV386_COMPAT 6421 if (STACK_TOP_P (operands[0])) 6422 p = "{rp\t%0, %1|p\t%1, %0}"; 6423 else 6424 p = "{p\t%1, %0|rp\t%0, %1}"; 6425#else 6426 if (STACK_TOP_P (operands[0])) 6427 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 6428 else 6429 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 6430#endif 6431 break; 6432 } 6433 6434 if (STACK_TOP_P (operands[0])) 6435 { 6436 if (STACK_TOP_P (operands[1])) 6437 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 6438 else 6439 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 6440 break; 6441 } 6442 else if (STACK_TOP_P (operands[1])) 6443 { 6444#if SYSV386_COMPAT 6445 p = "{\t%1, %0|r\t%0, %1}"; 6446#else 6447 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 6448#endif 6449 } 6450 else 6451 { 6452#if SYSV386_COMPAT 6453 p = "{r\t%2, %0|\t%0, %2}"; 6454#else 6455 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 6456#endif 6457 } 6458 break; 6459 6460 default: 6461 abort (); 6462 } 6463 6464 strcat (buf, p); 6465 return buf; 6466} 6467 6468/* Output code to initialize control word copies used by 6469 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN 6470 is set to control word rounding downwards. */ 6471void 6472emit_i387_cw_initialization (normal, round_down) 6473 rtx normal, round_down; 6474{ 6475 rtx reg = gen_reg_rtx (HImode); 6476 6477 emit_insn (gen_x86_fnstcw_1 (normal)); 6478 emit_move_insn (reg, normal); 6479 if (!TARGET_PARTIAL_REG_STALL && !optimize_size 6480 && !TARGET_64BIT) 6481 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 6482 else 6483 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00))); 6484 emit_move_insn (round_down, reg); 6485} 6486 6487/* Output code for INSN to convert a float to a signed int. OPERANDS 6488 are the insn operands. The output may be [HSD]Imode and the input 6489 operand may be [SDX]Fmode. */ 6490 6491const char * 6492output_fix_trunc (insn, operands) 6493 rtx insn; 6494 rtx *operands; 6495{ 6496 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 6497 int dimode_p = GET_MODE (operands[0]) == DImode; 6498 6499 /* Jump through a hoop or two for DImode, since the hardware has no 6500 non-popping instruction. We used to do this a different way, but 6501 that was somewhat fragile and broke with post-reload splitters. */ 6502 if (dimode_p && !stack_top_dies) 6503 output_asm_insn ("fld\t%y1", operands); 6504 6505 if (!STACK_TOP_P (operands[1])) 6506 abort (); 6507 6508 if (GET_CODE (operands[0]) != MEM) 6509 abort (); 6510 6511 output_asm_insn ("fldcw\t%3", operands); 6512 if (stack_top_dies || dimode_p) 6513 output_asm_insn ("fistp%z0\t%0", operands); 6514 else 6515 output_asm_insn ("fist%z0\t%0", operands); 6516 output_asm_insn ("fldcw\t%2", operands); 6517 6518 return ""; 6519} 6520 6521/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 6522 should be used and 2 when fnstsw should be used. UNORDERED_P is true 6523 when fucom should be used. */ 6524 6525const char * 6526output_fp_compare (insn, operands, eflags_p, unordered_p) 6527 rtx insn; 6528 rtx *operands; 6529 int eflags_p, unordered_p; 6530{ 6531 int stack_top_dies; 6532 rtx cmp_op0 = operands[0]; 6533 rtx cmp_op1 = operands[1]; 6534 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]); 6535 6536 if (eflags_p == 2) 6537 { 6538 cmp_op0 = cmp_op1; 6539 cmp_op1 = operands[2]; 6540 } 6541 if (is_sse) 6542 { 6543 if (GET_MODE (operands[0]) == SFmode) 6544 if (unordered_p) 6545 return "ucomiss\t{%1, %0|%0, %1}"; 6546 else 6547 return "comiss\t{%1, %0|%0, %y}"; 6548 else 6549 if (unordered_p) 6550 return "ucomisd\t{%1, %0|%0, %1}"; 6551 else 6552 return "comisd\t{%1, %0|%0, %y}"; 6553 } 6554 6555 if (! STACK_TOP_P (cmp_op0)) 6556 abort (); 6557 6558 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 6559 6560 if (STACK_REG_P (cmp_op1) 6561 && stack_top_dies 6562 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 6563 && REGNO (cmp_op1) != FIRST_STACK_REG) 6564 { 6565 /* If both the top of the 387 stack dies, and the other operand 6566 is also a stack register that dies, then this must be a 6567 `fcompp' float compare */ 6568 6569 if (eflags_p == 1) 6570 { 6571 /* There is no double popping fcomi variant. Fortunately, 6572 eflags is immune from the fstp's cc clobbering. */ 6573 if (unordered_p) 6574 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 6575 else 6576 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 6577 return "fstp\t%y0"; 6578 } 6579 else 6580 { 6581 if (eflags_p == 2) 6582 { 6583 if (unordered_p) 6584 return "fucompp\n\tfnstsw\t%0"; 6585 else 6586 return "fcompp\n\tfnstsw\t%0"; 6587 } 6588 else 6589 { 6590 if (unordered_p) 6591 return "fucompp"; 6592 else 6593 return "fcompp"; 6594 } 6595 } 6596 } 6597 else 6598 { 6599 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 6600 6601 static const char * const alt[24] = 6602 { 6603 "fcom%z1\t%y1", 6604 "fcomp%z1\t%y1", 6605 "fucom%z1\t%y1", 6606 "fucomp%z1\t%y1", 6607 6608 "ficom%z1\t%y1", 6609 "ficomp%z1\t%y1", 6610 NULL, 6611 NULL, 6612 6613 "fcomi\t{%y1, %0|%0, %y1}", 6614 "fcomip\t{%y1, %0|%0, %y1}", 6615 "fucomi\t{%y1, %0|%0, %y1}", 6616 "fucomip\t{%y1, %0|%0, %y1}", 6617 6618 NULL, 6619 NULL, 6620 NULL, 6621 NULL, 6622 6623 "fcom%z2\t%y2\n\tfnstsw\t%0", 6624 "fcomp%z2\t%y2\n\tfnstsw\t%0", 6625 "fucom%z2\t%y2\n\tfnstsw\t%0", 6626 "fucomp%z2\t%y2\n\tfnstsw\t%0", 6627 6628 "ficom%z2\t%y2\n\tfnstsw\t%0", 6629 "ficomp%z2\t%y2\n\tfnstsw\t%0", 6630 NULL, 6631 NULL 6632 }; 6633 6634 int mask; 6635 const char *ret; 6636 6637 mask = eflags_p << 3; 6638 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2; 6639 mask |= unordered_p << 1; 6640 mask |= stack_top_dies; 6641 6642 if (mask >= 24) 6643 abort (); 6644 ret = alt[mask]; 6645 if (ret == NULL) 6646 abort (); 6647 6648 return ret; 6649 } 6650} 6651 6652void 6653ix86_output_addr_vec_elt (file, value) 6654 FILE *file; 6655 int value; 6656{ 6657 const char *directive = ASM_LONG; 6658 6659 if (TARGET_64BIT) 6660 { 6661#ifdef ASM_QUAD 6662 directive = ASM_QUAD; 6663#else 6664 abort (); 6665#endif 6666 } 6667 6668 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 6669} 6670 6671void 6672ix86_output_addr_diff_elt (file, value, rel) 6673 FILE *file; 6674 int value, rel; 6675{ 6676 if (TARGET_64BIT) 6677 fprintf (file, "%s%s%d-.+(.-%s%d)\n", 6678 ASM_LONG, LPREFIX, value, LPREFIX, rel); 6679 else if (HAVE_AS_GOTOFF_IN_DATA) 6680 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 6681 else 6682 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n", 6683 ASM_LONG, LPREFIX, value); 6684} 6685 6686/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 6687 for the target. */ 6688 6689void 6690ix86_expand_clear (dest) 6691 rtx dest; 6692{ 6693 rtx tmp; 6694 6695 /* We play register width games, which are only valid after reload. */ 6696 if (!reload_completed) 6697 abort (); 6698 6699 /* Avoid HImode and its attendant prefix byte. */ 6700 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 6701 dest = gen_rtx_REG (SImode, REGNO (dest)); 6702 6703 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 6704 6705 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 6706 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 6707 { 6708 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 6709 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 6710 } 6711 6712 emit_insn (tmp); 6713} 6714 6715void 6716ix86_expand_move (mode, operands) 6717 enum machine_mode mode; 6718 rtx operands[]; 6719{ 6720 int strict = (reload_in_progress || reload_completed); 6721 rtx insn; 6722 6723 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode)) 6724 { 6725 /* Emit insns to move operands[1] into operands[0]. */ 6726 6727 if (GET_CODE (operands[0]) == MEM) 6728 operands[1] = force_reg (Pmode, operands[1]); 6729 else 6730 { 6731 rtx temp = operands[0]; 6732 if (GET_CODE (temp) != REG) 6733 temp = gen_reg_rtx (Pmode); 6734 temp = legitimize_pic_address (operands[1], temp); 6735 if (temp == operands[0]) 6736 return; 6737 operands[1] = temp; 6738 } 6739 } 6740 else 6741 { 6742 if (GET_CODE (operands[0]) == MEM 6743 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 6744 || !push_operand (operands[0], mode)) 6745 && GET_CODE (operands[1]) == MEM) 6746 operands[1] = force_reg (mode, operands[1]); 6747 6748 if (push_operand (operands[0], mode) 6749 && ! general_no_elim_operand (operands[1], mode)) 6750 operands[1] = copy_to_mode_reg (mode, operands[1]); 6751 6752 /* Force large constants in 64bit compilation into register 6753 to get them CSEed. */ 6754 if (TARGET_64BIT && mode == DImode 6755 && immediate_operand (operands[1], mode) 6756 && !x86_64_zero_extended_value (operands[1]) 6757 && !register_operand (operands[0], mode) 6758 && optimize && !reload_completed && !reload_in_progress) 6759 operands[1] = copy_to_mode_reg (mode, operands[1]); 6760 6761 if (FLOAT_MODE_P (mode)) 6762 { 6763 /* If we are loading a floating point constant to a register, 6764 force the value to memory now, since we'll get better code 6765 out the back end. */ 6766 6767 if (strict) 6768 ; 6769 else if (GET_CODE (operands[1]) == CONST_DOUBLE 6770 && register_operand (operands[0], mode)) 6771 operands[1] = validize_mem (force_const_mem (mode, operands[1])); 6772 } 6773 } 6774 6775 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]); 6776 6777 emit_insn (insn); 6778} 6779 6780void 6781ix86_expand_vector_move (mode, operands) 6782 enum machine_mode mode; 6783 rtx operands[]; 6784{ 6785 /* Force constants other than zero into memory. We do not know how 6786 the instructions used to build constants modify the upper 64 bits 6787 of the register, once we have that information we may be able 6788 to handle some of them more efficiently. */ 6789 if ((reload_in_progress | reload_completed) == 0 6790 && register_operand (operands[0], mode) 6791 && CONSTANT_P (operands[1])) 6792 { 6793 rtx addr = gen_reg_rtx (Pmode); 6794 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0)); 6795 operands[1] = gen_rtx_MEM (mode, addr); 6796 } 6797 6798 /* Make operand1 a register if it isn't already. */ 6799 if ((reload_in_progress | reload_completed) == 0 6800 && !register_operand (operands[0], mode) 6801 && !register_operand (operands[1], mode) 6802 && operands[1] != CONST0_RTX (mode)) 6803 { 6804 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]); 6805 emit_move_insn (operands[0], temp); 6806 return; 6807 } 6808 6809 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); 6810} 6811 6812/* Attempt to expand a binary operator. Make the expansion closer to the 6813 actual machine, then just general_operand, which will allow 3 separate 6814 memory references (one output, two input) in a single insn. */ 6815 6816void 6817ix86_expand_binary_operator (code, mode, operands) 6818 enum rtx_code code; 6819 enum machine_mode mode; 6820 rtx operands[]; 6821{ 6822 int matching_memory; 6823 rtx src1, src2, dst, op, clob; 6824 6825 dst = operands[0]; 6826 src1 = operands[1]; 6827 src2 = operands[2]; 6828 6829 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 6830 if (GET_RTX_CLASS (code) == 'c' 6831 && (rtx_equal_p (dst, src2) 6832 || immediate_operand (src1, mode))) 6833 { 6834 rtx temp = src1; 6835 src1 = src2; 6836 src2 = temp; 6837 } 6838 6839 /* If the destination is memory, and we do not have matching source 6840 operands, do things in registers. */ 6841 matching_memory = 0; 6842 if (GET_CODE (dst) == MEM) 6843 { 6844 if (rtx_equal_p (dst, src1)) 6845 matching_memory = 1; 6846 else if (GET_RTX_CLASS (code) == 'c' 6847 && rtx_equal_p (dst, src2)) 6848 matching_memory = 2; 6849 else 6850 dst = gen_reg_rtx (mode); 6851 } 6852 6853 /* Both source operands cannot be in memory. */ 6854 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 6855 { 6856 if (matching_memory != 2) 6857 src2 = force_reg (mode, src2); 6858 else 6859 src1 = force_reg (mode, src1); 6860 } 6861 6862 /* If the operation is not commutable, source 1 cannot be a constant 6863 or non-matching memory. */ 6864 if ((CONSTANT_P (src1) 6865 || (!matching_memory && GET_CODE (src1) == MEM)) 6866 && GET_RTX_CLASS (code) != 'c') 6867 src1 = force_reg (mode, src1); 6868 6869 /* If optimizing, copy to regs to improve CSE */ 6870 if (optimize && ! no_new_pseudos) 6871 { 6872 if (GET_CODE (dst) == MEM) 6873 dst = gen_reg_rtx (mode); 6874 if (GET_CODE (src1) == MEM) 6875 src1 = force_reg (mode, src1); 6876 if (GET_CODE (src2) == MEM) 6877 src2 = force_reg (mode, src2); 6878 } 6879 6880 /* Emit the instruction. */ 6881 6882 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 6883 if (reload_in_progress) 6884 { 6885 /* Reload doesn't know about the flags register, and doesn't know that 6886 it doesn't want to clobber it. We can only do this with PLUS. */ 6887 if (code != PLUS) 6888 abort (); 6889 emit_insn (op); 6890 } 6891 else 6892 { 6893 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 6894 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 6895 } 6896 6897 /* Fix up the destination if needed. */ 6898 if (dst != operands[0]) 6899 emit_move_insn (operands[0], dst); 6900} 6901 6902/* Return TRUE or FALSE depending on whether the binary operator meets the 6903 appropriate constraints. */ 6904 6905int 6906ix86_binary_operator_ok (code, mode, operands) 6907 enum rtx_code code; 6908 enum machine_mode mode ATTRIBUTE_UNUSED; 6909 rtx operands[3]; 6910{ 6911 /* Both source operands cannot be in memory. */ 6912 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 6913 return 0; 6914 /* If the operation is not commutable, source 1 cannot be a constant. */ 6915 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c') 6916 return 0; 6917 /* If the destination is memory, we must have a matching source operand. */ 6918 if (GET_CODE (operands[0]) == MEM 6919 && ! (rtx_equal_p (operands[0], operands[1]) 6920 || (GET_RTX_CLASS (code) == 'c' 6921 && rtx_equal_p (operands[0], operands[2])))) 6922 return 0; 6923 /* If the operation is not commutable and the source 1 is memory, we must 6924 have a matching destination. */ 6925 if (GET_CODE (operands[1]) == MEM 6926 && GET_RTX_CLASS (code) != 'c' 6927 && ! rtx_equal_p (operands[0], operands[1])) 6928 return 0; 6929 return 1; 6930} 6931 6932/* Attempt to expand a unary operator. Make the expansion closer to the 6933 actual machine, then just general_operand, which will allow 2 separate 6934 memory references (one output, one input) in a single insn. */ 6935 6936void 6937ix86_expand_unary_operator (code, mode, operands) 6938 enum rtx_code code; 6939 enum machine_mode mode; 6940 rtx operands[]; 6941{ 6942 int matching_memory; 6943 rtx src, dst, op, clob; 6944 6945 dst = operands[0]; 6946 src = operands[1]; 6947 6948 /* If the destination is memory, and we do not have matching source 6949 operands, do things in registers. */ 6950 matching_memory = 0; 6951 if (GET_CODE (dst) == MEM) 6952 { 6953 if (rtx_equal_p (dst, src)) 6954 matching_memory = 1; 6955 else 6956 dst = gen_reg_rtx (mode); 6957 } 6958 6959 /* When source operand is memory, destination must match. */ 6960 if (!matching_memory && GET_CODE (src) == MEM) 6961 src = force_reg (mode, src); 6962 6963 /* If optimizing, copy to regs to improve CSE */ 6964 if (optimize && ! no_new_pseudos) 6965 { 6966 if (GET_CODE (dst) == MEM) 6967 dst = gen_reg_rtx (mode); 6968 if (GET_CODE (src) == MEM) 6969 src = force_reg (mode, src); 6970 } 6971 6972 /* Emit the instruction. */ 6973 6974 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 6975 if (reload_in_progress || code == NOT) 6976 { 6977 /* Reload doesn't know about the flags register, and doesn't know that 6978 it doesn't want to clobber it. */ 6979 if (code != NOT) 6980 abort (); 6981 emit_insn (op); 6982 } 6983 else 6984 { 6985 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 6986 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 6987 } 6988 6989 /* Fix up the destination if needed. */ 6990 if (dst != operands[0]) 6991 emit_move_insn (operands[0], dst); 6992} 6993 6994/* Return TRUE or FALSE depending on whether the unary operator meets the 6995 appropriate constraints. */ 6996 6997int 6998ix86_unary_operator_ok (code, mode, operands) 6999 enum rtx_code code ATTRIBUTE_UNUSED; 7000 enum machine_mode mode ATTRIBUTE_UNUSED; 7001 rtx operands[2] ATTRIBUTE_UNUSED; 7002{ 7003 /* If one of operands is memory, source and destination must match. */ 7004 if ((GET_CODE (operands[0]) == MEM 7005 || GET_CODE (operands[1]) == MEM) 7006 && ! rtx_equal_p (operands[0], operands[1])) 7007 return FALSE; 7008 return TRUE; 7009} 7010 7011/* Return TRUE or FALSE depending on whether the first SET in INSN 7012 has source and destination with matching CC modes, and that the 7013 CC mode is at least as constrained as REQ_MODE. */ 7014 7015int 7016ix86_match_ccmode (insn, req_mode) 7017 rtx insn; 7018 enum machine_mode req_mode; 7019{ 7020 rtx set; 7021 enum machine_mode set_mode; 7022 7023 set = PATTERN (insn); 7024 if (GET_CODE (set) == PARALLEL) 7025 set = XVECEXP (set, 0, 0); 7026 if (GET_CODE (set) != SET) 7027 abort (); 7028 if (GET_CODE (SET_SRC (set)) != COMPARE) 7029 abort (); 7030 7031 set_mode = GET_MODE (SET_DEST (set)); 7032 switch (set_mode) 7033 { 7034 case CCNOmode: 7035 if (req_mode != CCNOmode 7036 && (req_mode != CCmode 7037 || XEXP (SET_SRC (set), 1) != const0_rtx)) 7038 return 0; 7039 break; 7040 case CCmode: 7041 if (req_mode == CCGCmode) 7042 return 0; 7043 /* FALLTHRU */ 7044 case CCGCmode: 7045 if (req_mode == CCGOCmode || req_mode == CCNOmode) 7046 return 0; 7047 /* FALLTHRU */ 7048 case CCGOCmode: 7049 if (req_mode == CCZmode) 7050 return 0; 7051 /* FALLTHRU */ 7052 case CCZmode: 7053 break; 7054 7055 default: 7056 abort (); 7057 } 7058 7059 return (GET_MODE (SET_SRC (set)) == set_mode); 7060} 7061 7062/* Generate insn patterns to do an integer compare of OPERANDS. */ 7063 7064static rtx 7065ix86_expand_int_compare (code, op0, op1) 7066 enum rtx_code code; 7067 rtx op0, op1; 7068{ 7069 enum machine_mode cmpmode; 7070 rtx tmp, flags; 7071 7072 cmpmode = SELECT_CC_MODE (code, op0, op1); 7073 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 7074 7075 /* This is very simple, but making the interface the same as in the 7076 FP case makes the rest of the code easier. */ 7077 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 7078 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 7079 7080 /* Return the test that should be put into the flags user, i.e. 7081 the bcc, scc, or cmov instruction. */ 7082 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 7083} 7084 7085/* Figure out whether to use ordered or unordered fp comparisons. 7086 Return the appropriate mode to use. */ 7087 7088enum machine_mode 7089ix86_fp_compare_mode (code) 7090 enum rtx_code code ATTRIBUTE_UNUSED; 7091{ 7092 /* ??? In order to make all comparisons reversible, we do all comparisons 7093 non-trapping when compiling for IEEE. Once gcc is able to distinguish 7094 all forms trapping and nontrapping comparisons, we can make inequality 7095 comparisons trapping again, since it results in better code when using 7096 FCOM based compares. */ 7097 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 7098} 7099 7100enum machine_mode 7101ix86_cc_mode (code, op0, op1) 7102 enum rtx_code code; 7103 rtx op0, op1; 7104{ 7105 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 7106 return ix86_fp_compare_mode (code); 7107 switch (code) 7108 { 7109 /* Only zero flag is needed. */ 7110 case EQ: /* ZF=0 */ 7111 case NE: /* ZF!=0 */ 7112 return CCZmode; 7113 /* Codes needing carry flag. */ 7114 case GEU: /* CF=0 */ 7115 case GTU: /* CF=0 & ZF=0 */ 7116 case LTU: /* CF=1 */ 7117 case LEU: /* CF=1 | ZF=1 */ 7118 return CCmode; 7119 /* Codes possibly doable only with sign flag when 7120 comparing against zero. */ 7121 case GE: /* SF=OF or SF=0 */ 7122 case LT: /* SF<>OF or SF=1 */ 7123 if (op1 == const0_rtx) 7124 return CCGOCmode; 7125 else 7126 /* For other cases Carry flag is not required. */ 7127 return CCGCmode; 7128 /* Codes doable only with sign flag when comparing 7129 against zero, but we miss jump instruction for it 7130 so we need to use relational tests agains overflow 7131 that thus needs to be zero. */ 7132 case GT: /* ZF=0 & SF=OF */ 7133 case LE: /* ZF=1 | SF<>OF */ 7134 if (op1 == const0_rtx) 7135 return CCNOmode; 7136 else 7137 return CCGCmode; 7138 /* strcmp pattern do (use flags) and combine may ask us for proper 7139 mode. */ 7140 case USE: 7141 return CCmode; 7142 default: 7143 abort (); 7144 } 7145} 7146 7147/* Return true if we should use an FCOMI instruction for this fp comparison. */ 7148 7149int 7150ix86_use_fcomi_compare (code) 7151 enum rtx_code code ATTRIBUTE_UNUSED; 7152{ 7153 enum rtx_code swapped_code = swap_condition (code); 7154 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 7155 || (ix86_fp_comparison_cost (swapped_code) 7156 == ix86_fp_comparison_fcomi_cost (swapped_code))); 7157} 7158 7159/* Swap, force into registers, or otherwise massage the two operands 7160 to a fp comparison. The operands are updated in place; the new 7161 comparsion code is returned. */ 7162 7163static enum rtx_code 7164ix86_prepare_fp_compare_args (code, pop0, pop1) 7165 enum rtx_code code; 7166 rtx *pop0, *pop1; 7167{ 7168 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 7169 rtx op0 = *pop0, op1 = *pop1; 7170 enum machine_mode op_mode = GET_MODE (op0); 7171 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1); 7172 7173 /* All of the unordered compare instructions only work on registers. 7174 The same is true of the XFmode compare instructions. The same is 7175 true of the fcomi compare instructions. */ 7176 7177 if (!is_sse 7178 && (fpcmp_mode == CCFPUmode 7179 || op_mode == XFmode 7180 || op_mode == TFmode 7181 || ix86_use_fcomi_compare (code))) 7182 { 7183 op0 = force_reg (op_mode, op0); 7184 op1 = force_reg (op_mode, op1); 7185 } 7186 else 7187 { 7188 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 7189 things around if they appear profitable, otherwise force op0 7190 into a register. */ 7191 7192 if (standard_80387_constant_p (op0) == 0 7193 || (GET_CODE (op0) == MEM 7194 && ! (standard_80387_constant_p (op1) == 0 7195 || GET_CODE (op1) == MEM))) 7196 { 7197 rtx tmp; 7198 tmp = op0, op0 = op1, op1 = tmp; 7199 code = swap_condition (code); 7200 } 7201 7202 if (GET_CODE (op0) != REG) 7203 op0 = force_reg (op_mode, op0); 7204 7205 if (CONSTANT_P (op1)) 7206 { 7207 if (standard_80387_constant_p (op1)) 7208 op1 = force_reg (op_mode, op1); 7209 else 7210 op1 = validize_mem (force_const_mem (op_mode, op1)); 7211 } 7212 } 7213 7214 /* Try to rearrange the comparison to make it cheaper. */ 7215 if (ix86_fp_comparison_cost (code) 7216 > ix86_fp_comparison_cost (swap_condition (code)) 7217 && (GET_CODE (op1) == REG || !no_new_pseudos)) 7218 { 7219 rtx tmp; 7220 tmp = op0, op0 = op1, op1 = tmp; 7221 code = swap_condition (code); 7222 if (GET_CODE (op0) != REG) 7223 op0 = force_reg (op_mode, op0); 7224 } 7225 7226 *pop0 = op0; 7227 *pop1 = op1; 7228 return code; 7229} 7230 7231/* Convert comparison codes we use to represent FP comparison to integer 7232 code that will result in proper branch. Return UNKNOWN if no such code 7233 is available. */ 7234static enum rtx_code 7235ix86_fp_compare_code_to_integer (code) 7236 enum rtx_code code; 7237{ 7238 switch (code) 7239 { 7240 case GT: 7241 return GTU; 7242 case GE: 7243 return GEU; 7244 case ORDERED: 7245 case UNORDERED: 7246 return code; 7247 break; 7248 case UNEQ: 7249 return EQ; 7250 break; 7251 case UNLT: 7252 return LTU; 7253 break; 7254 case UNLE: 7255 return LEU; 7256 break; 7257 case LTGT: 7258 return NE; 7259 break; 7260 default: 7261 return UNKNOWN; 7262 } 7263} 7264 7265/* Split comparison code CODE into comparisons we can do using branch 7266 instructions. BYPASS_CODE is comparison code for branch that will 7267 branch around FIRST_CODE and SECOND_CODE. If some of branches 7268 is not required, set value to NIL. 7269 We never require more than two branches. */ 7270static void 7271ix86_fp_comparison_codes (code, bypass_code, first_code, second_code) 7272 enum rtx_code code, *bypass_code, *first_code, *second_code; 7273{ 7274 *first_code = code; 7275 *bypass_code = NIL; 7276 *second_code = NIL; 7277 7278 /* The fcomi comparison sets flags as follows: 7279 7280 cmp ZF PF CF 7281 > 0 0 0 7282 < 0 0 1 7283 = 1 0 0 7284 un 1 1 1 */ 7285 7286 switch (code) 7287 { 7288 case GT: /* GTU - CF=0 & ZF=0 */ 7289 case GE: /* GEU - CF=0 */ 7290 case ORDERED: /* PF=0 */ 7291 case UNORDERED: /* PF=1 */ 7292 case UNEQ: /* EQ - ZF=1 */ 7293 case UNLT: /* LTU - CF=1 */ 7294 case UNLE: /* LEU - CF=1 | ZF=1 */ 7295 case LTGT: /* EQ - ZF=0 */ 7296 break; 7297 case LT: /* LTU - CF=1 - fails on unordered */ 7298 *first_code = UNLT; 7299 *bypass_code = UNORDERED; 7300 break; 7301 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 7302 *first_code = UNLE; 7303 *bypass_code = UNORDERED; 7304 break; 7305 case EQ: /* EQ - ZF=1 - fails on unordered */ 7306 *first_code = UNEQ; 7307 *bypass_code = UNORDERED; 7308 break; 7309 case NE: /* NE - ZF=0 - fails on unordered */ 7310 *first_code = LTGT; 7311 *second_code = UNORDERED; 7312 break; 7313 case UNGE: /* GEU - CF=0 - fails on unordered */ 7314 *first_code = GE; 7315 *second_code = UNORDERED; 7316 break; 7317 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 7318 *first_code = GT; 7319 *second_code = UNORDERED; 7320 break; 7321 default: 7322 abort (); 7323 } 7324 if (!TARGET_IEEE_FP) 7325 { 7326 *second_code = NIL; 7327 *bypass_code = NIL; 7328 } 7329} 7330 7331/* Return cost of comparison done fcom + arithmetics operations on AX. 7332 All following functions do use number of instructions as an cost metrics. 7333 In future this should be tweaked to compute bytes for optimize_size and 7334 take into account performance of various instructions on various CPUs. */ 7335static int 7336ix86_fp_comparison_arithmetics_cost (code) 7337 enum rtx_code code; 7338{ 7339 if (!TARGET_IEEE_FP) 7340 return 4; 7341 /* The cost of code output by ix86_expand_fp_compare. */ 7342 switch (code) 7343 { 7344 case UNLE: 7345 case UNLT: 7346 case LTGT: 7347 case GT: 7348 case GE: 7349 case UNORDERED: 7350 case ORDERED: 7351 case UNEQ: 7352 return 4; 7353 break; 7354 case LT: 7355 case NE: 7356 case EQ: 7357 case UNGE: 7358 return 5; 7359 break; 7360 case LE: 7361 case UNGT: 7362 return 6; 7363 break; 7364 default: 7365 abort (); 7366 } 7367} 7368 7369/* Return cost of comparison done using fcomi operation. 7370 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7371static int 7372ix86_fp_comparison_fcomi_cost (code) 7373 enum rtx_code code; 7374{ 7375 enum rtx_code bypass_code, first_code, second_code; 7376 /* Return arbitarily high cost when instruction is not supported - this 7377 prevents gcc from using it. */ 7378 if (!TARGET_CMOVE) 7379 return 1024; 7380 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7381 return (bypass_code != NIL || second_code != NIL) + 2; 7382} 7383 7384/* Return cost of comparison done using sahf operation. 7385 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7386static int 7387ix86_fp_comparison_sahf_cost (code) 7388 enum rtx_code code; 7389{ 7390 enum rtx_code bypass_code, first_code, second_code; 7391 /* Return arbitarily high cost when instruction is not preferred - this 7392 avoids gcc from using it. */ 7393 if (!TARGET_USE_SAHF && !optimize_size) 7394 return 1024; 7395 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7396 return (bypass_code != NIL || second_code != NIL) + 3; 7397} 7398 7399/* Compute cost of the comparison done using any method. 7400 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7401static int 7402ix86_fp_comparison_cost (code) 7403 enum rtx_code code; 7404{ 7405 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 7406 int min; 7407 7408 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 7409 sahf_cost = ix86_fp_comparison_sahf_cost (code); 7410 7411 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 7412 if (min > sahf_cost) 7413 min = sahf_cost; 7414 if (min > fcomi_cost) 7415 min = fcomi_cost; 7416 return min; 7417} 7418 7419/* Generate insn patterns to do a floating point compare of OPERANDS. */ 7420 7421static rtx 7422ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test) 7423 enum rtx_code code; 7424 rtx op0, op1, scratch; 7425 rtx *second_test; 7426 rtx *bypass_test; 7427{ 7428 enum machine_mode fpcmp_mode, intcmp_mode; 7429 rtx tmp, tmp2; 7430 int cost = ix86_fp_comparison_cost (code); 7431 enum rtx_code bypass_code, first_code, second_code; 7432 7433 fpcmp_mode = ix86_fp_compare_mode (code); 7434 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 7435 7436 if (second_test) 7437 *second_test = NULL_RTX; 7438 if (bypass_test) 7439 *bypass_test = NULL_RTX; 7440 7441 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7442 7443 /* Do fcomi/sahf based test when profitable. */ 7444 if ((bypass_code == NIL || bypass_test) 7445 && (second_code == NIL || second_test) 7446 && ix86_fp_comparison_arithmetics_cost (code) > cost) 7447 { 7448 if (TARGET_CMOVE) 7449 { 7450 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7451 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 7452 tmp); 7453 emit_insn (tmp); 7454 } 7455 else 7456 { 7457 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7458 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9); 7459 if (!scratch) 7460 scratch = gen_reg_rtx (HImode); 7461 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 7462 emit_insn (gen_x86_sahf_1 (scratch)); 7463 } 7464 7465 /* The FP codes work out to act like unsigned. */ 7466 intcmp_mode = fpcmp_mode; 7467 code = first_code; 7468 if (bypass_code != NIL) 7469 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 7470 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7471 const0_rtx); 7472 if (second_code != NIL) 7473 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 7474 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7475 const0_rtx); 7476 } 7477 else 7478 { 7479 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 7480 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7481 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9); 7482 if (!scratch) 7483 scratch = gen_reg_rtx (HImode); 7484 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 7485 7486 /* In the unordered case, we have to check C2 for NaN's, which 7487 doesn't happen to work out to anything nice combination-wise. 7488 So do some bit twiddling on the value we've got in AH to come 7489 up with an appropriate set of condition codes. */ 7490 7491 intcmp_mode = CCNOmode; 7492 switch (code) 7493 { 7494 case GT: 7495 case UNGT: 7496 if (code == GT || !TARGET_IEEE_FP) 7497 { 7498 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 7499 code = EQ; 7500 } 7501 else 7502 { 7503 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7504 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 7505 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 7506 intcmp_mode = CCmode; 7507 code = GEU; 7508 } 7509 break; 7510 case LT: 7511 case UNLT: 7512 if (code == LT && TARGET_IEEE_FP) 7513 { 7514 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7515 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 7516 intcmp_mode = CCmode; 7517 code = EQ; 7518 } 7519 else 7520 { 7521 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 7522 code = NE; 7523 } 7524 break; 7525 case GE: 7526 case UNGE: 7527 if (code == GE || !TARGET_IEEE_FP) 7528 { 7529 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 7530 code = EQ; 7531 } 7532 else 7533 { 7534 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7535 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 7536 GEN_INT (0x01))); 7537 code = NE; 7538 } 7539 break; 7540 case LE: 7541 case UNLE: 7542 if (code == LE && TARGET_IEEE_FP) 7543 { 7544 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7545 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 7546 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 7547 intcmp_mode = CCmode; 7548 code = LTU; 7549 } 7550 else 7551 { 7552 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 7553 code = NE; 7554 } 7555 break; 7556 case EQ: 7557 case UNEQ: 7558 if (code == EQ && TARGET_IEEE_FP) 7559 { 7560 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7561 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 7562 intcmp_mode = CCmode; 7563 code = EQ; 7564 } 7565 else 7566 { 7567 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 7568 code = NE; 7569 break; 7570 } 7571 break; 7572 case NE: 7573 case LTGT: 7574 if (code == NE && TARGET_IEEE_FP) 7575 { 7576 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7577 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 7578 GEN_INT (0x40))); 7579 code = NE; 7580 } 7581 else 7582 { 7583 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 7584 code = EQ; 7585 } 7586 break; 7587 7588 case UNORDERED: 7589 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 7590 code = NE; 7591 break; 7592 case ORDERED: 7593 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 7594 code = EQ; 7595 break; 7596 7597 default: 7598 abort (); 7599 } 7600 } 7601 7602 /* Return the test that should be put into the flags user, i.e. 7603 the bcc, scc, or cmov instruction. */ 7604 return gen_rtx_fmt_ee (code, VOIDmode, 7605 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7606 const0_rtx); 7607} 7608 7609rtx 7610ix86_expand_compare (code, second_test, bypass_test) 7611 enum rtx_code code; 7612 rtx *second_test, *bypass_test; 7613{ 7614 rtx op0, op1, ret; 7615 op0 = ix86_compare_op0; 7616 op1 = ix86_compare_op1; 7617 7618 if (second_test) 7619 *second_test = NULL_RTX; 7620 if (bypass_test) 7621 *bypass_test = NULL_RTX; 7622 7623 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 7624 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 7625 second_test, bypass_test); 7626 else 7627 ret = ix86_expand_int_compare (code, op0, op1); 7628 7629 return ret; 7630} 7631 7632/* Return true if the CODE will result in nontrivial jump sequence. */ 7633bool 7634ix86_fp_jump_nontrivial_p (code) 7635 enum rtx_code code; 7636{ 7637 enum rtx_code bypass_code, first_code, second_code; 7638 if (!TARGET_CMOVE) 7639 return true; 7640 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7641 return bypass_code != NIL || second_code != NIL; 7642} 7643 7644void 7645ix86_expand_branch (code, label) 7646 enum rtx_code code; 7647 rtx label; 7648{ 7649 rtx tmp; 7650 7651 switch (GET_MODE (ix86_compare_op0)) 7652 { 7653 case QImode: 7654 case HImode: 7655 case SImode: 7656 simple: 7657 tmp = ix86_expand_compare (code, NULL, NULL); 7658 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 7659 gen_rtx_LABEL_REF (VOIDmode, label), 7660 pc_rtx); 7661 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 7662 return; 7663 7664 case SFmode: 7665 case DFmode: 7666 case XFmode: 7667 case TFmode: 7668 { 7669 rtvec vec; 7670 int use_fcomi; 7671 enum rtx_code bypass_code, first_code, second_code; 7672 7673 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 7674 &ix86_compare_op1); 7675 7676 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7677 7678 /* Check whether we will use the natural sequence with one jump. If 7679 so, we can expand jump early. Otherwise delay expansion by 7680 creating compound insn to not confuse optimizers. */ 7681 if (bypass_code == NIL && second_code == NIL 7682 && TARGET_CMOVE) 7683 { 7684 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 7685 gen_rtx_LABEL_REF (VOIDmode, label), 7686 pc_rtx, NULL_RTX); 7687 } 7688 else 7689 { 7690 tmp = gen_rtx_fmt_ee (code, VOIDmode, 7691 ix86_compare_op0, ix86_compare_op1); 7692 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 7693 gen_rtx_LABEL_REF (VOIDmode, label), 7694 pc_rtx); 7695 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 7696 7697 use_fcomi = ix86_use_fcomi_compare (code); 7698 vec = rtvec_alloc (3 + !use_fcomi); 7699 RTVEC_ELT (vec, 0) = tmp; 7700 RTVEC_ELT (vec, 1) 7701 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 7702 RTVEC_ELT (vec, 2) 7703 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 7704 if (! use_fcomi) 7705 RTVEC_ELT (vec, 3) 7706 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 7707 7708 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 7709 } 7710 return; 7711 } 7712 7713 case DImode: 7714 if (TARGET_64BIT) 7715 goto simple; 7716 /* Expand DImode branch into multiple compare+branch. */ 7717 { 7718 rtx lo[2], hi[2], label2; 7719 enum rtx_code code1, code2, code3; 7720 7721 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 7722 { 7723 tmp = ix86_compare_op0; 7724 ix86_compare_op0 = ix86_compare_op1; 7725 ix86_compare_op1 = tmp; 7726 code = swap_condition (code); 7727 } 7728 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 7729 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 7730 7731 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 7732 avoid two branches. This costs one extra insn, so disable when 7733 optimizing for size. */ 7734 7735 if ((code == EQ || code == NE) 7736 && (!optimize_size 7737 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 7738 { 7739 rtx xor0, xor1; 7740 7741 xor1 = hi[0]; 7742 if (hi[1] != const0_rtx) 7743 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1], 7744 NULL_RTX, 0, OPTAB_WIDEN); 7745 7746 xor0 = lo[0]; 7747 if (lo[1] != const0_rtx) 7748 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1], 7749 NULL_RTX, 0, OPTAB_WIDEN); 7750 7751 tmp = expand_binop (SImode, ior_optab, xor1, xor0, 7752 NULL_RTX, 0, OPTAB_WIDEN); 7753 7754 ix86_compare_op0 = tmp; 7755 ix86_compare_op1 = const0_rtx; 7756 ix86_expand_branch (code, label); 7757 return; 7758 } 7759 7760 /* Otherwise, if we are doing less-than or greater-or-equal-than, 7761 op1 is a constant and the low word is zero, then we can just 7762 examine the high word. */ 7763 7764 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 7765 switch (code) 7766 { 7767 case LT: case LTU: case GE: case GEU: 7768 ix86_compare_op0 = hi[0]; 7769 ix86_compare_op1 = hi[1]; 7770 ix86_expand_branch (code, label); 7771 return; 7772 default: 7773 break; 7774 } 7775 7776 /* Otherwise, we need two or three jumps. */ 7777 7778 label2 = gen_label_rtx (); 7779 7780 code1 = code; 7781 code2 = swap_condition (code); 7782 code3 = unsigned_condition (code); 7783 7784 switch (code) 7785 { 7786 case LT: case GT: case LTU: case GTU: 7787 break; 7788 7789 case LE: code1 = LT; code2 = GT; break; 7790 case GE: code1 = GT; code2 = LT; break; 7791 case LEU: code1 = LTU; code2 = GTU; break; 7792 case GEU: code1 = GTU; code2 = LTU; break; 7793 7794 case EQ: code1 = NIL; code2 = NE; break; 7795 case NE: code2 = NIL; break; 7796 7797 default: 7798 abort (); 7799 } 7800 7801 /* 7802 * a < b => 7803 * if (hi(a) < hi(b)) goto true; 7804 * if (hi(a) > hi(b)) goto false; 7805 * if (lo(a) < lo(b)) goto true; 7806 * false: 7807 */ 7808 7809 ix86_compare_op0 = hi[0]; 7810 ix86_compare_op1 = hi[1]; 7811 7812 if (code1 != NIL) 7813 ix86_expand_branch (code1, label); 7814 if (code2 != NIL) 7815 ix86_expand_branch (code2, label2); 7816 7817 ix86_compare_op0 = lo[0]; 7818 ix86_compare_op1 = lo[1]; 7819 ix86_expand_branch (code3, label); 7820 7821 if (code2 != NIL) 7822 emit_label (label2); 7823 return; 7824 } 7825 7826 default: 7827 abort (); 7828 } 7829} 7830 7831/* Split branch based on floating point condition. */ 7832void 7833ix86_split_fp_branch (code, op1, op2, target1, target2, tmp) 7834 enum rtx_code code; 7835 rtx op1, op2, target1, target2, tmp; 7836{ 7837 rtx second, bypass; 7838 rtx label = NULL_RTX; 7839 rtx condition; 7840 int bypass_probability = -1, second_probability = -1, probability = -1; 7841 rtx i; 7842 7843 if (target2 != pc_rtx) 7844 { 7845 rtx tmp = target2; 7846 code = reverse_condition_maybe_unordered (code); 7847 target2 = target1; 7848 target1 = tmp; 7849 } 7850 7851 condition = ix86_expand_fp_compare (code, op1, op2, 7852 tmp, &second, &bypass); 7853 7854 if (split_branch_probability >= 0) 7855 { 7856 /* Distribute the probabilities across the jumps. 7857 Assume the BYPASS and SECOND to be always test 7858 for UNORDERED. */ 7859 probability = split_branch_probability; 7860 7861 /* Value of 1 is low enough to make no need for probability 7862 to be updated. Later we may run some experiments and see 7863 if unordered values are more frequent in practice. */ 7864 if (bypass) 7865 bypass_probability = 1; 7866 if (second) 7867 second_probability = 1; 7868 } 7869 if (bypass != NULL_RTX) 7870 { 7871 label = gen_label_rtx (); 7872 i = emit_jump_insn (gen_rtx_SET 7873 (VOIDmode, pc_rtx, 7874 gen_rtx_IF_THEN_ELSE (VOIDmode, 7875 bypass, 7876 gen_rtx_LABEL_REF (VOIDmode, 7877 label), 7878 pc_rtx))); 7879 if (bypass_probability >= 0) 7880 REG_NOTES (i) 7881 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7882 GEN_INT (bypass_probability), 7883 REG_NOTES (i)); 7884 } 7885 i = emit_jump_insn (gen_rtx_SET 7886 (VOIDmode, pc_rtx, 7887 gen_rtx_IF_THEN_ELSE (VOIDmode, 7888 condition, target1, target2))); 7889 if (probability >= 0) 7890 REG_NOTES (i) 7891 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7892 GEN_INT (probability), 7893 REG_NOTES (i)); 7894 if (second != NULL_RTX) 7895 { 7896 i = emit_jump_insn (gen_rtx_SET 7897 (VOIDmode, pc_rtx, 7898 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 7899 target2))); 7900 if (second_probability >= 0) 7901 REG_NOTES (i) 7902 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7903 GEN_INT (second_probability), 7904 REG_NOTES (i)); 7905 } 7906 if (label != NULL_RTX) 7907 emit_label (label); 7908} 7909 7910int 7911ix86_expand_setcc (code, dest) 7912 enum rtx_code code; 7913 rtx dest; 7914{ 7915 rtx ret, tmp, tmpreg; 7916 rtx second_test, bypass_test; 7917 7918 if (GET_MODE (ix86_compare_op0) == DImode 7919 && !TARGET_64BIT) 7920 return 0; /* FAIL */ 7921 7922 if (GET_MODE (dest) != QImode) 7923 abort (); 7924 7925 ret = ix86_expand_compare (code, &second_test, &bypass_test); 7926 PUT_MODE (ret, QImode); 7927 7928 tmp = dest; 7929 tmpreg = dest; 7930 7931 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 7932 if (bypass_test || second_test) 7933 { 7934 rtx test = second_test; 7935 int bypass = 0; 7936 rtx tmp2 = gen_reg_rtx (QImode); 7937 if (bypass_test) 7938 { 7939 if (second_test) 7940 abort (); 7941 test = bypass_test; 7942 bypass = 1; 7943 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 7944 } 7945 PUT_MODE (test, QImode); 7946 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 7947 7948 if (bypass) 7949 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 7950 else 7951 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 7952 } 7953 7954 return 1; /* DONE */ 7955} 7956 7957int 7958ix86_expand_int_movcc (operands) 7959 rtx operands[]; 7960{ 7961 enum rtx_code code = GET_CODE (operands[1]), compare_code; 7962 rtx compare_seq, compare_op; 7963 rtx second_test, bypass_test; 7964 enum machine_mode mode = GET_MODE (operands[0]); 7965 7966 /* When the compare code is not LTU or GEU, we can not use sbbl case. 7967 In case comparsion is done with immediate, we can convert it to LTU or 7968 GEU by altering the integer. */ 7969 7970 if ((code == LEU || code == GTU) 7971 && GET_CODE (ix86_compare_op1) == CONST_INT 7972 && mode != HImode 7973 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff 7974 && GET_CODE (operands[2]) == CONST_INT 7975 && GET_CODE (operands[3]) == CONST_INT) 7976 { 7977 if (code == LEU) 7978 code = LTU; 7979 else 7980 code = GEU; 7981 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1); 7982 } 7983 7984 start_sequence (); 7985 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 7986 compare_seq = gen_sequence (); 7987 end_sequence (); 7988 7989 compare_code = GET_CODE (compare_op); 7990 7991 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 7992 HImode insns, we'd be swallowed in word prefix ops. */ 7993 7994 if (mode != HImode 7995 && (mode != DImode || TARGET_64BIT) 7996 && GET_CODE (operands[2]) == CONST_INT 7997 && GET_CODE (operands[3]) == CONST_INT) 7998 { 7999 rtx out = operands[0]; 8000 HOST_WIDE_INT ct = INTVAL (operands[2]); 8001 HOST_WIDE_INT cf = INTVAL (operands[3]); 8002 HOST_WIDE_INT diff; 8003 8004 if ((compare_code == LTU || compare_code == GEU) 8005 && !second_test && !bypass_test) 8006 { 8007 8008 /* Detect overlap between destination and compare sources. */ 8009 rtx tmp = out; 8010 8011 /* To simplify rest of code, restrict to the GEU case. */ 8012 if (compare_code == LTU) 8013 { 8014 int tmp = ct; 8015 ct = cf; 8016 cf = tmp; 8017 compare_code = reverse_condition (compare_code); 8018 code = reverse_condition (code); 8019 } 8020 diff = ct - cf; 8021 8022 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 8023 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 8024 tmp = gen_reg_rtx (mode); 8025 8026 emit_insn (compare_seq); 8027 if (mode == DImode) 8028 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp)); 8029 else 8030 emit_insn (gen_x86_movsicc_0_m1 (tmp)); 8031 8032 if (diff == 1) 8033 { 8034 /* 8035 * cmpl op0,op1 8036 * sbbl dest,dest 8037 * [addl dest, ct] 8038 * 8039 * Size 5 - 8. 8040 */ 8041 if (ct) 8042 tmp = expand_simple_binop (mode, PLUS, 8043 tmp, GEN_INT (ct), 8044 tmp, 1, OPTAB_DIRECT); 8045 } 8046 else if (cf == -1) 8047 { 8048 /* 8049 * cmpl op0,op1 8050 * sbbl dest,dest 8051 * orl $ct, dest 8052 * 8053 * Size 8. 8054 */ 8055 tmp = expand_simple_binop (mode, IOR, 8056 tmp, GEN_INT (ct), 8057 tmp, 1, OPTAB_DIRECT); 8058 } 8059 else if (diff == -1 && ct) 8060 { 8061 /* 8062 * cmpl op0,op1 8063 * sbbl dest,dest 8064 * xorl $-1, dest 8065 * [addl dest, cf] 8066 * 8067 * Size 8 - 11. 8068 */ 8069 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1); 8070 if (cf) 8071 tmp = expand_simple_binop (mode, PLUS, 8072 tmp, GEN_INT (cf), 8073 tmp, 1, OPTAB_DIRECT); 8074 } 8075 else 8076 { 8077 /* 8078 * cmpl op0,op1 8079 * sbbl dest,dest 8080 * andl cf - ct, dest 8081 * [addl dest, ct] 8082 * 8083 * Size 8 - 11. 8084 */ 8085 tmp = expand_simple_binop (mode, AND, 8086 tmp, 8087 GEN_INT (trunc_int_for_mode 8088 (cf - ct, mode)), 8089 tmp, 1, OPTAB_DIRECT); 8090 if (ct) 8091 tmp = expand_simple_binop (mode, PLUS, 8092 tmp, GEN_INT (ct), 8093 tmp, 1, OPTAB_DIRECT); 8094 } 8095 8096 if (tmp != out) 8097 emit_move_insn (out, tmp); 8098 8099 return 1; /* DONE */ 8100 } 8101 8102 diff = ct - cf; 8103 if (diff < 0) 8104 { 8105 HOST_WIDE_INT tmp; 8106 tmp = ct, ct = cf, cf = tmp; 8107 diff = -diff; 8108 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 8109 { 8110 /* We may be reversing unordered compare to normal compare, that 8111 is not valid in general (we may convert non-trapping condition 8112 to trapping one), however on i386 we currently emit all 8113 comparisons unordered. */ 8114 compare_code = reverse_condition_maybe_unordered (compare_code); 8115 code = reverse_condition_maybe_unordered (code); 8116 } 8117 else 8118 { 8119 compare_code = reverse_condition (compare_code); 8120 code = reverse_condition (code); 8121 } 8122 } 8123 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 8124 || diff == 3 || diff == 5 || diff == 9) 8125 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf)))) 8126 { 8127 /* 8128 * xorl dest,dest 8129 * cmpl op1,op2 8130 * setcc dest 8131 * lea cf(dest*(ct-cf)),dest 8132 * 8133 * Size 14. 8134 * 8135 * This also catches the degenerate setcc-only case. 8136 */ 8137 8138 rtx tmp; 8139 int nops; 8140 8141 out = emit_store_flag (out, code, ix86_compare_op0, 8142 ix86_compare_op1, VOIDmode, 0, 1); 8143 8144 nops = 0; 8145 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics 8146 done in proper mode to match. */ 8147 if (diff == 1) 8148 tmp = out; 8149 else 8150 { 8151 rtx out1; 8152 out1 = out; 8153 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 8154 nops++; 8155 if (diff & 1) 8156 { 8157 tmp = gen_rtx_PLUS (mode, tmp, out1); 8158 nops++; 8159 } 8160 } 8161 if (cf != 0) 8162 { 8163 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 8164 nops++; 8165 } 8166 if (tmp != out 8167 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out)) 8168 { 8169 if (nops == 1) 8170 { 8171 rtx clob; 8172 8173 clob = gen_rtx_REG (CCmode, FLAGS_REG); 8174 clob = gen_rtx_CLOBBER (VOIDmode, clob); 8175 8176 tmp = gen_rtx_SET (VOIDmode, out, tmp); 8177 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 8178 emit_insn (tmp); 8179 } 8180 else 8181 emit_insn (gen_rtx_SET (VOIDmode, out, tmp)); 8182 } 8183 if (out != operands[0]) 8184 emit_move_insn (operands[0], out); 8185 8186 return 1; /* DONE */ 8187 } 8188 8189 /* 8190 * General case: Jumpful: 8191 * xorl dest,dest cmpl op1, op2 8192 * cmpl op1, op2 movl ct, dest 8193 * setcc dest jcc 1f 8194 * decl dest movl cf, dest 8195 * andl (cf-ct),dest 1: 8196 * addl ct,dest 8197 * 8198 * Size 20. Size 14. 8199 * 8200 * This is reasonably steep, but branch mispredict costs are 8201 * high on modern cpus, so consider failing only if optimizing 8202 * for space. 8203 * 8204 * %%% Parameterize branch_cost on the tuning architecture, then 8205 * use that. The 80386 couldn't care less about mispredicts. 8206 */ 8207 8208 if (!optimize_size && !TARGET_CMOVE) 8209 { 8210 if (ct == 0) 8211 { 8212 ct = cf; 8213 cf = 0; 8214 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 8215 { 8216 /* We may be reversing unordered compare to normal compare, 8217 that is not valid in general (we may convert non-trapping 8218 condition to trapping one), however on i386 we currently 8219 emit all comparisons unordered. */ 8220 compare_code = reverse_condition_maybe_unordered (compare_code); 8221 code = reverse_condition_maybe_unordered (code); 8222 } 8223 else 8224 { 8225 compare_code = reverse_condition (compare_code); 8226 code = reverse_condition (code); 8227 } 8228 } 8229 8230 out = emit_store_flag (out, code, ix86_compare_op0, 8231 ix86_compare_op1, VOIDmode, 0, 1); 8232 8233 out = expand_simple_binop (mode, PLUS, 8234 out, constm1_rtx, 8235 out, 1, OPTAB_DIRECT); 8236 out = expand_simple_binop (mode, AND, 8237 out, 8238 GEN_INT (trunc_int_for_mode 8239 (cf - ct, mode)), 8240 out, 1, OPTAB_DIRECT); 8241 out = expand_simple_binop (mode, PLUS, 8242 out, GEN_INT (ct), 8243 out, 1, OPTAB_DIRECT); 8244 if (out != operands[0]) 8245 emit_move_insn (operands[0], out); 8246 8247 return 1; /* DONE */ 8248 } 8249 } 8250 8251 if (!TARGET_CMOVE) 8252 { 8253 /* Try a few things more with specific constants and a variable. */ 8254 8255 optab op; 8256 rtx var, orig_out, out, tmp; 8257 8258 if (optimize_size) 8259 return 0; /* FAIL */ 8260 8261 /* If one of the two operands is an interesting constant, load a 8262 constant with the above and mask it in with a logical operation. */ 8263 8264 if (GET_CODE (operands[2]) == CONST_INT) 8265 { 8266 var = operands[3]; 8267 if (INTVAL (operands[2]) == 0) 8268 operands[3] = constm1_rtx, op = and_optab; 8269 else if (INTVAL (operands[2]) == -1) 8270 operands[3] = const0_rtx, op = ior_optab; 8271 else 8272 return 0; /* FAIL */ 8273 } 8274 else if (GET_CODE (operands[3]) == CONST_INT) 8275 { 8276 var = operands[2]; 8277 if (INTVAL (operands[3]) == 0) 8278 operands[2] = constm1_rtx, op = and_optab; 8279 else if (INTVAL (operands[3]) == -1) 8280 operands[2] = const0_rtx, op = ior_optab; 8281 else 8282 return 0; /* FAIL */ 8283 } 8284 else 8285 return 0; /* FAIL */ 8286 8287 orig_out = operands[0]; 8288 tmp = gen_reg_rtx (mode); 8289 operands[0] = tmp; 8290 8291 /* Recurse to get the constant loaded. */ 8292 if (ix86_expand_int_movcc (operands) == 0) 8293 return 0; /* FAIL */ 8294 8295 /* Mask in the interesting variable. */ 8296 out = expand_binop (mode, op, var, tmp, orig_out, 0, 8297 OPTAB_WIDEN); 8298 if (out != orig_out) 8299 emit_move_insn (orig_out, out); 8300 8301 return 1; /* DONE */ 8302 } 8303 8304 /* 8305 * For comparison with above, 8306 * 8307 * movl cf,dest 8308 * movl ct,tmp 8309 * cmpl op1,op2 8310 * cmovcc tmp,dest 8311 * 8312 * Size 15. 8313 */ 8314 8315 if (! nonimmediate_operand (operands[2], mode)) 8316 operands[2] = force_reg (mode, operands[2]); 8317 if (! nonimmediate_operand (operands[3], mode)) 8318 operands[3] = force_reg (mode, operands[3]); 8319 8320 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 8321 { 8322 rtx tmp = gen_reg_rtx (mode); 8323 emit_move_insn (tmp, operands[3]); 8324 operands[3] = tmp; 8325 } 8326 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 8327 { 8328 rtx tmp = gen_reg_rtx (mode); 8329 emit_move_insn (tmp, operands[2]); 8330 operands[2] = tmp; 8331 } 8332 if (! register_operand (operands[2], VOIDmode) 8333 && ! register_operand (operands[3], VOIDmode)) 8334 operands[2] = force_reg (mode, operands[2]); 8335 8336 emit_insn (compare_seq); 8337 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8338 gen_rtx_IF_THEN_ELSE (mode, 8339 compare_op, operands[2], 8340 operands[3]))); 8341 if (bypass_test) 8342 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8343 gen_rtx_IF_THEN_ELSE (mode, 8344 bypass_test, 8345 operands[3], 8346 operands[0]))); 8347 if (second_test) 8348 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8349 gen_rtx_IF_THEN_ELSE (mode, 8350 second_test, 8351 operands[2], 8352 operands[0]))); 8353 8354 return 1; /* DONE */ 8355} 8356 8357int 8358ix86_expand_fp_movcc (operands) 8359 rtx operands[]; 8360{ 8361 enum rtx_code code; 8362 rtx tmp; 8363 rtx compare_op, second_test, bypass_test; 8364 8365 /* For SF/DFmode conditional moves based on comparisons 8366 in same mode, we may want to use SSE min/max instructions. */ 8367 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode) 8368 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode)) 8369 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]) 8370 /* The SSE comparisons does not support the LTGT/UNEQ pair. */ 8371 && (!TARGET_IEEE_FP 8372 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ)) 8373 /* We may be called from the post-reload splitter. */ 8374 && (!REG_P (operands[0]) 8375 || SSE_REG_P (operands[0]) 8376 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)) 8377 { 8378 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1; 8379 code = GET_CODE (operands[1]); 8380 8381 /* See if we have (cross) match between comparison operands and 8382 conditional move operands. */ 8383 if (rtx_equal_p (operands[2], op1)) 8384 { 8385 rtx tmp = op0; 8386 op0 = op1; 8387 op1 = tmp; 8388 code = reverse_condition_maybe_unordered (code); 8389 } 8390 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1)) 8391 { 8392 /* Check for min operation. */ 8393 if (code == LT) 8394 { 8395 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 8396 if (memory_operand (op0, VOIDmode)) 8397 op0 = force_reg (GET_MODE (operands[0]), op0); 8398 if (GET_MODE (operands[0]) == SFmode) 8399 emit_insn (gen_minsf3 (operands[0], op0, op1)); 8400 else 8401 emit_insn (gen_mindf3 (operands[0], op0, op1)); 8402 return 1; 8403 } 8404 /* Check for max operation. */ 8405 if (code == GT) 8406 { 8407 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 8408 if (memory_operand (op0, VOIDmode)) 8409 op0 = force_reg (GET_MODE (operands[0]), op0); 8410 if (GET_MODE (operands[0]) == SFmode) 8411 emit_insn (gen_maxsf3 (operands[0], op0, op1)); 8412 else 8413 emit_insn (gen_maxdf3 (operands[0], op0, op1)); 8414 return 1; 8415 } 8416 } 8417 /* Manage condition to be sse_comparison_operator. In case we are 8418 in non-ieee mode, try to canonicalize the destination operand 8419 to be first in the comparison - this helps reload to avoid extra 8420 moves. */ 8421 if (!sse_comparison_operator (operands[1], VOIDmode) 8422 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP)) 8423 { 8424 rtx tmp = ix86_compare_op0; 8425 ix86_compare_op0 = ix86_compare_op1; 8426 ix86_compare_op1 = tmp; 8427 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), 8428 VOIDmode, ix86_compare_op0, 8429 ix86_compare_op1); 8430 } 8431 /* Similary try to manage result to be first operand of conditional 8432 move. We also don't support the NE comparison on SSE, so try to 8433 avoid it. */ 8434 if ((rtx_equal_p (operands[0], operands[3]) 8435 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ)) 8436 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP)) 8437 { 8438 rtx tmp = operands[2]; 8439 operands[2] = operands[3]; 8440 operands[3] = tmp; 8441 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered 8442 (GET_CODE (operands[1])), 8443 VOIDmode, ix86_compare_op0, 8444 ix86_compare_op1); 8445 } 8446 if (GET_MODE (operands[0]) == SFmode) 8447 emit_insn (gen_sse_movsfcc (operands[0], operands[1], 8448 operands[2], operands[3], 8449 ix86_compare_op0, ix86_compare_op1)); 8450 else 8451 emit_insn (gen_sse_movdfcc (operands[0], operands[1], 8452 operands[2], operands[3], 8453 ix86_compare_op0, ix86_compare_op1)); 8454 return 1; 8455 } 8456 8457 /* The floating point conditional move instructions don't directly 8458 support conditions resulting from a signed integer comparison. */ 8459 8460 code = GET_CODE (operands[1]); 8461 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 8462 8463 /* The floating point conditional move instructions don't directly 8464 support signed integer comparisons. */ 8465 8466 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 8467 { 8468 if (second_test != NULL || bypass_test != NULL) 8469 abort (); 8470 tmp = gen_reg_rtx (QImode); 8471 ix86_expand_setcc (code, tmp); 8472 code = NE; 8473 ix86_compare_op0 = tmp; 8474 ix86_compare_op1 = const0_rtx; 8475 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 8476 } 8477 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 8478 { 8479 tmp = gen_reg_rtx (GET_MODE (operands[0])); 8480 emit_move_insn (tmp, operands[3]); 8481 operands[3] = tmp; 8482 } 8483 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 8484 { 8485 tmp = gen_reg_rtx (GET_MODE (operands[0])); 8486 emit_move_insn (tmp, operands[2]); 8487 operands[2] = tmp; 8488 } 8489 8490 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8491 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8492 compare_op, 8493 operands[2], 8494 operands[3]))); 8495 if (bypass_test) 8496 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8497 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8498 bypass_test, 8499 operands[3], 8500 operands[0]))); 8501 if (second_test) 8502 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8503 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8504 second_test, 8505 operands[2], 8506 operands[0]))); 8507 8508 return 1; 8509} 8510 8511/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 8512 works for floating pointer parameters and nonoffsetable memories. 8513 For pushes, it returns just stack offsets; the values will be saved 8514 in the right order. Maximally three parts are generated. */ 8515 8516static int 8517ix86_split_to_parts (operand, parts, mode) 8518 rtx operand; 8519 rtx *parts; 8520 enum machine_mode mode; 8521{ 8522 int size; 8523 8524 if (!TARGET_64BIT) 8525 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4); 8526 else 8527 size = (GET_MODE_SIZE (mode) + 4) / 8; 8528 8529 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand))) 8530 abort (); 8531 if (size < 2 || size > 3) 8532 abort (); 8533 8534 /* Optimize constant pool reference to immediates. This is used by fp moves, 8535 that force all constants to memory to allow combining. */ 8536 8537 if (GET_CODE (operand) == MEM 8538 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF 8539 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0))) 8540 operand = get_pool_constant (XEXP (operand, 0)); 8541 8542 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 8543 { 8544 /* The only non-offsetable memories we handle are pushes. */ 8545 if (! push_operand (operand, VOIDmode)) 8546 abort (); 8547 8548 operand = copy_rtx (operand); 8549 PUT_MODE (operand, Pmode); 8550 parts[0] = parts[1] = parts[2] = operand; 8551 } 8552 else if (!TARGET_64BIT) 8553 { 8554 if (mode == DImode) 8555 split_di (&operand, 1, &parts[0], &parts[1]); 8556 else 8557 { 8558 if (REG_P (operand)) 8559 { 8560 if (!reload_completed) 8561 abort (); 8562 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 8563 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 8564 if (size == 3) 8565 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 8566 } 8567 else if (offsettable_memref_p (operand)) 8568 { 8569 operand = adjust_address (operand, SImode, 0); 8570 parts[0] = operand; 8571 parts[1] = adjust_address (operand, SImode, 4); 8572 if (size == 3) 8573 parts[2] = adjust_address (operand, SImode, 8); 8574 } 8575 else if (GET_CODE (operand) == CONST_DOUBLE) 8576 { 8577 REAL_VALUE_TYPE r; 8578 long l[4]; 8579 8580 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 8581 switch (mode) 8582 { 8583 case XFmode: 8584 case TFmode: 8585 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 8586 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode)); 8587 break; 8588 case DFmode: 8589 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 8590 break; 8591 default: 8592 abort (); 8593 } 8594 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode)); 8595 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode)); 8596 } 8597 else 8598 abort (); 8599 } 8600 } 8601 else 8602 { 8603 if (mode == TImode) 8604 split_ti (&operand, 1, &parts[0], &parts[1]); 8605 if (mode == XFmode || mode == TFmode) 8606 { 8607 if (REG_P (operand)) 8608 { 8609 if (!reload_completed) 8610 abort (); 8611 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 8612 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 8613 } 8614 else if (offsettable_memref_p (operand)) 8615 { 8616 operand = adjust_address (operand, DImode, 0); 8617 parts[0] = operand; 8618 parts[1] = adjust_address (operand, SImode, 8); 8619 } 8620 else if (GET_CODE (operand) == CONST_DOUBLE) 8621 { 8622 REAL_VALUE_TYPE r; 8623 long l[3]; 8624 8625 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 8626 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 8627 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 8628 if (HOST_BITS_PER_WIDE_INT >= 64) 8629 parts[0] 8630 = GEN_INT (trunc_int_for_mode 8631 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 8632 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 8633 DImode)); 8634 else 8635 parts[0] = immed_double_const (l[0], l[1], DImode); 8636 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode)); 8637 } 8638 else 8639 abort (); 8640 } 8641 } 8642 8643 return size; 8644} 8645 8646/* Emit insns to perform a move or push of DI, DF, and XF values. 8647 Return false when normal moves are needed; true when all required 8648 insns have been emitted. Operands 2-4 contain the input values 8649 int the correct order; operands 5-7 contain the output values. */ 8650 8651void 8652ix86_split_long_move (operands) 8653 rtx operands[]; 8654{ 8655 rtx part[2][3]; 8656 int nparts; 8657 int push = 0; 8658 int collisions = 0; 8659 enum machine_mode mode = GET_MODE (operands[0]); 8660 8661 /* The DFmode expanders may ask us to move double. 8662 For 64bit target this is single move. By hiding the fact 8663 here we simplify i386.md splitters. */ 8664 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 8665 { 8666 /* Optimize constant pool reference to immediates. This is used by 8667 fp moves, that force all constants to memory to allow combining. */ 8668 8669 if (GET_CODE (operands[1]) == MEM 8670 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 8671 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 8672 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 8673 if (push_operand (operands[0], VOIDmode)) 8674 { 8675 operands[0] = copy_rtx (operands[0]); 8676 PUT_MODE (operands[0], Pmode); 8677 } 8678 else 8679 operands[0] = gen_lowpart (DImode, operands[0]); 8680 operands[1] = gen_lowpart (DImode, operands[1]); 8681 emit_move_insn (operands[0], operands[1]); 8682 return; 8683 } 8684 8685 /* The only non-offsettable memory we handle is push. */ 8686 if (push_operand (operands[0], VOIDmode)) 8687 push = 1; 8688 else if (GET_CODE (operands[0]) == MEM 8689 && ! offsettable_memref_p (operands[0])) 8690 abort (); 8691 8692 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 8693 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 8694 8695 /* When emitting push, take care for source operands on the stack. */ 8696 if (push && GET_CODE (operands[1]) == MEM 8697 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 8698 { 8699 if (nparts == 3) 8700 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 8701 XEXP (part[1][2], 0)); 8702 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 8703 XEXP (part[1][1], 0)); 8704 } 8705 8706 /* We need to do copy in the right order in case an address register 8707 of the source overlaps the destination. */ 8708 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 8709 { 8710 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 8711 collisions++; 8712 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 8713 collisions++; 8714 if (nparts == 3 8715 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 8716 collisions++; 8717 8718 /* Collision in the middle part can be handled by reordering. */ 8719 if (collisions == 1 && nparts == 3 8720 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 8721 { 8722 rtx tmp; 8723 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 8724 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 8725 } 8726 8727 /* If there are more collisions, we can't handle it by reordering. 8728 Do an lea to the last part and use only one colliding move. */ 8729 else if (collisions > 1) 8730 { 8731 collisions = 1; 8732 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1], 8733 XEXP (part[1][0], 0))); 8734 part[1][0] = change_address (part[1][0], 8735 TARGET_64BIT ? DImode : SImode, 8736 part[0][nparts - 1]); 8737 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD); 8738 if (nparts == 3) 8739 part[1][2] = adjust_address (part[1][0], VOIDmode, 8); 8740 } 8741 } 8742 8743 if (push) 8744 { 8745 if (!TARGET_64BIT) 8746 { 8747 if (nparts == 3) 8748 { 8749 /* We use only first 12 bytes of TFmode value, but for pushing we 8750 are required to adjust stack as if we were pushing real 16byte 8751 value. */ 8752 if (mode == TFmode && !TARGET_64BIT) 8753 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 8754 GEN_INT (-4))); 8755 emit_move_insn (part[0][2], part[1][2]); 8756 } 8757 } 8758 else 8759 { 8760 /* In 64bit mode we don't have 32bit push available. In case this is 8761 register, it is OK - we will just use larger counterpart. We also 8762 retype memory - these comes from attempt to avoid REX prefix on 8763 moving of second half of TFmode value. */ 8764 if (GET_MODE (part[1][1]) == SImode) 8765 { 8766 if (GET_CODE (part[1][1]) == MEM) 8767 part[1][1] = adjust_address (part[1][1], DImode, 0); 8768 else if (REG_P (part[1][1])) 8769 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 8770 else 8771 abort (); 8772 if (GET_MODE (part[1][0]) == SImode) 8773 part[1][0] = part[1][1]; 8774 } 8775 } 8776 emit_move_insn (part[0][1], part[1][1]); 8777 emit_move_insn (part[0][0], part[1][0]); 8778 return; 8779 } 8780 8781 /* Choose correct order to not overwrite the source before it is copied. */ 8782 if ((REG_P (part[0][0]) 8783 && REG_P (part[1][1]) 8784 && (REGNO (part[0][0]) == REGNO (part[1][1]) 8785 || (nparts == 3 8786 && REGNO (part[0][0]) == REGNO (part[1][2])))) 8787 || (collisions > 0 8788 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 8789 { 8790 if (nparts == 3) 8791 { 8792 operands[2] = part[0][2]; 8793 operands[3] = part[0][1]; 8794 operands[4] = part[0][0]; 8795 operands[5] = part[1][2]; 8796 operands[6] = part[1][1]; 8797 operands[7] = part[1][0]; 8798 } 8799 else 8800 { 8801 operands[2] = part[0][1]; 8802 operands[3] = part[0][0]; 8803 operands[5] = part[1][1]; 8804 operands[6] = part[1][0]; 8805 } 8806 } 8807 else 8808 { 8809 if (nparts == 3) 8810 { 8811 operands[2] = part[0][0]; 8812 operands[3] = part[0][1]; 8813 operands[4] = part[0][2]; 8814 operands[5] = part[1][0]; 8815 operands[6] = part[1][1]; 8816 operands[7] = part[1][2]; 8817 } 8818 else 8819 { 8820 operands[2] = part[0][0]; 8821 operands[3] = part[0][1]; 8822 operands[5] = part[1][0]; 8823 operands[6] = part[1][1]; 8824 } 8825 } 8826 emit_move_insn (operands[2], operands[5]); 8827 emit_move_insn (operands[3], operands[6]); 8828 if (nparts == 3) 8829 emit_move_insn (operands[4], operands[7]); 8830 8831 return; 8832} 8833 8834void 8835ix86_split_ashldi (operands, scratch) 8836 rtx *operands, scratch; 8837{ 8838 rtx low[2], high[2]; 8839 int count; 8840 8841 if (GET_CODE (operands[2]) == CONST_INT) 8842 { 8843 split_di (operands, 2, low, high); 8844 count = INTVAL (operands[2]) & 63; 8845 8846 if (count >= 32) 8847 { 8848 emit_move_insn (high[0], low[1]); 8849 emit_move_insn (low[0], const0_rtx); 8850 8851 if (count > 32) 8852 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32))); 8853 } 8854 else 8855 { 8856 if (!rtx_equal_p (operands[0], operands[1])) 8857 emit_move_insn (operands[0], operands[1]); 8858 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); 8859 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count))); 8860 } 8861 } 8862 else 8863 { 8864 if (!rtx_equal_p (operands[0], operands[1])) 8865 emit_move_insn (operands[0], operands[1]); 8866 8867 split_di (operands, 1, low, high); 8868 8869 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); 8870 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); 8871 8872 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 8873 { 8874 if (! no_new_pseudos) 8875 scratch = force_reg (SImode, const0_rtx); 8876 else 8877 emit_move_insn (scratch, const0_rtx); 8878 8879 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], 8880 scratch)); 8881 } 8882 else 8883 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 8884 } 8885} 8886 8887void 8888ix86_split_ashrdi (operands, scratch) 8889 rtx *operands, scratch; 8890{ 8891 rtx low[2], high[2]; 8892 int count; 8893 8894 if (GET_CODE (operands[2]) == CONST_INT) 8895 { 8896 split_di (operands, 2, low, high); 8897 count = INTVAL (operands[2]) & 63; 8898 8899 if (count >= 32) 8900 { 8901 emit_move_insn (low[0], high[1]); 8902 8903 if (! reload_completed) 8904 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31))); 8905 else 8906 { 8907 emit_move_insn (high[0], low[0]); 8908 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); 8909 } 8910 8911 if (count > 32) 8912 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); 8913 } 8914 else 8915 { 8916 if (!rtx_equal_p (operands[0], operands[1])) 8917 emit_move_insn (operands[0], operands[1]); 8918 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 8919 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count))); 8920 } 8921 } 8922 else 8923 { 8924 if (!rtx_equal_p (operands[0], operands[1])) 8925 emit_move_insn (operands[0], operands[1]); 8926 8927 split_di (operands, 1, low, high); 8928 8929 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 8930 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); 8931 8932 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 8933 { 8934 if (! no_new_pseudos) 8935 scratch = gen_reg_rtx (SImode); 8936 emit_move_insn (scratch, high[0]); 8937 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); 8938 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 8939 scratch)); 8940 } 8941 else 8942 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 8943 } 8944} 8945 8946void 8947ix86_split_lshrdi (operands, scratch) 8948 rtx *operands, scratch; 8949{ 8950 rtx low[2], high[2]; 8951 int count; 8952 8953 if (GET_CODE (operands[2]) == CONST_INT) 8954 { 8955 split_di (operands, 2, low, high); 8956 count = INTVAL (operands[2]) & 63; 8957 8958 if (count >= 32) 8959 { 8960 emit_move_insn (low[0], high[1]); 8961 emit_move_insn (high[0], const0_rtx); 8962 8963 if (count > 32) 8964 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); 8965 } 8966 else 8967 { 8968 if (!rtx_equal_p (operands[0], operands[1])) 8969 emit_move_insn (operands[0], operands[1]); 8970 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 8971 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count))); 8972 } 8973 } 8974 else 8975 { 8976 if (!rtx_equal_p (operands[0], operands[1])) 8977 emit_move_insn (operands[0], operands[1]); 8978 8979 split_di (operands, 1, low, high); 8980 8981 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 8982 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); 8983 8984 /* Heh. By reversing the arguments, we can reuse this pattern. */ 8985 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 8986 { 8987 if (! no_new_pseudos) 8988 scratch = force_reg (SImode, const0_rtx); 8989 else 8990 emit_move_insn (scratch, const0_rtx); 8991 8992 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 8993 scratch)); 8994 } 8995 else 8996 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 8997 } 8998} 8999 9000/* Helper function for the string operations below. Dest VARIABLE whether 9001 it is aligned to VALUE bytes. If true, jump to the label. */ 9002static rtx 9003ix86_expand_aligntest (variable, value) 9004 rtx variable; 9005 int value; 9006{ 9007 rtx label = gen_label_rtx (); 9008 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 9009 if (GET_MODE (variable) == DImode) 9010 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 9011 else 9012 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 9013 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 9014 1, label); 9015 return label; 9016} 9017 9018/* Adjust COUNTER by the VALUE. */ 9019static void 9020ix86_adjust_counter (countreg, value) 9021 rtx countreg; 9022 HOST_WIDE_INT value; 9023{ 9024 if (GET_MODE (countreg) == DImode) 9025 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 9026 else 9027 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 9028} 9029 9030/* Zero extend possibly SImode EXP to Pmode register. */ 9031rtx 9032ix86_zero_extend_to_Pmode (exp) 9033 rtx exp; 9034{ 9035 rtx r; 9036 if (GET_MODE (exp) == VOIDmode) 9037 return force_reg (Pmode, exp); 9038 if (GET_MODE (exp) == Pmode) 9039 return copy_to_mode_reg (Pmode, exp); 9040 r = gen_reg_rtx (Pmode); 9041 emit_insn (gen_zero_extendsidi2 (r, exp)); 9042 return r; 9043} 9044 9045/* Expand string move (memcpy) operation. Use i386 string operations when 9046 profitable. expand_clrstr contains similar code. */ 9047int 9048ix86_expand_movstr (dst, src, count_exp, align_exp) 9049 rtx dst, src, count_exp, align_exp; 9050{ 9051 rtx srcreg, destreg, countreg; 9052 enum machine_mode counter_mode; 9053 HOST_WIDE_INT align = 0; 9054 unsigned HOST_WIDE_INT count = 0; 9055 rtx insns; 9056 9057 start_sequence (); 9058 9059 if (GET_CODE (align_exp) == CONST_INT) 9060 align = INTVAL (align_exp); 9061 9062 /* This simple hack avoids all inlining code and simplifies code below. */ 9063 if (!TARGET_ALIGN_STRINGOPS) 9064 align = 64; 9065 9066 if (GET_CODE (count_exp) == CONST_INT) 9067 count = INTVAL (count_exp); 9068 9069 /* Figure out proper mode for counter. For 32bits it is always SImode, 9070 for 64bits use SImode when possible, otherwise DImode. 9071 Set count to number of bytes copied when known at compile time. */ 9072 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 9073 || x86_64_zero_extended_value (count_exp)) 9074 counter_mode = SImode; 9075 else 9076 counter_mode = DImode; 9077 9078 if (counter_mode != SImode && counter_mode != DImode) 9079 abort (); 9080 9081 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 9082 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 9083 9084 emit_insn (gen_cld ()); 9085 9086 /* When optimizing for size emit simple rep ; movsb instruction for 9087 counts not divisible by 4. */ 9088 9089 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 9090 { 9091 countreg = ix86_zero_extend_to_Pmode (count_exp); 9092 if (TARGET_64BIT) 9093 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg, 9094 destreg, srcreg, countreg)); 9095 else 9096 emit_insn (gen_rep_movqi (destreg, srcreg, countreg, 9097 destreg, srcreg, countreg)); 9098 } 9099 9100 /* For constant aligned (or small unaligned) copies use rep movsl 9101 followed by code copying the rest. For PentiumPro ensure 8 byte 9102 alignment to allow rep movsl acceleration. */ 9103 9104 else if (count != 0 9105 && (align >= 8 9106 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 9107 || optimize_size || count < (unsigned int) 64)) 9108 { 9109 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 9110 if (count & ~(size - 1)) 9111 { 9112 countreg = copy_to_mode_reg (counter_mode, 9113 GEN_INT ((count >> (size == 4 ? 2 : 3)) 9114 & (TARGET_64BIT ? -1 : 0x3fffffff))); 9115 countreg = ix86_zero_extend_to_Pmode (countreg); 9116 if (size == 4) 9117 { 9118 if (TARGET_64BIT) 9119 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg, 9120 destreg, srcreg, countreg)); 9121 else 9122 emit_insn (gen_rep_movsi (destreg, srcreg, countreg, 9123 destreg, srcreg, countreg)); 9124 } 9125 else 9126 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg, 9127 destreg, srcreg, countreg)); 9128 } 9129 if (size == 8 && (count & 0x04)) 9130 emit_insn (gen_strmovsi (destreg, srcreg)); 9131 if (count & 0x02) 9132 emit_insn (gen_strmovhi (destreg, srcreg)); 9133 if (count & 0x01) 9134 emit_insn (gen_strmovqi (destreg, srcreg)); 9135 } 9136 /* The generic code based on the glibc implementation: 9137 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 9138 allowing accelerated copying there) 9139 - copy the data using rep movsl 9140 - copy the rest. */ 9141 else 9142 { 9143 rtx countreg2; 9144 rtx label = NULL; 9145 9146 /* In case we don't know anything about the alignment, default to 9147 library version, since it is usually equally fast and result in 9148 shorter code. */ 9149 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 9150 { 9151 end_sequence (); 9152 return 0; 9153 } 9154 9155 if (TARGET_SINGLE_STRINGOP) 9156 emit_insn (gen_cld ()); 9157 9158 countreg2 = gen_reg_rtx (Pmode); 9159 countreg = copy_to_mode_reg (counter_mode, count_exp); 9160 9161 /* We don't use loops to align destination and to copy parts smaller 9162 than 4 bytes, because gcc is able to optimize such code better (in 9163 the case the destination or the count really is aligned, gcc is often 9164 able to predict the branches) and also it is friendlier to the 9165 hardware branch prediction. 9166 9167 Using loops is benefical for generic case, because we can 9168 handle small counts using the loops. Many CPUs (such as Athlon) 9169 have large REP prefix setup costs. 9170 9171 This is quite costy. Maybe we can revisit this decision later or 9172 add some customizability to this code. */ 9173 9174 if (count == 0 9175 && align < (TARGET_PENTIUMPRO && (count == 0 9176 || count >= (unsigned int) 260) 9177 ? 8 : UNITS_PER_WORD)) 9178 { 9179 label = gen_label_rtx (); 9180 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1), 9181 LEU, 0, counter_mode, 1, label); 9182 } 9183 if (align <= 1) 9184 { 9185 rtx label = ix86_expand_aligntest (destreg, 1); 9186 emit_insn (gen_strmovqi (destreg, srcreg)); 9187 ix86_adjust_counter (countreg, 1); 9188 emit_label (label); 9189 LABEL_NUSES (label) = 1; 9190 } 9191 if (align <= 2) 9192 { 9193 rtx label = ix86_expand_aligntest (destreg, 2); 9194 emit_insn (gen_strmovhi (destreg, srcreg)); 9195 ix86_adjust_counter (countreg, 2); 9196 emit_label (label); 9197 LABEL_NUSES (label) = 1; 9198 } 9199 if (align <= 4 9200 && ((TARGET_PENTIUMPRO && (count == 0 9201 || count >= (unsigned int) 260)) 9202 || TARGET_64BIT)) 9203 { 9204 rtx label = ix86_expand_aligntest (destreg, 4); 9205 emit_insn (gen_strmovsi (destreg, srcreg)); 9206 ix86_adjust_counter (countreg, 4); 9207 emit_label (label); 9208 LABEL_NUSES (label) = 1; 9209 } 9210 9211 if (!TARGET_SINGLE_STRINGOP) 9212 emit_insn (gen_cld ()); 9213 if (TARGET_64BIT) 9214 { 9215 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 9216 GEN_INT (3))); 9217 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2, 9218 destreg, srcreg, countreg2)); 9219 } 9220 else 9221 { 9222 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 9223 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2, 9224 destreg, srcreg, countreg2)); 9225 } 9226 9227 if (label) 9228 { 9229 emit_label (label); 9230 LABEL_NUSES (label) = 1; 9231 } 9232 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 9233 emit_insn (gen_strmovsi (destreg, srcreg)); 9234 if ((align <= 4 || count == 0) && TARGET_64BIT) 9235 { 9236 rtx label = ix86_expand_aligntest (countreg, 4); 9237 emit_insn (gen_strmovsi (destreg, srcreg)); 9238 emit_label (label); 9239 LABEL_NUSES (label) = 1; 9240 } 9241 if (align > 2 && count != 0 && (count & 2)) 9242 emit_insn (gen_strmovhi (destreg, srcreg)); 9243 if (align <= 2 || count == 0) 9244 { 9245 rtx label = ix86_expand_aligntest (countreg, 2); 9246 emit_insn (gen_strmovhi (destreg, srcreg)); 9247 emit_label (label); 9248 LABEL_NUSES (label) = 1; 9249 } 9250 if (align > 1 && count != 0 && (count & 1)) 9251 emit_insn (gen_strmovqi (destreg, srcreg)); 9252 if (align <= 1 || count == 0) 9253 { 9254 rtx label = ix86_expand_aligntest (countreg, 1); 9255 emit_insn (gen_strmovqi (destreg, srcreg)); 9256 emit_label (label); 9257 LABEL_NUSES (label) = 1; 9258 } 9259 } 9260 9261 insns = get_insns (); 9262 end_sequence (); 9263 9264 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg); 9265 emit_insns (insns); 9266 return 1; 9267} 9268 9269/* Expand string clear operation (bzero). Use i386 string operations when 9270 profitable. expand_movstr contains similar code. */ 9271int 9272ix86_expand_clrstr (src, count_exp, align_exp) 9273 rtx src, count_exp, align_exp; 9274{ 9275 rtx destreg, zeroreg, countreg; 9276 enum machine_mode counter_mode; 9277 HOST_WIDE_INT align = 0; 9278 unsigned HOST_WIDE_INT count = 0; 9279 9280 if (GET_CODE (align_exp) == CONST_INT) 9281 align = INTVAL (align_exp); 9282 9283 /* This simple hack avoids all inlining code and simplifies code below. */ 9284 if (!TARGET_ALIGN_STRINGOPS) 9285 align = 32; 9286 9287 if (GET_CODE (count_exp) == CONST_INT) 9288 count = INTVAL (count_exp); 9289 /* Figure out proper mode for counter. For 32bits it is always SImode, 9290 for 64bits use SImode when possible, otherwise DImode. 9291 Set count to number of bytes copied when known at compile time. */ 9292 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 9293 || x86_64_zero_extended_value (count_exp)) 9294 counter_mode = SImode; 9295 else 9296 counter_mode = DImode; 9297 9298 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 9299 9300 emit_insn (gen_cld ()); 9301 9302 /* When optimizing for size emit simple rep ; movsb instruction for 9303 counts not divisible by 4. */ 9304 9305 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 9306 { 9307 countreg = ix86_zero_extend_to_Pmode (count_exp); 9308 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 9309 if (TARGET_64BIT) 9310 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg, 9311 destreg, countreg)); 9312 else 9313 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg, 9314 destreg, countreg)); 9315 } 9316 else if (count != 0 9317 && (align >= 8 9318 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 9319 || optimize_size || count < (unsigned int) 64)) 9320 { 9321 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 9322 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 9323 if (count & ~(size - 1)) 9324 { 9325 countreg = copy_to_mode_reg (counter_mode, 9326 GEN_INT ((count >> (size == 4 ? 2 : 3)) 9327 & (TARGET_64BIT ? -1 : 0x3fffffff))); 9328 countreg = ix86_zero_extend_to_Pmode (countreg); 9329 if (size == 4) 9330 { 9331 if (TARGET_64BIT) 9332 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg, 9333 destreg, countreg)); 9334 else 9335 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg, 9336 destreg, countreg)); 9337 } 9338 else 9339 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg, 9340 destreg, countreg)); 9341 } 9342 if (size == 8 && (count & 0x04)) 9343 emit_insn (gen_strsetsi (destreg, 9344 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9345 if (count & 0x02) 9346 emit_insn (gen_strsethi (destreg, 9347 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9348 if (count & 0x01) 9349 emit_insn (gen_strsetqi (destreg, 9350 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9351 } 9352 else 9353 { 9354 rtx countreg2; 9355 rtx label = NULL; 9356 9357 /* In case we don't know anything about the alignment, default to 9358 library version, since it is usually equally fast and result in 9359 shorter code. */ 9360 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 9361 return 0; 9362 9363 if (TARGET_SINGLE_STRINGOP) 9364 emit_insn (gen_cld ()); 9365 9366 countreg2 = gen_reg_rtx (Pmode); 9367 countreg = copy_to_mode_reg (counter_mode, count_exp); 9368 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 9369 9370 if (count == 0 9371 && align < (TARGET_PENTIUMPRO && (count == 0 9372 || count >= (unsigned int) 260) 9373 ? 8 : UNITS_PER_WORD)) 9374 { 9375 label = gen_label_rtx (); 9376 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1), 9377 LEU, 0, counter_mode, 1, label); 9378 } 9379 if (align <= 1) 9380 { 9381 rtx label = ix86_expand_aligntest (destreg, 1); 9382 emit_insn (gen_strsetqi (destreg, 9383 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9384 ix86_adjust_counter (countreg, 1); 9385 emit_label (label); 9386 LABEL_NUSES (label) = 1; 9387 } 9388 if (align <= 2) 9389 { 9390 rtx label = ix86_expand_aligntest (destreg, 2); 9391 emit_insn (gen_strsethi (destreg, 9392 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9393 ix86_adjust_counter (countreg, 2); 9394 emit_label (label); 9395 LABEL_NUSES (label) = 1; 9396 } 9397 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0 9398 || count >= (unsigned int) 260)) 9399 { 9400 rtx label = ix86_expand_aligntest (destreg, 4); 9401 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT 9402 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 9403 : zeroreg))); 9404 ix86_adjust_counter (countreg, 4); 9405 emit_label (label); 9406 LABEL_NUSES (label) = 1; 9407 } 9408 9409 if (!TARGET_SINGLE_STRINGOP) 9410 emit_insn (gen_cld ()); 9411 if (TARGET_64BIT) 9412 { 9413 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 9414 GEN_INT (3))); 9415 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg, 9416 destreg, countreg2)); 9417 } 9418 else 9419 { 9420 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 9421 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg, 9422 destreg, countreg2)); 9423 } 9424 9425 if (label) 9426 { 9427 emit_label (label); 9428 LABEL_NUSES (label) = 1; 9429 } 9430 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 9431 emit_insn (gen_strsetsi (destreg, 9432 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9433 if (TARGET_64BIT && (align <= 4 || count == 0)) 9434 { 9435 rtx label = ix86_expand_aligntest (countreg, 4); 9436 emit_insn (gen_strsetsi (destreg, 9437 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9438 emit_label (label); 9439 LABEL_NUSES (label) = 1; 9440 } 9441 if (align > 2 && count != 0 && (count & 2)) 9442 emit_insn (gen_strsethi (destreg, 9443 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9444 if (align <= 2 || count == 0) 9445 { 9446 rtx label = ix86_expand_aligntest (countreg, 2); 9447 emit_insn (gen_strsethi (destreg, 9448 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9449 emit_label (label); 9450 LABEL_NUSES (label) = 1; 9451 } 9452 if (align > 1 && count != 0 && (count & 1)) 9453 emit_insn (gen_strsetqi (destreg, 9454 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9455 if (align <= 1 || count == 0) 9456 { 9457 rtx label = ix86_expand_aligntest (countreg, 1); 9458 emit_insn (gen_strsetqi (destreg, 9459 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9460 emit_label (label); 9461 LABEL_NUSES (label) = 1; 9462 } 9463 } 9464 return 1; 9465} 9466/* Expand strlen. */ 9467int 9468ix86_expand_strlen (out, src, eoschar, align) 9469 rtx out, src, eoschar, align; 9470{ 9471 rtx addr, scratch1, scratch2, scratch3, scratch4; 9472 9473 /* The generic case of strlen expander is long. Avoid it's 9474 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 9475 9476 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 9477 && !TARGET_INLINE_ALL_STRINGOPS 9478 && !optimize_size 9479 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 9480 return 0; 9481 9482 addr = force_reg (Pmode, XEXP (src, 0)); 9483 scratch1 = gen_reg_rtx (Pmode); 9484 9485 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 9486 && !optimize_size) 9487 { 9488 /* Well it seems that some optimizer does not combine a call like 9489 foo(strlen(bar), strlen(bar)); 9490 when the move and the subtraction is done here. It does calculate 9491 the length just once when these instructions are done inside of 9492 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 9493 often used and I use one fewer register for the lifetime of 9494 output_strlen_unroll() this is better. */ 9495 9496 emit_move_insn (out, addr); 9497 9498 ix86_expand_strlensi_unroll_1 (out, align); 9499 9500 /* strlensi_unroll_1 returns the address of the zero at the end of 9501 the string, like memchr(), so compute the length by subtracting 9502 the start address. */ 9503 if (TARGET_64BIT) 9504 emit_insn (gen_subdi3 (out, out, addr)); 9505 else 9506 emit_insn (gen_subsi3 (out, out, addr)); 9507 } 9508 else 9509 { 9510 scratch2 = gen_reg_rtx (Pmode); 9511 scratch3 = gen_reg_rtx (Pmode); 9512 scratch4 = force_reg (Pmode, constm1_rtx); 9513 9514 emit_move_insn (scratch3, addr); 9515 eoschar = force_reg (QImode, eoschar); 9516 9517 emit_insn (gen_cld ()); 9518 if (TARGET_64BIT) 9519 { 9520 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar, 9521 align, scratch4, scratch3)); 9522 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 9523 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 9524 } 9525 else 9526 { 9527 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar, 9528 align, scratch4, scratch3)); 9529 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 9530 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 9531 } 9532 } 9533 return 1; 9534} 9535 9536/* Expand the appropriate insns for doing strlen if not just doing 9537 repnz; scasb 9538 9539 out = result, initialized with the start address 9540 align_rtx = alignment of the address. 9541 scratch = scratch register, initialized with the startaddress when 9542 not aligned, otherwise undefined 9543 9544 This is just the body. It needs the initialisations mentioned above and 9545 some address computing at the end. These things are done in i386.md. */ 9546 9547static void 9548ix86_expand_strlensi_unroll_1 (out, align_rtx) 9549 rtx out, align_rtx; 9550{ 9551 int align; 9552 rtx tmp; 9553 rtx align_2_label = NULL_RTX; 9554 rtx align_3_label = NULL_RTX; 9555 rtx align_4_label = gen_label_rtx (); 9556 rtx end_0_label = gen_label_rtx (); 9557 rtx mem; 9558 rtx tmpreg = gen_reg_rtx (SImode); 9559 rtx scratch = gen_reg_rtx (SImode); 9560 9561 align = 0; 9562 if (GET_CODE (align_rtx) == CONST_INT) 9563 align = INTVAL (align_rtx); 9564 9565 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 9566 9567 /* Is there a known alignment and is it less than 4? */ 9568 if (align < 4) 9569 { 9570 rtx scratch1 = gen_reg_rtx (Pmode); 9571 emit_move_insn (scratch1, out); 9572 /* Is there a known alignment and is it not 2? */ 9573 if (align != 2) 9574 { 9575 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 9576 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 9577 9578 /* Leave just the 3 lower bits. */ 9579 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 9580 NULL_RTX, 0, OPTAB_WIDEN); 9581 9582 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 9583 Pmode, 1, align_4_label); 9584 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL, 9585 Pmode, 1, align_2_label); 9586 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL, 9587 Pmode, 1, align_3_label); 9588 } 9589 else 9590 { 9591 /* Since the alignment is 2, we have to check 2 or 0 bytes; 9592 check if is aligned to 4 - byte. */ 9593 9594 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2), 9595 NULL_RTX, 0, OPTAB_WIDEN); 9596 9597 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 9598 Pmode, 1, align_4_label); 9599 } 9600 9601 mem = gen_rtx_MEM (QImode, out); 9602 9603 /* Now compare the bytes. */ 9604 9605 /* Compare the first n unaligned byte on a byte per byte basis. */ 9606 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 9607 QImode, 1, end_0_label); 9608 9609 /* Increment the address. */ 9610 if (TARGET_64BIT) 9611 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9612 else 9613 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9614 9615 /* Not needed with an alignment of 2 */ 9616 if (align != 2) 9617 { 9618 emit_label (align_2_label); 9619 9620 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 9621 end_0_label); 9622 9623 if (TARGET_64BIT) 9624 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9625 else 9626 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9627 9628 emit_label (align_3_label); 9629 } 9630 9631 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 9632 end_0_label); 9633 9634 if (TARGET_64BIT) 9635 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9636 else 9637 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9638 } 9639 9640 /* Generate loop to check 4 bytes at a time. It is not a good idea to 9641 align this loop. It gives only huge programs, but does not help to 9642 speed up. */ 9643 emit_label (align_4_label); 9644 9645 mem = gen_rtx_MEM (SImode, out); 9646 emit_move_insn (scratch, mem); 9647 if (TARGET_64BIT) 9648 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 9649 else 9650 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 9651 9652 /* This formula yields a nonzero result iff one of the bytes is zero. 9653 This saves three branches inside loop and many cycles. */ 9654 9655 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 9656 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 9657 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 9658 emit_insn (gen_andsi3 (tmpreg, tmpreg, 9659 GEN_INT (trunc_int_for_mode 9660 (0x80808080, SImode)))); 9661 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 9662 align_4_label); 9663 9664 if (TARGET_CMOVE) 9665 { 9666 rtx reg = gen_reg_rtx (SImode); 9667 rtx reg2 = gen_reg_rtx (Pmode); 9668 emit_move_insn (reg, tmpreg); 9669 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 9670 9671 /* If zero is not in the first two bytes, move two bytes forward. */ 9672 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 9673 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9674 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 9675 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 9676 gen_rtx_IF_THEN_ELSE (SImode, tmp, 9677 reg, 9678 tmpreg))); 9679 /* Emit lea manually to avoid clobbering of flags. */ 9680 emit_insn (gen_rtx_SET (SImode, reg2, 9681 gen_rtx_PLUS (Pmode, out, GEN_INT (2)))); 9682 9683 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9684 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 9685 emit_insn (gen_rtx_SET (VOIDmode, out, 9686 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 9687 reg2, 9688 out))); 9689 9690 } 9691 else 9692 { 9693 rtx end_2_label = gen_label_rtx (); 9694 /* Is zero in the first two bytes? */ 9695 9696 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 9697 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9698 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 9699 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 9700 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 9701 pc_rtx); 9702 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 9703 JUMP_LABEL (tmp) = end_2_label; 9704 9705 /* Not in the first two. Move two bytes forward. */ 9706 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 9707 if (TARGET_64BIT) 9708 emit_insn (gen_adddi3 (out, out, GEN_INT (2))); 9709 else 9710 emit_insn (gen_addsi3 (out, out, GEN_INT (2))); 9711 9712 emit_label (end_2_label); 9713 9714 } 9715 9716 /* Avoid branch in fixing the byte. */ 9717 tmpreg = gen_lowpart (QImode, tmpreg); 9718 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 9719 if (TARGET_64BIT) 9720 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3))); 9721 else 9722 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3))); 9723 9724 emit_label (end_0_label); 9725} 9726 9727/* Clear stack slot assignments remembered from previous functions. 9728 This is called from INIT_EXPANDERS once before RTL is emitted for each 9729 function. */ 9730 9731static void 9732ix86_init_machine_status (p) 9733 struct function *p; 9734{ 9735 p->machine = (struct machine_function *) 9736 xcalloc (1, sizeof (struct machine_function)); 9737} 9738 9739/* Mark machine specific bits of P for GC. */ 9740static void 9741ix86_mark_machine_status (p) 9742 struct function *p; 9743{ 9744 struct machine_function *machine = p->machine; 9745 enum machine_mode mode; 9746 int n; 9747 9748 if (! machine) 9749 return; 9750 9751 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE; 9752 mode = (enum machine_mode) ((int) mode + 1)) 9753 for (n = 0; n < MAX_386_STACK_LOCALS; n++) 9754 ggc_mark_rtx (machine->stack_locals[(int) mode][n]); 9755} 9756 9757static void 9758ix86_free_machine_status (p) 9759 struct function *p; 9760{ 9761 free (p->machine); 9762 p->machine = NULL; 9763} 9764 9765/* Return a MEM corresponding to a stack slot with mode MODE. 9766 Allocate a new slot if necessary. 9767 9768 The RTL for a function can have several slots available: N is 9769 which slot to use. */ 9770 9771rtx 9772assign_386_stack_local (mode, n) 9773 enum machine_mode mode; 9774 int n; 9775{ 9776 if (n < 0 || n >= MAX_386_STACK_LOCALS) 9777 abort (); 9778 9779 if (ix86_stack_locals[(int) mode][n] == NULL_RTX) 9780 ix86_stack_locals[(int) mode][n] 9781 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 9782 9783 return ix86_stack_locals[(int) mode][n]; 9784} 9785 9786/* Calculate the length of the memory address in the instruction 9787 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 9788 9789static int 9790memory_address_length (addr) 9791 rtx addr; 9792{ 9793 struct ix86_address parts; 9794 rtx base, index, disp; 9795 int len; 9796 9797 if (GET_CODE (addr) == PRE_DEC 9798 || GET_CODE (addr) == POST_INC 9799 || GET_CODE (addr) == PRE_MODIFY 9800 || GET_CODE (addr) == POST_MODIFY) 9801 return 0; 9802 9803 if (! ix86_decompose_address (addr, &parts)) 9804 abort (); 9805 9806 base = parts.base; 9807 index = parts.index; 9808 disp = parts.disp; 9809 len = 0; 9810 9811 /* Register Indirect. */ 9812 if (base && !index && !disp) 9813 { 9814 /* Special cases: ebp and esp need the two-byte modrm form. */ 9815 if (addr == stack_pointer_rtx 9816 || addr == arg_pointer_rtx 9817 || addr == frame_pointer_rtx 9818 || addr == hard_frame_pointer_rtx) 9819 len = 1; 9820 } 9821 9822 /* Direct Addressing. */ 9823 else if (disp && !base && !index) 9824 len = 4; 9825 9826 else 9827 { 9828 /* Find the length of the displacement constant. */ 9829 if (disp) 9830 { 9831 if (GET_CODE (disp) == CONST_INT 9832 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')) 9833 len = 1; 9834 else 9835 len = 4; 9836 } 9837 9838 /* An index requires the two-byte modrm form. */ 9839 if (index) 9840 len += 1; 9841 } 9842 9843 return len; 9844} 9845 9846/* Compute default value for "length_immediate" attribute. When SHORTFORM is set 9847 expect that insn have 8bit immediate alternative. */ 9848int 9849ix86_attr_length_immediate_default (insn, shortform) 9850 rtx insn; 9851 int shortform; 9852{ 9853 int len = 0; 9854 int i; 9855 extract_insn_cached (insn); 9856 for (i = recog_data.n_operands - 1; i >= 0; --i) 9857 if (CONSTANT_P (recog_data.operand[i])) 9858 { 9859 if (len) 9860 abort (); 9861 if (shortform 9862 && GET_CODE (recog_data.operand[i]) == CONST_INT 9863 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K')) 9864 len = 1; 9865 else 9866 { 9867 switch (get_attr_mode (insn)) 9868 { 9869 case MODE_QI: 9870 len+=1; 9871 break; 9872 case MODE_HI: 9873 len+=2; 9874 break; 9875 case MODE_SI: 9876 len+=4; 9877 break; 9878 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 9879 case MODE_DI: 9880 len+=4; 9881 break; 9882 default: 9883 fatal_insn ("unknown insn mode", insn); 9884 } 9885 } 9886 } 9887 return len; 9888} 9889/* Compute default value for "length_address" attribute. */ 9890int 9891ix86_attr_length_address_default (insn) 9892 rtx insn; 9893{ 9894 int i; 9895 extract_insn_cached (insn); 9896 for (i = recog_data.n_operands - 1; i >= 0; --i) 9897 if (GET_CODE (recog_data.operand[i]) == MEM) 9898 { 9899 return memory_address_length (XEXP (recog_data.operand[i], 0)); 9900 break; 9901 } 9902 return 0; 9903} 9904 9905/* Return the maximum number of instructions a cpu can issue. */ 9906 9907static int 9908ix86_issue_rate () 9909{ 9910 switch (ix86_cpu) 9911 { 9912 case PROCESSOR_PENTIUM: 9913 case PROCESSOR_K6: 9914 return 2; 9915 9916 case PROCESSOR_PENTIUMPRO: 9917 case PROCESSOR_PENTIUM4: 9918 case PROCESSOR_ATHLON: 9919 return 3; 9920 9921 default: 9922 return 1; 9923 } 9924} 9925 9926/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 9927 by DEP_INSN and nothing set by DEP_INSN. */ 9928 9929static int 9930ix86_flags_dependant (insn, dep_insn, insn_type) 9931 rtx insn, dep_insn; 9932 enum attr_type insn_type; 9933{ 9934 rtx set, set2; 9935 9936 /* Simplify the test for uninteresting insns. */ 9937 if (insn_type != TYPE_SETCC 9938 && insn_type != TYPE_ICMOV 9939 && insn_type != TYPE_FCMOV 9940 && insn_type != TYPE_IBR) 9941 return 0; 9942 9943 if ((set = single_set (dep_insn)) != 0) 9944 { 9945 set = SET_DEST (set); 9946 set2 = NULL_RTX; 9947 } 9948 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 9949 && XVECLEN (PATTERN (dep_insn), 0) == 2 9950 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 9951 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 9952 { 9953 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 9954 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 9955 } 9956 else 9957 return 0; 9958 9959 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 9960 return 0; 9961 9962 /* This test is true if the dependent insn reads the flags but 9963 not any other potentially set register. */ 9964 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 9965 return 0; 9966 9967 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 9968 return 0; 9969 9970 return 1; 9971} 9972 9973/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 9974 address with operands set by DEP_INSN. */ 9975 9976static int 9977ix86_agi_dependant (insn, dep_insn, insn_type) 9978 rtx insn, dep_insn; 9979 enum attr_type insn_type; 9980{ 9981 rtx addr; 9982 9983 if (insn_type == TYPE_LEA 9984 && TARGET_PENTIUM) 9985 { 9986 addr = PATTERN (insn); 9987 if (GET_CODE (addr) == SET) 9988 ; 9989 else if (GET_CODE (addr) == PARALLEL 9990 && GET_CODE (XVECEXP (addr, 0, 0)) == SET) 9991 addr = XVECEXP (addr, 0, 0); 9992 else 9993 abort (); 9994 addr = SET_SRC (addr); 9995 } 9996 else 9997 { 9998 int i; 9999 extract_insn_cached (insn); 10000 for (i = recog_data.n_operands - 1; i >= 0; --i) 10001 if (GET_CODE (recog_data.operand[i]) == MEM) 10002 { 10003 addr = XEXP (recog_data.operand[i], 0); 10004 goto found; 10005 } 10006 return 0; 10007 found:; 10008 } 10009 10010 return modified_in_p (addr, dep_insn); 10011} 10012 10013static int 10014ix86_adjust_cost (insn, link, dep_insn, cost) 10015 rtx insn, link, dep_insn; 10016 int cost; 10017{ 10018 enum attr_type insn_type, dep_insn_type; 10019 enum attr_memory memory, dep_memory; 10020 rtx set, set2; 10021 int dep_insn_code_number; 10022 10023 /* Anti and output depenancies have zero cost on all CPUs. */ 10024 if (REG_NOTE_KIND (link) != 0) 10025 return 0; 10026 10027 dep_insn_code_number = recog_memoized (dep_insn); 10028 10029 /* If we can't recognize the insns, we can't really do anything. */ 10030 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 10031 return cost; 10032 10033 insn_type = get_attr_type (insn); 10034 dep_insn_type = get_attr_type (dep_insn); 10035 10036 switch (ix86_cpu) 10037 { 10038 case PROCESSOR_PENTIUM: 10039 /* Address Generation Interlock adds a cycle of latency. */ 10040 if (ix86_agi_dependant (insn, dep_insn, insn_type)) 10041 cost += 1; 10042 10043 /* ??? Compares pair with jump/setcc. */ 10044 if (ix86_flags_dependant (insn, dep_insn, insn_type)) 10045 cost = 0; 10046 10047 /* Floating point stores require value to be ready one cycle ealier. */ 10048 if (insn_type == TYPE_FMOV 10049 && get_attr_memory (insn) == MEMORY_STORE 10050 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10051 cost += 1; 10052 break; 10053 10054 case PROCESSOR_PENTIUMPRO: 10055 memory = get_attr_memory (insn); 10056 dep_memory = get_attr_memory (dep_insn); 10057 10058 /* Since we can't represent delayed latencies of load+operation, 10059 increase the cost here for non-imov insns. */ 10060 if (dep_insn_type != TYPE_IMOV 10061 && dep_insn_type != TYPE_FMOV 10062 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)) 10063 cost += 1; 10064 10065 /* INT->FP conversion is expensive. */ 10066 if (get_attr_fp_int_src (dep_insn)) 10067 cost += 5; 10068 10069 /* There is one cycle extra latency between an FP op and a store. */ 10070 if (insn_type == TYPE_FMOV 10071 && (set = single_set (dep_insn)) != NULL_RTX 10072 && (set2 = single_set (insn)) != NULL_RTX 10073 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 10074 && GET_CODE (SET_DEST (set2)) == MEM) 10075 cost += 1; 10076 10077 /* Show ability of reorder buffer to hide latency of load by executing 10078 in parallel with previous instruction in case 10079 previous instruction is not needed to compute the address. */ 10080 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 10081 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10082 { 10083 /* Claim moves to take one cycle, as core can issue one load 10084 at time and the next load can start cycle later. */ 10085 if (dep_insn_type == TYPE_IMOV 10086 || dep_insn_type == TYPE_FMOV) 10087 cost = 1; 10088 else if (cost > 1) 10089 cost--; 10090 } 10091 break; 10092 10093 case PROCESSOR_K6: 10094 memory = get_attr_memory (insn); 10095 dep_memory = get_attr_memory (dep_insn); 10096 /* The esp dependency is resolved before the instruction is really 10097 finished. */ 10098 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 10099 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 10100 return 1; 10101 10102 /* Since we can't represent delayed latencies of load+operation, 10103 increase the cost here for non-imov insns. */ 10104 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 10105 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; 10106 10107 /* INT->FP conversion is expensive. */ 10108 if (get_attr_fp_int_src (dep_insn)) 10109 cost += 5; 10110 10111 /* Show ability of reorder buffer to hide latency of load by executing 10112 in parallel with previous instruction in case 10113 previous instruction is not needed to compute the address. */ 10114 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 10115 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10116 { 10117 /* Claim moves to take one cycle, as core can issue one load 10118 at time and the next load can start cycle later. */ 10119 if (dep_insn_type == TYPE_IMOV 10120 || dep_insn_type == TYPE_FMOV) 10121 cost = 1; 10122 else if (cost > 2) 10123 cost -= 2; 10124 else 10125 cost = 1; 10126 } 10127 break; 10128 10129 case PROCESSOR_ATHLON: 10130 memory = get_attr_memory (insn); 10131 dep_memory = get_attr_memory (dep_insn); 10132 10133 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 10134 { 10135 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV) 10136 cost += 2; 10137 else 10138 cost += 3; 10139 } 10140 /* Show ability of reorder buffer to hide latency of load by executing 10141 in parallel with previous instruction in case 10142 previous instruction is not needed to compute the address. */ 10143 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 10144 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10145 { 10146 /* Claim moves to take one cycle, as core can issue one load 10147 at time and the next load can start cycle later. */ 10148 if (dep_insn_type == TYPE_IMOV 10149 || dep_insn_type == TYPE_FMOV) 10150 cost = 0; 10151 else if (cost >= 3) 10152 cost -= 3; 10153 else 10154 cost = 0; 10155 } 10156 10157 default: 10158 break; 10159 } 10160 10161 return cost; 10162} 10163 10164static union 10165{ 10166 struct ppro_sched_data 10167 { 10168 rtx decode[3]; 10169 int issued_this_cycle; 10170 } ppro; 10171} ix86_sched_data; 10172 10173static int 10174ix86_safe_length (insn) 10175 rtx insn; 10176{ 10177 if (recog_memoized (insn) >= 0) 10178 return get_attr_length (insn); 10179 else 10180 return 128; 10181} 10182 10183static int 10184ix86_safe_length_prefix (insn) 10185 rtx insn; 10186{ 10187 if (recog_memoized (insn) >= 0) 10188 return get_attr_length (insn); 10189 else 10190 return 0; 10191} 10192 10193static enum attr_memory 10194ix86_safe_memory (insn) 10195 rtx insn; 10196{ 10197 if (recog_memoized (insn) >= 0) 10198 return get_attr_memory (insn); 10199 else 10200 return MEMORY_UNKNOWN; 10201} 10202 10203static enum attr_pent_pair 10204ix86_safe_pent_pair (insn) 10205 rtx insn; 10206{ 10207 if (recog_memoized (insn) >= 0) 10208 return get_attr_pent_pair (insn); 10209 else 10210 return PENT_PAIR_NP; 10211} 10212 10213static enum attr_ppro_uops 10214ix86_safe_ppro_uops (insn) 10215 rtx insn; 10216{ 10217 if (recog_memoized (insn) >= 0) 10218 return get_attr_ppro_uops (insn); 10219 else 10220 return PPRO_UOPS_MANY; 10221} 10222 10223static void 10224ix86_dump_ppro_packet (dump) 10225 FILE *dump; 10226{ 10227 if (ix86_sched_data.ppro.decode[0]) 10228 { 10229 fprintf (dump, "PPRO packet: %d", 10230 INSN_UID (ix86_sched_data.ppro.decode[0])); 10231 if (ix86_sched_data.ppro.decode[1]) 10232 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1])); 10233 if (ix86_sched_data.ppro.decode[2]) 10234 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2])); 10235 fputc ('\n', dump); 10236 } 10237} 10238 10239/* We're beginning a new block. Initialize data structures as necessary. */ 10240 10241static void 10242ix86_sched_init (dump, sched_verbose, veclen) 10243 FILE *dump ATTRIBUTE_UNUSED; 10244 int sched_verbose ATTRIBUTE_UNUSED; 10245 int veclen ATTRIBUTE_UNUSED; 10246{ 10247 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data)); 10248} 10249 10250/* Shift INSN to SLOT, and shift everything else down. */ 10251 10252static void 10253ix86_reorder_insn (insnp, slot) 10254 rtx *insnp, *slot; 10255{ 10256 if (insnp != slot) 10257 { 10258 rtx insn = *insnp; 10259 do 10260 insnp[0] = insnp[1]; 10261 while (++insnp != slot); 10262 *insnp = insn; 10263 } 10264} 10265 10266/* Find an instruction with given pairability and minimal amount of cycles 10267 lost by the fact that the CPU waits for both pipelines to finish before 10268 reading next instructions. Also take care that both instructions together 10269 can not exceed 7 bytes. */ 10270 10271static rtx * 10272ix86_pent_find_pair (e_ready, ready, type, first) 10273 rtx *e_ready; 10274 rtx *ready; 10275 enum attr_pent_pair type; 10276 rtx first; 10277{ 10278 int mincycles, cycles; 10279 enum attr_pent_pair tmp; 10280 enum attr_memory memory; 10281 rtx *insnp, *bestinsnp = NULL; 10282 10283 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first)) 10284 return NULL; 10285 10286 memory = ix86_safe_memory (first); 10287 cycles = result_ready_cost (first); 10288 mincycles = INT_MAX; 10289 10290 for (insnp = e_ready; insnp >= ready && mincycles; --insnp) 10291 if ((tmp = ix86_safe_pent_pair (*insnp)) == type 10292 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp)) 10293 { 10294 enum attr_memory second_memory; 10295 int secondcycles, currentcycles; 10296 10297 second_memory = ix86_safe_memory (*insnp); 10298 secondcycles = result_ready_cost (*insnp); 10299 currentcycles = abs (cycles - secondcycles); 10300 10301 if (secondcycles >= 1 && cycles >= 1) 10302 { 10303 /* Two read/modify/write instructions together takes two 10304 cycles longer. */ 10305 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH) 10306 currentcycles += 2; 10307 10308 /* Read modify/write instruction followed by read/modify 10309 takes one cycle longer. */ 10310 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD 10311 && tmp != PENT_PAIR_UV 10312 && ix86_safe_pent_pair (first) != PENT_PAIR_UV) 10313 currentcycles += 1; 10314 } 10315 if (currentcycles < mincycles) 10316 bestinsnp = insnp, mincycles = currentcycles; 10317 } 10318 10319 return bestinsnp; 10320} 10321 10322/* Subroutines of ix86_sched_reorder. */ 10323 10324static void 10325ix86_sched_reorder_pentium (ready, e_ready) 10326 rtx *ready; 10327 rtx *e_ready; 10328{ 10329 enum attr_pent_pair pair1, pair2; 10330 rtx *insnp; 10331 10332 /* This wouldn't be necessary if Haifa knew that static insn ordering 10333 is important to which pipe an insn is issued to. So we have to make 10334 some minor rearrangements. */ 10335 10336 pair1 = ix86_safe_pent_pair (*e_ready); 10337 10338 /* If the first insn is non-pairable, let it be. */ 10339 if (pair1 == PENT_PAIR_NP) 10340 return; 10341 10342 pair2 = PENT_PAIR_NP; 10343 insnp = 0; 10344 10345 /* If the first insn is UV or PV pairable, search for a PU 10346 insn to go with. */ 10347 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV) 10348 { 10349 insnp = ix86_pent_find_pair (e_ready-1, ready, 10350 PENT_PAIR_PU, *e_ready); 10351 if (insnp) 10352 pair2 = PENT_PAIR_PU; 10353 } 10354 10355 /* If the first insn is PU or UV pairable, search for a PV 10356 insn to go with. */ 10357 if (pair2 == PENT_PAIR_NP 10358 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV)) 10359 { 10360 insnp = ix86_pent_find_pair (e_ready-1, ready, 10361 PENT_PAIR_PV, *e_ready); 10362 if (insnp) 10363 pair2 = PENT_PAIR_PV; 10364 } 10365 10366 /* If the first insn is pairable, search for a UV 10367 insn to go with. */ 10368 if (pair2 == PENT_PAIR_NP) 10369 { 10370 insnp = ix86_pent_find_pair (e_ready-1, ready, 10371 PENT_PAIR_UV, *e_ready); 10372 if (insnp) 10373 pair2 = PENT_PAIR_UV; 10374 } 10375 10376 if (pair2 == PENT_PAIR_NP) 10377 return; 10378 10379 /* Found something! Decide if we need to swap the order. */ 10380 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU 10381 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV 10382 && ix86_safe_memory (*e_ready) == MEMORY_BOTH 10383 && ix86_safe_memory (*insnp) == MEMORY_LOAD)) 10384 ix86_reorder_insn (insnp, e_ready); 10385 else 10386 ix86_reorder_insn (insnp, e_ready - 1); 10387} 10388 10389static void 10390ix86_sched_reorder_ppro (ready, e_ready) 10391 rtx *ready; 10392 rtx *e_ready; 10393{ 10394 rtx decode[3]; 10395 enum attr_ppro_uops cur_uops; 10396 int issued_this_cycle; 10397 rtx *insnp; 10398 int i; 10399 10400 /* At this point .ppro.decode contains the state of the three 10401 decoders from last "cycle". That is, those insns that were 10402 actually independent. But here we're scheduling for the 10403 decoder, and we may find things that are decodable in the 10404 same cycle. */ 10405 10406 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode)); 10407 issued_this_cycle = 0; 10408 10409 insnp = e_ready; 10410 cur_uops = ix86_safe_ppro_uops (*insnp); 10411 10412 /* If the decoders are empty, and we've a complex insn at the 10413 head of the priority queue, let it issue without complaint. */ 10414 if (decode[0] == NULL) 10415 { 10416 if (cur_uops == PPRO_UOPS_MANY) 10417 { 10418 decode[0] = *insnp; 10419 goto ppro_done; 10420 } 10421 10422 /* Otherwise, search for a 2-4 uop unsn to issue. */ 10423 while (cur_uops != PPRO_UOPS_FEW) 10424 { 10425 if (insnp == ready) 10426 break; 10427 cur_uops = ix86_safe_ppro_uops (*--insnp); 10428 } 10429 10430 /* If so, move it to the head of the line. */ 10431 if (cur_uops == PPRO_UOPS_FEW) 10432 ix86_reorder_insn (insnp, e_ready); 10433 10434 /* Issue the head of the queue. */ 10435 issued_this_cycle = 1; 10436 decode[0] = *e_ready--; 10437 } 10438 10439 /* Look for simple insns to fill in the other two slots. */ 10440 for (i = 1; i < 3; ++i) 10441 if (decode[i] == NULL) 10442 { 10443 if (ready >= e_ready) 10444 goto ppro_done; 10445 10446 insnp = e_ready; 10447 cur_uops = ix86_safe_ppro_uops (*insnp); 10448 while (cur_uops != PPRO_UOPS_ONE) 10449 { 10450 if (insnp == ready) 10451 break; 10452 cur_uops = ix86_safe_ppro_uops (*--insnp); 10453 } 10454 10455 /* Found one. Move it to the head of the queue and issue it. */ 10456 if (cur_uops == PPRO_UOPS_ONE) 10457 { 10458 ix86_reorder_insn (insnp, e_ready); 10459 decode[i] = *e_ready--; 10460 issued_this_cycle++; 10461 continue; 10462 } 10463 10464 /* ??? Didn't find one. Ideally, here we would do a lazy split 10465 of 2-uop insns, issue one and queue the other. */ 10466 } 10467 10468 ppro_done: 10469 if (issued_this_cycle == 0) 10470 issued_this_cycle = 1; 10471 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle; 10472} 10473 10474/* We are about to being issuing insns for this clock cycle. 10475 Override the default sort algorithm to better slot instructions. */ 10476static int 10477ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var) 10478 FILE *dump ATTRIBUTE_UNUSED; 10479 int sched_verbose ATTRIBUTE_UNUSED; 10480 rtx *ready; 10481 int *n_readyp; 10482 int clock_var ATTRIBUTE_UNUSED; 10483{ 10484 int n_ready = *n_readyp; 10485 rtx *e_ready = ready + n_ready - 1; 10486 10487 if (n_ready < 2) 10488 goto out; 10489 10490 switch (ix86_cpu) 10491 { 10492 default: 10493 break; 10494 10495 case PROCESSOR_PENTIUM: 10496 ix86_sched_reorder_pentium (ready, e_ready); 10497 break; 10498 10499 case PROCESSOR_PENTIUMPRO: 10500 ix86_sched_reorder_ppro (ready, e_ready); 10501 break; 10502 } 10503 10504out: 10505 return ix86_issue_rate (); 10506} 10507 10508/* We are about to issue INSN. Return the number of insns left on the 10509 ready queue that can be issued this cycle. */ 10510 10511static int 10512ix86_variable_issue (dump, sched_verbose, insn, can_issue_more) 10513 FILE *dump; 10514 int sched_verbose; 10515 rtx insn; 10516 int can_issue_more; 10517{ 10518 int i; 10519 switch (ix86_cpu) 10520 { 10521 default: 10522 return can_issue_more - 1; 10523 10524 case PROCESSOR_PENTIUMPRO: 10525 { 10526 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn); 10527 10528 if (uops == PPRO_UOPS_MANY) 10529 { 10530 if (sched_verbose) 10531 ix86_dump_ppro_packet (dump); 10532 ix86_sched_data.ppro.decode[0] = insn; 10533 ix86_sched_data.ppro.decode[1] = NULL; 10534 ix86_sched_data.ppro.decode[2] = NULL; 10535 if (sched_verbose) 10536 ix86_dump_ppro_packet (dump); 10537 ix86_sched_data.ppro.decode[0] = NULL; 10538 } 10539 else if (uops == PPRO_UOPS_FEW) 10540 { 10541 if (sched_verbose) 10542 ix86_dump_ppro_packet (dump); 10543 ix86_sched_data.ppro.decode[0] = insn; 10544 ix86_sched_data.ppro.decode[1] = NULL; 10545 ix86_sched_data.ppro.decode[2] = NULL; 10546 } 10547 else 10548 { 10549 for (i = 0; i < 3; ++i) 10550 if (ix86_sched_data.ppro.decode[i] == NULL) 10551 { 10552 ix86_sched_data.ppro.decode[i] = insn; 10553 break; 10554 } 10555 if (i == 3) 10556 abort (); 10557 if (i == 2) 10558 { 10559 if (sched_verbose) 10560 ix86_dump_ppro_packet (dump); 10561 ix86_sched_data.ppro.decode[0] = NULL; 10562 ix86_sched_data.ppro.decode[1] = NULL; 10563 ix86_sched_data.ppro.decode[2] = NULL; 10564 } 10565 } 10566 } 10567 return --ix86_sched_data.ppro.issued_this_cycle; 10568 } 10569} 10570 10571/* Walk through INSNS and look for MEM references whose address is DSTREG or 10572 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as 10573 appropriate. */ 10574 10575void 10576ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg) 10577 rtx insns; 10578 rtx dstref, srcref, dstreg, srcreg; 10579{ 10580 rtx insn; 10581 10582 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn)) 10583 if (INSN_P (insn)) 10584 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref, 10585 dstreg, srcreg); 10586} 10587 10588/* Subroutine of above to actually do the updating by recursively walking 10589 the rtx. */ 10590 10591static void 10592ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg) 10593 rtx x; 10594 rtx dstref, srcref, dstreg, srcreg; 10595{ 10596 enum rtx_code code = GET_CODE (x); 10597 const char *format_ptr = GET_RTX_FORMAT (code); 10598 int i, j; 10599 10600 if (code == MEM && XEXP (x, 0) == dstreg) 10601 MEM_COPY_ATTRIBUTES (x, dstref); 10602 else if (code == MEM && XEXP (x, 0) == srcreg) 10603 MEM_COPY_ATTRIBUTES (x, srcref); 10604 10605 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++) 10606 { 10607 if (*format_ptr == 'e') 10608 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref, 10609 dstreg, srcreg); 10610 else if (*format_ptr == 'E') 10611 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 10612 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref, 10613 dstreg, srcreg); 10614 } 10615} 10616 10617/* Compute the alignment given to a constant that is being placed in memory. 10618 EXP is the constant and ALIGN is the alignment that the object would 10619 ordinarily have. 10620 The value of this function is used instead of that alignment to align 10621 the object. */ 10622 10623int 10624ix86_constant_alignment (exp, align) 10625 tree exp; 10626 int align; 10627{ 10628 if (TREE_CODE (exp) == REAL_CST) 10629 { 10630 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 10631 return 64; 10632 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 10633 return 128; 10634 } 10635 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31 10636 && align < 256) 10637 return 256; 10638 10639 return align; 10640} 10641 10642/* Compute the alignment for a static variable. 10643 TYPE is the data type, and ALIGN is the alignment that 10644 the object would ordinarily have. The value of this function is used 10645 instead of that alignment to align the object. */ 10646 10647int 10648ix86_data_alignment (type, align) 10649 tree type; 10650 int align; 10651{ 10652 if (AGGREGATE_TYPE_P (type) 10653 && TYPE_SIZE (type) 10654 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10655 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256 10656 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256) 10657 return 256; 10658 10659 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 10660 to 16byte boundary. */ 10661 if (TARGET_64BIT) 10662 { 10663 if (AGGREGATE_TYPE_P (type) 10664 && TYPE_SIZE (type) 10665 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10666 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 10667 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 10668 return 128; 10669 } 10670 10671 if (TREE_CODE (type) == ARRAY_TYPE) 10672 { 10673 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 10674 return 64; 10675 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 10676 return 128; 10677 } 10678 else if (TREE_CODE (type) == COMPLEX_TYPE) 10679 { 10680 10681 if (TYPE_MODE (type) == DCmode && align < 64) 10682 return 64; 10683 if (TYPE_MODE (type) == XCmode && align < 128) 10684 return 128; 10685 } 10686 else if ((TREE_CODE (type) == RECORD_TYPE 10687 || TREE_CODE (type) == UNION_TYPE 10688 || TREE_CODE (type) == QUAL_UNION_TYPE) 10689 && TYPE_FIELDS (type)) 10690 { 10691 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 10692 return 64; 10693 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 10694 return 128; 10695 } 10696 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 10697 || TREE_CODE (type) == INTEGER_TYPE) 10698 { 10699 if (TYPE_MODE (type) == DFmode && align < 64) 10700 return 64; 10701 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 10702 return 128; 10703 } 10704 10705 return align; 10706} 10707 10708/* Compute the alignment for a local variable. 10709 TYPE is the data type, and ALIGN is the alignment that 10710 the object would ordinarily have. The value of this macro is used 10711 instead of that alignment to align the object. */ 10712 10713int 10714ix86_local_alignment (type, align) 10715 tree type; 10716 int align; 10717{ 10718 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 10719 to 16byte boundary. */ 10720 if (TARGET_64BIT) 10721 { 10722 if (AGGREGATE_TYPE_P (type) 10723 && TYPE_SIZE (type) 10724 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10725 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 10726 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 10727 return 128; 10728 } 10729 if (TREE_CODE (type) == ARRAY_TYPE) 10730 { 10731 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 10732 return 64; 10733 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 10734 return 128; 10735 } 10736 else if (TREE_CODE (type) == COMPLEX_TYPE) 10737 { 10738 if (TYPE_MODE (type) == DCmode && align < 64) 10739 return 64; 10740 if (TYPE_MODE (type) == XCmode && align < 128) 10741 return 128; 10742 } 10743 else if ((TREE_CODE (type) == RECORD_TYPE 10744 || TREE_CODE (type) == UNION_TYPE 10745 || TREE_CODE (type) == QUAL_UNION_TYPE) 10746 && TYPE_FIELDS (type)) 10747 { 10748 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 10749 return 64; 10750 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 10751 return 128; 10752 } 10753 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 10754 || TREE_CODE (type) == INTEGER_TYPE) 10755 { 10756 10757 if (TYPE_MODE (type) == DFmode && align < 64) 10758 return 64; 10759 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 10760 return 128; 10761 } 10762 return align; 10763} 10764 10765/* Emit RTL insns to initialize the variable parts of a trampoline. 10766 FNADDR is an RTX for the address of the function's pure code. 10767 CXT is an RTX for the static chain value for the function. */ 10768void 10769x86_initialize_trampoline (tramp, fnaddr, cxt) 10770 rtx tramp, fnaddr, cxt; 10771{ 10772 if (!TARGET_64BIT) 10773 { 10774 /* Compute offset from the end of the jmp to the target function. */ 10775 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 10776 plus_constant (tramp, 10), 10777 NULL_RTX, 1, OPTAB_DIRECT); 10778 emit_move_insn (gen_rtx_MEM (QImode, tramp), 10779 GEN_INT (trunc_int_for_mode (0xb9, QImode))); 10780 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 10781 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 10782 GEN_INT (trunc_int_for_mode (0xe9, QImode))); 10783 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 10784 } 10785 else 10786 { 10787 int offset = 0; 10788 /* Try to load address using shorter movl instead of movabs. 10789 We may want to support movq for kernel mode, but kernel does not use 10790 trampolines at the moment. */ 10791 if (x86_64_zero_extended_value (fnaddr)) 10792 { 10793 fnaddr = copy_to_mode_reg (DImode, fnaddr); 10794 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10795 GEN_INT (trunc_int_for_mode (0xbb41, HImode))); 10796 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 10797 gen_lowpart (SImode, fnaddr)); 10798 offset += 6; 10799 } 10800 else 10801 { 10802 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10803 GEN_INT (trunc_int_for_mode (0xbb49, HImode))); 10804 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 10805 fnaddr); 10806 offset += 10; 10807 } 10808 /* Load static chain using movabs to r10. */ 10809 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10810 GEN_INT (trunc_int_for_mode (0xba49, HImode))); 10811 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 10812 cxt); 10813 offset += 10; 10814 /* Jump to the r11 */ 10815 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10816 GEN_INT (trunc_int_for_mode (0xff49, HImode))); 10817 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 10818 GEN_INT (trunc_int_for_mode (0xe3, QImode))); 10819 offset += 3; 10820 if (offset > TRAMPOLINE_SIZE) 10821 abort (); 10822 } 10823} 10824 10825#define def_builtin(MASK, NAME, TYPE, CODE) \ 10826do { \ 10827 if ((MASK) & target_flags) \ 10828 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \ 10829} while (0) 10830 10831struct builtin_description 10832{ 10833 const unsigned int mask; 10834 const enum insn_code icode; 10835 const char *const name; 10836 const enum ix86_builtins code; 10837 const enum rtx_code comparison; 10838 const unsigned int flag; 10839}; 10840 10841static const struct builtin_description bdesc_comi[] = 10842{ 10843 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 }, 10844 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 }, 10845 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 }, 10846 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 }, 10847 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 }, 10848 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 }, 10849 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, 10850 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, 10851 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, 10852 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 }, 10853 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 }, 10854 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 } 10855}; 10856 10857static const struct builtin_description bdesc_2arg[] = 10858{ 10859 /* SSE */ 10860 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 10861 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 10862 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 10863 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 10864 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 10865 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 10866 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 10867 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 10868 10869 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 10870 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 10871 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 10872 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, 10873 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, 10874 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 10875 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, 10876 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, 10877 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, 10878 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, 10879 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, 10880 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, 10881 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 10882 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 10883 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 10884 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 }, 10885 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 }, 10886 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 10887 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, 10888 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, 10889 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, 10890 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 }, 10891 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 }, 10892 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, 10893 10894 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 10895 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 10896 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 10897 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 10898 10899 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 10900 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 10901 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 10902 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 10903 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 10904 10905 /* MMX */ 10906 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 10907 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 10908 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 10909 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 10910 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 10911 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 10912 10913 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 10914 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 10915 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 10916 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 10917 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 10918 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 10919 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 10920 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 10921 10922 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 10923 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 10924 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 10925 10926 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 10927 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 10928 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 10929 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 10930 10931 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 10932 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 10933 10934 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 10935 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 10936 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 10937 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 10938 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 10939 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 10940 10941 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 10942 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 10943 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 10944 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 10945 10946 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 10947 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 10948 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 10949 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 10950 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 10951 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 10952 10953 /* Special. */ 10954 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 10955 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 10956 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 10957 10958 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 10959 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 10960 10961 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 10962 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 10963 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 10964 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 10965 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 10966 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 10967 10968 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 10969 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 10970 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 10971 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 10972 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 10973 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 10974 10975 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 10976 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 10977 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 10978 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 10979 10980 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 10981 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 } 10982 10983}; 10984 10985static const struct builtin_description bdesc_1arg[] = 10986{ 10987 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 10988 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 10989 10990 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 10991 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 10992 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 10993 10994 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 10995 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 10996 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 10997 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 } 10998 10999}; 11000 11001void 11002ix86_init_builtins () 11003{ 11004 if (TARGET_MMX) 11005 ix86_init_mmx_sse_builtins (); 11006} 11007 11008/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 11009 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 11010 builtins. */ 11011static void 11012ix86_init_mmx_sse_builtins () 11013{ 11014 const struct builtin_description * d; 11015 size_t i; 11016 tree endlink = void_list_node; 11017 11018 tree pchar_type_node = build_pointer_type (char_type_node); 11019 tree pfloat_type_node = build_pointer_type (float_type_node); 11020 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 11021 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 11022 11023 /* Comparisons. */ 11024 tree int_ftype_v4sf_v4sf 11025 = build_function_type (integer_type_node, 11026 tree_cons (NULL_TREE, V4SF_type_node, 11027 tree_cons (NULL_TREE, 11028 V4SF_type_node, 11029 endlink))); 11030 tree v4si_ftype_v4sf_v4sf 11031 = build_function_type (V4SI_type_node, 11032 tree_cons (NULL_TREE, V4SF_type_node, 11033 tree_cons (NULL_TREE, 11034 V4SF_type_node, 11035 endlink))); 11036 /* MMX/SSE/integer conversions. */ 11037 tree int_ftype_v4sf 11038 = build_function_type (integer_type_node, 11039 tree_cons (NULL_TREE, V4SF_type_node, 11040 endlink)); 11041 tree int_ftype_v8qi 11042 = build_function_type (integer_type_node, 11043 tree_cons (NULL_TREE, V8QI_type_node, 11044 endlink)); 11045 tree v4sf_ftype_v4sf_int 11046 = build_function_type (V4SF_type_node, 11047 tree_cons (NULL_TREE, V4SF_type_node, 11048 tree_cons (NULL_TREE, integer_type_node, 11049 endlink))); 11050 tree v4sf_ftype_v4sf_v2si 11051 = build_function_type (V4SF_type_node, 11052 tree_cons (NULL_TREE, V4SF_type_node, 11053 tree_cons (NULL_TREE, V2SI_type_node, 11054 endlink))); 11055 tree int_ftype_v4hi_int 11056 = build_function_type (integer_type_node, 11057 tree_cons (NULL_TREE, V4HI_type_node, 11058 tree_cons (NULL_TREE, integer_type_node, 11059 endlink))); 11060 tree v4hi_ftype_v4hi_int_int 11061 = build_function_type (V4HI_type_node, 11062 tree_cons (NULL_TREE, V4HI_type_node, 11063 tree_cons (NULL_TREE, integer_type_node, 11064 tree_cons (NULL_TREE, 11065 integer_type_node, 11066 endlink)))); 11067 /* Miscellaneous. */ 11068 tree v8qi_ftype_v4hi_v4hi 11069 = build_function_type (V8QI_type_node, 11070 tree_cons (NULL_TREE, V4HI_type_node, 11071 tree_cons (NULL_TREE, V4HI_type_node, 11072 endlink))); 11073 tree v4hi_ftype_v2si_v2si 11074 = build_function_type (V4HI_type_node, 11075 tree_cons (NULL_TREE, V2SI_type_node, 11076 tree_cons (NULL_TREE, V2SI_type_node, 11077 endlink))); 11078 tree v4sf_ftype_v4sf_v4sf_int 11079 = build_function_type (V4SF_type_node, 11080 tree_cons (NULL_TREE, V4SF_type_node, 11081 tree_cons (NULL_TREE, V4SF_type_node, 11082 tree_cons (NULL_TREE, 11083 integer_type_node, 11084 endlink)))); 11085 tree v4hi_ftype_v8qi_v8qi 11086 = build_function_type (V4HI_type_node, 11087 tree_cons (NULL_TREE, V8QI_type_node, 11088 tree_cons (NULL_TREE, V8QI_type_node, 11089 endlink))); 11090 tree v2si_ftype_v4hi_v4hi 11091 = build_function_type (V2SI_type_node, 11092 tree_cons (NULL_TREE, V4HI_type_node, 11093 tree_cons (NULL_TREE, V4HI_type_node, 11094 endlink))); 11095 tree v4hi_ftype_v4hi_int 11096 = build_function_type (V4HI_type_node, 11097 tree_cons (NULL_TREE, V4HI_type_node, 11098 tree_cons (NULL_TREE, integer_type_node, 11099 endlink))); 11100 tree v4hi_ftype_v4hi_di 11101 = build_function_type (V4HI_type_node, 11102 tree_cons (NULL_TREE, V4HI_type_node, 11103 tree_cons (NULL_TREE, 11104 long_long_integer_type_node, 11105 endlink))); 11106 tree v2si_ftype_v2si_di 11107 = build_function_type (V2SI_type_node, 11108 tree_cons (NULL_TREE, V2SI_type_node, 11109 tree_cons (NULL_TREE, 11110 long_long_integer_type_node, 11111 endlink))); 11112 tree void_ftype_void 11113 = build_function_type (void_type_node, endlink); 11114 tree void_ftype_unsigned 11115 = build_function_type (void_type_node, 11116 tree_cons (NULL_TREE, unsigned_type_node, 11117 endlink)); 11118 tree unsigned_ftype_void 11119 = build_function_type (unsigned_type_node, endlink); 11120 tree di_ftype_void 11121 = build_function_type (long_long_unsigned_type_node, endlink); 11122 tree v4sf_ftype_void 11123 = build_function_type (V4SF_type_node, endlink); 11124 tree v2si_ftype_v4sf 11125 = build_function_type (V2SI_type_node, 11126 tree_cons (NULL_TREE, V4SF_type_node, 11127 endlink)); 11128 /* Loads/stores. */ 11129 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node, 11130 tree_cons (NULL_TREE, V8QI_type_node, 11131 tree_cons (NULL_TREE, 11132 pchar_type_node, 11133 endlink))); 11134 tree void_ftype_v8qi_v8qi_pchar 11135 = build_function_type (void_type_node, maskmovq_args); 11136 tree v4sf_ftype_pfloat 11137 = build_function_type (V4SF_type_node, 11138 tree_cons (NULL_TREE, pfloat_type_node, 11139 endlink)); 11140 /* @@@ the type is bogus */ 11141 tree v4sf_ftype_v4sf_pv2si 11142 = build_function_type (V4SF_type_node, 11143 tree_cons (NULL_TREE, V4SF_type_node, 11144 tree_cons (NULL_TREE, pv2si_type_node, 11145 endlink))); 11146 tree void_ftype_pv2si_v4sf 11147 = build_function_type (void_type_node, 11148 tree_cons (NULL_TREE, pv2si_type_node, 11149 tree_cons (NULL_TREE, V4SF_type_node, 11150 endlink))); 11151 tree void_ftype_pfloat_v4sf 11152 = build_function_type (void_type_node, 11153 tree_cons (NULL_TREE, pfloat_type_node, 11154 tree_cons (NULL_TREE, V4SF_type_node, 11155 endlink))); 11156 tree void_ftype_pdi_di 11157 = build_function_type (void_type_node, 11158 tree_cons (NULL_TREE, pdi_type_node, 11159 tree_cons (NULL_TREE, 11160 long_long_unsigned_type_node, 11161 endlink))); 11162 /* Normal vector unops. */ 11163 tree v4sf_ftype_v4sf 11164 = build_function_type (V4SF_type_node, 11165 tree_cons (NULL_TREE, V4SF_type_node, 11166 endlink)); 11167 11168 /* Normal vector binops. */ 11169 tree v4sf_ftype_v4sf_v4sf 11170 = build_function_type (V4SF_type_node, 11171 tree_cons (NULL_TREE, V4SF_type_node, 11172 tree_cons (NULL_TREE, V4SF_type_node, 11173 endlink))); 11174 tree v8qi_ftype_v8qi_v8qi 11175 = build_function_type (V8QI_type_node, 11176 tree_cons (NULL_TREE, V8QI_type_node, 11177 tree_cons (NULL_TREE, V8QI_type_node, 11178 endlink))); 11179 tree v4hi_ftype_v4hi_v4hi 11180 = build_function_type (V4HI_type_node, 11181 tree_cons (NULL_TREE, V4HI_type_node, 11182 tree_cons (NULL_TREE, V4HI_type_node, 11183 endlink))); 11184 tree v2si_ftype_v2si_v2si 11185 = build_function_type (V2SI_type_node, 11186 tree_cons (NULL_TREE, V2SI_type_node, 11187 tree_cons (NULL_TREE, V2SI_type_node, 11188 endlink))); 11189 tree di_ftype_di_di 11190 = build_function_type (long_long_unsigned_type_node, 11191 tree_cons (NULL_TREE, long_long_unsigned_type_node, 11192 tree_cons (NULL_TREE, 11193 long_long_unsigned_type_node, 11194 endlink))); 11195 11196 tree v2si_ftype_v2sf 11197 = build_function_type (V2SI_type_node, 11198 tree_cons (NULL_TREE, V2SF_type_node, 11199 endlink)); 11200 tree v2sf_ftype_v2si 11201 = build_function_type (V2SF_type_node, 11202 tree_cons (NULL_TREE, V2SI_type_node, 11203 endlink)); 11204 tree v2si_ftype_v2si 11205 = build_function_type (V2SI_type_node, 11206 tree_cons (NULL_TREE, V2SI_type_node, 11207 endlink)); 11208 tree v2sf_ftype_v2sf 11209 = build_function_type (V2SF_type_node, 11210 tree_cons (NULL_TREE, V2SF_type_node, 11211 endlink)); 11212 tree v2sf_ftype_v2sf_v2sf 11213 = build_function_type (V2SF_type_node, 11214 tree_cons (NULL_TREE, V2SF_type_node, 11215 tree_cons (NULL_TREE, 11216 V2SF_type_node, 11217 endlink))); 11218 tree v2si_ftype_v2sf_v2sf 11219 = build_function_type (V2SI_type_node, 11220 tree_cons (NULL_TREE, V2SF_type_node, 11221 tree_cons (NULL_TREE, 11222 V2SF_type_node, 11223 endlink))); 11224 11225 /* Add all builtins that are more or less simple operations on two 11226 operands. */ 11227 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) 11228 { 11229 /* Use one of the operands; the target can have a different mode for 11230 mask-generating compares. */ 11231 enum machine_mode mode; 11232 tree type; 11233 11234 if (d->name == 0) 11235 continue; 11236 mode = insn_data[d->icode].operand[1].mode; 11237 11238 switch (mode) 11239 { 11240 case V4SFmode: 11241 type = v4sf_ftype_v4sf_v4sf; 11242 break; 11243 case V8QImode: 11244 type = v8qi_ftype_v8qi_v8qi; 11245 break; 11246 case V4HImode: 11247 type = v4hi_ftype_v4hi_v4hi; 11248 break; 11249 case V2SImode: 11250 type = v2si_ftype_v2si_v2si; 11251 break; 11252 case DImode: 11253 type = di_ftype_di_di; 11254 break; 11255 11256 default: 11257 abort (); 11258 } 11259 11260 /* Override for comparisons. */ 11261 if (d->icode == CODE_FOR_maskcmpv4sf3 11262 || d->icode == CODE_FOR_maskncmpv4sf3 11263 || d->icode == CODE_FOR_vmmaskcmpv4sf3 11264 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 11265 type = v4si_ftype_v4sf_v4sf; 11266 11267 def_builtin (d->mask, d->name, type, d->code); 11268 } 11269 11270 /* Add the remaining MMX insns with somewhat more complicated types. */ 11271 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); 11272 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 11273 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 11274 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 11275 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 11276 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 11277 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 11278 11279 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 11280 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 11281 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 11282 11283 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 11284 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 11285 11286 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 11287 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 11288 11289 /* comi/ucomi insns. */ 11290 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++) 11291 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 11292 11293 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 11294 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 11295 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 11296 11297 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 11298 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 11299 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 11300 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 11301 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 11302 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 11303 11304 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); 11305 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); 11306 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); 11307 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); 11308 11309 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); 11310 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); 11311 11312 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 11313 11314 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); 11315 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); 11316 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS); 11317 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); 11318 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 11319 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); 11320 11321 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 11322 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 11323 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 11324 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 11325 11326 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 11327 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 11328 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 11329 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 11330 11331 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 11332 11333 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 11334 11335 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 11336 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 11337 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 11338 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 11339 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 11340 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 11341 11342 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 11343 11344 /* Original 3DNow! */ 11345 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 11346 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 11347 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 11348 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 11349 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 11350 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 11351 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 11352 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 11353 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 11354 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 11355 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 11356 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 11357 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 11358 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 11359 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 11360 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 11361 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 11362 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 11363 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 11364 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 11365 11366 /* 3DNow! extension as used in the Athlon CPU. */ 11367 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 11368 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 11369 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 11370 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 11371 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 11372 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 11373 11374 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); 11375} 11376 11377/* Errors in the source file can cause expand_expr to return const0_rtx 11378 where we expect a vector. To avoid crashing, use one of the vector 11379 clear instructions. */ 11380static rtx 11381safe_vector_operand (x, mode) 11382 rtx x; 11383 enum machine_mode mode; 11384{ 11385 if (x != const0_rtx) 11386 return x; 11387 x = gen_reg_rtx (mode); 11388 11389 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) 11390 emit_insn (gen_mmx_clrdi (mode == DImode ? x 11391 : gen_rtx_SUBREG (DImode, x, 0))); 11392 else 11393 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x 11394 : gen_rtx_SUBREG (V4SFmode, x, 0))); 11395 return x; 11396} 11397 11398/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 11399 11400static rtx 11401ix86_expand_binop_builtin (icode, arglist, target) 11402 enum insn_code icode; 11403 tree arglist; 11404 rtx target; 11405{ 11406 rtx pat; 11407 tree arg0 = TREE_VALUE (arglist); 11408 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11409 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11410 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11411 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11412 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11413 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 11414 11415 if (VECTOR_MODE_P (mode0)) 11416 op0 = safe_vector_operand (op0, mode0); 11417 if (VECTOR_MODE_P (mode1)) 11418 op1 = safe_vector_operand (op1, mode1); 11419 11420 if (! target 11421 || GET_MODE (target) != tmode 11422 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11423 target = gen_reg_rtx (tmode); 11424 11425 /* In case the insn wants input operands in modes different from 11426 the result, abort. */ 11427 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1) 11428 abort (); 11429 11430 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11431 op0 = copy_to_mode_reg (mode0, op0); 11432 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11433 op1 = copy_to_mode_reg (mode1, op1); 11434 11435 /* In the commutative cases, both op0 and op1 are nonimmediate_operand, 11436 yet one of the two must not be a memory. This is normally enforced 11437 by expanders, but we didn't bother to create one here. */ 11438 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) 11439 op0 = copy_to_mode_reg (mode0, op0); 11440 11441 pat = GEN_FCN (icode) (target, op0, op1); 11442 if (! pat) 11443 return 0; 11444 emit_insn (pat); 11445 return target; 11446} 11447 11448/* In type_for_mode we restrict the ability to create TImode types 11449 to hosts with 64-bit H_W_I. So we've defined the SSE logicals 11450 to have a V4SFmode signature. Convert them in-place to TImode. */ 11451 11452static rtx 11453ix86_expand_timode_binop_builtin (icode, arglist, target) 11454 enum insn_code icode; 11455 tree arglist; 11456 rtx target; 11457{ 11458 rtx pat; 11459 tree arg0 = TREE_VALUE (arglist); 11460 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11461 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11462 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11463 11464 op0 = gen_lowpart (TImode, op0); 11465 op1 = gen_lowpart (TImode, op1); 11466 target = gen_reg_rtx (TImode); 11467 11468 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode)) 11469 op0 = copy_to_mode_reg (TImode, op0); 11470 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) 11471 op1 = copy_to_mode_reg (TImode, op1); 11472 11473 /* In the commutative cases, both op0 and op1 are nonimmediate_operand, 11474 yet one of the two must not be a memory. This is normally enforced 11475 by expanders, but we didn't bother to create one here. */ 11476 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) 11477 op0 = copy_to_mode_reg (TImode, op0); 11478 11479 pat = GEN_FCN (icode) (target, op0, op1); 11480 if (! pat) 11481 return 0; 11482 emit_insn (pat); 11483 11484 return gen_lowpart (V4SFmode, target); 11485} 11486 11487/* Subroutine of ix86_expand_builtin to take care of stores. */ 11488 11489static rtx 11490ix86_expand_store_builtin (icode, arglist) 11491 enum insn_code icode; 11492 tree arglist; 11493{ 11494 rtx pat; 11495 tree arg0 = TREE_VALUE (arglist); 11496 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11497 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11498 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11499 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 11500 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 11501 11502 if (VECTOR_MODE_P (mode1)) 11503 op1 = safe_vector_operand (op1, mode1); 11504 11505 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11506 11507 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 11508 op1 = copy_to_mode_reg (mode1, op1); 11509 11510 pat = GEN_FCN (icode) (op0, op1); 11511 if (pat) 11512 emit_insn (pat); 11513 return 0; 11514} 11515 11516/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 11517 11518static rtx 11519ix86_expand_unop_builtin (icode, arglist, target, do_load) 11520 enum insn_code icode; 11521 tree arglist; 11522 rtx target; 11523 int do_load; 11524{ 11525 rtx pat; 11526 tree arg0 = TREE_VALUE (arglist); 11527 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11528 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11529 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11530 11531 if (! target 11532 || GET_MODE (target) != tmode 11533 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11534 target = gen_reg_rtx (tmode); 11535 if (do_load) 11536 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11537 else 11538 { 11539 if (VECTOR_MODE_P (mode0)) 11540 op0 = safe_vector_operand (op0, mode0); 11541 11542 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11543 op0 = copy_to_mode_reg (mode0, op0); 11544 } 11545 11546 pat = GEN_FCN (icode) (target, op0); 11547 if (! pat) 11548 return 0; 11549 emit_insn (pat); 11550 return target; 11551} 11552 11553/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 11554 sqrtss, rsqrtss, rcpss. */ 11555 11556static rtx 11557ix86_expand_unop1_builtin (icode, arglist, target) 11558 enum insn_code icode; 11559 tree arglist; 11560 rtx target; 11561{ 11562 rtx pat; 11563 tree arg0 = TREE_VALUE (arglist); 11564 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11565 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11566 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11567 11568 if (! target 11569 || GET_MODE (target) != tmode 11570 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11571 target = gen_reg_rtx (tmode); 11572 11573 if (VECTOR_MODE_P (mode0)) 11574 op0 = safe_vector_operand (op0, mode0); 11575 11576 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11577 op0 = copy_to_mode_reg (mode0, op0); 11578 11579 op1 = op0; 11580 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 11581 op1 = copy_to_mode_reg (mode0, op1); 11582 11583 pat = GEN_FCN (icode) (target, op0, op1); 11584 if (! pat) 11585 return 0; 11586 emit_insn (pat); 11587 return target; 11588} 11589 11590/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 11591 11592static rtx 11593ix86_expand_sse_compare (d, arglist, target) 11594 const struct builtin_description *d; 11595 tree arglist; 11596 rtx target; 11597{ 11598 rtx pat; 11599 tree arg0 = TREE_VALUE (arglist); 11600 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11601 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11602 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11603 rtx op2; 11604 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 11605 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 11606 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 11607 enum rtx_code comparison = d->comparison; 11608 11609 if (VECTOR_MODE_P (mode0)) 11610 op0 = safe_vector_operand (op0, mode0); 11611 if (VECTOR_MODE_P (mode1)) 11612 op1 = safe_vector_operand (op1, mode1); 11613 11614 /* Swap operands if we have a comparison that isn't available in 11615 hardware. */ 11616 if (d->flag) 11617 { 11618 rtx tmp = gen_reg_rtx (mode1); 11619 emit_move_insn (tmp, op1); 11620 op1 = op0; 11621 op0 = tmp; 11622 } 11623 11624 if (! target 11625 || GET_MODE (target) != tmode 11626 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 11627 target = gen_reg_rtx (tmode); 11628 11629 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 11630 op0 = copy_to_mode_reg (mode0, op0); 11631 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 11632 op1 = copy_to_mode_reg (mode1, op1); 11633 11634 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 11635 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 11636 if (! pat) 11637 return 0; 11638 emit_insn (pat); 11639 return target; 11640} 11641 11642/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 11643 11644static rtx 11645ix86_expand_sse_comi (d, arglist, target) 11646 const struct builtin_description *d; 11647 tree arglist; 11648 rtx target; 11649{ 11650 rtx pat; 11651 tree arg0 = TREE_VALUE (arglist); 11652 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11653 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11654 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11655 rtx op2; 11656 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 11657 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 11658 enum rtx_code comparison = d->comparison; 11659 11660 if (VECTOR_MODE_P (mode0)) 11661 op0 = safe_vector_operand (op0, mode0); 11662 if (VECTOR_MODE_P (mode1)) 11663 op1 = safe_vector_operand (op1, mode1); 11664 11665 /* Swap operands if we have a comparison that isn't available in 11666 hardware. */ 11667 if (d->flag) 11668 { 11669 rtx tmp = op1; 11670 op1 = op0; 11671 op0 = tmp; 11672 } 11673 11674 target = gen_reg_rtx (SImode); 11675 emit_move_insn (target, const0_rtx); 11676 target = gen_rtx_SUBREG (QImode, target, 0); 11677 11678 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 11679 op0 = copy_to_mode_reg (mode0, op0); 11680 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 11681 op1 = copy_to_mode_reg (mode1, op1); 11682 11683 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 11684 pat = GEN_FCN (d->icode) (op0, op1, op2); 11685 if (! pat) 11686 return 0; 11687 emit_insn (pat); 11688 emit_insn (gen_rtx_SET (VOIDmode, 11689 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 11690 gen_rtx_fmt_ee (comparison, QImode, 11691 gen_rtx_REG (CCmode, FLAGS_REG), 11692 const0_rtx))); 11693 11694 return SUBREG_REG (target); 11695} 11696 11697/* Expand an expression EXP that calls a built-in function, 11698 with result going to TARGET if that's convenient 11699 (and in mode MODE if that's convenient). 11700 SUBTARGET may be used as the target for computing one of EXP's operands. 11701 IGNORE is nonzero if the value is to be ignored. */ 11702 11703rtx 11704ix86_expand_builtin (exp, target, subtarget, mode, ignore) 11705 tree exp; 11706 rtx target; 11707 rtx subtarget ATTRIBUTE_UNUSED; 11708 enum machine_mode mode ATTRIBUTE_UNUSED; 11709 int ignore ATTRIBUTE_UNUSED; 11710{ 11711 const struct builtin_description *d; 11712 size_t i; 11713 enum insn_code icode; 11714 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 11715 tree arglist = TREE_OPERAND (exp, 1); 11716 tree arg0, arg1, arg2; 11717 rtx op0, op1, op2, pat; 11718 enum machine_mode tmode, mode0, mode1, mode2; 11719 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 11720 11721 switch (fcode) 11722 { 11723 case IX86_BUILTIN_EMMS: 11724 emit_insn (gen_emms ()); 11725 return 0; 11726 11727 case IX86_BUILTIN_SFENCE: 11728 emit_insn (gen_sfence ()); 11729 return 0; 11730 11731 case IX86_BUILTIN_PEXTRW: 11732 icode = CODE_FOR_mmx_pextrw; 11733 arg0 = TREE_VALUE (arglist); 11734 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11735 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11736 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11737 tmode = insn_data[icode].operand[0].mode; 11738 mode0 = insn_data[icode].operand[1].mode; 11739 mode1 = insn_data[icode].operand[2].mode; 11740 11741 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11742 op0 = copy_to_mode_reg (mode0, op0); 11743 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11744 { 11745 /* @@@ better error message */ 11746 error ("selector must be an immediate"); 11747 return gen_reg_rtx (tmode); 11748 } 11749 if (target == 0 11750 || GET_MODE (target) != tmode 11751 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11752 target = gen_reg_rtx (tmode); 11753 pat = GEN_FCN (icode) (target, op0, op1); 11754 if (! pat) 11755 return 0; 11756 emit_insn (pat); 11757 return target; 11758 11759 case IX86_BUILTIN_PINSRW: 11760 icode = CODE_FOR_mmx_pinsrw; 11761 arg0 = TREE_VALUE (arglist); 11762 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11763 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 11764 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11765 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11766 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 11767 tmode = insn_data[icode].operand[0].mode; 11768 mode0 = insn_data[icode].operand[1].mode; 11769 mode1 = insn_data[icode].operand[2].mode; 11770 mode2 = insn_data[icode].operand[3].mode; 11771 11772 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11773 op0 = copy_to_mode_reg (mode0, op0); 11774 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11775 op1 = copy_to_mode_reg (mode1, op1); 11776 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 11777 { 11778 /* @@@ better error message */ 11779 error ("selector must be an immediate"); 11780 return const0_rtx; 11781 } 11782 if (target == 0 11783 || GET_MODE (target) != tmode 11784 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11785 target = gen_reg_rtx (tmode); 11786 pat = GEN_FCN (icode) (target, op0, op1, op2); 11787 if (! pat) 11788 return 0; 11789 emit_insn (pat); 11790 return target; 11791 11792 case IX86_BUILTIN_MASKMOVQ: 11793 icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq; 11794 /* Note the arg order is different from the operand order. */ 11795 arg1 = TREE_VALUE (arglist); 11796 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 11797 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 11798 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11799 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11800 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 11801 mode0 = insn_data[icode].operand[0].mode; 11802 mode1 = insn_data[icode].operand[1].mode; 11803 mode2 = insn_data[icode].operand[2].mode; 11804 11805 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 11806 op0 = copy_to_mode_reg (mode0, op0); 11807 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 11808 op1 = copy_to_mode_reg (mode1, op1); 11809 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 11810 op2 = copy_to_mode_reg (mode2, op2); 11811 pat = GEN_FCN (icode) (op0, op1, op2); 11812 if (! pat) 11813 return 0; 11814 emit_insn (pat); 11815 return 0; 11816 11817 case IX86_BUILTIN_SQRTSS: 11818 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target); 11819 case IX86_BUILTIN_RSQRTSS: 11820 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target); 11821 case IX86_BUILTIN_RCPSS: 11822 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); 11823 11824 case IX86_BUILTIN_ANDPS: 11825 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3, 11826 arglist, target); 11827 case IX86_BUILTIN_ANDNPS: 11828 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3, 11829 arglist, target); 11830 case IX86_BUILTIN_ORPS: 11831 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3, 11832 arglist, target); 11833 case IX86_BUILTIN_XORPS: 11834 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3, 11835 arglist, target); 11836 11837 case IX86_BUILTIN_LOADAPS: 11838 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); 11839 11840 case IX86_BUILTIN_LOADUPS: 11841 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 11842 11843 case IX86_BUILTIN_STOREAPS: 11844 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); 11845 case IX86_BUILTIN_STOREUPS: 11846 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 11847 11848 case IX86_BUILTIN_LOADSS: 11849 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1); 11850 11851 case IX86_BUILTIN_STORESS: 11852 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist); 11853 11854 case IX86_BUILTIN_LOADHPS: 11855 case IX86_BUILTIN_LOADLPS: 11856 icode = (fcode == IX86_BUILTIN_LOADHPS 11857 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); 11858 arg0 = TREE_VALUE (arglist); 11859 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11860 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11861 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11862 tmode = insn_data[icode].operand[0].mode; 11863 mode0 = insn_data[icode].operand[1].mode; 11864 mode1 = insn_data[icode].operand[2].mode; 11865 11866 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11867 op0 = copy_to_mode_reg (mode0, op0); 11868 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 11869 if (target == 0 11870 || GET_MODE (target) != tmode 11871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11872 target = gen_reg_rtx (tmode); 11873 pat = GEN_FCN (icode) (target, op0, op1); 11874 if (! pat) 11875 return 0; 11876 emit_insn (pat); 11877 return target; 11878 11879 case IX86_BUILTIN_STOREHPS: 11880 case IX86_BUILTIN_STORELPS: 11881 icode = (fcode == IX86_BUILTIN_STOREHPS 11882 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); 11883 arg0 = TREE_VALUE (arglist); 11884 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11885 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11886 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11887 mode0 = insn_data[icode].operand[1].mode; 11888 mode1 = insn_data[icode].operand[2].mode; 11889 11890 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11891 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11892 op1 = copy_to_mode_reg (mode1, op1); 11893 11894 pat = GEN_FCN (icode) (op0, op0, op1); 11895 if (! pat) 11896 return 0; 11897 emit_insn (pat); 11898 return 0; 11899 11900 case IX86_BUILTIN_MOVNTPS: 11901 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 11902 case IX86_BUILTIN_MOVNTQ: 11903 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 11904 11905 case IX86_BUILTIN_LDMXCSR: 11906 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); 11907 target = assign_386_stack_local (SImode, 0); 11908 emit_move_insn (target, op0); 11909 emit_insn (gen_ldmxcsr (target)); 11910 return 0; 11911 11912 case IX86_BUILTIN_STMXCSR: 11913 target = assign_386_stack_local (SImode, 0); 11914 emit_insn (gen_stmxcsr (target)); 11915 return copy_to_mode_reg (SImode, target); 11916 11917 case IX86_BUILTIN_SHUFPS: 11918 icode = CODE_FOR_sse_shufps; 11919 arg0 = TREE_VALUE (arglist); 11920 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11921 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 11922 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11923 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11924 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 11925 tmode = insn_data[icode].operand[0].mode; 11926 mode0 = insn_data[icode].operand[1].mode; 11927 mode1 = insn_data[icode].operand[2].mode; 11928 mode2 = insn_data[icode].operand[3].mode; 11929 11930 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11931 op0 = copy_to_mode_reg (mode0, op0); 11932 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11933 op1 = copy_to_mode_reg (mode1, op1); 11934 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 11935 { 11936 /* @@@ better error message */ 11937 error ("mask must be an immediate"); 11938 return gen_reg_rtx (tmode); 11939 } 11940 if (target == 0 11941 || GET_MODE (target) != tmode 11942 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11943 target = gen_reg_rtx (tmode); 11944 pat = GEN_FCN (icode) (target, op0, op1, op2); 11945 if (! pat) 11946 return 0; 11947 emit_insn (pat); 11948 return target; 11949 11950 case IX86_BUILTIN_PSHUFW: 11951 icode = CODE_FOR_mmx_pshufw; 11952 arg0 = TREE_VALUE (arglist); 11953 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11954 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11955 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11956 tmode = insn_data[icode].operand[0].mode; 11957 mode1 = insn_data[icode].operand[1].mode; 11958 mode2 = insn_data[icode].operand[2].mode; 11959 11960 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 11961 op0 = copy_to_mode_reg (mode1, op0); 11962 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 11963 { 11964 /* @@@ better error message */ 11965 error ("mask must be an immediate"); 11966 return const0_rtx; 11967 } 11968 if (target == 0 11969 || GET_MODE (target) != tmode 11970 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11971 target = gen_reg_rtx (tmode); 11972 pat = GEN_FCN (icode) (target, op0, op1); 11973 if (! pat) 11974 return 0; 11975 emit_insn (pat); 11976 return target; 11977 11978 case IX86_BUILTIN_FEMMS: 11979 emit_insn (gen_femms ()); 11980 return NULL_RTX; 11981 11982 case IX86_BUILTIN_PAVGUSB: 11983 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target); 11984 11985 case IX86_BUILTIN_PF2ID: 11986 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0); 11987 11988 case IX86_BUILTIN_PFACC: 11989 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target); 11990 11991 case IX86_BUILTIN_PFADD: 11992 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target); 11993 11994 case IX86_BUILTIN_PFCMPEQ: 11995 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target); 11996 11997 case IX86_BUILTIN_PFCMPGE: 11998 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target); 11999 12000 case IX86_BUILTIN_PFCMPGT: 12001 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target); 12002 12003 case IX86_BUILTIN_PFMAX: 12004 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target); 12005 12006 case IX86_BUILTIN_PFMIN: 12007 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target); 12008 12009 case IX86_BUILTIN_PFMUL: 12010 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target); 12011 12012 case IX86_BUILTIN_PFRCP: 12013 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0); 12014 12015 case IX86_BUILTIN_PFRCPIT1: 12016 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target); 12017 12018 case IX86_BUILTIN_PFRCPIT2: 12019 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target); 12020 12021 case IX86_BUILTIN_PFRSQIT1: 12022 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target); 12023 12024 case IX86_BUILTIN_PFRSQRT: 12025 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0); 12026 12027 case IX86_BUILTIN_PFSUB: 12028 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target); 12029 12030 case IX86_BUILTIN_PFSUBR: 12031 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target); 12032 12033 case IX86_BUILTIN_PI2FD: 12034 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0); 12035 12036 case IX86_BUILTIN_PMULHRW: 12037 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); 12038 12039 case IX86_BUILTIN_PF2IW: 12040 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); 12041 12042 case IX86_BUILTIN_PFNACC: 12043 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target); 12044 12045 case IX86_BUILTIN_PFPNACC: 12046 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target); 12047 12048 case IX86_BUILTIN_PI2FW: 12049 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0); 12050 12051 case IX86_BUILTIN_PSWAPDSI: 12052 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0); 12053 12054 case IX86_BUILTIN_PSWAPDSF: 12055 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); 12056 12057 case IX86_BUILTIN_SSE_ZERO: 12058 target = gen_reg_rtx (V4SFmode); 12059 emit_insn (gen_sse_clrv4sf (target)); 12060 return target; 12061 12062 case IX86_BUILTIN_MMX_ZERO: 12063 target = gen_reg_rtx (DImode); 12064 emit_insn (gen_mmx_clrdi (target)); 12065 return target; 12066 12067 default: 12068 break; 12069 } 12070 12071 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) 12072 if (d->code == fcode) 12073 { 12074 /* Compares are treated specially. */ 12075 if (d->icode == CODE_FOR_maskcmpv4sf3 12076 || d->icode == CODE_FOR_vmmaskcmpv4sf3 12077 || d->icode == CODE_FOR_maskncmpv4sf3 12078 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 12079 return ix86_expand_sse_compare (d, arglist, target); 12080 12081 return ix86_expand_binop_builtin (d->icode, arglist, target); 12082 } 12083 12084 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++) 12085 if (d->code == fcode) 12086 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 12087 12088 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++) 12089 if (d->code == fcode) 12090 return ix86_expand_sse_comi (d, arglist, target); 12091 12092 /* @@@ Should really do something sensible here. */ 12093 return 0; 12094} 12095 12096/* Store OPERAND to the memory after reload is completed. This means 12097 that we can't easily use assign_stack_local. */ 12098rtx 12099ix86_force_to_memory (mode, operand) 12100 enum machine_mode mode; 12101 rtx operand; 12102{ 12103 rtx result; 12104 if (!reload_completed) 12105 abort (); 12106 if (TARGET_64BIT && TARGET_RED_ZONE) 12107 { 12108 result = gen_rtx_MEM (mode, 12109 gen_rtx_PLUS (Pmode, 12110 stack_pointer_rtx, 12111 GEN_INT (-RED_ZONE_SIZE))); 12112 emit_move_insn (result, operand); 12113 } 12114 else if (TARGET_64BIT && !TARGET_RED_ZONE) 12115 { 12116 switch (mode) 12117 { 12118 case HImode: 12119 case SImode: 12120 operand = gen_lowpart (DImode, operand); 12121 /* FALLTHRU */ 12122 case DImode: 12123 emit_insn ( 12124 gen_rtx_SET (VOIDmode, 12125 gen_rtx_MEM (DImode, 12126 gen_rtx_PRE_DEC (DImode, 12127 stack_pointer_rtx)), 12128 operand)); 12129 break; 12130 default: 12131 abort (); 12132 } 12133 result = gen_rtx_MEM (mode, stack_pointer_rtx); 12134 } 12135 else 12136 { 12137 switch (mode) 12138 { 12139 case DImode: 12140 { 12141 rtx operands[2]; 12142 split_di (&operand, 1, operands, operands + 1); 12143 emit_insn ( 12144 gen_rtx_SET (VOIDmode, 12145 gen_rtx_MEM (SImode, 12146 gen_rtx_PRE_DEC (Pmode, 12147 stack_pointer_rtx)), 12148 operands[1])); 12149 emit_insn ( 12150 gen_rtx_SET (VOIDmode, 12151 gen_rtx_MEM (SImode, 12152 gen_rtx_PRE_DEC (Pmode, 12153 stack_pointer_rtx)), 12154 operands[0])); 12155 } 12156 break; 12157 case HImode: 12158 /* It is better to store HImodes as SImodes. */ 12159 if (!TARGET_PARTIAL_REG_STALL) 12160 operand = gen_lowpart (SImode, operand); 12161 /* FALLTHRU */ 12162 case SImode: 12163 emit_insn ( 12164 gen_rtx_SET (VOIDmode, 12165 gen_rtx_MEM (GET_MODE (operand), 12166 gen_rtx_PRE_DEC (SImode, 12167 stack_pointer_rtx)), 12168 operand)); 12169 break; 12170 default: 12171 abort (); 12172 } 12173 result = gen_rtx_MEM (mode, stack_pointer_rtx); 12174 } 12175 return result; 12176} 12177 12178/* Free operand from the memory. */ 12179void 12180ix86_free_from_memory (mode) 12181 enum machine_mode mode; 12182{ 12183 if (!TARGET_64BIT || !TARGET_RED_ZONE) 12184 { 12185 int size; 12186 12187 if (mode == DImode || TARGET_64BIT) 12188 size = 8; 12189 else if (mode == HImode && TARGET_PARTIAL_REG_STALL) 12190 size = 2; 12191 else 12192 size = 4; 12193 /* Use LEA to deallocate stack space. In peephole2 it will be converted 12194 to pop or add instruction if registers are available. */ 12195 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 12196 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 12197 GEN_INT (size)))); 12198 } 12199} 12200 12201/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 12202 QImode must go into class Q_REGS. 12203 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 12204 movdf to do mem-to-mem moves through integer regs. */ 12205enum reg_class 12206ix86_preferred_reload_class (x, class) 12207 rtx x; 12208 enum reg_class class; 12209{ 12210 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 12211 { 12212 /* SSE can't load any constant directly yet. */ 12213 if (SSE_CLASS_P (class)) 12214 return NO_REGS; 12215 /* Floats can load 0 and 1. */ 12216 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x)) 12217 { 12218 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */ 12219 if (MAYBE_SSE_CLASS_P (class)) 12220 return (reg_class_subset_p (class, GENERAL_REGS) 12221 ? GENERAL_REGS : FLOAT_REGS); 12222 else 12223 return class; 12224 } 12225 /* General regs can load everything. */ 12226 if (reg_class_subset_p (class, GENERAL_REGS)) 12227 return GENERAL_REGS; 12228 /* In case we haven't resolved FLOAT or SSE yet, give up. */ 12229 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)) 12230 return NO_REGS; 12231 } 12232 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x)) 12233 return NO_REGS; 12234 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS)) 12235 return Q_REGS; 12236 return class; 12237} 12238 12239/* If we are copying between general and FP registers, we need a memory 12240 location. The same is true for SSE and MMX registers. 12241 12242 The macro can't work reliably when one of the CLASSES is class containing 12243 registers from multiple units (SSE, MMX, integer). We avoid this by never 12244 combining those units in single alternative in the machine description. 12245 Ensure that this constraint holds to avoid unexpected surprises. 12246 12247 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 12248 enforce these sanity checks. */ 12249int 12250ix86_secondary_memory_needed (class1, class2, mode, strict) 12251 enum reg_class class1, class2; 12252 enum machine_mode mode; 12253 int strict; 12254{ 12255 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 12256 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 12257 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 12258 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 12259 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 12260 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 12261 { 12262 if (strict) 12263 abort (); 12264 else 12265 return 1; 12266 } 12267 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2) 12268 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2) 12269 && (mode) != SImode) 12270 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 12271 && (mode) != SImode)); 12272} 12273/* Return the cost of moving data from a register in class CLASS1 to 12274 one in class CLASS2. 12275 12276 It is not required that the cost always equal 2 when FROM is the same as TO; 12277 on some machines it is expensive to move between registers if they are not 12278 general registers. */ 12279int 12280ix86_register_move_cost (mode, class1, class2) 12281 enum machine_mode mode; 12282 enum reg_class class1, class2; 12283{ 12284 /* In case we require secondary memory, compute cost of the store followed 12285 by load. In case of copying from general_purpose_register we may emit 12286 multiple stores followed by single load causing memory size mismatch 12287 stall. Count this as arbitarily high cost of 20. */ 12288 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 12289 { 12290 int add_cost = 0; 12291 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 12292 add_cost = 20; 12293 return (MEMORY_MOVE_COST (mode, class1, 0) 12294 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost); 12295 } 12296 /* Moves between SSE/MMX and integer unit are expensive. */ 12297 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 12298 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 12299 return ix86_cost->mmxsse_to_integer; 12300 if (MAYBE_FLOAT_CLASS_P (class1)) 12301 return ix86_cost->fp_move; 12302 if (MAYBE_SSE_CLASS_P (class1)) 12303 return ix86_cost->sse_move; 12304 if (MAYBE_MMX_CLASS_P (class1)) 12305 return ix86_cost->mmx_move; 12306 return 2; 12307} 12308 12309/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 12310int 12311ix86_hard_regno_mode_ok (regno, mode) 12312 int regno; 12313 enum machine_mode mode; 12314{ 12315 /* Flags and only flags can only hold CCmode values. */ 12316 if (CC_REGNO_P (regno)) 12317 return GET_MODE_CLASS (mode) == MODE_CC; 12318 if (GET_MODE_CLASS (mode) == MODE_CC 12319 || GET_MODE_CLASS (mode) == MODE_RANDOM 12320 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 12321 return 0; 12322 if (FP_REGNO_P (regno)) 12323 return VALID_FP_MODE_P (mode); 12324 if (SSE_REGNO_P (regno)) 12325 return VALID_SSE_REG_MODE (mode); 12326 if (MMX_REGNO_P (regno)) 12327 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode); 12328 /* We handle both integer and floats in the general purpose registers. 12329 In future we should be able to handle vector modes as well. */ 12330 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) 12331 return 0; 12332 /* Take care for QImode values - they can be in non-QI regs, but then 12333 they do cause partial register stalls. */ 12334 if (regno < 4 || mode != QImode || TARGET_64BIT) 12335 return 1; 12336 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL; 12337} 12338 12339/* Return the cost of moving data of mode M between a 12340 register and memory. A value of 2 is the default; this cost is 12341 relative to those in `REGISTER_MOVE_COST'. 12342 12343 If moving between registers and memory is more expensive than 12344 between two registers, you should define this macro to express the 12345 relative cost. 12346 12347 Model also increased moving costs of QImode registers in non 12348 Q_REGS classes. 12349 */ 12350int 12351ix86_memory_move_cost (mode, class, in) 12352 enum machine_mode mode; 12353 enum reg_class class; 12354 int in; 12355{ 12356 if (FLOAT_CLASS_P (class)) 12357 { 12358 int index; 12359 switch (mode) 12360 { 12361 case SFmode: 12362 index = 0; 12363 break; 12364 case DFmode: 12365 index = 1; 12366 break; 12367 case XFmode: 12368 case TFmode: 12369 index = 2; 12370 break; 12371 default: 12372 return 100; 12373 } 12374 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 12375 } 12376 if (SSE_CLASS_P (class)) 12377 { 12378 int index; 12379 switch (GET_MODE_SIZE (mode)) 12380 { 12381 case 4: 12382 index = 0; 12383 break; 12384 case 8: 12385 index = 1; 12386 break; 12387 case 16: 12388 index = 2; 12389 break; 12390 default: 12391 return 100; 12392 } 12393 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 12394 } 12395 if (MMX_CLASS_P (class)) 12396 { 12397 int index; 12398 switch (GET_MODE_SIZE (mode)) 12399 { 12400 case 4: 12401 index = 0; 12402 break; 12403 case 8: 12404 index = 1; 12405 break; 12406 default: 12407 return 100; 12408 } 12409 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 12410 } 12411 switch (GET_MODE_SIZE (mode)) 12412 { 12413 case 1: 12414 if (in) 12415 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 12416 : ix86_cost->movzbl_load); 12417 else 12418 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 12419 : ix86_cost->int_store[0] + 4); 12420 break; 12421 case 2: 12422 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 12423 default: 12424 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 12425 if (mode == TFmode) 12426 mode = XFmode; 12427 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 12428 * (int) GET_MODE_SIZE (mode) / 4); 12429 } 12430} 12431 12432#ifdef DO_GLOBAL_CTORS_BODY 12433static void 12434ix86_svr3_asm_out_constructor (symbol, priority) 12435 rtx symbol; 12436 int priority ATTRIBUTE_UNUSED; 12437{ 12438 init_section (); 12439 fputs ("\tpushl $", asm_out_file); 12440 assemble_name (asm_out_file, XSTR (symbol, 0)); 12441 fputc ('\n', asm_out_file); 12442} 12443#endif 12444 12445/* Order the registers for register allocator. */ 12446 12447void 12448x86_order_regs_for_local_alloc () 12449{ 12450 int pos = 0; 12451 int i; 12452 12453 /* First allocate the local general purpose registers. */ 12454 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 12455 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 12456 reg_alloc_order [pos++] = i; 12457 12458 /* Global general purpose registers. */ 12459 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 12460 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 12461 reg_alloc_order [pos++] = i; 12462 12463 /* x87 registers come first in case we are doing FP math 12464 using them. */ 12465 if (!TARGET_SSE_MATH) 12466 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 12467 reg_alloc_order [pos++] = i; 12468 12469 /* SSE registers. */ 12470 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 12471 reg_alloc_order [pos++] = i; 12472 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 12473 reg_alloc_order [pos++] = i; 12474 12475 /* x87 registerts. */ 12476 if (TARGET_SSE_MATH) 12477 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 12478 reg_alloc_order [pos++] = i; 12479 12480 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 12481 reg_alloc_order [pos++] = i; 12482 12483 /* Initialize the rest of array as we do not allocate some registers 12484 at all. */ 12485 while (pos < FIRST_PSEUDO_REGISTER) 12486 reg_alloc_order [pos++] = 0; 12487} 12488