i386.c revision 90284
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002 Free Software Foundation, Inc. 4 5This file is part of GNU CC. 6 7GNU CC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GNU CC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GNU CC; see the file COPYING. If not, write to 19the Free Software Foundation, 59 Temple Place - Suite 330, 20Boston, MA 02111-1307, USA. */ 21 22 23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 90284 2002-02-06 05:01:29Z obrien $ */ 24 25 26#include "config.h" 27#include "system.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-attr.h" 38#include "flags.h" 39#include "except.h" 40#include "function.h" 41#include "recog.h" 42#include "expr.h" 43#include "optabs.h" 44#include "toplev.h" 45#include "basic-block.h" 46#include "ggc.h" 47#include "target.h" 48#include "target-def.h" 49 50#ifndef CHECK_STACK_LIMIT 51#define CHECK_STACK_LIMIT (-1) 52#endif 53 54#warning NEED TO REVISIT "PIC_REG_USED" AND -mprofiler-epilogue SUPPORT 55#if 0 56#define PIC_REG_USED \ 57 (flag_pic && (current_function_uses_pic_offset_table \ 58 || current_function_uses_const_pool \ 59 || profile_flag || profile_block_flag)) 60#endif 61 62/* Processor costs (relative to an add) */ 63static const 64struct processor_costs size_cost = { /* costs for tunning for size */ 65 2, /* cost of an add instruction */ 66 3, /* cost of a lea instruction */ 67 2, /* variable shift costs */ 68 3, /* constant shift costs */ 69 3, /* cost of starting a multiply */ 70 0, /* cost of multiply per each bit set */ 71 3, /* cost of a divide/mod */ 72 3, /* cost of movsx */ 73 3, /* cost of movzx */ 74 0, /* "large" insn */ 75 2, /* MOVE_RATIO */ 76 2, /* cost for loading QImode using movzbl */ 77 {2, 2, 2}, /* cost of loading integer registers 78 in QImode, HImode and SImode. 79 Relative to reg-reg move (2). */ 80 {2, 2, 2}, /* cost of storing integer registers */ 81 2, /* cost of reg,reg fld/fst */ 82 {2, 2, 2}, /* cost of loading fp registers 83 in SFmode, DFmode and XFmode */ 84 {2, 2, 2}, /* cost of loading integer registers */ 85 3, /* cost of moving MMX register */ 86 {3, 3}, /* cost of loading MMX registers 87 in SImode and DImode */ 88 {3, 3}, /* cost of storing MMX registers 89 in SImode and DImode */ 90 3, /* cost of moving SSE register */ 91 {3, 3, 3}, /* cost of loading SSE registers 92 in SImode, DImode and TImode */ 93 {3, 3, 3}, /* cost of storing SSE registers 94 in SImode, DImode and TImode */ 95 3, /* MMX or SSE register to integer */ 96 0, /* size of prefetch block */ 97 0, /* number of parallel prefetches */ 98}; 99/* Processor costs (relative to an add) */ 100static const 101struct processor_costs i386_cost = { /* 386 specific costs */ 102 1, /* cost of an add instruction */ 103 1, /* cost of a lea instruction */ 104 3, /* variable shift costs */ 105 2, /* constant shift costs */ 106 6, /* cost of starting a multiply */ 107 1, /* cost of multiply per each bit set */ 108 23, /* cost of a divide/mod */ 109 3, /* cost of movsx */ 110 2, /* cost of movzx */ 111 15, /* "large" insn */ 112 3, /* MOVE_RATIO */ 113 4, /* cost for loading QImode using movzbl */ 114 {2, 4, 2}, /* cost of loading integer registers 115 in QImode, HImode and SImode. 116 Relative to reg-reg move (2). */ 117 {2, 4, 2}, /* cost of storing integer registers */ 118 2, /* cost of reg,reg fld/fst */ 119 {8, 8, 8}, /* cost of loading fp registers 120 in SFmode, DFmode and XFmode */ 121 {8, 8, 8}, /* cost of loading integer registers */ 122 2, /* cost of moving MMX register */ 123 {4, 8}, /* cost of loading MMX registers 124 in SImode and DImode */ 125 {4, 8}, /* cost of storing MMX registers 126 in SImode and DImode */ 127 2, /* cost of moving SSE register */ 128 {4, 8, 16}, /* cost of loading SSE registers 129 in SImode, DImode and TImode */ 130 {4, 8, 16}, /* cost of storing SSE registers 131 in SImode, DImode and TImode */ 132 3, /* MMX or SSE register to integer */ 133 0, /* size of prefetch block */ 134 0, /* number of parallel prefetches */ 135}; 136 137static const 138struct processor_costs i486_cost = { /* 486 specific costs */ 139 1, /* cost of an add instruction */ 140 1, /* cost of a lea instruction */ 141 3, /* variable shift costs */ 142 2, /* constant shift costs */ 143 12, /* cost of starting a multiply */ 144 1, /* cost of multiply per each bit set */ 145 40, /* cost of a divide/mod */ 146 3, /* cost of movsx */ 147 2, /* cost of movzx */ 148 15, /* "large" insn */ 149 3, /* MOVE_RATIO */ 150 4, /* cost for loading QImode using movzbl */ 151 {2, 4, 2}, /* cost of loading integer registers 152 in QImode, HImode and SImode. 153 Relative to reg-reg move (2). */ 154 {2, 4, 2}, /* cost of storing integer registers */ 155 2, /* cost of reg,reg fld/fst */ 156 {8, 8, 8}, /* cost of loading fp registers 157 in SFmode, DFmode and XFmode */ 158 {8, 8, 8}, /* cost of loading integer registers */ 159 2, /* cost of moving MMX register */ 160 {4, 8}, /* cost of loading MMX registers 161 in SImode and DImode */ 162 {4, 8}, /* cost of storing MMX registers 163 in SImode and DImode */ 164 2, /* cost of moving SSE register */ 165 {4, 8, 16}, /* cost of loading SSE registers 166 in SImode, DImode and TImode */ 167 {4, 8, 16}, /* cost of storing SSE registers 168 in SImode, DImode and TImode */ 169 3, /* MMX or SSE register to integer */ 170 0, /* size of prefetch block */ 171 0, /* number of parallel prefetches */ 172}; 173 174static const 175struct processor_costs pentium_cost = { 176 1, /* cost of an add instruction */ 177 1, /* cost of a lea instruction */ 178 4, /* variable shift costs */ 179 1, /* constant shift costs */ 180 11, /* cost of starting a multiply */ 181 0, /* cost of multiply per each bit set */ 182 25, /* cost of a divide/mod */ 183 3, /* cost of movsx */ 184 2, /* cost of movzx */ 185 8, /* "large" insn */ 186 6, /* MOVE_RATIO */ 187 6, /* cost for loading QImode using movzbl */ 188 {2, 4, 2}, /* cost of loading integer registers 189 in QImode, HImode and SImode. 190 Relative to reg-reg move (2). */ 191 {2, 4, 2}, /* cost of storing integer registers */ 192 2, /* cost of reg,reg fld/fst */ 193 {2, 2, 6}, /* cost of loading fp registers 194 in SFmode, DFmode and XFmode */ 195 {4, 4, 6}, /* cost of loading integer registers */ 196 8, /* cost of moving MMX register */ 197 {8, 8}, /* cost of loading MMX registers 198 in SImode and DImode */ 199 {8, 8}, /* cost of storing MMX registers 200 in SImode and DImode */ 201 2, /* cost of moving SSE register */ 202 {4, 8, 16}, /* cost of loading SSE registers 203 in SImode, DImode and TImode */ 204 {4, 8, 16}, /* cost of storing SSE registers 205 in SImode, DImode and TImode */ 206 3, /* MMX or SSE register to integer */ 207 0, /* size of prefetch block */ 208 0, /* number of parallel prefetches */ 209}; 210 211static const 212struct processor_costs pentiumpro_cost = { 213 1, /* cost of an add instruction */ 214 1, /* cost of a lea instruction */ 215 1, /* variable shift costs */ 216 1, /* constant shift costs */ 217 4, /* cost of starting a multiply */ 218 0, /* cost of multiply per each bit set */ 219 17, /* cost of a divide/mod */ 220 1, /* cost of movsx */ 221 1, /* cost of movzx */ 222 8, /* "large" insn */ 223 6, /* MOVE_RATIO */ 224 2, /* cost for loading QImode using movzbl */ 225 {4, 4, 4}, /* cost of loading integer registers 226 in QImode, HImode and SImode. 227 Relative to reg-reg move (2). */ 228 {2, 2, 2}, /* cost of storing integer registers */ 229 2, /* cost of reg,reg fld/fst */ 230 {2, 2, 6}, /* cost of loading fp registers 231 in SFmode, DFmode and XFmode */ 232 {4, 4, 6}, /* cost of loading integer registers */ 233 2, /* cost of moving MMX register */ 234 {2, 2}, /* cost of loading MMX registers 235 in SImode and DImode */ 236 {2, 2}, /* cost of storing MMX registers 237 in SImode and DImode */ 238 2, /* cost of moving SSE register */ 239 {2, 2, 8}, /* cost of loading SSE registers 240 in SImode, DImode and TImode */ 241 {2, 2, 8}, /* cost of storing SSE registers 242 in SImode, DImode and TImode */ 243 3, /* MMX or SSE register to integer */ 244 32, /* size of prefetch block */ 245 6, /* number of parallel prefetches */ 246}; 247 248static const 249struct processor_costs k6_cost = { 250 1, /* cost of an add instruction */ 251 2, /* cost of a lea instruction */ 252 1, /* variable shift costs */ 253 1, /* constant shift costs */ 254 3, /* cost of starting a multiply */ 255 0, /* cost of multiply per each bit set */ 256 18, /* cost of a divide/mod */ 257 2, /* cost of movsx */ 258 2, /* cost of movzx */ 259 8, /* "large" insn */ 260 4, /* MOVE_RATIO */ 261 3, /* cost for loading QImode using movzbl */ 262 {4, 5, 4}, /* cost of loading integer registers 263 in QImode, HImode and SImode. 264 Relative to reg-reg move (2). */ 265 {2, 3, 2}, /* cost of storing integer registers */ 266 4, /* cost of reg,reg fld/fst */ 267 {6, 6, 6}, /* cost of loading fp registers 268 in SFmode, DFmode and XFmode */ 269 {4, 4, 4}, /* cost of loading integer registers */ 270 2, /* cost of moving MMX register */ 271 {2, 2}, /* cost of loading MMX registers 272 in SImode and DImode */ 273 {2, 2}, /* cost of storing MMX registers 274 in SImode and DImode */ 275 2, /* cost of moving SSE register */ 276 {2, 2, 8}, /* cost of loading SSE registers 277 in SImode, DImode and TImode */ 278 {2, 2, 8}, /* cost of storing SSE registers 279 in SImode, DImode and TImode */ 280 6, /* MMX or SSE register to integer */ 281 32, /* size of prefetch block */ 282 1, /* number of parallel prefetches */ 283}; 284 285static const 286struct processor_costs athlon_cost = { 287 1, /* cost of an add instruction */ 288 2, /* cost of a lea instruction */ 289 1, /* variable shift costs */ 290 1, /* constant shift costs */ 291 5, /* cost of starting a multiply */ 292 0, /* cost of multiply per each bit set */ 293 42, /* cost of a divide/mod */ 294 1, /* cost of movsx */ 295 1, /* cost of movzx */ 296 8, /* "large" insn */ 297 9, /* MOVE_RATIO */ 298 4, /* cost for loading QImode using movzbl */ 299 {4, 5, 4}, /* cost of loading integer registers 300 in QImode, HImode and SImode. 301 Relative to reg-reg move (2). */ 302 {2, 3, 2}, /* cost of storing integer registers */ 303 4, /* cost of reg,reg fld/fst */ 304 {6, 6, 20}, /* cost of loading fp registers 305 in SFmode, DFmode and XFmode */ 306 {4, 4, 16}, /* cost of loading integer registers */ 307 2, /* cost of moving MMX register */ 308 {2, 2}, /* cost of loading MMX registers 309 in SImode and DImode */ 310 {2, 2}, /* cost of storing MMX registers 311 in SImode and DImode */ 312 2, /* cost of moving SSE register */ 313 {2, 2, 8}, /* cost of loading SSE registers 314 in SImode, DImode and TImode */ 315 {2, 2, 8}, /* cost of storing SSE registers 316 in SImode, DImode and TImode */ 317 6, /* MMX or SSE register to integer */ 318 64, /* size of prefetch block */ 319 6, /* number of parallel prefetches */ 320}; 321 322static const 323struct processor_costs pentium4_cost = { 324 1, /* cost of an add instruction */ 325 1, /* cost of a lea instruction */ 326 8, /* variable shift costs */ 327 8, /* constant shift costs */ 328 30, /* cost of starting a multiply */ 329 0, /* cost of multiply per each bit set */ 330 112, /* cost of a divide/mod */ 331 1, /* cost of movsx */ 332 1, /* cost of movzx */ 333 16, /* "large" insn */ 334 6, /* MOVE_RATIO */ 335 2, /* cost for loading QImode using movzbl */ 336 {4, 5, 4}, /* cost of loading integer registers 337 in QImode, HImode and SImode. 338 Relative to reg-reg move (2). */ 339 {2, 3, 2}, /* cost of storing integer registers */ 340 2, /* cost of reg,reg fld/fst */ 341 {2, 2, 6}, /* cost of loading fp registers 342 in SFmode, DFmode and XFmode */ 343 {4, 4, 6}, /* cost of loading integer registers */ 344 2, /* cost of moving MMX register */ 345 {2, 2}, /* cost of loading MMX registers 346 in SImode and DImode */ 347 {2, 2}, /* cost of storing MMX registers 348 in SImode and DImode */ 349 12, /* cost of moving SSE register */ 350 {12, 12, 12}, /* cost of loading SSE registers 351 in SImode, DImode and TImode */ 352 {2, 2, 8}, /* cost of storing SSE registers 353 in SImode, DImode and TImode */ 354 10, /* MMX or SSE register to integer */ 355 64, /* size of prefetch block */ 356 6, /* number of parallel prefetches */ 357}; 358 359const struct processor_costs *ix86_cost = &pentium_cost; 360 361/* Processor feature/optimization bitmasks. */ 362#define m_386 (1<<PROCESSOR_I386) 363#define m_486 (1<<PROCESSOR_I486) 364#define m_PENT (1<<PROCESSOR_PENTIUM) 365#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 366#define m_K6 (1<<PROCESSOR_K6) 367#define m_ATHLON (1<<PROCESSOR_ATHLON) 368#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 369 370const int x86_use_leave = m_386 | m_K6 | m_ATHLON; 371const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4; 372const int x86_zero_extend_with_and = m_486 | m_PENT; 373const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */; 374const int x86_double_with_add = ~m_386; 375const int x86_use_bit_test = m_386; 376const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; 377const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4; 378const int x86_3dnow_a = m_ATHLON; 379const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4; 380const int x86_branch_hints = m_PENT4; 381const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; 382const int x86_partial_reg_stall = m_PPRO; 383const int x86_use_loop = m_K6; 384const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT); 385const int x86_use_mov0 = m_K6; 386const int x86_use_cltd = ~(m_PENT | m_K6); 387const int x86_read_modify_write = ~m_PENT; 388const int x86_read_modify = ~(m_PENT | m_PPRO); 389const int x86_split_long_moves = m_PPRO; 390const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486; 391const int x86_single_stringop = m_386 | m_PENT4; 392const int x86_qimode_math = ~(0); 393const int x86_promote_qi_regs = 0; 394const int x86_himode_math = ~(m_PPRO); 395const int x86_promote_hi_regs = m_PPRO; 396const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4; 397const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4; 398const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4; 399const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; 400const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4); 401const int x86_partial_reg_dependency = m_ATHLON | m_PENT4; 402const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4; 403const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO; 404const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 405const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 406const int x86_decompose_lea = m_PENT4; 407 408/* In case the avreage insn count for single function invocation is 409 lower than this constant, emit fast (but longer) prologue and 410 epilogue code. */ 411#define FAST_PROLOGUE_INSN_COUNT 30 412/* Set by prologue expander and used by epilogue expander to determine 413 the style used. */ 414static int use_fast_prologue_epilogue; 415 416#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx)) 417 418static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */ 419static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */ 420static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */ 421 422/* Array of the smallest class containing reg number REGNO, indexed by 423 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 424 425enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 426{ 427 /* ax, dx, cx, bx */ 428 AREG, DREG, CREG, BREG, 429 /* si, di, bp, sp */ 430 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 431 /* FP registers */ 432 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 433 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 434 /* arg pointer */ 435 NON_Q_REGS, 436 /* flags, fpsr, dirflag, frame */ 437 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 438 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 439 SSE_REGS, SSE_REGS, 440 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 441 MMX_REGS, MMX_REGS, 442 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 443 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 444 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 445 SSE_REGS, SSE_REGS, 446}; 447 448/* The "default" register map used in 32bit mode. */ 449 450int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 451{ 452 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 453 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 454 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 455 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 456 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 457 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 458 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 459}; 460 461static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/, 462 1 /*RDX*/, 2 /*RCX*/, 463 FIRST_REX_INT_REG /*R8 */, 464 FIRST_REX_INT_REG + 1 /*R9 */}; 465static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4}; 466 467/* The "default" register map used in 64bit mode. */ 468int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 469{ 470 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 471 33, 34, 35, 36, 37, 38, 39, 40 /* fp regs */ 472 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 473 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 474 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 475 8,9,10,11,12,13,14,15, /* extended integer registers */ 476 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 477}; 478 479/* Define the register numbers to be used in Dwarf debugging information. 480 The SVR4 reference port C compiler uses the following register numbers 481 in its Dwarf output code: 482 0 for %eax (gcc regno = 0) 483 1 for %ecx (gcc regno = 2) 484 2 for %edx (gcc regno = 1) 485 3 for %ebx (gcc regno = 3) 486 4 for %esp (gcc regno = 7) 487 5 for %ebp (gcc regno = 6) 488 6 for %esi (gcc regno = 4) 489 7 for %edi (gcc regno = 5) 490 The following three DWARF register numbers are never generated by 491 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 492 believes these numbers have these meanings. 493 8 for %eip (no gcc equivalent) 494 9 for %eflags (gcc regno = 17) 495 10 for %trapno (no gcc equivalent) 496 It is not at all clear how we should number the FP stack registers 497 for the x86 architecture. If the version of SDB on x86/svr4 were 498 a bit less brain dead with respect to floating-point then we would 499 have a precedent to follow with respect to DWARF register numbers 500 for x86 FP registers, but the SDB on x86/svr4 is so completely 501 broken with respect to FP registers that it is hardly worth thinking 502 of it as something to strive for compatibility with. 503 The version of x86/svr4 SDB I have at the moment does (partially) 504 seem to believe that DWARF register number 11 is associated with 505 the x86 register %st(0), but that's about all. Higher DWARF 506 register numbers don't seem to be associated with anything in 507 particular, and even for DWARF regno 11, SDB only seems to under- 508 stand that it should say that a variable lives in %st(0) (when 509 asked via an `=' command) if we said it was in DWARF regno 11, 510 but SDB still prints garbage when asked for the value of the 511 variable in question (via a `/' command). 512 (Also note that the labels SDB prints for various FP stack regs 513 when doing an `x' command are all wrong.) 514 Note that these problems generally don't affect the native SVR4 515 C compiler because it doesn't allow the use of -O with -g and 516 because when it is *not* optimizing, it allocates a memory 517 location for each floating-point variable, and the memory 518 location is what gets described in the DWARF AT_location 519 attribute for the variable in question. 520 Regardless of the severe mental illness of the x86/svr4 SDB, we 521 do something sensible here and we use the following DWARF 522 register numbers. Note that these are all stack-top-relative 523 numbers. 524 11 for %st(0) (gcc regno = 8) 525 12 for %st(1) (gcc regno = 9) 526 13 for %st(2) (gcc regno = 10) 527 14 for %st(3) (gcc regno = 11) 528 15 for %st(4) (gcc regno = 12) 529 16 for %st(5) (gcc regno = 13) 530 17 for %st(6) (gcc regno = 14) 531 18 for %st(7) (gcc regno = 15) 532*/ 533int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 534{ 535 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 536 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 537 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 538 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 539 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 540 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */ 541 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */ 542}; 543 544/* Test and compare insns in i386.md store the information needed to 545 generate branch and scc insns here. */ 546 547rtx ix86_compare_op0 = NULL_RTX; 548rtx ix86_compare_op1 = NULL_RTX; 549 550#define MAX_386_STACK_LOCALS 3 551/* Size of the register save area. */ 552#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 553 554/* Define the structure for the machine field in struct function. */ 555struct machine_function 556{ 557 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS]; 558 int save_varrargs_registers; 559 int accesses_prev_frame; 560}; 561 562#define ix86_stack_locals (cfun->machine->stack_locals) 563#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) 564 565/* Structure describing stack frame layout. 566 Stack grows downward: 567 568 [arguments] 569 <- ARG_POINTER 570 saved pc 571 572 saved frame pointer if frame_pointer_needed 573 <- HARD_FRAME_POINTER 574 [saved regs] 575 576 [padding1] \ 577 ) 578 [va_arg registers] ( 579 > to_allocate <- FRAME_POINTER 580 [frame] ( 581 ) 582 [padding2] / 583 */ 584struct ix86_frame 585{ 586 int nregs; 587 int padding1; 588 int va_arg_size; 589 HOST_WIDE_INT frame; 590 int padding2; 591 int outgoing_arguments_size; 592 int red_zone_size; 593 594 HOST_WIDE_INT to_allocate; 595 /* The offsets relative to ARG_POINTER. */ 596 HOST_WIDE_INT frame_pointer_offset; 597 HOST_WIDE_INT hard_frame_pointer_offset; 598 HOST_WIDE_INT stack_pointer_offset; 599}; 600 601/* Used to enable/disable debugging features. */ 602const char *ix86_debug_arg_string, *ix86_debug_addr_string; 603/* Code model option as passed by user. */ 604const char *ix86_cmodel_string; 605/* Parsed value. */ 606enum cmodel ix86_cmodel; 607/* Asm dialect. */ 608const char *ix86_asm_string; 609enum asm_dialect ix86_asm_dialect = ASM_ATT; 610 611/* which cpu are we scheduling for */ 612enum processor_type ix86_cpu; 613 614/* which unit we are generating floating point math for */ 615enum fpmath_unit ix86_fpmath; 616 617/* which instruction set architecture to use. */ 618int ix86_arch; 619 620/* Strings to hold which cpu and instruction set architecture to use. */ 621const char *ix86_cpu_string; /* for -mcpu=<xxx> */ 622const char *ix86_arch_string; /* for -march=<xxx> */ 623const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */ 624 625/* # of registers to use to pass arguments. */ 626const char *ix86_regparm_string; 627 628/* true if sse prefetch instruction is not NOOP. */ 629int x86_prefetch_sse; 630 631/* ix86_regparm_string as a number */ 632int ix86_regparm; 633 634/* Alignment to use for loops and jumps: */ 635 636/* Power of two alignment for loops. */ 637const char *ix86_align_loops_string; 638 639/* Power of two alignment for non-loop jumps. */ 640const char *ix86_align_jumps_string; 641 642/* Power of two alignment for stack boundary in bytes. */ 643const char *ix86_preferred_stack_boundary_string; 644 645/* Preferred alignment for stack boundary in bits. */ 646int ix86_preferred_stack_boundary; 647 648/* Values 1-5: see jump.c */ 649int ix86_branch_cost; 650const char *ix86_branch_cost_string; 651 652/* Power of two alignment for functions. */ 653const char *ix86_align_funcs_string; 654 655/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 656static char internal_label_prefix[16]; 657static int internal_label_prefix_len; 658 659static int local_symbolic_operand PARAMS ((rtx, enum machine_mode)); 660static void output_pic_addr_const PARAMS ((FILE *, rtx, int)); 661static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode, 662 int, int, FILE *)); 663static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx)); 664static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code, 665 rtx *, rtx *)); 666static rtx gen_push PARAMS ((rtx)); 667static int memory_address_length PARAMS ((rtx addr)); 668static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type)); 669static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type)); 670static int ix86_safe_length PARAMS ((rtx)); 671static enum attr_memory ix86_safe_memory PARAMS ((rtx)); 672static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx)); 673static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx)); 674static void ix86_dump_ppro_packet PARAMS ((FILE *)); 675static void ix86_reorder_insn PARAMS ((rtx *, rtx *)); 676static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair, 677 rtx)); 678static void ix86_init_machine_status PARAMS ((struct function *)); 679static void ix86_mark_machine_status PARAMS ((struct function *)); 680static void ix86_free_machine_status PARAMS ((struct function *)); 681static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode)); 682static int ix86_safe_length_prefix PARAMS ((rtx)); 683static int ix86_nsaved_regs PARAMS ((void)); 684static void ix86_emit_save_regs PARAMS ((void)); 685static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT)); 686static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int)); 687static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx)); 688static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *)); 689static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *)); 690static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void)); 691static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT)); 692static rtx ix86_expand_aligntest PARAMS ((rtx, int)); 693static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx)); 694static int ix86_issue_rate PARAMS ((void)); 695static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int)); 696static void ix86_sched_init PARAMS ((FILE *, int, int)); 697static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); 698static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int)); 699static void ix86_init_mmx_sse_builtins PARAMS ((void)); 700 701struct ix86_address 702{ 703 rtx base, index, disp; 704 HOST_WIDE_INT scale; 705}; 706 707static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *)); 708 709struct builtin_description; 710static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *, 711 tree, rtx)); 712static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, 713 tree, rtx)); 714static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); 715static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); 716static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); 717static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code, 718 tree, rtx)); 719static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); 720static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); 721static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); 722static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code, 723 enum rtx_code *, 724 enum rtx_code *, 725 enum rtx_code *)); 726static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx, 727 rtx *, rtx *)); 728static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code)); 729static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code)); 730static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code)); 731static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code)); 732static int ix86_save_reg PARAMS ((int, int)); 733static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *)); 734static int ix86_comp_type_attributes PARAMS ((tree, tree)); 735const struct attribute_spec ix86_attribute_table[]; 736static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *)); 737static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *)); 738 739#ifdef DO_GLOBAL_CTORS_BODY 740static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); 741#endif 742 743/* Register class used for passing given 64bit part of the argument. 744 These represent classes as documented by the PS ABI, with the exception 745 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 746 use SF or DFmode move instead of DImode to avoid reformating penalties. 747 748 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 749 whenever possible (upper half does contain padding). 750 */ 751enum x86_64_reg_class 752 { 753 X86_64_NO_CLASS, 754 X86_64_INTEGER_CLASS, 755 X86_64_INTEGERSI_CLASS, 756 X86_64_SSE_CLASS, 757 X86_64_SSESF_CLASS, 758 X86_64_SSEDF_CLASS, 759 X86_64_SSEUP_CLASS, 760 X86_64_X87_CLASS, 761 X86_64_X87UP_CLASS, 762 X86_64_MEMORY_CLASS 763 }; 764static const char * const x86_64_reg_class_name[] = 765 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"}; 766 767#define MAX_CLASSES 4 768static int classify_argument PARAMS ((enum machine_mode, tree, 769 enum x86_64_reg_class [MAX_CLASSES], 770 int)); 771static int examine_argument PARAMS ((enum machine_mode, tree, int, int *, 772 int *)); 773static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int, 774 const int *, int)); 775static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, 776 enum x86_64_reg_class)); 777 778/* Initialize the GCC target structure. */ 779#undef TARGET_ATTRIBUTE_TABLE 780#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 781#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 782# undef TARGET_MERGE_DECL_ATTRIBUTES 783# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 784#endif 785 786#undef TARGET_COMP_TYPE_ATTRIBUTES 787#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 788 789#undef TARGET_INIT_BUILTINS 790#define TARGET_INIT_BUILTINS ix86_init_builtins 791 792#undef TARGET_EXPAND_BUILTIN 793#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 794 795#if defined (OSF_OS) || defined (TARGET_OSF1ELF) 796 static void ix86_osf_output_function_prologue PARAMS ((FILE *, 797 HOST_WIDE_INT)); 798# undef TARGET_ASM_FUNCTION_PROLOGUE 799# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue 800#endif 801 802#undef TARGET_ASM_OPEN_PAREN 803#define TARGET_ASM_OPEN_PAREN "" 804#undef TARGET_ASM_CLOSE_PAREN 805#define TARGET_ASM_CLOSE_PAREN "" 806 807#undef TARGET_ASM_ALIGNED_HI_OP 808#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 809#undef TARGET_ASM_ALIGNED_SI_OP 810#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 811#ifdef ASM_QUAD 812#undef TARGET_ASM_ALIGNED_DI_OP 813#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 814#endif 815 816#undef TARGET_ASM_UNALIGNED_HI_OP 817#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 818#undef TARGET_ASM_UNALIGNED_SI_OP 819#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 820#undef TARGET_ASM_UNALIGNED_DI_OP 821#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 822 823#undef TARGET_SCHED_ADJUST_COST 824#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 825#undef TARGET_SCHED_ISSUE_RATE 826#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 827#undef TARGET_SCHED_VARIABLE_ISSUE 828#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue 829#undef TARGET_SCHED_INIT 830#define TARGET_SCHED_INIT ix86_sched_init 831#undef TARGET_SCHED_REORDER 832#define TARGET_SCHED_REORDER ix86_sched_reorder 833 834struct gcc_target targetm = TARGET_INITIALIZER; 835 836/* Sometimes certain combinations of command options do not make 837 sense on a particular target machine. You can define a macro 838 `OVERRIDE_OPTIONS' to take account of this. This macro, if 839 defined, is executed once just after all the command options have 840 been parsed. 841 842 Don't use this macro to turn on various extra optimizations for 843 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 844 845void 846override_options () 847{ 848 int i; 849 /* Comes from final.c -- no real reason to change it. */ 850#define MAX_CODE_ALIGN 16 851 852 static struct ptt 853 { 854 const struct processor_costs *cost; /* Processor costs */ 855 const int target_enable; /* Target flags to enable. */ 856 const int target_disable; /* Target flags to disable. */ 857 const int align_loop; /* Default alignments. */ 858 const int align_loop_max_skip; 859 const int align_jump; 860 const int align_jump_max_skip; 861 const int align_func; 862 const int branch_cost; 863 } 864 const processor_target_table[PROCESSOR_max] = 865 { 866 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1}, 867 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1}, 868 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1}, 869 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1}, 870 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1}, 871 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1}, 872 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1} 873 }; 874 875 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 876 static struct pta 877 { 878 const char *const name; /* processor name or nickname. */ 879 const enum processor_type processor; 880 const enum pta_flags 881 { 882 PTA_SSE = 1, 883 PTA_SSE2 = 2, 884 PTA_MMX = 4, 885 PTA_PREFETCH_SSE = 8, 886 PTA_3DNOW = 16, 887 PTA_3DNOW_A = 64 888 } flags; 889 } 890 const processor_alias_table[] = 891 { 892 {"i386", PROCESSOR_I386, 0}, 893 {"i486", PROCESSOR_I486, 0}, 894 {"i586", PROCESSOR_PENTIUM, 0}, 895 {"pentium", PROCESSOR_PENTIUM, 0}, 896 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 897 {"i686", PROCESSOR_PENTIUMPRO, 0}, 898 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 899 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 900 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 901 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | 902 PTA_MMX | PTA_PREFETCH_SSE}, 903 {"k6", PROCESSOR_K6, PTA_MMX}, 904 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 905 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 906 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 907 | PTA_3DNOW_A}, 908 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 909 | PTA_3DNOW | PTA_3DNOW_A}, 910 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 911 | PTA_3DNOW_A | PTA_SSE}, 912 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 913 | PTA_3DNOW_A | PTA_SSE}, 914 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 915 | PTA_3DNOW_A | PTA_SSE}, 916 }; 917 918 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta); 919 920#ifdef SUBTARGET_OVERRIDE_OPTIONS 921 SUBTARGET_OVERRIDE_OPTIONS; 922#endif 923 924 if (!ix86_cpu_string && ix86_arch_string) 925 ix86_cpu_string = ix86_arch_string; 926 if (!ix86_cpu_string) 927 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT]; 928 if (!ix86_arch_string) 929 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386"; 930 931 if (ix86_cmodel_string != 0) 932 { 933 if (!strcmp (ix86_cmodel_string, "small")) 934 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 935 else if (flag_pic) 936 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 937 else if (!strcmp (ix86_cmodel_string, "32")) 938 ix86_cmodel = CM_32; 939 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 940 ix86_cmodel = CM_KERNEL; 941 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic) 942 ix86_cmodel = CM_MEDIUM; 943 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 944 ix86_cmodel = CM_LARGE; 945 else 946 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 947 } 948 else 949 { 950 ix86_cmodel = CM_32; 951 if (TARGET_64BIT) 952 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 953 } 954 if (ix86_asm_string != 0) 955 { 956 if (!strcmp (ix86_asm_string, "intel")) 957 ix86_asm_dialect = ASM_INTEL; 958 else if (!strcmp (ix86_asm_string, "att")) 959 ix86_asm_dialect = ASM_ATT; 960 else 961 error ("bad value (%s) for -masm= switch", ix86_asm_string); 962 } 963 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 964 error ("code model `%s' not supported in the %s bit mode", 965 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 966 if (ix86_cmodel == CM_LARGE) 967 sorry ("code model `large' not supported yet"); 968 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 969 sorry ("%i-bit mode not compiled in", 970 (target_flags & MASK_64BIT) ? 64 : 32); 971 972 for (i = 0; i < pta_size; i++) 973 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 974 { 975 ix86_arch = processor_alias_table[i].processor; 976 /* Default cpu tuning to the architecture. */ 977 ix86_cpu = ix86_arch; 978 if (processor_alias_table[i].flags & PTA_MMX 979 && !(target_flags & MASK_MMX_SET)) 980 target_flags |= MASK_MMX; 981 if (processor_alias_table[i].flags & PTA_3DNOW 982 && !(target_flags & MASK_3DNOW_SET)) 983 target_flags |= MASK_3DNOW; 984 if (processor_alias_table[i].flags & PTA_3DNOW_A 985 && !(target_flags & MASK_3DNOW_A_SET)) 986 target_flags |= MASK_3DNOW_A; 987 if (processor_alias_table[i].flags & PTA_SSE 988 && !(target_flags & MASK_SSE_SET)) 989 target_flags |= MASK_SSE; 990 if (processor_alias_table[i].flags & PTA_SSE2 991 && !(target_flags & MASK_SSE2_SET)) 992 target_flags |= MASK_SSE2; 993 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 994 x86_prefetch_sse = true; 995 break; 996 } 997 998 if (i == pta_size) 999 error ("bad value (%s) for -march= switch", ix86_arch_string); 1000 1001 for (i = 0; i < pta_size; i++) 1002 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name)) 1003 { 1004 ix86_cpu = processor_alias_table[i].processor; 1005 break; 1006 } 1007 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1008 x86_prefetch_sse = true; 1009 if (i == pta_size) 1010 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string); 1011 1012 if (optimize_size) 1013 ix86_cost = &size_cost; 1014 else 1015 ix86_cost = processor_target_table[ix86_cpu].cost; 1016 target_flags |= processor_target_table[ix86_cpu].target_enable; 1017 target_flags &= ~processor_target_table[ix86_cpu].target_disable; 1018 1019 /* Arrange to set up i386_stack_locals for all functions. */ 1020 init_machine_status = ix86_init_machine_status; 1021 mark_machine_status = ix86_mark_machine_status; 1022 free_machine_status = ix86_free_machine_status; 1023 1024 /* Validate -mregparm= value. */ 1025 if (ix86_regparm_string) 1026 { 1027 i = atoi (ix86_regparm_string); 1028 if (i < 0 || i > REGPARM_MAX) 1029 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1030 else 1031 ix86_regparm = i; 1032 } 1033 else 1034 if (TARGET_64BIT) 1035 ix86_regparm = REGPARM_MAX; 1036 1037 /* If the user has provided any of the -malign-* options, 1038 warn and use that value only if -falign-* is not set. 1039 Remove this code in GCC 3.2 or later. */ 1040 if (ix86_align_loops_string) 1041 { 1042 warning ("-malign-loops is obsolete, use -falign-loops"); 1043 if (align_loops == 0) 1044 { 1045 i = atoi (ix86_align_loops_string); 1046 if (i < 0 || i > MAX_CODE_ALIGN) 1047 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1048 else 1049 align_loops = 1 << i; 1050 } 1051 } 1052 1053 if (ix86_align_jumps_string) 1054 { 1055 warning ("-malign-jumps is obsolete, use -falign-jumps"); 1056 if (align_jumps == 0) 1057 { 1058 i = atoi (ix86_align_jumps_string); 1059 if (i < 0 || i > MAX_CODE_ALIGN) 1060 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1061 else 1062 align_jumps = 1 << i; 1063 } 1064 } 1065 1066 if (ix86_align_funcs_string) 1067 { 1068 warning ("-malign-functions is obsolete, use -falign-functions"); 1069 if (align_functions == 0) 1070 { 1071 i = atoi (ix86_align_funcs_string); 1072 if (i < 0 || i > MAX_CODE_ALIGN) 1073 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1074 else 1075 align_functions = 1 << i; 1076 } 1077 } 1078 1079 /* Default align_* from the processor table. */ 1080 if (align_loops == 0) 1081 { 1082 align_loops = processor_target_table[ix86_cpu].align_loop; 1083 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip; 1084 } 1085 if (align_jumps == 0) 1086 { 1087 align_jumps = processor_target_table[ix86_cpu].align_jump; 1088 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip; 1089 } 1090 if (align_functions == 0) 1091 { 1092 align_functions = processor_target_table[ix86_cpu].align_func; 1093 } 1094 1095 /* Validate -mpreferred-stack-boundary= value, or provide default. 1096 The default of 128 bits is for Pentium III's SSE __m128, but we 1097 don't want additional code to keep the stack aligned when 1098 optimizing for code size. */ 1099 ix86_preferred_stack_boundary = (optimize_size 1100 ? TARGET_64BIT ? 64 : 32 1101 : 128); 1102 if (ix86_preferred_stack_boundary_string) 1103 { 1104 i = atoi (ix86_preferred_stack_boundary_string); 1105 if (i < (TARGET_64BIT ? 3 : 2) || i > 12) 1106 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1107 TARGET_64BIT ? 3 : 2); 1108 else 1109 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1110 } 1111 1112 /* Validate -mbranch-cost= value, or provide default. */ 1113 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost; 1114 if (ix86_branch_cost_string) 1115 { 1116 i = atoi (ix86_branch_cost_string); 1117 if (i < 0 || i > 5) 1118 error ("-mbranch-cost=%d is not between 0 and 5", i); 1119 else 1120 ix86_branch_cost = i; 1121 } 1122 1123 /* Keep nonleaf frame pointers. */ 1124 if (TARGET_OMIT_LEAF_FRAME_POINTER) 1125 flag_omit_frame_pointer = 1; 1126 1127 /* If we're doing fast math, we don't care about comparison order 1128 wrt NaNs. This lets us use a shorter comparison sequence. */ 1129 if (flag_unsafe_math_optimizations) 1130 target_flags &= ~MASK_IEEE_FP; 1131 1132 if (TARGET_64BIT) 1133 { 1134 if (TARGET_ALIGN_DOUBLE) 1135 error ("-malign-double makes no sense in the 64bit mode"); 1136 if (TARGET_RTD) 1137 error ("-mrtd calling convention not supported in the 64bit mode"); 1138 /* Enable by default the SSE and MMX builtins. */ 1139 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE); 1140 ix86_fpmath = FPMATH_SSE; 1141 } 1142 else 1143 ix86_fpmath = FPMATH_387; 1144 1145 if (ix86_fpmath_string != 0) 1146 { 1147 if (! strcmp (ix86_fpmath_string, "387")) 1148 ix86_fpmath = FPMATH_387; 1149 else if (! strcmp (ix86_fpmath_string, "sse")) 1150 { 1151 if (!TARGET_SSE) 1152 { 1153 warning ("SSE instruction set disabled, using 387 arithmetics"); 1154 ix86_fpmath = FPMATH_387; 1155 } 1156 else 1157 ix86_fpmath = FPMATH_SSE; 1158 } 1159 else if (! strcmp (ix86_fpmath_string, "387,sse") 1160 || ! strcmp (ix86_fpmath_string, "sse,387")) 1161 { 1162 if (!TARGET_SSE) 1163 { 1164 warning ("SSE instruction set disabled, using 387 arithmetics"); 1165 ix86_fpmath = FPMATH_387; 1166 } 1167 else if (!TARGET_80387) 1168 { 1169 warning ("387 instruction set disabled, using SSE arithmetics"); 1170 ix86_fpmath = FPMATH_SSE; 1171 } 1172 else 1173 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1174 } 1175 else 1176 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1177 } 1178 1179 /* It makes no sense to ask for just SSE builtins, so MMX is also turned 1180 on by -msse. */ 1181 if (TARGET_SSE) 1182 { 1183 target_flags |= MASK_MMX; 1184 x86_prefetch_sse = true; 1185 } 1186 1187 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ 1188 if (TARGET_3DNOW) 1189 { 1190 target_flags |= MASK_MMX; 1191 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX 1192 extensions it adds. */ 1193 if (x86_3dnow_a & (1 << ix86_arch)) 1194 target_flags |= MASK_3DNOW_A; 1195 } 1196 if ((x86_accumulate_outgoing_args & CPUMASK) 1197 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET) 1198 && !optimize_size) 1199 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1200 1201 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1202 { 1203 char *p; 1204 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1205 p = strchr (internal_label_prefix, 'X'); 1206 internal_label_prefix_len = p - internal_label_prefix; 1207 *p = '\0'; 1208 } 1209} 1210 1211void 1212optimization_options (level, size) 1213 int level; 1214 int size ATTRIBUTE_UNUSED; 1215{ 1216 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 1217 make the problem with not enough registers even worse. */ 1218#ifdef INSN_SCHEDULING 1219 if (level > 1) 1220 flag_schedule_insns = 0; 1221#endif 1222 if (TARGET_64BIT && optimize >= 1) 1223 flag_omit_frame_pointer = 1; 1224 if (TARGET_64BIT) 1225 { 1226 flag_pcc_struct_return = 0; 1227 flag_asynchronous_unwind_tables = 1; 1228 } 1229} 1230 1231/* Table of valid machine attributes. */ 1232const struct attribute_spec ix86_attribute_table[] = 1233{ 1234 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 1235 /* Stdcall attribute says callee is responsible for popping arguments 1236 if they are not variable. */ 1237 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1238 /* Cdecl attribute says the callee is a normal C declaration */ 1239 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1240 /* Regparm attribute specifies how many integer arguments are to be 1241 passed in registers. */ 1242 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute }, 1243#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 1244 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1245 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1246 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 1247#endif 1248 { NULL, 0, 0, false, false, false, NULL } 1249}; 1250 1251/* Handle a "cdecl" or "stdcall" attribute; 1252 arguments as in struct attribute_spec.handler. */ 1253static tree 1254ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs) 1255 tree *node; 1256 tree name; 1257 tree args ATTRIBUTE_UNUSED; 1258 int flags ATTRIBUTE_UNUSED; 1259 bool *no_add_attrs; 1260{ 1261 if (TREE_CODE (*node) != FUNCTION_TYPE 1262 && TREE_CODE (*node) != METHOD_TYPE 1263 && TREE_CODE (*node) != FIELD_DECL 1264 && TREE_CODE (*node) != TYPE_DECL) 1265 { 1266 warning ("`%s' attribute only applies to functions", 1267 IDENTIFIER_POINTER (name)); 1268 *no_add_attrs = true; 1269 } 1270 1271 if (TARGET_64BIT) 1272 { 1273 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name)); 1274 *no_add_attrs = true; 1275 } 1276 1277 return NULL_TREE; 1278} 1279 1280/* Handle a "regparm" attribute; 1281 arguments as in struct attribute_spec.handler. */ 1282static tree 1283ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs) 1284 tree *node; 1285 tree name; 1286 tree args; 1287 int flags ATTRIBUTE_UNUSED; 1288 bool *no_add_attrs; 1289{ 1290 if (TREE_CODE (*node) != FUNCTION_TYPE 1291 && TREE_CODE (*node) != METHOD_TYPE 1292 && TREE_CODE (*node) != FIELD_DECL 1293 && TREE_CODE (*node) != TYPE_DECL) 1294 { 1295 warning ("`%s' attribute only applies to functions", 1296 IDENTIFIER_POINTER (name)); 1297 *no_add_attrs = true; 1298 } 1299 else 1300 { 1301 tree cst; 1302 1303 cst = TREE_VALUE (args); 1304 if (TREE_CODE (cst) != INTEGER_CST) 1305 { 1306 warning ("`%s' attribute requires an integer constant argument", 1307 IDENTIFIER_POINTER (name)); 1308 *no_add_attrs = true; 1309 } 1310 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 1311 { 1312 warning ("argument to `%s' attribute larger than %d", 1313 IDENTIFIER_POINTER (name), REGPARM_MAX); 1314 *no_add_attrs = true; 1315 } 1316 } 1317 1318 return NULL_TREE; 1319} 1320 1321#if defined (OSF_OS) || defined (TARGET_OSF1ELF) 1322 1323/* Generate the assembly code for function entry. FILE is a stdio 1324 stream to output the code to. SIZE is an int: how many units of 1325 temporary storage to allocate. 1326 1327 Refer to the array `regs_ever_live' to determine which registers to 1328 save; `regs_ever_live[I]' is nonzero if register number I is ever 1329 used in the function. This function is responsible for knowing 1330 which registers should not be saved even if used. 1331 1332 We override it here to allow for the new profiling code to go before 1333 the prologue and the old mcount code to go after the prologue (and 1334 after %ebx has been set up for ELF shared library support). */ 1335 1336static void 1337ix86_osf_output_function_prologue (file, size) 1338 FILE *file; 1339 HOST_WIDE_INT size; 1340{ 1341 const char *prefix = ""; 1342 const char *const lprefix = LPREFIX; 1343 int labelno = profile_label_no; 1344 1345#ifdef OSF_OS 1346 1347 if (TARGET_UNDERSCORES) 1348 prefix = "_"; 1349 1350 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE) 1351 { 1352 if (!flag_pic && !HALF_PIC_P ()) 1353 { 1354 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1355 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix); 1356 } 1357 1358 else if (HALF_PIC_P ()) 1359 { 1360 rtx symref; 1361 1362 HALF_PIC_EXTERNAL ("_mcount_ptr"); 1363 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode, 1364 "_mcount_ptr")); 1365 1366 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1367 fprintf (file, "\tmovl %s%s,%%eax\n", prefix, 1368 XSTR (symref, 0)); 1369 fprintf (file, "\tcall *(%%eax)\n"); 1370 } 1371 1372 else 1373 { 1374 static int call_no = 0; 1375 1376 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no); 1377 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no); 1378 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n", 1379 lprefix, call_no++); 1380 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n", 1381 lprefix, labelno); 1382 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n", 1383 prefix); 1384 fprintf (file, "\tcall *(%%eax)\n"); 1385 } 1386 } 1387 1388#else /* !OSF_OS */ 1389 1390 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE) 1391 { 1392 if (!flag_pic) 1393 { 1394 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1395 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix); 1396 } 1397 1398 else 1399 { 1400 static int call_no = 0; 1401 1402 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no); 1403 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no); 1404 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n", 1405 lprefix, call_no++); 1406 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n", 1407 lprefix, labelno); 1408 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n", 1409 prefix); 1410 fprintf (file, "\tcall *(%%eax)\n"); 1411 } 1412 } 1413#endif /* !OSF_OS */ 1414 1415 function_prologue (file, size); 1416} 1417 1418#endif /* OSF_OS || TARGET_OSF1ELF */ 1419 1420/* Return 0 if the attributes for two types are incompatible, 1 if they 1421 are compatible, and 2 if they are nearly compatible (which causes a 1422 warning to be generated). */ 1423 1424static int 1425ix86_comp_type_attributes (type1, type2) 1426 tree type1; 1427 tree type2; 1428{ 1429 /* Check for mismatch of non-default calling convention. */ 1430 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 1431 1432 if (TREE_CODE (type1) != FUNCTION_TYPE) 1433 return 1; 1434 1435 /* Check for mismatched return types (cdecl vs stdcall). */ 1436 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 1437 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 1438 return 0; 1439 return 1; 1440} 1441 1442/* Value is the number of bytes of arguments automatically 1443 popped when returning from a subroutine call. 1444 FUNDECL is the declaration node of the function (as a tree), 1445 FUNTYPE is the data type of the function (as a tree), 1446 or for a library call it is an identifier node for the subroutine name. 1447 SIZE is the number of bytes of arguments passed on the stack. 1448 1449 On the 80386, the RTD insn may be used to pop them if the number 1450 of args is fixed, but if the number is variable then the caller 1451 must pop them all. RTD can't be used for library calls now 1452 because the library is compiled with the Unix compiler. 1453 Use of RTD is a selectable option, since it is incompatible with 1454 standard Unix calling sequences. If the option is not selected, 1455 the caller must always pop the args. 1456 1457 The attribute stdcall is equivalent to RTD on a per module basis. */ 1458 1459int 1460ix86_return_pops_args (fundecl, funtype, size) 1461 tree fundecl; 1462 tree funtype; 1463 int size; 1464{ 1465 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 1466 1467 /* Cdecl functions override -mrtd, and never pop the stack. */ 1468 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 1469 1470 /* Stdcall functions will pop the stack if not variable args. */ 1471 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))) 1472 rtd = 1; 1473 1474 if (rtd 1475 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 1476 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 1477 == void_type_node))) 1478 return size; 1479 } 1480 1481 /* Lose any fake structure return argument. */ 1482 if (aggregate_value_p (TREE_TYPE (funtype)) 1483 && !TARGET_64BIT) 1484 return GET_MODE_SIZE (Pmode); 1485 1486 return 0; 1487} 1488 1489/* Argument support functions. */ 1490 1491/* Return true when register may be used to pass function parameters. */ 1492bool 1493ix86_function_arg_regno_p (regno) 1494 int regno; 1495{ 1496 int i; 1497 if (!TARGET_64BIT) 1498 return (regno < REGPARM_MAX 1499 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 1500 if (SSE_REGNO_P (regno) && TARGET_SSE) 1501 return true; 1502 /* RAX is used as hidden argument to va_arg functions. */ 1503 if (!regno) 1504 return true; 1505 for (i = 0; i < REGPARM_MAX; i++) 1506 if (regno == x86_64_int_parameter_registers[i]) 1507 return true; 1508 return false; 1509} 1510 1511/* Initialize a variable CUM of type CUMULATIVE_ARGS 1512 for a call to a function whose data type is FNTYPE. 1513 For a library call, FNTYPE is 0. */ 1514 1515void 1516init_cumulative_args (cum, fntype, libname) 1517 CUMULATIVE_ARGS *cum; /* Argument info to initialize */ 1518 tree fntype; /* tree ptr for function decl */ 1519 rtx libname; /* SYMBOL_REF of library name or 0 */ 1520{ 1521 static CUMULATIVE_ARGS zero_cum; 1522 tree param, next_param; 1523 1524 if (TARGET_DEBUG_ARG) 1525 { 1526 fprintf (stderr, "\ninit_cumulative_args ("); 1527 if (fntype) 1528 fprintf (stderr, "fntype code = %s, ret code = %s", 1529 tree_code_name[(int) TREE_CODE (fntype)], 1530 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 1531 else 1532 fprintf (stderr, "no fntype"); 1533 1534 if (libname) 1535 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 1536 } 1537 1538 *cum = zero_cum; 1539 1540 /* Set up the number of registers to use for passing arguments. */ 1541 cum->nregs = ix86_regparm; 1542 cum->sse_nregs = SSE_REGPARM_MAX; 1543 if (fntype && !TARGET_64BIT) 1544 { 1545 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype)); 1546 1547 if (attr) 1548 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1549 } 1550 cum->maybe_vaarg = false; 1551 1552 /* Determine if this function has variable arguments. This is 1553 indicated by the last argument being 'void_type_mode' if there 1554 are no variable arguments. If there are variable arguments, then 1555 we won't pass anything in registers */ 1556 1557 if (cum->nregs) 1558 { 1559 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 1560 param != 0; param = next_param) 1561 { 1562 next_param = TREE_CHAIN (param); 1563 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 1564 { 1565 if (!TARGET_64BIT) 1566 cum->nregs = 0; 1567 cum->maybe_vaarg = true; 1568 } 1569 } 1570 } 1571 if ((!fntype && !libname) 1572 || (fntype && !TYPE_ARG_TYPES (fntype))) 1573 cum->maybe_vaarg = 1; 1574 1575 if (TARGET_DEBUG_ARG) 1576 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 1577 1578 return; 1579} 1580 1581/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal 1582 of this code is to classify each 8bytes of incoming argument by the register 1583 class and assign registers accordingly. */ 1584 1585/* Return the union class of CLASS1 and CLASS2. 1586 See the x86-64 PS ABI for details. */ 1587 1588static enum x86_64_reg_class 1589merge_classes (class1, class2) 1590 enum x86_64_reg_class class1, class2; 1591{ 1592 /* Rule #1: If both classes are equal, this is the resulting class. */ 1593 if (class1 == class2) 1594 return class1; 1595 1596 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 1597 the other class. */ 1598 if (class1 == X86_64_NO_CLASS) 1599 return class2; 1600 if (class2 == X86_64_NO_CLASS) 1601 return class1; 1602 1603 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 1604 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 1605 return X86_64_MEMORY_CLASS; 1606 1607 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 1608 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 1609 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 1610 return X86_64_INTEGERSI_CLASS; 1611 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 1612 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 1613 return X86_64_INTEGER_CLASS; 1614 1615 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */ 1616 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS 1617 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS) 1618 return X86_64_MEMORY_CLASS; 1619 1620 /* Rule #6: Otherwise class SSE is used. */ 1621 return X86_64_SSE_CLASS; 1622} 1623 1624/* Classify the argument of type TYPE and mode MODE. 1625 CLASSES will be filled by the register class used to pass each word 1626 of the operand. The number of words is returned. In case the parameter 1627 should be passed in memory, 0 is returned. As a special case for zero 1628 sized containers, classes[0] will be NO_CLASS and 1 is returned. 1629 1630 BIT_OFFSET is used internally for handling records and specifies offset 1631 of the offset in bits modulo 256 to avoid overflow cases. 1632 1633 See the x86-64 PS ABI for details. 1634*/ 1635 1636static int 1637classify_argument (mode, type, classes, bit_offset) 1638 enum machine_mode mode; 1639 tree type; 1640 enum x86_64_reg_class classes[MAX_CLASSES]; 1641 int bit_offset; 1642{ 1643 int bytes = 1644 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1645 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1646 1647 if (type && AGGREGATE_TYPE_P (type)) 1648 { 1649 int i; 1650 tree field; 1651 enum x86_64_reg_class subclasses[MAX_CLASSES]; 1652 1653 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 1654 if (bytes > 16) 1655 return 0; 1656 1657 for (i = 0; i < words; i++) 1658 classes[i] = X86_64_NO_CLASS; 1659 1660 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 1661 signalize memory class, so handle it as special case. */ 1662 if (!words) 1663 { 1664 classes[0] = X86_64_NO_CLASS; 1665 return 1; 1666 } 1667 1668 /* Classify each field of record and merge classes. */ 1669 if (TREE_CODE (type) == RECORD_TYPE) 1670 { 1671 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1672 { 1673 if (TREE_CODE (field) == FIELD_DECL) 1674 { 1675 int num; 1676 1677 /* Bitfields are always classified as integer. Handle them 1678 early, since later code would consider them to be 1679 misaligned integers. */ 1680 if (DECL_BIT_FIELD (field)) 1681 { 1682 for (i = int_bit_position (field) / 8 / 8; 1683 i < (int_bit_position (field) 1684 + tree_low_cst (DECL_SIZE (field), 0) 1685 + 63) / 8 / 8; i++) 1686 classes[i] = 1687 merge_classes (X86_64_INTEGER_CLASS, 1688 classes[i]); 1689 } 1690 else 1691 { 1692 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1693 TREE_TYPE (field), subclasses, 1694 (int_bit_position (field) 1695 + bit_offset) % 256); 1696 if (!num) 1697 return 0; 1698 for (i = 0; i < num; i++) 1699 { 1700 int pos = 1701 (int_bit_position (field) + bit_offset) / 8 / 8; 1702 classes[i + pos] = 1703 merge_classes (subclasses[i], classes[i + pos]); 1704 } 1705 } 1706 } 1707 } 1708 } 1709 /* Arrays are handled as small records. */ 1710 else if (TREE_CODE (type) == ARRAY_TYPE) 1711 { 1712 int num; 1713 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 1714 TREE_TYPE (type), subclasses, bit_offset); 1715 if (!num) 1716 return 0; 1717 1718 /* The partial classes are now full classes. */ 1719 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 1720 subclasses[0] = X86_64_SSE_CLASS; 1721 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 1722 subclasses[0] = X86_64_INTEGER_CLASS; 1723 1724 for (i = 0; i < words; i++) 1725 classes[i] = subclasses[i % num]; 1726 } 1727 /* Unions are similar to RECORD_TYPE but offset is always 0. */ 1728 else if (TREE_CODE (type) == UNION_TYPE) 1729 { 1730 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1731 { 1732 if (TREE_CODE (field) == FIELD_DECL) 1733 { 1734 int num; 1735 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1736 TREE_TYPE (field), subclasses, 1737 bit_offset); 1738 if (!num) 1739 return 0; 1740 for (i = 0; i < num; i++) 1741 classes[i] = merge_classes (subclasses[i], classes[i]); 1742 } 1743 } 1744 } 1745 else 1746 abort (); 1747 1748 /* Final merger cleanup. */ 1749 for (i = 0; i < words; i++) 1750 { 1751 /* If one class is MEMORY, everything should be passed in 1752 memory. */ 1753 if (classes[i] == X86_64_MEMORY_CLASS) 1754 return 0; 1755 1756 /* The X86_64_SSEUP_CLASS should be always preceded by 1757 X86_64_SSE_CLASS. */ 1758 if (classes[i] == X86_64_SSEUP_CLASS 1759 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 1760 classes[i] = X86_64_SSE_CLASS; 1761 1762 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 1763 if (classes[i] == X86_64_X87UP_CLASS 1764 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 1765 classes[i] = X86_64_SSE_CLASS; 1766 } 1767 return words; 1768 } 1769 1770 /* Compute alignment needed. We align all types to natural boundaries with 1771 exception of XFmode that is aligned to 64bits. */ 1772 if (mode != VOIDmode && mode != BLKmode) 1773 { 1774 int mode_alignment = GET_MODE_BITSIZE (mode); 1775 1776 if (mode == XFmode) 1777 mode_alignment = 128; 1778 else if (mode == XCmode) 1779 mode_alignment = 256; 1780 /* Misaligned fields are always returned in memory. */ 1781 if (bit_offset % mode_alignment) 1782 return 0; 1783 } 1784 1785 /* Classification of atomic types. */ 1786 switch (mode) 1787 { 1788 case DImode: 1789 case SImode: 1790 case HImode: 1791 case QImode: 1792 case CSImode: 1793 case CHImode: 1794 case CQImode: 1795 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 1796 classes[0] = X86_64_INTEGERSI_CLASS; 1797 else 1798 classes[0] = X86_64_INTEGER_CLASS; 1799 return 1; 1800 case CDImode: 1801 case TImode: 1802 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1803 return 2; 1804 case CTImode: 1805 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1806 classes[2] = classes[3] = X86_64_INTEGER_CLASS; 1807 return 4; 1808 case SFmode: 1809 if (!(bit_offset % 64)) 1810 classes[0] = X86_64_SSESF_CLASS; 1811 else 1812 classes[0] = X86_64_SSE_CLASS; 1813 return 1; 1814 case DFmode: 1815 classes[0] = X86_64_SSEDF_CLASS; 1816 return 1; 1817 case TFmode: 1818 classes[0] = X86_64_X87_CLASS; 1819 classes[1] = X86_64_X87UP_CLASS; 1820 return 2; 1821 case TCmode: 1822 classes[0] = X86_64_X87_CLASS; 1823 classes[1] = X86_64_X87UP_CLASS; 1824 classes[2] = X86_64_X87_CLASS; 1825 classes[3] = X86_64_X87UP_CLASS; 1826 return 4; 1827 case DCmode: 1828 classes[0] = X86_64_SSEDF_CLASS; 1829 classes[1] = X86_64_SSEDF_CLASS; 1830 return 2; 1831 case SCmode: 1832 classes[0] = X86_64_SSE_CLASS; 1833 return 1; 1834 case BLKmode: 1835 return 0; 1836 default: 1837 abort (); 1838 } 1839} 1840 1841/* Examine the argument and return set number of register required in each 1842 class. Return 0 iff parameter should be passed in memory. */ 1843static int 1844examine_argument (mode, type, in_return, int_nregs, sse_nregs) 1845 enum machine_mode mode; 1846 tree type; 1847 int *int_nregs, *sse_nregs; 1848 int in_return; 1849{ 1850 enum x86_64_reg_class class[MAX_CLASSES]; 1851 int n = classify_argument (mode, type, class, 0); 1852 1853 *int_nregs = 0; 1854 *sse_nregs = 0; 1855 if (!n) 1856 return 0; 1857 for (n--; n >= 0; n--) 1858 switch (class[n]) 1859 { 1860 case X86_64_INTEGER_CLASS: 1861 case X86_64_INTEGERSI_CLASS: 1862 (*int_nregs)++; 1863 break; 1864 case X86_64_SSE_CLASS: 1865 case X86_64_SSESF_CLASS: 1866 case X86_64_SSEDF_CLASS: 1867 (*sse_nregs)++; 1868 break; 1869 case X86_64_NO_CLASS: 1870 case X86_64_SSEUP_CLASS: 1871 break; 1872 case X86_64_X87_CLASS: 1873 case X86_64_X87UP_CLASS: 1874 if (!in_return) 1875 return 0; 1876 break; 1877 case X86_64_MEMORY_CLASS: 1878 abort (); 1879 } 1880 return 1; 1881} 1882/* Construct container for the argument used by GCC interface. See 1883 FUNCTION_ARG for the detailed description. */ 1884static rtx 1885construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno) 1886 enum machine_mode mode; 1887 tree type; 1888 int in_return; 1889 int nintregs, nsseregs; 1890 const int * intreg; 1891 int sse_regno; 1892{ 1893 enum machine_mode tmpmode; 1894 int bytes = 1895 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1896 enum x86_64_reg_class class[MAX_CLASSES]; 1897 int n; 1898 int i; 1899 int nexps = 0; 1900 int needed_sseregs, needed_intregs; 1901 rtx exp[MAX_CLASSES]; 1902 rtx ret; 1903 1904 n = classify_argument (mode, type, class, 0); 1905 if (TARGET_DEBUG_ARG) 1906 { 1907 if (!n) 1908 fprintf (stderr, "Memory class\n"); 1909 else 1910 { 1911 fprintf (stderr, "Classes:"); 1912 for (i = 0; i < n; i++) 1913 { 1914 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 1915 } 1916 fprintf (stderr, "\n"); 1917 } 1918 } 1919 if (!n) 1920 return NULL; 1921 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs)) 1922 return NULL; 1923 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 1924 return NULL; 1925 1926 /* First construct simple cases. Avoid SCmode, since we want to use 1927 single register to pass this type. */ 1928 if (n == 1 && mode != SCmode) 1929 switch (class[0]) 1930 { 1931 case X86_64_INTEGER_CLASS: 1932 case X86_64_INTEGERSI_CLASS: 1933 return gen_rtx_REG (mode, intreg[0]); 1934 case X86_64_SSE_CLASS: 1935 case X86_64_SSESF_CLASS: 1936 case X86_64_SSEDF_CLASS: 1937 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 1938 case X86_64_X87_CLASS: 1939 return gen_rtx_REG (mode, FIRST_STACK_REG); 1940 case X86_64_NO_CLASS: 1941 /* Zero sized array, struct or class. */ 1942 return NULL; 1943 default: 1944 abort (); 1945 } 1946 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS) 1947 return gen_rtx_REG (TImode, SSE_REGNO (sse_regno)); 1948 if (n == 2 1949 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 1950 return gen_rtx_REG (TFmode, FIRST_STACK_REG); 1951 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 1952 && class[1] == X86_64_INTEGER_CLASS 1953 && (mode == CDImode || mode == TImode) 1954 && intreg[0] + 1 == intreg[1]) 1955 return gen_rtx_REG (mode, intreg[0]); 1956 if (n == 4 1957 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS 1958 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS) 1959 return gen_rtx_REG (TCmode, FIRST_STACK_REG); 1960 1961 /* Otherwise figure out the entries of the PARALLEL. */ 1962 for (i = 0; i < n; i++) 1963 { 1964 switch (class[i]) 1965 { 1966 case X86_64_NO_CLASS: 1967 break; 1968 case X86_64_INTEGER_CLASS: 1969 case X86_64_INTEGERSI_CLASS: 1970 /* Merge TImodes on aligned occassions here too. */ 1971 if (i * 8 + 8 > bytes) 1972 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 1973 else if (class[i] == X86_64_INTEGERSI_CLASS) 1974 tmpmode = SImode; 1975 else 1976 tmpmode = DImode; 1977 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 1978 if (tmpmode == BLKmode) 1979 tmpmode = DImode; 1980 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 1981 gen_rtx_REG (tmpmode, *intreg), 1982 GEN_INT (i*8)); 1983 intreg++; 1984 break; 1985 case X86_64_SSESF_CLASS: 1986 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 1987 gen_rtx_REG (SFmode, 1988 SSE_REGNO (sse_regno)), 1989 GEN_INT (i*8)); 1990 sse_regno++; 1991 break; 1992 case X86_64_SSEDF_CLASS: 1993 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 1994 gen_rtx_REG (DFmode, 1995 SSE_REGNO (sse_regno)), 1996 GEN_INT (i*8)); 1997 sse_regno++; 1998 break; 1999 case X86_64_SSE_CLASS: 2000 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS) 2001 tmpmode = TImode, i++; 2002 else 2003 tmpmode = DImode; 2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2005 gen_rtx_REG (tmpmode, 2006 SSE_REGNO (sse_regno)), 2007 GEN_INT (i*8)); 2008 sse_regno++; 2009 break; 2010 default: 2011 abort (); 2012 } 2013 } 2014 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 2015 for (i = 0; i < nexps; i++) 2016 XVECEXP (ret, 0, i) = exp [i]; 2017 return ret; 2018} 2019 2020/* Update the data in CUM to advance over an argument 2021 of mode MODE and data type TYPE. 2022 (TYPE is null for libcalls where that information may not be available.) */ 2023 2024void 2025function_arg_advance (cum, mode, type, named) 2026 CUMULATIVE_ARGS *cum; /* current arg information */ 2027 enum machine_mode mode; /* current arg mode */ 2028 tree type; /* type of the argument or 0 if lib support */ 2029 int named; /* whether or not the argument was named */ 2030{ 2031 int bytes = 2032 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2033 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2034 2035 if (TARGET_DEBUG_ARG) 2036 fprintf (stderr, 2037 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n", 2038 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2039 if (TARGET_64BIT) 2040 { 2041 int int_nregs, sse_nregs; 2042 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 2043 cum->words += words; 2044 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 2045 { 2046 cum->nregs -= int_nregs; 2047 cum->sse_nregs -= sse_nregs; 2048 cum->regno += int_nregs; 2049 cum->sse_regno += sse_nregs; 2050 } 2051 else 2052 cum->words += words; 2053 } 2054 else 2055 { 2056 if (TARGET_SSE && mode == TImode) 2057 { 2058 cum->sse_words += words; 2059 cum->sse_nregs -= 1; 2060 cum->sse_regno += 1; 2061 if (cum->sse_nregs <= 0) 2062 { 2063 cum->sse_nregs = 0; 2064 cum->sse_regno = 0; 2065 } 2066 } 2067 else 2068 { 2069 cum->words += words; 2070 cum->nregs -= words; 2071 cum->regno += words; 2072 2073 if (cum->nregs <= 0) 2074 { 2075 cum->nregs = 0; 2076 cum->regno = 0; 2077 } 2078 } 2079 } 2080 return; 2081} 2082 2083/* Define where to put the arguments to a function. 2084 Value is zero to push the argument on the stack, 2085 or a hard register in which to store the argument. 2086 2087 MODE is the argument's machine mode. 2088 TYPE is the data type of the argument (as a tree). 2089 This is null for libcalls where that information may 2090 not be available. 2091 CUM is a variable of type CUMULATIVE_ARGS which gives info about 2092 the preceding args and about the function being called. 2093 NAMED is nonzero if this argument is a named parameter 2094 (otherwise it is an extra parameter matching an ellipsis). */ 2095 2096rtx 2097function_arg (cum, mode, type, named) 2098 CUMULATIVE_ARGS *cum; /* current arg information */ 2099 enum machine_mode mode; /* current arg mode */ 2100 tree type; /* type of the argument or 0 if lib support */ 2101 int named; /* != 0 for normal args, == 0 for ... args */ 2102{ 2103 rtx ret = NULL_RTX; 2104 int bytes = 2105 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2106 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2107 2108 /* Handle an hidden AL argument containing number of registers for varargs 2109 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 2110 any AL settings. */ 2111 if (mode == VOIDmode) 2112 { 2113 if (TARGET_64BIT) 2114 return GEN_INT (cum->maybe_vaarg 2115 ? (cum->sse_nregs < 0 2116 ? SSE_REGPARM_MAX 2117 : cum->sse_regno) 2118 : -1); 2119 else 2120 return constm1_rtx; 2121 } 2122 if (TARGET_64BIT) 2123 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs, 2124 &x86_64_int_parameter_registers [cum->regno], 2125 cum->sse_regno); 2126 else 2127 switch (mode) 2128 { 2129 /* For now, pass fp/complex values on the stack. */ 2130 default: 2131 break; 2132 2133 case BLKmode: 2134 case DImode: 2135 case SImode: 2136 case HImode: 2137 case QImode: 2138 if (words <= cum->nregs) 2139 ret = gen_rtx_REG (mode, cum->regno); 2140 break; 2141 case TImode: 2142 if (cum->sse_nregs) 2143 ret = gen_rtx_REG (mode, cum->sse_regno); 2144 break; 2145 } 2146 2147 if (TARGET_DEBUG_ARG) 2148 { 2149 fprintf (stderr, 2150 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d", 2151 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2152 2153 if (ret) 2154 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]); 2155 else 2156 fprintf (stderr, ", stack"); 2157 2158 fprintf (stderr, " )\n"); 2159 } 2160 2161 return ret; 2162} 2163 2164/* Gives the alignment boundary, in bits, of an argument with the specified mode 2165 and type. */ 2166 2167int 2168ix86_function_arg_boundary (mode, type) 2169 enum machine_mode mode; 2170 tree type; 2171{ 2172 int align; 2173 if (!TARGET_64BIT) 2174 return PARM_BOUNDARY; 2175 if (type) 2176 align = TYPE_ALIGN (type); 2177 else 2178 align = GET_MODE_ALIGNMENT (mode); 2179 if (align < PARM_BOUNDARY) 2180 align = PARM_BOUNDARY; 2181 if (align > 128) 2182 align = 128; 2183 return align; 2184} 2185 2186/* Return true if N is a possible register number of function value. */ 2187bool 2188ix86_function_value_regno_p (regno) 2189 int regno; 2190{ 2191 if (!TARGET_64BIT) 2192 { 2193 return ((regno) == 0 2194 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 2195 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 2196 } 2197 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 2198 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 2199 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 2200} 2201 2202/* Define how to find the value returned by a function. 2203 VALTYPE is the data type of the value (as a tree). 2204 If the precise function being called is known, FUNC is its FUNCTION_DECL; 2205 otherwise, FUNC is 0. */ 2206rtx 2207ix86_function_value (valtype) 2208 tree valtype; 2209{ 2210 if (TARGET_64BIT) 2211 { 2212 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1, 2213 REGPARM_MAX, SSE_REGPARM_MAX, 2214 x86_64_int_return_registers, 0); 2215 /* For zero sized structures, construct_continer return NULL, but we need 2216 to keep rest of compiler happy by returning meaningfull value. */ 2217 if (!ret) 2218 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 2219 return ret; 2220 } 2221 else 2222 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype))); 2223} 2224 2225/* Return false iff type is returned in memory. */ 2226int 2227ix86_return_in_memory (type) 2228 tree type; 2229{ 2230 int needed_intregs, needed_sseregs; 2231 if (TARGET_64BIT) 2232 { 2233 return !examine_argument (TYPE_MODE (type), type, 1, 2234 &needed_intregs, &needed_sseregs); 2235 } 2236 else 2237 { 2238 if (TYPE_MODE (type) == BLKmode 2239 || (VECTOR_MODE_P (TYPE_MODE (type)) 2240 && int_size_in_bytes (type) == 8) 2241 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode 2242 && TYPE_MODE (type) != TFmode 2243 && !VECTOR_MODE_P (TYPE_MODE (type)))) 2244 return 1; 2245 return 0; 2246 } 2247} 2248 2249/* Define how to find the value returned by a library function 2250 assuming the value has mode MODE. */ 2251rtx 2252ix86_libcall_value (mode) 2253 enum machine_mode mode; 2254{ 2255 if (TARGET_64BIT) 2256 { 2257 switch (mode) 2258 { 2259 case SFmode: 2260 case SCmode: 2261 case DFmode: 2262 case DCmode: 2263 return gen_rtx_REG (mode, FIRST_SSE_REG); 2264 case TFmode: 2265 case TCmode: 2266 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 2267 default: 2268 return gen_rtx_REG (mode, 0); 2269 } 2270 } 2271 else 2272 return gen_rtx_REG (mode, VALUE_REGNO (mode)); 2273} 2274 2275/* Create the va_list data type. */ 2276 2277tree 2278ix86_build_va_list () 2279{ 2280 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 2281 2282 /* For i386 we use plain pointer to argument area. */ 2283 if (!TARGET_64BIT) 2284 return build_pointer_type (char_type_node); 2285 2286 record = make_lang_type (RECORD_TYPE); 2287 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 2288 2289 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 2290 unsigned_type_node); 2291 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 2292 unsigned_type_node); 2293 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 2294 ptr_type_node); 2295 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 2296 ptr_type_node); 2297 2298 DECL_FIELD_CONTEXT (f_gpr) = record; 2299 DECL_FIELD_CONTEXT (f_fpr) = record; 2300 DECL_FIELD_CONTEXT (f_ovf) = record; 2301 DECL_FIELD_CONTEXT (f_sav) = record; 2302 2303 TREE_CHAIN (record) = type_decl; 2304 TYPE_NAME (record) = type_decl; 2305 TYPE_FIELDS (record) = f_gpr; 2306 TREE_CHAIN (f_gpr) = f_fpr; 2307 TREE_CHAIN (f_fpr) = f_ovf; 2308 TREE_CHAIN (f_ovf) = f_sav; 2309 2310 layout_type (record); 2311 2312 /* The correct type is an array type of one element. */ 2313 return build_array_type (record, build_index_type (size_zero_node)); 2314} 2315 2316/* Perform any needed actions needed for a function that is receiving a 2317 variable number of arguments. 2318 2319 CUM is as above. 2320 2321 MODE and TYPE are the mode and type of the current parameter. 2322 2323 PRETEND_SIZE is a variable that should be set to the amount of stack 2324 that must be pushed by the prolog to pretend that our caller pushed 2325 it. 2326 2327 Normally, this macro will push all remaining incoming registers on the 2328 stack and set PRETEND_SIZE to the length of the registers pushed. */ 2329 2330void 2331ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) 2332 CUMULATIVE_ARGS *cum; 2333 enum machine_mode mode; 2334 tree type; 2335 int *pretend_size ATTRIBUTE_UNUSED; 2336 int no_rtl; 2337 2338{ 2339 CUMULATIVE_ARGS next_cum; 2340 rtx save_area = NULL_RTX, mem; 2341 rtx label; 2342 rtx label_ref; 2343 rtx tmp_reg; 2344 rtx nsse_reg; 2345 int set; 2346 tree fntype; 2347 int stdarg_p; 2348 int i; 2349 2350 if (!TARGET_64BIT) 2351 return; 2352 2353 /* Indicate to allocate space on the stack for varargs save area. */ 2354 ix86_save_varrargs_registers = 1; 2355 2356 fntype = TREE_TYPE (current_function_decl); 2357 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 2358 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 2359 != void_type_node)); 2360 2361 /* For varargs, we do not want to skip the dummy va_dcl argument. 2362 For stdargs, we do want to skip the last named argument. */ 2363 next_cum = *cum; 2364 if (stdarg_p) 2365 function_arg_advance (&next_cum, mode, type, 1); 2366 2367 if (!no_rtl) 2368 save_area = frame_pointer_rtx; 2369 2370 set = get_varargs_alias_set (); 2371 2372 for (i = next_cum.regno; i < ix86_regparm; i++) 2373 { 2374 mem = gen_rtx_MEM (Pmode, 2375 plus_constant (save_area, i * UNITS_PER_WORD)); 2376 set_mem_alias_set (mem, set); 2377 emit_move_insn (mem, gen_rtx_REG (Pmode, 2378 x86_64_int_parameter_registers[i])); 2379 } 2380 2381 if (next_cum.sse_nregs) 2382 { 2383 /* Now emit code to save SSE registers. The AX parameter contains number 2384 of SSE parameter regsiters used to call this function. We use 2385 sse_prologue_save insn template that produces computed jump across 2386 SSE saves. We need some preparation work to get this working. */ 2387 2388 label = gen_label_rtx (); 2389 label_ref = gen_rtx_LABEL_REF (Pmode, label); 2390 2391 /* Compute address to jump to : 2392 label - 5*eax + nnamed_sse_arguments*5 */ 2393 tmp_reg = gen_reg_rtx (Pmode); 2394 nsse_reg = gen_reg_rtx (Pmode); 2395 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 2396 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2397 gen_rtx_MULT (Pmode, nsse_reg, 2398 GEN_INT (4)))); 2399 if (next_cum.sse_regno) 2400 emit_move_insn 2401 (nsse_reg, 2402 gen_rtx_CONST (DImode, 2403 gen_rtx_PLUS (DImode, 2404 label_ref, 2405 GEN_INT (next_cum.sse_regno * 4)))); 2406 else 2407 emit_move_insn (nsse_reg, label_ref); 2408 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 2409 2410 /* Compute address of memory block we save into. We always use pointer 2411 pointing 127 bytes after first byte to store - this is needed to keep 2412 instruction size limited by 4 bytes. */ 2413 tmp_reg = gen_reg_rtx (Pmode); 2414 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2415 plus_constant (save_area, 2416 8 * REGPARM_MAX + 127))); 2417 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 2418 set_mem_alias_set (mem, set); 2419 set_mem_align (mem, BITS_PER_WORD); 2420 2421 /* And finally do the dirty job! */ 2422 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 2423 GEN_INT (next_cum.sse_regno), label)); 2424 } 2425 2426} 2427 2428/* Implement va_start. */ 2429 2430void 2431ix86_va_start (stdarg_p, valist, nextarg) 2432 int stdarg_p; 2433 tree valist; 2434 rtx nextarg; 2435{ 2436 HOST_WIDE_INT words, n_gpr, n_fpr; 2437 tree f_gpr, f_fpr, f_ovf, f_sav; 2438 tree gpr, fpr, ovf, sav, t; 2439 2440 /* Only 64bit target needs something special. */ 2441 if (!TARGET_64BIT) 2442 { 2443 std_expand_builtin_va_start (stdarg_p, valist, nextarg); 2444 return; 2445 } 2446 2447 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2448 f_fpr = TREE_CHAIN (f_gpr); 2449 f_ovf = TREE_CHAIN (f_fpr); 2450 f_sav = TREE_CHAIN (f_ovf); 2451 2452 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2453 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2454 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2455 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2456 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2457 2458 /* Count number of gp and fp argument registers used. */ 2459 words = current_function_args_info.words; 2460 n_gpr = current_function_args_info.regno; 2461 n_fpr = current_function_args_info.sse_regno; 2462 2463 if (TARGET_DEBUG_ARG) 2464 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 2465 (int) words, (int) n_gpr, (int) n_fpr); 2466 2467 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 2468 build_int_2 (n_gpr * 8, 0)); 2469 TREE_SIDE_EFFECTS (t) = 1; 2470 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2471 2472 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 2473 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0)); 2474 TREE_SIDE_EFFECTS (t) = 1; 2475 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2476 2477 /* Find the overflow area. */ 2478 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 2479 if (words != 0) 2480 t = build (PLUS_EXPR, TREE_TYPE (ovf), t, 2481 build_int_2 (words * UNITS_PER_WORD, 0)); 2482 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2483 TREE_SIDE_EFFECTS (t) = 1; 2484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2485 2486 /* Find the register save area. 2487 Prologue of the function save it right above stack frame. */ 2488 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx); 2489 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 2490 TREE_SIDE_EFFECTS (t) = 1; 2491 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2492} 2493 2494/* Implement va_arg. */ 2495rtx 2496ix86_va_arg (valist, type) 2497 tree valist, type; 2498{ 2499 static int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 2500 tree f_gpr, f_fpr, f_ovf, f_sav; 2501 tree gpr, fpr, ovf, sav, t; 2502 int size, rsize; 2503 rtx lab_false, lab_over = NULL_RTX; 2504 rtx addr_rtx, r; 2505 rtx container; 2506 2507 /* Only 64bit target needs something special. */ 2508 if (!TARGET_64BIT) 2509 { 2510 return std_expand_builtin_va_arg (valist, type); 2511 } 2512 2513 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2514 f_fpr = TREE_CHAIN (f_gpr); 2515 f_ovf = TREE_CHAIN (f_fpr); 2516 f_sav = TREE_CHAIN (f_ovf); 2517 2518 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2519 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2520 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2521 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2522 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2523 2524 size = int_size_in_bytes (type); 2525 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2526 2527 container = construct_container (TYPE_MODE (type), type, 0, 2528 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 2529 /* 2530 * Pull the value out of the saved registers ... 2531 */ 2532 2533 addr_rtx = gen_reg_rtx (Pmode); 2534 2535 if (container) 2536 { 2537 rtx int_addr_rtx, sse_addr_rtx; 2538 int needed_intregs, needed_sseregs; 2539 int need_temp; 2540 2541 lab_over = gen_label_rtx (); 2542 lab_false = gen_label_rtx (); 2543 2544 examine_argument (TYPE_MODE (type), type, 0, 2545 &needed_intregs, &needed_sseregs); 2546 2547 2548 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64) 2549 || TYPE_ALIGN (type) > 128); 2550 2551 /* In case we are passing structure, verify that it is consetuctive block 2552 on the register save area. If not we need to do moves. */ 2553 if (!need_temp && !REG_P (container)) 2554 { 2555 /* Verify that all registers are strictly consetuctive */ 2556 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 2557 { 2558 int i; 2559 2560 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2561 { 2562 rtx slot = XVECEXP (container, 0, i); 2563 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 2564 || INTVAL (XEXP (slot, 1)) != i * 16) 2565 need_temp = 1; 2566 } 2567 } 2568 else 2569 { 2570 int i; 2571 2572 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2573 { 2574 rtx slot = XVECEXP (container, 0, i); 2575 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 2576 || INTVAL (XEXP (slot, 1)) != i * 8) 2577 need_temp = 1; 2578 } 2579 } 2580 } 2581 if (!need_temp) 2582 { 2583 int_addr_rtx = addr_rtx; 2584 sse_addr_rtx = addr_rtx; 2585 } 2586 else 2587 { 2588 int_addr_rtx = gen_reg_rtx (Pmode); 2589 sse_addr_rtx = gen_reg_rtx (Pmode); 2590 } 2591 /* First ensure that we fit completely in registers. */ 2592 if (needed_intregs) 2593 { 2594 emit_cmp_and_jump_insns (expand_expr 2595 (gpr, NULL_RTX, SImode, EXPAND_NORMAL), 2596 GEN_INT ((REGPARM_MAX - needed_intregs + 2597 1) * 8), GE, const1_rtx, SImode, 2598 1, lab_false); 2599 } 2600 if (needed_sseregs) 2601 { 2602 emit_cmp_and_jump_insns (expand_expr 2603 (fpr, NULL_RTX, SImode, EXPAND_NORMAL), 2604 GEN_INT ((SSE_REGPARM_MAX - 2605 needed_sseregs + 1) * 16 + 2606 REGPARM_MAX * 8), GE, const1_rtx, 2607 SImode, 1, lab_false); 2608 } 2609 2610 /* Compute index to start of area used for integer regs. */ 2611 if (needed_intregs) 2612 { 2613 t = build (PLUS_EXPR, ptr_type_node, sav, gpr); 2614 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL); 2615 if (r != int_addr_rtx) 2616 emit_move_insn (int_addr_rtx, r); 2617 } 2618 if (needed_sseregs) 2619 { 2620 t = build (PLUS_EXPR, ptr_type_node, sav, fpr); 2621 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL); 2622 if (r != sse_addr_rtx) 2623 emit_move_insn (sse_addr_rtx, r); 2624 } 2625 if (need_temp) 2626 { 2627 int i; 2628 rtx mem; 2629 2630 /* Never use the memory itself, as it has the alias set. */ 2631 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0); 2632 mem = gen_rtx_MEM (BLKmode, addr_rtx); 2633 set_mem_alias_set (mem, get_varargs_alias_set ()); 2634 set_mem_align (mem, BITS_PER_UNIT); 2635 2636 for (i = 0; i < XVECLEN (container, 0); i++) 2637 { 2638 rtx slot = XVECEXP (container, 0, i); 2639 rtx reg = XEXP (slot, 0); 2640 enum machine_mode mode = GET_MODE (reg); 2641 rtx src_addr; 2642 rtx src_mem; 2643 int src_offset; 2644 rtx dest_mem; 2645 2646 if (SSE_REGNO_P (REGNO (reg))) 2647 { 2648 src_addr = sse_addr_rtx; 2649 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 2650 } 2651 else 2652 { 2653 src_addr = int_addr_rtx; 2654 src_offset = REGNO (reg) * 8; 2655 } 2656 src_mem = gen_rtx_MEM (mode, src_addr); 2657 set_mem_alias_set (src_mem, get_varargs_alias_set ()); 2658 src_mem = adjust_address (src_mem, mode, src_offset); 2659 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1))); 2660 emit_move_insn (dest_mem, src_mem); 2661 } 2662 } 2663 2664 if (needed_intregs) 2665 { 2666 t = 2667 build (PLUS_EXPR, TREE_TYPE (gpr), gpr, 2668 build_int_2 (needed_intregs * 8, 0)); 2669 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 2670 TREE_SIDE_EFFECTS (t) = 1; 2671 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2672 } 2673 if (needed_sseregs) 2674 { 2675 t = 2676 build (PLUS_EXPR, TREE_TYPE (fpr), fpr, 2677 build_int_2 (needed_sseregs * 16, 0)); 2678 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 2679 TREE_SIDE_EFFECTS (t) = 1; 2680 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2681 } 2682 2683 emit_jump_insn (gen_jump (lab_over)); 2684 emit_barrier (); 2685 emit_label (lab_false); 2686 } 2687 2688 /* ... otherwise out of the overflow area. */ 2689 2690 /* Care for on-stack alignment if needed. */ 2691 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64) 2692 t = ovf; 2693 else 2694 { 2695 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 2696 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0)); 2697 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1)); 2698 } 2699 t = save_expr (t); 2700 2701 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL); 2702 if (r != addr_rtx) 2703 emit_move_insn (addr_rtx, r); 2704 2705 t = 2706 build (PLUS_EXPR, TREE_TYPE (t), t, 2707 build_int_2 (rsize * UNITS_PER_WORD, 0)); 2708 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2709 TREE_SIDE_EFFECTS (t) = 1; 2710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2711 2712 if (container) 2713 emit_label (lab_over); 2714 2715 return addr_rtx; 2716} 2717 2718/* Return nonzero if OP is general operand representable on x86_64. */ 2719 2720int 2721x86_64_general_operand (op, mode) 2722 rtx op; 2723 enum machine_mode mode; 2724{ 2725 if (!TARGET_64BIT) 2726 return general_operand (op, mode); 2727 if (nonimmediate_operand (op, mode)) 2728 return 1; 2729 return x86_64_sign_extended_value (op); 2730} 2731 2732/* Return nonzero if OP is general operand representable on x86_64 2733 as either sign extended or zero extended constant. */ 2734 2735int 2736x86_64_szext_general_operand (op, mode) 2737 rtx op; 2738 enum machine_mode mode; 2739{ 2740 if (!TARGET_64BIT) 2741 return general_operand (op, mode); 2742 if (nonimmediate_operand (op, mode)) 2743 return 1; 2744 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 2745} 2746 2747/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 2748 2749int 2750x86_64_nonmemory_operand (op, mode) 2751 rtx op; 2752 enum machine_mode mode; 2753{ 2754 if (!TARGET_64BIT) 2755 return nonmemory_operand (op, mode); 2756 if (register_operand (op, mode)) 2757 return 1; 2758 return x86_64_sign_extended_value (op); 2759} 2760 2761/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */ 2762 2763int 2764x86_64_movabs_operand (op, mode) 2765 rtx op; 2766 enum machine_mode mode; 2767{ 2768 if (!TARGET_64BIT || !flag_pic) 2769 return nonmemory_operand (op, mode); 2770 if (register_operand (op, mode) || x86_64_sign_extended_value (op)) 2771 return 1; 2772 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op)) 2773 return 1; 2774 return 0; 2775} 2776 2777/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 2778 2779int 2780x86_64_szext_nonmemory_operand (op, mode) 2781 rtx op; 2782 enum machine_mode mode; 2783{ 2784 if (!TARGET_64BIT) 2785 return nonmemory_operand (op, mode); 2786 if (register_operand (op, mode)) 2787 return 1; 2788 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 2789} 2790 2791/* Return nonzero if OP is immediate operand representable on x86_64. */ 2792 2793int 2794x86_64_immediate_operand (op, mode) 2795 rtx op; 2796 enum machine_mode mode; 2797{ 2798 if (!TARGET_64BIT) 2799 return immediate_operand (op, mode); 2800 return x86_64_sign_extended_value (op); 2801} 2802 2803/* Return nonzero if OP is immediate operand representable on x86_64. */ 2804 2805int 2806x86_64_zext_immediate_operand (op, mode) 2807 rtx op; 2808 enum machine_mode mode ATTRIBUTE_UNUSED; 2809{ 2810 return x86_64_zero_extended_value (op); 2811} 2812 2813/* Return nonzero if OP is (const_int 1), else return zero. */ 2814 2815int 2816const_int_1_operand (op, mode) 2817 rtx op; 2818 enum machine_mode mode ATTRIBUTE_UNUSED; 2819{ 2820 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1); 2821} 2822 2823/* Returns 1 if OP is either a symbol reference or a sum of a symbol 2824 reference and a constant. */ 2825 2826int 2827symbolic_operand (op, mode) 2828 register rtx op; 2829 enum machine_mode mode ATTRIBUTE_UNUSED; 2830{ 2831 switch (GET_CODE (op)) 2832 { 2833 case SYMBOL_REF: 2834 case LABEL_REF: 2835 return 1; 2836 2837 case CONST: 2838 op = XEXP (op, 0); 2839 if (GET_CODE (op) == SYMBOL_REF 2840 || GET_CODE (op) == LABEL_REF 2841 || (GET_CODE (op) == UNSPEC 2842 && (XINT (op, 1) == 6 2843 || XINT (op, 1) == 7 2844 || XINT (op, 1) == 15))) 2845 return 1; 2846 if (GET_CODE (op) != PLUS 2847 || GET_CODE (XEXP (op, 1)) != CONST_INT) 2848 return 0; 2849 2850 op = XEXP (op, 0); 2851 if (GET_CODE (op) == SYMBOL_REF 2852 || GET_CODE (op) == LABEL_REF) 2853 return 1; 2854 /* Only @GOTOFF gets offsets. */ 2855 if (GET_CODE (op) != UNSPEC 2856 || XINT (op, 1) != 7) 2857 return 0; 2858 2859 op = XVECEXP (op, 0, 0); 2860 if (GET_CODE (op) == SYMBOL_REF 2861 || GET_CODE (op) == LABEL_REF) 2862 return 1; 2863 return 0; 2864 2865 default: 2866 return 0; 2867 } 2868} 2869 2870/* Return true if the operand contains a @GOT or @GOTOFF reference. */ 2871 2872int 2873pic_symbolic_operand (op, mode) 2874 register rtx op; 2875 enum machine_mode mode ATTRIBUTE_UNUSED; 2876{ 2877 if (GET_CODE (op) != CONST) 2878 return 0; 2879 op = XEXP (op, 0); 2880 if (TARGET_64BIT) 2881 { 2882 if (GET_CODE (XEXP (op, 0)) == UNSPEC) 2883 return 1; 2884 } 2885 else 2886 { 2887 if (GET_CODE (op) == UNSPEC) 2888 return 1; 2889 if (GET_CODE (op) != PLUS 2890 || GET_CODE (XEXP (op, 1)) != CONST_INT) 2891 return 0; 2892 op = XEXP (op, 0); 2893 if (GET_CODE (op) == UNSPEC) 2894 return 1; 2895 } 2896 return 0; 2897} 2898 2899/* Return true if OP is a symbolic operand that resolves locally. */ 2900 2901static int 2902local_symbolic_operand (op, mode) 2903 rtx op; 2904 enum machine_mode mode ATTRIBUTE_UNUSED; 2905{ 2906 if (GET_CODE (op) == LABEL_REF) 2907 return 1; 2908 2909 if (GET_CODE (op) == CONST 2910 && GET_CODE (XEXP (op, 0)) == PLUS 2911 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 2912 op = XEXP (XEXP (op, 0), 0); 2913 2914 if (GET_CODE (op) != SYMBOL_REF) 2915 return 0; 2916 2917 /* These we've been told are local by varasm and encode_section_info 2918 respectively. */ 2919 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op)) 2920 return 1; 2921 2922 /* There is, however, a not insubstantial body of code in the rest of 2923 the compiler that assumes it can just stick the results of 2924 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ 2925 /* ??? This is a hack. Should update the body of the compiler to 2926 always create a DECL an invoke ENCODE_SECTION_INFO. */ 2927 if (strncmp (XSTR (op, 0), internal_label_prefix, 2928 internal_label_prefix_len) == 0) 2929 return 1; 2930 2931 return 0; 2932} 2933 2934/* Test for a valid operand for a call instruction. Don't allow the 2935 arg pointer register or virtual regs since they may decay into 2936 reg + const, which the patterns can't handle. */ 2937 2938int 2939call_insn_operand (op, mode) 2940 rtx op; 2941 enum machine_mode mode ATTRIBUTE_UNUSED; 2942{ 2943 /* Disallow indirect through a virtual register. This leads to 2944 compiler aborts when trying to eliminate them. */ 2945 if (GET_CODE (op) == REG 2946 && (op == arg_pointer_rtx 2947 || op == frame_pointer_rtx 2948 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 2949 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 2950 return 0; 2951 2952 /* Disallow `call 1234'. Due to varying assembler lameness this 2953 gets either rejected or translated to `call .+1234'. */ 2954 if (GET_CODE (op) == CONST_INT) 2955 return 0; 2956 2957 /* Explicitly allow SYMBOL_REF even if pic. */ 2958 if (GET_CODE (op) == SYMBOL_REF) 2959 return 1; 2960 2961 /* Half-pic doesn't allow anything but registers and constants. 2962 We've just taken care of the later. */ 2963 if (HALF_PIC_P ()) 2964 return register_operand (op, Pmode); 2965 2966 /* Otherwise we can allow any general_operand in the address. */ 2967 return general_operand (op, Pmode); 2968} 2969 2970int 2971constant_call_address_operand (op, mode) 2972 rtx op; 2973 enum machine_mode mode ATTRIBUTE_UNUSED; 2974{ 2975 if (GET_CODE (op) == CONST 2976 && GET_CODE (XEXP (op, 0)) == PLUS 2977 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 2978 op = XEXP (XEXP (op, 0), 0); 2979 return GET_CODE (op) == SYMBOL_REF; 2980} 2981 2982/* Match exactly zero and one. */ 2983 2984int 2985const0_operand (op, mode) 2986 register rtx op; 2987 enum machine_mode mode; 2988{ 2989 return op == CONST0_RTX (mode); 2990} 2991 2992int 2993const1_operand (op, mode) 2994 register rtx op; 2995 enum machine_mode mode ATTRIBUTE_UNUSED; 2996{ 2997 return op == const1_rtx; 2998} 2999 3000/* Match 2, 4, or 8. Used for leal multiplicands. */ 3001 3002int 3003const248_operand (op, mode) 3004 register rtx op; 3005 enum machine_mode mode ATTRIBUTE_UNUSED; 3006{ 3007 return (GET_CODE (op) == CONST_INT 3008 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8)); 3009} 3010 3011/* True if this is a constant appropriate for an increment or decremenmt. */ 3012 3013int 3014incdec_operand (op, mode) 3015 register rtx op; 3016 enum machine_mode mode ATTRIBUTE_UNUSED; 3017{ 3018 /* On Pentium4, the inc and dec operations causes extra dependency on flag 3019 registers, since carry flag is not set. */ 3020 if (TARGET_PENTIUM4 && !optimize_size) 3021 return 0; 3022 return op == const1_rtx || op == constm1_rtx; 3023} 3024 3025/* Return nonzero if OP is acceptable as operand of DImode shift 3026 expander. */ 3027 3028int 3029shiftdi_operand (op, mode) 3030 rtx op; 3031 enum machine_mode mode ATTRIBUTE_UNUSED; 3032{ 3033 if (TARGET_64BIT) 3034 return nonimmediate_operand (op, mode); 3035 else 3036 return register_operand (op, mode); 3037} 3038 3039/* Return false if this is the stack pointer, or any other fake 3040 register eliminable to the stack pointer. Otherwise, this is 3041 a register operand. 3042 3043 This is used to prevent esp from being used as an index reg. 3044 Which would only happen in pathological cases. */ 3045 3046int 3047reg_no_sp_operand (op, mode) 3048 register rtx op; 3049 enum machine_mode mode; 3050{ 3051 rtx t = op; 3052 if (GET_CODE (t) == SUBREG) 3053 t = SUBREG_REG (t); 3054 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx) 3055 return 0; 3056 3057 return register_operand (op, mode); 3058} 3059 3060int 3061mmx_reg_operand (op, mode) 3062 register rtx op; 3063 enum machine_mode mode ATTRIBUTE_UNUSED; 3064{ 3065 return MMX_REG_P (op); 3066} 3067 3068/* Return false if this is any eliminable register. Otherwise 3069 general_operand. */ 3070 3071int 3072general_no_elim_operand (op, mode) 3073 register rtx op; 3074 enum machine_mode mode; 3075{ 3076 rtx t = op; 3077 if (GET_CODE (t) == SUBREG) 3078 t = SUBREG_REG (t); 3079 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3080 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3081 || t == virtual_stack_dynamic_rtx) 3082 return 0; 3083 if (REG_P (t) 3084 && REGNO (t) >= FIRST_VIRTUAL_REGISTER 3085 && REGNO (t) <= LAST_VIRTUAL_REGISTER) 3086 return 0; 3087 3088 return general_operand (op, mode); 3089} 3090 3091/* Return false if this is any eliminable register. Otherwise 3092 register_operand or const_int. */ 3093 3094int 3095nonmemory_no_elim_operand (op, mode) 3096 register rtx op; 3097 enum machine_mode mode; 3098{ 3099 rtx t = op; 3100 if (GET_CODE (t) == SUBREG) 3101 t = SUBREG_REG (t); 3102 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3103 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3104 || t == virtual_stack_dynamic_rtx) 3105 return 0; 3106 3107 return GET_CODE (op) == CONST_INT || register_operand (op, mode); 3108} 3109 3110/* Return true if op is a Q_REGS class register. */ 3111 3112int 3113q_regs_operand (op, mode) 3114 register rtx op; 3115 enum machine_mode mode; 3116{ 3117 if (mode != VOIDmode && GET_MODE (op) != mode) 3118 return 0; 3119 if (GET_CODE (op) == SUBREG) 3120 op = SUBREG_REG (op); 3121 return QI_REG_P (op); 3122} 3123 3124/* Return true if op is a NON_Q_REGS class register. */ 3125 3126int 3127non_q_regs_operand (op, mode) 3128 register rtx op; 3129 enum machine_mode mode; 3130{ 3131 if (mode != VOIDmode && GET_MODE (op) != mode) 3132 return 0; 3133 if (GET_CODE (op) == SUBREG) 3134 op = SUBREG_REG (op); 3135 return NON_QI_REG_P (op); 3136} 3137 3138/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS 3139 insns. */ 3140int 3141sse_comparison_operator (op, mode) 3142 rtx op; 3143 enum machine_mode mode ATTRIBUTE_UNUSED; 3144{ 3145 enum rtx_code code = GET_CODE (op); 3146 switch (code) 3147 { 3148 /* Operations supported directly. */ 3149 case EQ: 3150 case LT: 3151 case LE: 3152 case UNORDERED: 3153 case NE: 3154 case UNGE: 3155 case UNGT: 3156 case ORDERED: 3157 return 1; 3158 /* These are equivalent to ones above in non-IEEE comparisons. */ 3159 case UNEQ: 3160 case UNLT: 3161 case UNLE: 3162 case LTGT: 3163 case GE: 3164 case GT: 3165 return !TARGET_IEEE_FP; 3166 default: 3167 return 0; 3168 } 3169} 3170/* Return 1 if OP is a valid comparison operator in valid mode. */ 3171int 3172ix86_comparison_operator (op, mode) 3173 register rtx op; 3174 enum machine_mode mode; 3175{ 3176 enum machine_mode inmode; 3177 enum rtx_code code = GET_CODE (op); 3178 if (mode != VOIDmode && GET_MODE (op) != mode) 3179 return 0; 3180 if (GET_RTX_CLASS (code) != '<') 3181 return 0; 3182 inmode = GET_MODE (XEXP (op, 0)); 3183 3184 if (inmode == CCFPmode || inmode == CCFPUmode) 3185 { 3186 enum rtx_code second_code, bypass_code; 3187 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3188 return (bypass_code == NIL && second_code == NIL); 3189 } 3190 switch (code) 3191 { 3192 case EQ: case NE: 3193 return 1; 3194 case LT: case GE: 3195 if (inmode == CCmode || inmode == CCGCmode 3196 || inmode == CCGOCmode || inmode == CCNOmode) 3197 return 1; 3198 return 0; 3199 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU: 3200 if (inmode == CCmode) 3201 return 1; 3202 return 0; 3203 case GT: case LE: 3204 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode) 3205 return 1; 3206 return 0; 3207 default: 3208 return 0; 3209 } 3210} 3211 3212/* Return 1 if OP is a comparison operator that can be issued by fcmov. */ 3213 3214int 3215fcmov_comparison_operator (op, mode) 3216 register rtx op; 3217 enum machine_mode mode; 3218{ 3219 enum machine_mode inmode; 3220 enum rtx_code code = GET_CODE (op); 3221 if (mode != VOIDmode && GET_MODE (op) != mode) 3222 return 0; 3223 if (GET_RTX_CLASS (code) != '<') 3224 return 0; 3225 inmode = GET_MODE (XEXP (op, 0)); 3226 if (inmode == CCFPmode || inmode == CCFPUmode) 3227 { 3228 enum rtx_code second_code, bypass_code; 3229 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3230 if (bypass_code != NIL || second_code != NIL) 3231 return 0; 3232 code = ix86_fp_compare_code_to_integer (code); 3233 } 3234 /* i387 supports just limited amount of conditional codes. */ 3235 switch (code) 3236 { 3237 case LTU: case GTU: case LEU: case GEU: 3238 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode) 3239 return 1; 3240 return 0; 3241 case ORDERED: case UNORDERED: 3242 case EQ: case NE: 3243 return 1; 3244 default: 3245 return 0; 3246 } 3247} 3248 3249/* Return 1 if OP is a binary operator that can be promoted to wider mode. */ 3250 3251int 3252promotable_binary_operator (op, mode) 3253 register rtx op; 3254 enum machine_mode mode ATTRIBUTE_UNUSED; 3255{ 3256 switch (GET_CODE (op)) 3257 { 3258 case MULT: 3259 /* Modern CPUs have same latency for HImode and SImode multiply, 3260 but 386 and 486 do HImode multiply faster. */ 3261 return ix86_cpu > PROCESSOR_I486; 3262 case PLUS: 3263 case AND: 3264 case IOR: 3265 case XOR: 3266 case ASHIFT: 3267 return 1; 3268 default: 3269 return 0; 3270 } 3271} 3272 3273/* Nearly general operand, but accept any const_double, since we wish 3274 to be able to drop them into memory rather than have them get pulled 3275 into registers. */ 3276 3277int 3278cmp_fp_expander_operand (op, mode) 3279 register rtx op; 3280 enum machine_mode mode; 3281{ 3282 if (mode != VOIDmode && mode != GET_MODE (op)) 3283 return 0; 3284 if (GET_CODE (op) == CONST_DOUBLE) 3285 return 1; 3286 return general_operand (op, mode); 3287} 3288 3289/* Match an SI or HImode register for a zero_extract. */ 3290 3291int 3292ext_register_operand (op, mode) 3293 register rtx op; 3294 enum machine_mode mode ATTRIBUTE_UNUSED; 3295{ 3296 int regno; 3297 if ((!TARGET_64BIT || GET_MODE (op) != DImode) 3298 && GET_MODE (op) != SImode && GET_MODE (op) != HImode) 3299 return 0; 3300 3301 if (!register_operand (op, VOIDmode)) 3302 return 0; 3303 3304 /* Be curefull to accept only registers having upper parts. */ 3305 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op)); 3306 return (regno > LAST_VIRTUAL_REGISTER || regno < 4); 3307} 3308 3309/* Return 1 if this is a valid binary floating-point operation. 3310 OP is the expression matched, and MODE is its mode. */ 3311 3312int 3313binary_fp_operator (op, mode) 3314 register rtx op; 3315 enum machine_mode mode; 3316{ 3317 if (mode != VOIDmode && mode != GET_MODE (op)) 3318 return 0; 3319 3320 switch (GET_CODE (op)) 3321 { 3322 case PLUS: 3323 case MINUS: 3324 case MULT: 3325 case DIV: 3326 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT; 3327 3328 default: 3329 return 0; 3330 } 3331} 3332 3333int 3334mult_operator (op, mode) 3335 register rtx op; 3336 enum machine_mode mode ATTRIBUTE_UNUSED; 3337{ 3338 return GET_CODE (op) == MULT; 3339} 3340 3341int 3342div_operator (op, mode) 3343 register rtx op; 3344 enum machine_mode mode ATTRIBUTE_UNUSED; 3345{ 3346 return GET_CODE (op) == DIV; 3347} 3348 3349int 3350arith_or_logical_operator (op, mode) 3351 rtx op; 3352 enum machine_mode mode; 3353{ 3354 return ((mode == VOIDmode || GET_MODE (op) == mode) 3355 && (GET_RTX_CLASS (GET_CODE (op)) == 'c' 3356 || GET_RTX_CLASS (GET_CODE (op)) == '2')); 3357} 3358 3359/* Returns 1 if OP is memory operand with a displacement. */ 3360 3361int 3362memory_displacement_operand (op, mode) 3363 register rtx op; 3364 enum machine_mode mode; 3365{ 3366 struct ix86_address parts; 3367 3368 if (! memory_operand (op, mode)) 3369 return 0; 3370 3371 if (! ix86_decompose_address (XEXP (op, 0), &parts)) 3372 abort (); 3373 3374 return parts.disp != NULL_RTX; 3375} 3376 3377/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0, 3378 re-recognize the operand to avoid a copy_to_mode_reg that will fail. 3379 3380 ??? It seems likely that this will only work because cmpsi is an 3381 expander, and no actual insns use this. */ 3382 3383int 3384cmpsi_operand (op, mode) 3385 rtx op; 3386 enum machine_mode mode; 3387{ 3388 if (nonimmediate_operand (op, mode)) 3389 return 1; 3390 3391 if (GET_CODE (op) == AND 3392 && GET_MODE (op) == SImode 3393 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT 3394 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT 3395 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT 3396 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8 3397 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8 3398 && GET_CODE (XEXP (op, 1)) == CONST_INT) 3399 return 1; 3400 3401 return 0; 3402} 3403 3404/* Returns 1 if OP is memory operand that can not be represented by the 3405 modRM array. */ 3406 3407int 3408long_memory_operand (op, mode) 3409 register rtx op; 3410 enum machine_mode mode; 3411{ 3412 if (! memory_operand (op, mode)) 3413 return 0; 3414 3415 return memory_address_length (op) != 0; 3416} 3417 3418/* Return nonzero if the rtx is known aligned. */ 3419 3420int 3421aligned_operand (op, mode) 3422 rtx op; 3423 enum machine_mode mode; 3424{ 3425 struct ix86_address parts; 3426 3427 if (!general_operand (op, mode)) 3428 return 0; 3429 3430 /* Registers and immediate operands are always "aligned". */ 3431 if (GET_CODE (op) != MEM) 3432 return 1; 3433 3434 /* Don't even try to do any aligned optimizations with volatiles. */ 3435 if (MEM_VOLATILE_P (op)) 3436 return 0; 3437 3438 op = XEXP (op, 0); 3439 3440 /* Pushes and pops are only valid on the stack pointer. */ 3441 if (GET_CODE (op) == PRE_DEC 3442 || GET_CODE (op) == POST_INC) 3443 return 1; 3444 3445 /* Decode the address. */ 3446 if (! ix86_decompose_address (op, &parts)) 3447 abort (); 3448 3449 /* Look for some component that isn't known to be aligned. */ 3450 if (parts.index) 3451 { 3452 if (parts.scale < 4 3453 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32) 3454 return 0; 3455 } 3456 if (parts.base) 3457 { 3458 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32) 3459 return 0; 3460 } 3461 if (parts.disp) 3462 { 3463 if (GET_CODE (parts.disp) != CONST_INT 3464 || (INTVAL (parts.disp) & 3) != 0) 3465 return 0; 3466 } 3467 3468 /* Didn't find one -- this must be an aligned address. */ 3469 return 1; 3470} 3471 3472/* Return true if the constant is something that can be loaded with 3473 a special instruction. Only handle 0.0 and 1.0; others are less 3474 worthwhile. */ 3475 3476int 3477standard_80387_constant_p (x) 3478 rtx x; 3479{ 3480 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 3481 return -1; 3482 /* Note that on the 80387, other constants, such as pi, that we should support 3483 too. On some machines, these are much slower to load as standard constant, 3484 than to load from doubles in memory. */ 3485 if (x == CONST0_RTX (GET_MODE (x))) 3486 return 1; 3487 if (x == CONST1_RTX (GET_MODE (x))) 3488 return 2; 3489 return 0; 3490} 3491 3492/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 3493 */ 3494int 3495standard_sse_constant_p (x) 3496 rtx x; 3497{ 3498 if (GET_CODE (x) != CONST_DOUBLE) 3499 return -1; 3500 return (x == CONST0_RTX (GET_MODE (x))); 3501} 3502 3503/* Returns 1 if OP contains a symbol reference */ 3504 3505int 3506symbolic_reference_mentioned_p (op) 3507 rtx op; 3508{ 3509 register const char *fmt; 3510 register int i; 3511 3512 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 3513 return 1; 3514 3515 fmt = GET_RTX_FORMAT (GET_CODE (op)); 3516 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 3517 { 3518 if (fmt[i] == 'E') 3519 { 3520 register int j; 3521 3522 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 3523 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 3524 return 1; 3525 } 3526 3527 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 3528 return 1; 3529 } 3530 3531 return 0; 3532} 3533 3534/* Return 1 if it is appropriate to emit `ret' instructions in the 3535 body of a function. Do this only if the epilogue is simple, needing a 3536 couple of insns. Prior to reloading, we can't tell how many registers 3537 must be saved, so return 0 then. Return 0 if there is no frame 3538 marker to de-allocate. 3539 3540 If NON_SAVING_SETJMP is defined and true, then it is not possible 3541 for the epilogue to be simple, so return 0. This is a special case 3542 since NON_SAVING_SETJMP will not cause regs_ever_live to change 3543 until final, but jump_optimize may need to know sooner if a 3544 `return' is OK. */ 3545 3546int 3547ix86_can_use_return_insn_p () 3548{ 3549 struct ix86_frame frame; 3550 3551#ifdef NON_SAVING_SETJMP 3552 if (NON_SAVING_SETJMP && current_function_calls_setjmp) 3553 return 0; 3554#endif 3555 3556 if (! reload_completed || frame_pointer_needed) 3557 return 0; 3558 3559 /* Don't allow more than 32 pop, since that's all we can do 3560 with one instruction. */ 3561 if (current_function_pops_args 3562 && current_function_args_size >= 32768) 3563 return 0; 3564 3565 ix86_compute_frame_layout (&frame); 3566 return frame.to_allocate == 0 && frame.nregs == 0; 3567} 3568 3569/* Return 1 if VALUE can be stored in the sign extended immediate field. */ 3570int 3571x86_64_sign_extended_value (value) 3572 rtx value; 3573{ 3574 switch (GET_CODE (value)) 3575 { 3576 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known 3577 to be at least 32 and this all acceptable constants are 3578 represented as CONST_INT. */ 3579 case CONST_INT: 3580 if (HOST_BITS_PER_WIDE_INT == 32) 3581 return 1; 3582 else 3583 { 3584 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode); 3585 return trunc_int_for_mode (val, SImode) == val; 3586 } 3587 break; 3588 3589 /* For certain code models, the symbolic references are known to fit. */ 3590 case SYMBOL_REF: 3591 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL; 3592 3593 /* For certain code models, the code is near as well. */ 3594 case LABEL_REF: 3595 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC; 3596 3597 /* We also may accept the offsetted memory references in certain special 3598 cases. */ 3599 case CONST: 3600 if (GET_CODE (XEXP (value, 0)) == UNSPEC 3601 && XVECLEN (XEXP (value, 0), 0) == 1 3602 && XINT (XEXP (value, 0), 1) == 15) 3603 return 1; 3604 else if (GET_CODE (XEXP (value, 0)) == PLUS) 3605 { 3606 rtx op1 = XEXP (XEXP (value, 0), 0); 3607 rtx op2 = XEXP (XEXP (value, 0), 1); 3608 HOST_WIDE_INT offset; 3609 3610 if (ix86_cmodel == CM_LARGE) 3611 return 0; 3612 if (GET_CODE (op2) != CONST_INT) 3613 return 0; 3614 offset = trunc_int_for_mode (INTVAL (op2), DImode); 3615 switch (GET_CODE (op1)) 3616 { 3617 case SYMBOL_REF: 3618 /* For CM_SMALL assume that latest object is 1MB before 3619 end of 31bits boundary. We may also accept pretty 3620 large negative constants knowing that all objects are 3621 in the positive half of address space. */ 3622 if (ix86_cmodel == CM_SMALL 3623 && offset < 1024*1024*1024 3624 && trunc_int_for_mode (offset, SImode) == offset) 3625 return 1; 3626 /* For CM_KERNEL we know that all object resist in the 3627 negative half of 32bits address space. We may not 3628 accept negative offsets, since they may be just off 3629 and we may accept pretty large positive ones. */ 3630 if (ix86_cmodel == CM_KERNEL 3631 && offset > 0 3632 && trunc_int_for_mode (offset, SImode) == offset) 3633 return 1; 3634 break; 3635 case LABEL_REF: 3636 /* These conditions are similar to SYMBOL_REF ones, just the 3637 constraints for code models differ. */ 3638 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 3639 && offset < 1024*1024*1024 3640 && trunc_int_for_mode (offset, SImode) == offset) 3641 return 1; 3642 if (ix86_cmodel == CM_KERNEL 3643 && offset > 0 3644 && trunc_int_for_mode (offset, SImode) == offset) 3645 return 1; 3646 break; 3647 default: 3648 return 0; 3649 } 3650 } 3651 return 0; 3652 default: 3653 return 0; 3654 } 3655} 3656 3657/* Return 1 if VALUE can be stored in the zero extended immediate field. */ 3658int 3659x86_64_zero_extended_value (value) 3660 rtx value; 3661{ 3662 switch (GET_CODE (value)) 3663 { 3664 case CONST_DOUBLE: 3665 if (HOST_BITS_PER_WIDE_INT == 32) 3666 return (GET_MODE (value) == VOIDmode 3667 && !CONST_DOUBLE_HIGH (value)); 3668 else 3669 return 0; 3670 case CONST_INT: 3671 if (HOST_BITS_PER_WIDE_INT == 32) 3672 return INTVAL (value) >= 0; 3673 else 3674 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff); 3675 break; 3676 3677 /* For certain code models, the symbolic references are known to fit. */ 3678 case SYMBOL_REF: 3679 return ix86_cmodel == CM_SMALL; 3680 3681 /* For certain code models, the code is near as well. */ 3682 case LABEL_REF: 3683 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM; 3684 3685 /* We also may accept the offsetted memory references in certain special 3686 cases. */ 3687 case CONST: 3688 if (GET_CODE (XEXP (value, 0)) == PLUS) 3689 { 3690 rtx op1 = XEXP (XEXP (value, 0), 0); 3691 rtx op2 = XEXP (XEXP (value, 0), 1); 3692 3693 if (ix86_cmodel == CM_LARGE) 3694 return 0; 3695 switch (GET_CODE (op1)) 3696 { 3697 case SYMBOL_REF: 3698 return 0; 3699 /* For small code model we may accept pretty large positive 3700 offsets, since one bit is available for free. Negative 3701 offsets are limited by the size of NULL pointer area 3702 specified by the ABI. */ 3703 if (ix86_cmodel == CM_SMALL 3704 && GET_CODE (op2) == CONST_INT 3705 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 3706 && (trunc_int_for_mode (INTVAL (op2), SImode) 3707 == INTVAL (op2))) 3708 return 1; 3709 /* ??? For the kernel, we may accept adjustment of 3710 -0x10000000, since we know that it will just convert 3711 negative address space to positive, but perhaps this 3712 is not worthwhile. */ 3713 break; 3714 case LABEL_REF: 3715 /* These conditions are similar to SYMBOL_REF ones, just the 3716 constraints for code models differ. */ 3717 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 3718 && GET_CODE (op2) == CONST_INT 3719 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 3720 && (trunc_int_for_mode (INTVAL (op2), SImode) 3721 == INTVAL (op2))) 3722 return 1; 3723 break; 3724 default: 3725 return 0; 3726 } 3727 } 3728 return 0; 3729 default: 3730 return 0; 3731 } 3732} 3733 3734/* Value should be nonzero if functions must have frame pointers. 3735 Zero means the frame pointer need not be set up (and parms may 3736 be accessed via the stack pointer) in functions that seem suitable. */ 3737 3738int 3739ix86_frame_pointer_required () 3740{ 3741 /* If we accessed previous frames, then the generated code expects 3742 to be able to access the saved ebp value in our frame. */ 3743 if (cfun->machine->accesses_prev_frame) 3744 return 1; 3745 3746 /* Several x86 os'es need a frame pointer for other reasons, 3747 usually pertaining to setjmp. */ 3748 if (SUBTARGET_FRAME_POINTER_REQUIRED) 3749 return 1; 3750 3751 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 3752 the frame pointer by default. Turn it back on now if we've not 3753 got a leaf function. */ 3754 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ()) 3755 return 1; 3756 3757 return 0; 3758} 3759 3760/* Record that the current function accesses previous call frames. */ 3761 3762void 3763ix86_setup_frame_addresses () 3764{ 3765 cfun->machine->accesses_prev_frame = 1; 3766} 3767 3768static char pic_label_name[32]; 3769 3770/* This function generates code for -fpic that loads %ebx with 3771 the return address of the caller and then returns. */ 3772 3773void 3774ix86_asm_file_end (file) 3775 FILE *file; 3776{ 3777 rtx xops[2]; 3778 3779 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0) 3780 return; 3781 3782 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related 3783 to updating relocations to a section being discarded such that this 3784 doesn't work. Ought to detect this at configure time. */ 3785#if 0 3786 /* The trick here is to create a linkonce section containing the 3787 pic label thunk, but to refer to it with an internal label. 3788 Because the label is internal, we don't have inter-dso name 3789 binding issues on hosts that don't support ".hidden". 3790 3791 In order to use these macros, however, we must create a fake 3792 function decl. */ 3793 if (targetm.have_named_sections) 3794 { 3795 tree decl = build_decl (FUNCTION_DECL, 3796 get_identifier ("i686.get_pc_thunk"), 3797 error_mark_node); 3798 DECL_ONE_ONLY (decl) = 1; 3799 UNIQUE_SECTION (decl, 0); 3800 named_section (decl, NULL); 3801 } 3802 else 3803#else 3804 text_section (); 3805#endif 3806 3807 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an 3808 internal (non-global) label that's being emitted, it didn't make 3809 sense to have .type information for local labels. This caused 3810 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving 3811 me debug info for a label that you're declaring non-global?) this 3812 was changed to call ASM_OUTPUT_LABEL() instead. */ 3813 3814 ASM_OUTPUT_LABEL (file, pic_label_name); 3815 3816 xops[0] = pic_offset_table_rtx; 3817 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 3818 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 3819 output_asm_insn ("ret", xops); 3820} 3821 3822void 3823load_pic_register () 3824{ 3825 rtx gotsym, pclab; 3826 3827 if (TARGET_64BIT) 3828 abort (); 3829 3830 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 3831 3832 if (TARGET_DEEP_BRANCH_PREDICTION) 3833 { 3834 if (! pic_label_name[0]) 3835 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0); 3836 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name)); 3837 } 3838 else 3839 { 3840 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 3841 } 3842 3843 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab)); 3844 3845 if (! TARGET_DEEP_BRANCH_PREDICTION) 3846 emit_insn (gen_popsi1 (pic_offset_table_rtx)); 3847 3848 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab)); 3849} 3850 3851/* Generate an "push" pattern for input ARG. */ 3852 3853static rtx 3854gen_push (arg) 3855 rtx arg; 3856{ 3857 return gen_rtx_SET (VOIDmode, 3858 gen_rtx_MEM (Pmode, 3859 gen_rtx_PRE_DEC (Pmode, 3860 stack_pointer_rtx)), 3861 arg); 3862} 3863 3864/* Return 1 if we need to save REGNO. */ 3865static int 3866ix86_save_reg (regno, maybe_eh_return) 3867 int regno; 3868 int maybe_eh_return; 3869{ 3870 if (flag_pic 3871 && ! TARGET_64BIT 3872 && regno == PIC_OFFSET_TABLE_REGNUM 3873 && (current_function_uses_pic_offset_table 3874 || current_function_uses_const_pool 3875 || current_function_calls_eh_return)) 3876 return 1; 3877 3878 if (current_function_calls_eh_return && maybe_eh_return) 3879 { 3880 unsigned i; 3881 for (i = 0; ; i++) 3882 { 3883 unsigned test = EH_RETURN_DATA_REGNO (i); 3884 if (test == INVALID_REGNUM) 3885 break; 3886 if (test == (unsigned) regno) 3887 return 1; 3888 } 3889 } 3890 3891 return (regs_ever_live[regno] 3892 && !call_used_regs[regno] 3893 && !fixed_regs[regno] 3894 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 3895} 3896 3897/* Return number of registers to be saved on the stack. */ 3898 3899static int 3900ix86_nsaved_regs () 3901{ 3902 int nregs = 0; 3903 int regno; 3904 3905 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 3906 if (ix86_save_reg (regno, true)) 3907 nregs++; 3908 return nregs; 3909} 3910 3911/* Return the offset between two registers, one to be eliminated, and the other 3912 its replacement, at the start of a routine. */ 3913 3914HOST_WIDE_INT 3915ix86_initial_elimination_offset (from, to) 3916 int from; 3917 int to; 3918{ 3919 struct ix86_frame frame; 3920 ix86_compute_frame_layout (&frame); 3921 3922 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 3923 return frame.hard_frame_pointer_offset; 3924 else if (from == FRAME_POINTER_REGNUM 3925 && to == HARD_FRAME_POINTER_REGNUM) 3926 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 3927 else 3928 { 3929 if (to != STACK_POINTER_REGNUM) 3930 abort (); 3931 else if (from == ARG_POINTER_REGNUM) 3932 return frame.stack_pointer_offset; 3933 else if (from != FRAME_POINTER_REGNUM) 3934 abort (); 3935 else 3936 return frame.stack_pointer_offset - frame.frame_pointer_offset; 3937 } 3938} 3939 3940/* Fill structure ix86_frame about frame of currently computed function. */ 3941 3942static void 3943ix86_compute_frame_layout (frame) 3944 struct ix86_frame *frame; 3945{ 3946 HOST_WIDE_INT total_size; 3947 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 3948 int offset; 3949 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 3950 HOST_WIDE_INT size = get_frame_size (); 3951 3952 frame->nregs = ix86_nsaved_regs (); 3953 total_size = size; 3954 3955 /* Skip return value and save base pointer. */ 3956 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 3957 3958 frame->hard_frame_pointer_offset = offset; 3959 3960 /* Do some sanity checking of stack_alignment_needed and 3961 preferred_alignment, since i386 port is the only using those features 3962 that may break easily. */ 3963 3964 if (size && !stack_alignment_needed) 3965 abort (); 3966 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT) 3967 abort (); 3968 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 3969 abort (); 3970 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 3971 abort (); 3972 3973 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 3974 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 3975 3976 /* Register save area */ 3977 offset += frame->nregs * UNITS_PER_WORD; 3978 3979 /* Va-arg area */ 3980 if (ix86_save_varrargs_registers) 3981 { 3982 offset += X86_64_VARARGS_SIZE; 3983 frame->va_arg_size = X86_64_VARARGS_SIZE; 3984 } 3985 else 3986 frame->va_arg_size = 0; 3987 3988 /* Align start of frame for local function. */ 3989 frame->padding1 = ((offset + stack_alignment_needed - 1) 3990 & -stack_alignment_needed) - offset; 3991 3992 offset += frame->padding1; 3993 3994 /* Frame pointer points here. */ 3995 frame->frame_pointer_offset = offset; 3996 3997 offset += size; 3998 3999 /* Add outgoing arguments area. */ 4000 if (ACCUMULATE_OUTGOING_ARGS) 4001 { 4002 offset += current_function_outgoing_args_size; 4003 frame->outgoing_arguments_size = current_function_outgoing_args_size; 4004 } 4005 else 4006 frame->outgoing_arguments_size = 0; 4007 4008 /* Align stack boundary. */ 4009 frame->padding2 = ((offset + preferred_alignment - 1) 4010 & -preferred_alignment) - offset; 4011 4012 offset += frame->padding2; 4013 4014 /* We've reached end of stack frame. */ 4015 frame->stack_pointer_offset = offset; 4016 4017 /* Size prologue needs to allocate. */ 4018 frame->to_allocate = 4019 (size + frame->padding1 + frame->padding2 4020 + frame->outgoing_arguments_size + frame->va_arg_size); 4021 4022 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging 4023 && current_function_is_leaf) 4024 { 4025 frame->red_zone_size = frame->to_allocate; 4026 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 4027 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 4028 } 4029 else 4030 frame->red_zone_size = 0; 4031 frame->to_allocate -= frame->red_zone_size; 4032 frame->stack_pointer_offset -= frame->red_zone_size; 4033#if 0 4034 fprintf (stderr, "nregs: %i\n", frame->nregs); 4035 fprintf (stderr, "size: %i\n", size); 4036 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 4037 fprintf (stderr, "padding1: %i\n", frame->padding1); 4038 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 4039 fprintf (stderr, "padding2: %i\n", frame->padding2); 4040 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 4041 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 4042 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 4043 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 4044 frame->hard_frame_pointer_offset); 4045 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 4046#endif 4047} 4048 4049/* Emit code to save registers in the prologue. */ 4050 4051static void 4052ix86_emit_save_regs () 4053{ 4054 register int regno; 4055 rtx insn; 4056 4057 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4058 if (ix86_save_reg (regno, true)) 4059 { 4060 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 4061 RTX_FRAME_RELATED_P (insn) = 1; 4062 } 4063} 4064 4065/* Emit code to save registers using MOV insns. First register 4066 is restored from POINTER + OFFSET. */ 4067static void 4068ix86_emit_save_regs_using_mov (pointer, offset) 4069 rtx pointer; 4070 HOST_WIDE_INT offset; 4071{ 4072 int regno; 4073 rtx insn; 4074 4075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4076 if (ix86_save_reg (regno, true)) 4077 { 4078 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 4079 Pmode, offset), 4080 gen_rtx_REG (Pmode, regno)); 4081 RTX_FRAME_RELATED_P (insn) = 1; 4082 offset += UNITS_PER_WORD; 4083 } 4084} 4085 4086/* Expand the prologue into a bunch of separate insns. */ 4087 4088void 4089ix86_expand_prologue () 4090{ 4091 rtx insn; 4092 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table 4093 || current_function_uses_const_pool) 4094 && !TARGET_64BIT); 4095 struct ix86_frame frame; 4096 int use_mov = 0; 4097 HOST_WIDE_INT allocate; 4098 4099 if (!optimize_size) 4100 { 4101 use_fast_prologue_epilogue 4102 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT); 4103 if (TARGET_PROLOGUE_USING_MOVE) 4104 use_mov = use_fast_prologue_epilogue; 4105 } 4106 ix86_compute_frame_layout (&frame); 4107 4108 /* Note: AT&T enter does NOT have reversed args. Enter is probably 4109 slower on all targets. Also sdb doesn't like it. */ 4110 4111 if (frame_pointer_needed) 4112 { 4113 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 4114 RTX_FRAME_RELATED_P (insn) = 1; 4115 4116 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 4117 RTX_FRAME_RELATED_P (insn) = 1; 4118 } 4119 4120 allocate = frame.to_allocate; 4121 /* In case we are dealing only with single register and empty frame, 4122 push is equivalent of the mov+add sequence. */ 4123 if (allocate == 0 && frame.nregs <= 1) 4124 use_mov = 0; 4125 4126 if (!use_mov) 4127 ix86_emit_save_regs (); 4128 else 4129 allocate += frame.nregs * UNITS_PER_WORD; 4130 4131 if (allocate == 0) 4132 ; 4133 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 4134 { 4135 insn = emit_insn (gen_pro_epilogue_adjust_stack 4136 (stack_pointer_rtx, stack_pointer_rtx, 4137 GEN_INT (-allocate))); 4138 RTX_FRAME_RELATED_P (insn) = 1; 4139 } 4140 else 4141 { 4142 /* ??? Is this only valid for Win32? */ 4143 4144 rtx arg0, sym; 4145 4146 if (TARGET_64BIT) 4147 abort (); 4148 4149 arg0 = gen_rtx_REG (SImode, 0); 4150 emit_move_insn (arg0, GEN_INT (allocate)); 4151 4152 sym = gen_rtx_MEM (FUNCTION_MODE, 4153 gen_rtx_SYMBOL_REF (Pmode, "_alloca")); 4154 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx)); 4155 4156 CALL_INSN_FUNCTION_USAGE (insn) 4157 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0), 4158 CALL_INSN_FUNCTION_USAGE (insn)); 4159 } 4160 if (use_mov) 4161 { 4162 if (!frame_pointer_needed || !frame.to_allocate) 4163 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 4164 else 4165 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 4166 -frame.nregs * UNITS_PER_WORD); 4167 } 4168 4169#ifdef SUBTARGET_PROLOGUE 4170 SUBTARGET_PROLOGUE; 4171#endif 4172 4173 if (pic_reg_used) 4174 load_pic_register (); 4175 4176 /* If we are profiling, make sure no instructions are scheduled before 4177 the call to mcount. However, if -fpic, the above call will have 4178 done that. */ 4179 if (current_function_profile && ! pic_reg_used) 4180 emit_insn (gen_blockage ()); 4181} 4182 4183/* Emit code to restore saved registers using MOV insns. First register 4184 is restored from POINTER + OFFSET. */ 4185static void 4186ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return) 4187 rtx pointer; 4188 int offset; 4189 int maybe_eh_return; 4190{ 4191 int regno; 4192 4193 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4194 if (ix86_save_reg (regno, maybe_eh_return)) 4195 { 4196 emit_move_insn (gen_rtx_REG (Pmode, regno), 4197 adjust_address (gen_rtx_MEM (Pmode, pointer), 4198 Pmode, offset)); 4199 offset += UNITS_PER_WORD; 4200 } 4201} 4202 4203/* Restore function stack, frame, and registers. */ 4204 4205void 4206ix86_expand_epilogue (style) 4207 int style; 4208{ 4209 int regno; 4210 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 4211 struct ix86_frame frame; 4212 HOST_WIDE_INT offset; 4213 4214 ix86_compute_frame_layout (&frame); 4215 4216 /* Calculate start of saved registers relative to ebp. Special care 4217 must be taken for the normal return case of a function using 4218 eh_return: the eax and edx registers are marked as saved, but not 4219 restored along this path. */ 4220 offset = frame.nregs; 4221 if (current_function_calls_eh_return && style != 2) 4222 offset -= 2; 4223 offset *= -UNITS_PER_WORD; 4224 4225 /* If we're only restoring one register and sp is not valid then 4226 using a move instruction to restore the register since it's 4227 less work than reloading sp and popping the register. 4228 4229 The default code result in stack adjustment using add/lea instruction, 4230 while this code results in LEAVE instruction (or discrete equivalent), 4231 so it is profitable in some other cases as well. Especially when there 4232 are no registers to restore. We also use this code when TARGET_USE_LEAVE 4233 and there is exactly one register to pop. This heruistic may need some 4234 tuning in future. */ 4235 if ((!sp_valid && frame.nregs <= 1) 4236 || (TARGET_EPILOGUE_USING_MOVE 4237 && use_fast_prologue_epilogue 4238 && (frame.nregs > 1 || frame.to_allocate)) 4239 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 4240 || (frame_pointer_needed && TARGET_USE_LEAVE 4241 && use_fast_prologue_epilogue && frame.nregs == 1) 4242 || current_function_calls_eh_return) 4243 { 4244 /* Restore registers. We can use ebp or esp to address the memory 4245 locations. If both are available, default to ebp, since offsets 4246 are known to be small. Only exception is esp pointing directly to the 4247 end of block of saved registers, where we may simplify addressing 4248 mode. */ 4249 4250 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 4251 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 4252 frame.to_allocate, style == 2); 4253 else 4254 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 4255 offset, style == 2); 4256 4257 /* eh_return epilogues need %ecx added to the stack pointer. */ 4258 if (style == 2) 4259 { 4260 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 4261 4262 if (frame_pointer_needed) 4263 { 4264 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 4265 tmp = plus_constant (tmp, UNITS_PER_WORD); 4266 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 4267 4268 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 4269 emit_move_insn (hard_frame_pointer_rtx, tmp); 4270 4271 emit_insn (gen_pro_epilogue_adjust_stack 4272 (stack_pointer_rtx, sa, const0_rtx)); 4273 } 4274 else 4275 { 4276 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 4277 tmp = plus_constant (tmp, (frame.to_allocate 4278 + frame.nregs * UNITS_PER_WORD)); 4279 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 4280 } 4281 } 4282 else if (!frame_pointer_needed) 4283 emit_insn (gen_pro_epilogue_adjust_stack 4284 (stack_pointer_rtx, stack_pointer_rtx, 4285 GEN_INT (frame.to_allocate 4286 + frame.nregs * UNITS_PER_WORD))); 4287 /* If not an i386, mov & pop is faster than "leave". */ 4288 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue) 4289 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4290 else 4291 { 4292 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4293 hard_frame_pointer_rtx, 4294 const0_rtx)); 4295 if (TARGET_64BIT) 4296 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4297 else 4298 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4299 } 4300 } 4301 else 4302 { 4303 /* First step is to deallocate the stack frame so that we can 4304 pop the registers. */ 4305 if (!sp_valid) 4306 { 4307 if (!frame_pointer_needed) 4308 abort (); 4309 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4310 hard_frame_pointer_rtx, 4311 GEN_INT (offset))); 4312 } 4313 else if (frame.to_allocate) 4314 emit_insn (gen_pro_epilogue_adjust_stack 4315 (stack_pointer_rtx, stack_pointer_rtx, 4316 GEN_INT (frame.to_allocate))); 4317 4318 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4319 if (ix86_save_reg (regno, false)) 4320 { 4321 if (TARGET_64BIT) 4322 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 4323 else 4324 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 4325 } 4326 if (frame_pointer_needed) 4327 { 4328 /* Leave results in shorter dependency chains on CPUs that are 4329 able to grok it fast. */ 4330 if (TARGET_USE_LEAVE) 4331 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4332 else if (TARGET_64BIT) 4333 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4334 else 4335 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4336 } 4337 } 4338 4339 /* Sibcall epilogues don't want a return instruction. */ 4340 if (style == 0) 4341 return; 4342 4343 if (current_function_pops_args && current_function_args_size) 4344 { 4345 rtx popc = GEN_INT (current_function_pops_args); 4346 4347 /* i386 can only pop 64K bytes. If asked to pop more, pop 4348 return address, do explicit add, and jump indirectly to the 4349 caller. */ 4350 4351 if (current_function_pops_args >= 65536) 4352 { 4353 rtx ecx = gen_rtx_REG (SImode, 2); 4354 4355 /* There are is no "pascal" calling convention in 64bit ABI. */ 4356 if (TARGET_64BIT) 4357 abort (); 4358 4359 emit_insn (gen_popsi1 (ecx)); 4360 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 4361 emit_jump_insn (gen_return_indirect_internal (ecx)); 4362 } 4363 else 4364 emit_jump_insn (gen_return_pop_internal (popc)); 4365 } 4366 else 4367 emit_jump_insn (gen_return_internal ()); 4368} 4369 4370/* Extract the parts of an RTL expression that is a valid memory address 4371 for an instruction. Return 0 if the structure of the address is 4372 grossly off. Return -1 if the address contains ASHIFT, so it is not 4373 strictly valid, but still used for computing length of lea instruction. 4374 */ 4375 4376static int 4377ix86_decompose_address (addr, out) 4378 register rtx addr; 4379 struct ix86_address *out; 4380{ 4381 rtx base = NULL_RTX; 4382 rtx index = NULL_RTX; 4383 rtx disp = NULL_RTX; 4384 HOST_WIDE_INT scale = 1; 4385 rtx scale_rtx = NULL_RTX; 4386 int retval = 1; 4387 4388 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 4389 base = addr; 4390 else if (GET_CODE (addr) == PLUS) 4391 { 4392 rtx op0 = XEXP (addr, 0); 4393 rtx op1 = XEXP (addr, 1); 4394 enum rtx_code code0 = GET_CODE (op0); 4395 enum rtx_code code1 = GET_CODE (op1); 4396 4397 if (code0 == REG || code0 == SUBREG) 4398 { 4399 if (code1 == REG || code1 == SUBREG) 4400 index = op0, base = op1; /* index + base */ 4401 else 4402 base = op0, disp = op1; /* base + displacement */ 4403 } 4404 else if (code0 == MULT) 4405 { 4406 index = XEXP (op0, 0); 4407 scale_rtx = XEXP (op0, 1); 4408 if (code1 == REG || code1 == SUBREG) 4409 base = op1; /* index*scale + base */ 4410 else 4411 disp = op1; /* index*scale + disp */ 4412 } 4413 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT) 4414 { 4415 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */ 4416 scale_rtx = XEXP (XEXP (op0, 0), 1); 4417 base = XEXP (op0, 1); 4418 disp = op1; 4419 } 4420 else if (code0 == PLUS) 4421 { 4422 index = XEXP (op0, 0); /* index + base + disp */ 4423 base = XEXP (op0, 1); 4424 disp = op1; 4425 } 4426 else 4427 return 0; 4428 } 4429 else if (GET_CODE (addr) == MULT) 4430 { 4431 index = XEXP (addr, 0); /* index*scale */ 4432 scale_rtx = XEXP (addr, 1); 4433 } 4434 else if (GET_CODE (addr) == ASHIFT) 4435 { 4436 rtx tmp; 4437 4438 /* We're called for lea too, which implements ashift on occasion. */ 4439 index = XEXP (addr, 0); 4440 tmp = XEXP (addr, 1); 4441 if (GET_CODE (tmp) != CONST_INT) 4442 return 0; 4443 scale = INTVAL (tmp); 4444 if ((unsigned HOST_WIDE_INT) scale > 3) 4445 return 0; 4446 scale = 1 << scale; 4447 retval = -1; 4448 } 4449 else 4450 disp = addr; /* displacement */ 4451 4452 /* Extract the integral value of scale. */ 4453 if (scale_rtx) 4454 { 4455 if (GET_CODE (scale_rtx) != CONST_INT) 4456 return 0; 4457 scale = INTVAL (scale_rtx); 4458 } 4459 4460 /* Allow arg pointer and stack pointer as index if there is not scaling */ 4461 if (base && index && scale == 1 4462 && (index == arg_pointer_rtx || index == frame_pointer_rtx 4463 || index == stack_pointer_rtx)) 4464 { 4465 rtx tmp = base; 4466 base = index; 4467 index = tmp; 4468 } 4469 4470 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 4471 if ((base == hard_frame_pointer_rtx 4472 || base == frame_pointer_rtx 4473 || base == arg_pointer_rtx) && !disp) 4474 disp = const0_rtx; 4475 4476 /* Special case: on K6, [%esi] makes the instruction vector decoded. 4477 Avoid this by transforming to [%esi+0]. */ 4478 if (ix86_cpu == PROCESSOR_K6 && !optimize_size 4479 && base && !index && !disp 4480 && REG_P (base) 4481 && REGNO_REG_CLASS (REGNO (base)) == SIREG) 4482 disp = const0_rtx; 4483 4484 /* Special case: encode reg+reg instead of reg*2. */ 4485 if (!base && index && scale && scale == 2) 4486 base = index, scale = 1; 4487 4488 /* Special case: scaling cannot be encoded without base or displacement. */ 4489 if (!base && !disp && index && scale != 1) 4490 disp = const0_rtx; 4491 4492 out->base = base; 4493 out->index = index; 4494 out->disp = disp; 4495 out->scale = scale; 4496 4497 return retval; 4498} 4499 4500/* Return cost of the memory address x. 4501 For i386, it is better to use a complex address than let gcc copy 4502 the address into a reg and make a new pseudo. But not if the address 4503 requires to two regs - that would mean more pseudos with longer 4504 lifetimes. */ 4505int 4506ix86_address_cost (x) 4507 rtx x; 4508{ 4509 struct ix86_address parts; 4510 int cost = 1; 4511 4512 if (!ix86_decompose_address (x, &parts)) 4513 abort (); 4514 4515 /* More complex memory references are better. */ 4516 if (parts.disp && parts.disp != const0_rtx) 4517 cost--; 4518 4519 /* Attempt to minimize number of registers in the address. */ 4520 if ((parts.base 4521 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 4522 || (parts.index 4523 && (!REG_P (parts.index) 4524 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 4525 cost++; 4526 4527 if (parts.base 4528 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 4529 && parts.index 4530 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 4531 && parts.base != parts.index) 4532 cost++; 4533 4534 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 4535 since it's predecode logic can't detect the length of instructions 4536 and it degenerates to vector decoded. Increase cost of such 4537 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 4538 to split such addresses or even refuse such addresses at all. 4539 4540 Following addressing modes are affected: 4541 [base+scale*index] 4542 [scale*index+disp] 4543 [base+index] 4544 4545 The first and last case may be avoidable by explicitly coding the zero in 4546 memory address, but I don't have AMD-K6 machine handy to check this 4547 theory. */ 4548 4549 if (TARGET_K6 4550 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 4551 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 4552 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 4553 cost += 10; 4554 4555 return cost; 4556} 4557 4558/* If X is a machine specific address (i.e. a symbol or label being 4559 referenced as a displacement from the GOT implemented using an 4560 UNSPEC), then return the base term. Otherwise return X. */ 4561 4562rtx 4563ix86_find_base_term (x) 4564 rtx x; 4565{ 4566 rtx term; 4567 4568 if (TARGET_64BIT) 4569 { 4570 if (GET_CODE (x) != CONST) 4571 return x; 4572 term = XEXP (x, 0); 4573 if (GET_CODE (term) == PLUS 4574 && (GET_CODE (XEXP (term, 1)) == CONST_INT 4575 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 4576 term = XEXP (term, 0); 4577 if (GET_CODE (term) != UNSPEC 4578 || XVECLEN (term, 0) != 1 4579 || XINT (term, 1) != 15) 4580 return x; 4581 4582 term = XVECEXP (term, 0, 0); 4583 4584 if (GET_CODE (term) != SYMBOL_REF 4585 && GET_CODE (term) != LABEL_REF) 4586 return x; 4587 4588 return term; 4589 } 4590 4591 if (GET_CODE (x) != PLUS 4592 || XEXP (x, 0) != pic_offset_table_rtx 4593 || GET_CODE (XEXP (x, 1)) != CONST) 4594 return x; 4595 4596 term = XEXP (XEXP (x, 1), 0); 4597 4598 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT) 4599 term = XEXP (term, 0); 4600 4601 if (GET_CODE (term) != UNSPEC 4602 || XVECLEN (term, 0) != 1 4603 || XINT (term, 1) != 7) 4604 return x; 4605 4606 term = XVECEXP (term, 0, 0); 4607 4608 if (GET_CODE (term) != SYMBOL_REF 4609 && GET_CODE (term) != LABEL_REF) 4610 return x; 4611 4612 return term; 4613} 4614 4615/* Determine if a given CONST RTX is a valid memory displacement 4616 in PIC mode. */ 4617 4618int 4619legitimate_pic_address_disp_p (disp) 4620 register rtx disp; 4621{ 4622 /* In 64bit mode we can allow direct addresses of symbols and labels 4623 when they are not dynamic symbols. */ 4624 if (TARGET_64BIT) 4625 { 4626 rtx x = disp; 4627 if (GET_CODE (disp) == CONST) 4628 x = XEXP (disp, 0); 4629 /* ??? Handle PIC code models */ 4630 if (GET_CODE (x) == PLUS 4631 && (GET_CODE (XEXP (x, 1)) == CONST_INT 4632 && ix86_cmodel == CM_SMALL_PIC 4633 && INTVAL (XEXP (x, 1)) < 1024*1024*1024 4634 && INTVAL (XEXP (x, 1)) > -1024*1024*1024)) 4635 x = XEXP (x, 0); 4636 if (local_symbolic_operand (x, Pmode)) 4637 return 1; 4638 } 4639 if (GET_CODE (disp) != CONST) 4640 return 0; 4641 disp = XEXP (disp, 0); 4642 4643 if (TARGET_64BIT) 4644 { 4645 /* We are unsafe to allow PLUS expressions. This limit allowed distance 4646 of GOT tables. We should not need these anyway. */ 4647 if (GET_CODE (disp) != UNSPEC 4648 || XVECLEN (disp, 0) != 1 4649 || XINT (disp, 1) != 15) 4650 return 0; 4651 4652 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 4653 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 4654 return 0; 4655 return 1; 4656 } 4657 4658 if (GET_CODE (disp) == PLUS) 4659 { 4660 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 4661 return 0; 4662 disp = XEXP (disp, 0); 4663 } 4664 4665 if (GET_CODE (disp) != UNSPEC 4666 || XVECLEN (disp, 0) != 1) 4667 return 0; 4668 4669 /* Must be @GOT or @GOTOFF. */ 4670 switch (XINT (disp, 1)) 4671 { 4672 case 6: /* @GOT */ 4673 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 4674 4675 case 7: /* @GOTOFF */ 4676 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 4677 } 4678 4679 return 0; 4680} 4681 4682/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 4683 memory address for an instruction. The MODE argument is the machine mode 4684 for the MEM expression that wants to use this address. 4685 4686 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 4687 convert common non-canonical forms to canonical form so that they will 4688 be recognized. */ 4689 4690int 4691legitimate_address_p (mode, addr, strict) 4692 enum machine_mode mode; 4693 register rtx addr; 4694 int strict; 4695{ 4696 struct ix86_address parts; 4697 rtx base, index, disp; 4698 HOST_WIDE_INT scale; 4699 const char *reason = NULL; 4700 rtx reason_rtx = NULL_RTX; 4701 4702 if (TARGET_DEBUG_ADDR) 4703 { 4704 fprintf (stderr, 4705 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 4706 GET_MODE_NAME (mode), strict); 4707 debug_rtx (addr); 4708 } 4709 4710 if (ix86_decompose_address (addr, &parts) <= 0) 4711 { 4712 reason = "decomposition failed"; 4713 goto report_error; 4714 } 4715 4716 base = parts.base; 4717 index = parts.index; 4718 disp = parts.disp; 4719 scale = parts.scale; 4720 4721 /* Validate base register. 4722 4723 Don't allow SUBREG's here, it can lead to spill failures when the base 4724 is one word out of a two word structure, which is represented internally 4725 as a DImode int. */ 4726 4727 if (base) 4728 { 4729 reason_rtx = base; 4730 4731 if (GET_CODE (base) != REG) 4732 { 4733 reason = "base is not a register"; 4734 goto report_error; 4735 } 4736 4737 if (GET_MODE (base) != Pmode) 4738 { 4739 reason = "base is not in Pmode"; 4740 goto report_error; 4741 } 4742 4743 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) 4744 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) 4745 { 4746 reason = "base is not valid"; 4747 goto report_error; 4748 } 4749 } 4750 4751 /* Validate index register. 4752 4753 Don't allow SUBREG's here, it can lead to spill failures when the index 4754 is one word out of a two word structure, which is represented internally 4755 as a DImode int. */ 4756 4757 if (index) 4758 { 4759 reason_rtx = index; 4760 4761 if (GET_CODE (index) != REG) 4762 { 4763 reason = "index is not a register"; 4764 goto report_error; 4765 } 4766 4767 if (GET_MODE (index) != Pmode) 4768 { 4769 reason = "index is not in Pmode"; 4770 goto report_error; 4771 } 4772 4773 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index)) 4774 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index))) 4775 { 4776 reason = "index is not valid"; 4777 goto report_error; 4778 } 4779 } 4780 4781 /* Validate scale factor. */ 4782 if (scale != 1) 4783 { 4784 reason_rtx = GEN_INT (scale); 4785 if (!index) 4786 { 4787 reason = "scale without index"; 4788 goto report_error; 4789 } 4790 4791 if (scale != 2 && scale != 4 && scale != 8) 4792 { 4793 reason = "scale is not a valid multiplier"; 4794 goto report_error; 4795 } 4796 } 4797 4798 /* Validate displacement. */ 4799 if (disp) 4800 { 4801 reason_rtx = disp; 4802 4803 if (!CONSTANT_ADDRESS_P (disp)) 4804 { 4805 reason = "displacement is not constant"; 4806 goto report_error; 4807 } 4808 4809 if (TARGET_64BIT) 4810 { 4811 if (!x86_64_sign_extended_value (disp)) 4812 { 4813 reason = "displacement is out of range"; 4814 goto report_error; 4815 } 4816 } 4817 else 4818 { 4819 if (GET_CODE (disp) == CONST_DOUBLE) 4820 { 4821 reason = "displacement is a const_double"; 4822 goto report_error; 4823 } 4824 } 4825 4826 if (flag_pic && SYMBOLIC_CONST (disp)) 4827 { 4828 if (TARGET_64BIT && (index || base)) 4829 { 4830 reason = "non-constant pic memory reference"; 4831 goto report_error; 4832 } 4833 if (! legitimate_pic_address_disp_p (disp)) 4834 { 4835 reason = "displacement is an invalid pic construct"; 4836 goto report_error; 4837 } 4838 4839 /* This code used to verify that a symbolic pic displacement 4840 includes the pic_offset_table_rtx register. 4841 4842 While this is good idea, unfortunately these constructs may 4843 be created by "adds using lea" optimization for incorrect 4844 code like: 4845 4846 int a; 4847 int foo(int i) 4848 { 4849 return *(&a+i); 4850 } 4851 4852 This code is nonsensical, but results in addressing 4853 GOT table with pic_offset_table_rtx base. We can't 4854 just refuse it easily, since it gets matched by 4855 "addsi3" pattern, that later gets split to lea in the 4856 case output register differs from input. While this 4857 can be handled by separate addsi pattern for this case 4858 that never results in lea, this seems to be easier and 4859 correct fix for crash to disable this test. */ 4860 } 4861 else if (HALF_PIC_P ()) 4862 { 4863 if (! HALF_PIC_ADDRESS_P (disp) 4864 || (base != NULL_RTX || index != NULL_RTX)) 4865 { 4866 reason = "displacement is an invalid half-pic reference"; 4867 goto report_error; 4868 } 4869 } 4870 } 4871 4872 /* Everything looks valid. */ 4873 if (TARGET_DEBUG_ADDR) 4874 fprintf (stderr, "Success.\n"); 4875 return TRUE; 4876 4877report_error: 4878 if (TARGET_DEBUG_ADDR) 4879 { 4880 fprintf (stderr, "Error: %s\n", reason); 4881 debug_rtx (reason_rtx); 4882 } 4883 return FALSE; 4884} 4885 4886/* Return an unique alias set for the GOT. */ 4887 4888static HOST_WIDE_INT 4889ix86_GOT_alias_set () 4890{ 4891 static HOST_WIDE_INT set = -1; 4892 if (set == -1) 4893 set = new_alias_set (); 4894 return set; 4895} 4896 4897/* Return a legitimate reference for ORIG (an address) using the 4898 register REG. If REG is 0, a new pseudo is generated. 4899 4900 There are two types of references that must be handled: 4901 4902 1. Global data references must load the address from the GOT, via 4903 the PIC reg. An insn is emitted to do this load, and the reg is 4904 returned. 4905 4906 2. Static data references, constant pool addresses, and code labels 4907 compute the address as an offset from the GOT, whose base is in 4908 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to 4909 differentiate them from global data objects. The returned 4910 address is the PIC reg + an unspec constant. 4911 4912 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 4913 reg also appears in the address. */ 4914 4915rtx 4916legitimize_pic_address (orig, reg) 4917 rtx orig; 4918 rtx reg; 4919{ 4920 rtx addr = orig; 4921 rtx new = orig; 4922 rtx base; 4923 4924 if (local_symbolic_operand (addr, Pmode)) 4925 { 4926 /* In 64bit mode we can address such objects directly. */ 4927 if (TARGET_64BIT) 4928 new = addr; 4929 else 4930 { 4931 /* This symbol may be referenced via a displacement from the PIC 4932 base address (@GOTOFF). */ 4933 4934 current_function_uses_pic_offset_table = 1; 4935 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7); 4936 new = gen_rtx_CONST (Pmode, new); 4937 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 4938 4939 if (reg != 0) 4940 { 4941 emit_move_insn (reg, new); 4942 new = reg; 4943 } 4944 } 4945 } 4946 else if (GET_CODE (addr) == SYMBOL_REF) 4947 { 4948 if (TARGET_64BIT) 4949 { 4950 current_function_uses_pic_offset_table = 1; 4951 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15); 4952 new = gen_rtx_CONST (Pmode, new); 4953 new = gen_rtx_MEM (Pmode, new); 4954 RTX_UNCHANGING_P (new) = 1; 4955 set_mem_alias_set (new, ix86_GOT_alias_set ()); 4956 4957 if (reg == 0) 4958 reg = gen_reg_rtx (Pmode); 4959 /* Use directly gen_movsi, otherwise the address is loaded 4960 into register for CSE. We don't want to CSE this addresses, 4961 instead we CSE addresses from the GOT table, so skip this. */ 4962 emit_insn (gen_movsi (reg, new)); 4963 new = reg; 4964 } 4965 else 4966 { 4967 /* This symbol must be referenced via a load from the 4968 Global Offset Table (@GOT). */ 4969 4970 current_function_uses_pic_offset_table = 1; 4971 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6); 4972 new = gen_rtx_CONST (Pmode, new); 4973 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 4974 new = gen_rtx_MEM (Pmode, new); 4975 RTX_UNCHANGING_P (new) = 1; 4976 set_mem_alias_set (new, ix86_GOT_alias_set ()); 4977 4978 if (reg == 0) 4979 reg = gen_reg_rtx (Pmode); 4980 emit_move_insn (reg, new); 4981 new = reg; 4982 } 4983 } 4984 else 4985 { 4986 if (GET_CODE (addr) == CONST) 4987 { 4988 addr = XEXP (addr, 0); 4989 if (GET_CODE (addr) == UNSPEC) 4990 { 4991 /* Check that the unspec is one of the ones we generate? */ 4992 } 4993 else if (GET_CODE (addr) != PLUS) 4994 abort (); 4995 } 4996 if (GET_CODE (addr) == PLUS) 4997 { 4998 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 4999 5000 /* Check first to see if this is a constant offset from a @GOTOFF 5001 symbol reference. */ 5002 if (local_symbolic_operand (op0, Pmode) 5003 && GET_CODE (op1) == CONST_INT) 5004 { 5005 if (!TARGET_64BIT) 5006 { 5007 current_function_uses_pic_offset_table = 1; 5008 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7); 5009 new = gen_rtx_PLUS (Pmode, new, op1); 5010 new = gen_rtx_CONST (Pmode, new); 5011 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5012 5013 if (reg != 0) 5014 { 5015 emit_move_insn (reg, new); 5016 new = reg; 5017 } 5018 } 5019 else 5020 { 5021 /* ??? We need to limit offsets here. */ 5022 } 5023 } 5024 else 5025 { 5026 base = legitimize_pic_address (XEXP (addr, 0), reg); 5027 new = legitimize_pic_address (XEXP (addr, 1), 5028 base == reg ? NULL_RTX : reg); 5029 5030 if (GET_CODE (new) == CONST_INT) 5031 new = plus_constant (base, INTVAL (new)); 5032 else 5033 { 5034 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 5035 { 5036 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 5037 new = XEXP (new, 1); 5038 } 5039 new = gen_rtx_PLUS (Pmode, base, new); 5040 } 5041 } 5042 } 5043 } 5044 return new; 5045} 5046 5047/* Try machine-dependent ways of modifying an illegitimate address 5048 to be legitimate. If we find one, return the new, valid address. 5049 This macro is used in only one place: `memory_address' in explow.c. 5050 5051 OLDX is the address as it was before break_out_memory_refs was called. 5052 In some cases it is useful to look at this to decide what needs to be done. 5053 5054 MODE and WIN are passed so that this macro can use 5055 GO_IF_LEGITIMATE_ADDRESS. 5056 5057 It is always safe for this macro to do nothing. It exists to recognize 5058 opportunities to optimize the output. 5059 5060 For the 80386, we handle X+REG by loading X into a register R and 5061 using R+REG. R will go in a general reg and indexing will be used. 5062 However, if REG is a broken-out memory address or multiplication, 5063 nothing needs to be done because REG can certainly go in a general reg. 5064 5065 When -fpic is used, special handling is needed for symbolic references. 5066 See comments by legitimize_pic_address in i386.c for details. */ 5067 5068rtx 5069legitimize_address (x, oldx, mode) 5070 register rtx x; 5071 register rtx oldx ATTRIBUTE_UNUSED; 5072 enum machine_mode mode; 5073{ 5074 int changed = 0; 5075 unsigned log; 5076 5077 if (TARGET_DEBUG_ADDR) 5078 { 5079 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 5080 GET_MODE_NAME (mode)); 5081 debug_rtx (x); 5082 } 5083 5084 if (flag_pic && SYMBOLIC_CONST (x)) 5085 return legitimize_pic_address (x, 0); 5086 5087 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 5088 if (GET_CODE (x) == ASHIFT 5089 && GET_CODE (XEXP (x, 1)) == CONST_INT 5090 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4) 5091 { 5092 changed = 1; 5093 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 5094 GEN_INT (1 << log)); 5095 } 5096 5097 if (GET_CODE (x) == PLUS) 5098 { 5099 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 5100 5101 if (GET_CODE (XEXP (x, 0)) == ASHIFT 5102 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 5103 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4) 5104 { 5105 changed = 1; 5106 XEXP (x, 0) = gen_rtx_MULT (Pmode, 5107 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 5108 GEN_INT (1 << log)); 5109 } 5110 5111 if (GET_CODE (XEXP (x, 1)) == ASHIFT 5112 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 5113 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4) 5114 { 5115 changed = 1; 5116 XEXP (x, 1) = gen_rtx_MULT (Pmode, 5117 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 5118 GEN_INT (1 << log)); 5119 } 5120 5121 /* Put multiply first if it isn't already. */ 5122 if (GET_CODE (XEXP (x, 1)) == MULT) 5123 { 5124 rtx tmp = XEXP (x, 0); 5125 XEXP (x, 0) = XEXP (x, 1); 5126 XEXP (x, 1) = tmp; 5127 changed = 1; 5128 } 5129 5130 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 5131 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 5132 created by virtual register instantiation, register elimination, and 5133 similar optimizations. */ 5134 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 5135 { 5136 changed = 1; 5137 x = gen_rtx_PLUS (Pmode, 5138 gen_rtx_PLUS (Pmode, XEXP (x, 0), 5139 XEXP (XEXP (x, 1), 0)), 5140 XEXP (XEXP (x, 1), 1)); 5141 } 5142 5143 /* Canonicalize 5144 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 5145 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 5146 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 5147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 5148 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 5149 && CONSTANT_P (XEXP (x, 1))) 5150 { 5151 rtx constant; 5152 rtx other = NULL_RTX; 5153 5154 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5155 { 5156 constant = XEXP (x, 1); 5157 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 5158 } 5159 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 5160 { 5161 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 5162 other = XEXP (x, 1); 5163 } 5164 else 5165 constant = 0; 5166 5167 if (constant) 5168 { 5169 changed = 1; 5170 x = gen_rtx_PLUS (Pmode, 5171 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 5172 XEXP (XEXP (XEXP (x, 0), 1), 0)), 5173 plus_constant (other, INTVAL (constant))); 5174 } 5175 } 5176 5177 if (changed && legitimate_address_p (mode, x, FALSE)) 5178 return x; 5179 5180 if (GET_CODE (XEXP (x, 0)) == MULT) 5181 { 5182 changed = 1; 5183 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 5184 } 5185 5186 if (GET_CODE (XEXP (x, 1)) == MULT) 5187 { 5188 changed = 1; 5189 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 5190 } 5191 5192 if (changed 5193 && GET_CODE (XEXP (x, 1)) == REG 5194 && GET_CODE (XEXP (x, 0)) == REG) 5195 return x; 5196 5197 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 5198 { 5199 changed = 1; 5200 x = legitimize_pic_address (x, 0); 5201 } 5202 5203 if (changed && legitimate_address_p (mode, x, FALSE)) 5204 return x; 5205 5206 if (GET_CODE (XEXP (x, 0)) == REG) 5207 { 5208 register rtx temp = gen_reg_rtx (Pmode); 5209 register rtx val = force_operand (XEXP (x, 1), temp); 5210 if (val != temp) 5211 emit_move_insn (temp, val); 5212 5213 XEXP (x, 1) = temp; 5214 return x; 5215 } 5216 5217 else if (GET_CODE (XEXP (x, 1)) == REG) 5218 { 5219 register rtx temp = gen_reg_rtx (Pmode); 5220 register rtx val = force_operand (XEXP (x, 0), temp); 5221 if (val != temp) 5222 emit_move_insn (temp, val); 5223 5224 XEXP (x, 0) = temp; 5225 return x; 5226 } 5227 } 5228 5229 return x; 5230} 5231 5232/* Print an integer constant expression in assembler syntax. Addition 5233 and subtraction are the only arithmetic that may appear in these 5234 expressions. FILE is the stdio stream to write to, X is the rtx, and 5235 CODE is the operand print code from the output string. */ 5236 5237static void 5238output_pic_addr_const (file, x, code) 5239 FILE *file; 5240 rtx x; 5241 int code; 5242{ 5243 char buf[256]; 5244 5245 switch (GET_CODE (x)) 5246 { 5247 case PC: 5248 if (flag_pic) 5249 putc ('.', file); 5250 else 5251 abort (); 5252 break; 5253 5254 case SYMBOL_REF: 5255 assemble_name (file, XSTR (x, 0)); 5256 if (code == 'P' && ! SYMBOL_REF_FLAG (x)) 5257 fputs ("@PLT", file); 5258 break; 5259 5260 case LABEL_REF: 5261 x = XEXP (x, 0); 5262 /* FALLTHRU */ 5263 case CODE_LABEL: 5264 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 5265 assemble_name (asm_out_file, buf); 5266 break; 5267 5268 case CONST_INT: 5269 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 5270 break; 5271 5272 case CONST: 5273 /* This used to output parentheses around the expression, 5274 but that does not work on the 386 (either ATT or BSD assembler). */ 5275 output_pic_addr_const (file, XEXP (x, 0), code); 5276 break; 5277 5278 case CONST_DOUBLE: 5279 if (GET_MODE (x) == VOIDmode) 5280 { 5281 /* We can use %d if the number is <32 bits and positive. */ 5282 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 5283 fprintf (file, "0x%lx%08lx", 5284 (unsigned long) CONST_DOUBLE_HIGH (x), 5285 (unsigned long) CONST_DOUBLE_LOW (x)); 5286 else 5287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 5288 } 5289 else 5290 /* We can't handle floating point constants; 5291 PRINT_OPERAND must handle them. */ 5292 output_operand_lossage ("floating constant misused"); 5293 break; 5294 5295 case PLUS: 5296 /* Some assemblers need integer constants to appear first. */ 5297 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 5298 { 5299 output_pic_addr_const (file, XEXP (x, 0), code); 5300 putc ('+', file); 5301 output_pic_addr_const (file, XEXP (x, 1), code); 5302 } 5303 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5304 { 5305 output_pic_addr_const (file, XEXP (x, 1), code); 5306 putc ('+', file); 5307 output_pic_addr_const (file, XEXP (x, 0), code); 5308 } 5309 else 5310 abort (); 5311 break; 5312 5313 case MINUS: 5314 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 5315 output_pic_addr_const (file, XEXP (x, 0), code); 5316 putc ('-', file); 5317 output_pic_addr_const (file, XEXP (x, 1), code); 5318 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 5319 break; 5320 5321 case UNSPEC: 5322 if (XVECLEN (x, 0) != 1) 5323 abort (); 5324 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 5325 switch (XINT (x, 1)) 5326 { 5327 case 6: 5328 fputs ("@GOT", file); 5329 break; 5330 case 7: 5331 fputs ("@GOTOFF", file); 5332 break; 5333 case 8: 5334 fputs ("@PLT", file); 5335 break; 5336 case 15: 5337 fputs ("@GOTPCREL(%RIP)", file); 5338 break; 5339 default: 5340 output_operand_lossage ("invalid UNSPEC as operand"); 5341 break; 5342 } 5343 break; 5344 5345 default: 5346 output_operand_lossage ("invalid expression as operand"); 5347 } 5348} 5349 5350/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST. 5351 We need to handle our special PIC relocations. */ 5352 5353void 5354i386_dwarf_output_addr_const (file, x) 5355 FILE *file; 5356 rtx x; 5357{ 5358#ifdef ASM_QUAD 5359 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG); 5360#else 5361 if (TARGET_64BIT) 5362 abort (); 5363 fprintf (file, "%s", ASM_LONG); 5364#endif 5365 if (flag_pic) 5366 output_pic_addr_const (file, x, '\0'); 5367 else 5368 output_addr_const (file, x); 5369 fputc ('\n', file); 5370} 5371 5372/* In the name of slightly smaller debug output, and to cater to 5373 general assembler losage, recognize PIC+GOTOFF and turn it back 5374 into a direct symbol reference. */ 5375 5376rtx 5377i386_simplify_dwarf_addr (orig_x) 5378 rtx orig_x; 5379{ 5380 rtx x = orig_x; 5381 5382 if (TARGET_64BIT) 5383 { 5384 if (GET_CODE (x) != CONST 5385 || GET_CODE (XEXP (x, 0)) != UNSPEC 5386 || XINT (XEXP (x, 0), 1) != 15) 5387 return orig_x; 5388 return XVECEXP (XEXP (x, 0), 0, 0); 5389 } 5390 5391 if (GET_CODE (x) != PLUS 5392 || GET_CODE (XEXP (x, 0)) != REG 5393 || GET_CODE (XEXP (x, 1)) != CONST) 5394 return orig_x; 5395 5396 x = XEXP (XEXP (x, 1), 0); 5397 if (GET_CODE (x) == UNSPEC 5398 && (XINT (x, 1) == 6 5399 || XINT (x, 1) == 7)) 5400 return XVECEXP (x, 0, 0); 5401 5402 if (GET_CODE (x) == PLUS 5403 && GET_CODE (XEXP (x, 0)) == UNSPEC 5404 && GET_CODE (XEXP (x, 1)) == CONST_INT 5405 && (XINT (XEXP (x, 0), 1) == 6 5406 || XINT (XEXP (x, 0), 1) == 7)) 5407 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1)); 5408 5409 return orig_x; 5410} 5411 5412static void 5413put_condition_code (code, mode, reverse, fp, file) 5414 enum rtx_code code; 5415 enum machine_mode mode; 5416 int reverse, fp; 5417 FILE *file; 5418{ 5419 const char *suffix; 5420 5421 if (mode == CCFPmode || mode == CCFPUmode) 5422 { 5423 enum rtx_code second_code, bypass_code; 5424 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 5425 if (bypass_code != NIL || second_code != NIL) 5426 abort (); 5427 code = ix86_fp_compare_code_to_integer (code); 5428 mode = CCmode; 5429 } 5430 if (reverse) 5431 code = reverse_condition (code); 5432 5433 switch (code) 5434 { 5435 case EQ: 5436 suffix = "e"; 5437 break; 5438 case NE: 5439 suffix = "ne"; 5440 break; 5441 case GT: 5442 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode) 5443 abort (); 5444 suffix = "g"; 5445 break; 5446 case GTU: 5447 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers. 5448 Those same assemblers have the same but opposite losage on cmov. */ 5449 if (mode != CCmode) 5450 abort (); 5451 suffix = fp ? "nbe" : "a"; 5452 break; 5453 case LT: 5454 if (mode == CCNOmode || mode == CCGOCmode) 5455 suffix = "s"; 5456 else if (mode == CCmode || mode == CCGCmode) 5457 suffix = "l"; 5458 else 5459 abort (); 5460 break; 5461 case LTU: 5462 if (mode != CCmode) 5463 abort (); 5464 suffix = "b"; 5465 break; 5466 case GE: 5467 if (mode == CCNOmode || mode == CCGOCmode) 5468 suffix = "ns"; 5469 else if (mode == CCmode || mode == CCGCmode) 5470 suffix = "ge"; 5471 else 5472 abort (); 5473 break; 5474 case GEU: 5475 /* ??? As above. */ 5476 if (mode != CCmode) 5477 abort (); 5478 suffix = fp ? "nb" : "ae"; 5479 break; 5480 case LE: 5481 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode) 5482 abort (); 5483 suffix = "le"; 5484 break; 5485 case LEU: 5486 if (mode != CCmode) 5487 abort (); 5488 suffix = "be"; 5489 break; 5490 case UNORDERED: 5491 suffix = fp ? "u" : "p"; 5492 break; 5493 case ORDERED: 5494 suffix = fp ? "nu" : "np"; 5495 break; 5496 default: 5497 abort (); 5498 } 5499 fputs (suffix, file); 5500} 5501 5502void 5503print_reg (x, code, file) 5504 rtx x; 5505 int code; 5506 FILE *file; 5507{ 5508 if (REGNO (x) == ARG_POINTER_REGNUM 5509 || REGNO (x) == FRAME_POINTER_REGNUM 5510 || REGNO (x) == FLAGS_REG 5511 || REGNO (x) == FPSR_REG) 5512 abort (); 5513 5514 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 5515 putc ('%', file); 5516 5517 if (code == 'w' || MMX_REG_P (x)) 5518 code = 2; 5519 else if (code == 'b') 5520 code = 1; 5521 else if (code == 'k') 5522 code = 4; 5523 else if (code == 'q') 5524 code = 8; 5525 else if (code == 'y') 5526 code = 3; 5527 else if (code == 'h') 5528 code = 0; 5529 else 5530 code = GET_MODE_SIZE (GET_MODE (x)); 5531 5532 /* Irritatingly, AMD extended registers use different naming convention 5533 from the normal registers. */ 5534 if (REX_INT_REG_P (x)) 5535 { 5536 if (!TARGET_64BIT) 5537 abort (); 5538 switch (code) 5539 { 5540 case 0: 5541 error ("extended registers have no high halves"); 5542 break; 5543 case 1: 5544 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 5545 break; 5546 case 2: 5547 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 5548 break; 5549 case 4: 5550 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 5551 break; 5552 case 8: 5553 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 5554 break; 5555 default: 5556 error ("unsupported operand size for extended register"); 5557 break; 5558 } 5559 return; 5560 } 5561 switch (code) 5562 { 5563 case 3: 5564 if (STACK_TOP_P (x)) 5565 { 5566 fputs ("st(0)", file); 5567 break; 5568 } 5569 /* FALLTHRU */ 5570 case 8: 5571 case 4: 5572 case 12: 5573 if (! ANY_FP_REG_P (x)) 5574 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 5575 /* FALLTHRU */ 5576 case 16: 5577 case 2: 5578 fputs (hi_reg_name[REGNO (x)], file); 5579 break; 5580 case 1: 5581 fputs (qi_reg_name[REGNO (x)], file); 5582 break; 5583 case 0: 5584 fputs (qi_high_reg_name[REGNO (x)], file); 5585 break; 5586 default: 5587 abort (); 5588 } 5589} 5590 5591/* Meaning of CODE: 5592 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 5593 C -- print opcode suffix for set/cmov insn. 5594 c -- like C, but print reversed condition 5595 F,f -- likewise, but for floating-point. 5596 R -- print the prefix for register names. 5597 z -- print the opcode suffix for the size of the current operand. 5598 * -- print a star (in certain assembler syntax) 5599 A -- print an absolute memory reference. 5600 w -- print the operand as if it's a "word" (HImode) even if it isn't. 5601 s -- print a shift double count, followed by the assemblers argument 5602 delimiter. 5603 b -- print the QImode name of the register for the indicated operand. 5604 %b0 would print %al if operands[0] is reg 0. 5605 w -- likewise, print the HImode name of the register. 5606 k -- likewise, print the SImode name of the register. 5607 q -- likewise, print the DImode name of the register. 5608 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 5609 y -- print "st(0)" instead of "st" as a register. 5610 D -- print condition for SSE cmp instruction. 5611 P -- if PIC, print an @PLT suffix. 5612 X -- don't print any sort of PIC '@' suffix for a symbol. 5613 */ 5614 5615void 5616print_operand (file, x, code) 5617 FILE *file; 5618 rtx x; 5619 int code; 5620{ 5621 if (code) 5622 { 5623 switch (code) 5624 { 5625 case '*': 5626 if (ASSEMBLER_DIALECT == ASM_ATT) 5627 putc ('*', file); 5628 return; 5629 5630 case 'A': 5631 if (ASSEMBLER_DIALECT == ASM_ATT) 5632 putc ('*', file); 5633 else if (ASSEMBLER_DIALECT == ASM_INTEL) 5634 { 5635 /* Intel syntax. For absolute addresses, registers should not 5636 be surrounded by braces. */ 5637 if (GET_CODE (x) != REG) 5638 { 5639 putc ('[', file); 5640 PRINT_OPERAND (file, x, 0); 5641 putc (']', file); 5642 return; 5643 } 5644 } 5645 else 5646 abort (); 5647 5648 PRINT_OPERAND (file, x, 0); 5649 return; 5650 5651 5652 case 'L': 5653 if (ASSEMBLER_DIALECT == ASM_ATT) 5654 putc ('l', file); 5655 return; 5656 5657 case 'W': 5658 if (ASSEMBLER_DIALECT == ASM_ATT) 5659 putc ('w', file); 5660 return; 5661 5662 case 'B': 5663 if (ASSEMBLER_DIALECT == ASM_ATT) 5664 putc ('b', file); 5665 return; 5666 5667 case 'Q': 5668 if (ASSEMBLER_DIALECT == ASM_ATT) 5669 putc ('l', file); 5670 return; 5671 5672 case 'S': 5673 if (ASSEMBLER_DIALECT == ASM_ATT) 5674 putc ('s', file); 5675 return; 5676 5677 case 'T': 5678 if (ASSEMBLER_DIALECT == ASM_ATT) 5679 putc ('t', file); 5680 return; 5681 5682 case 'z': 5683 /* 387 opcodes don't get size suffixes if the operands are 5684 registers. */ 5685 5686 if (STACK_REG_P (x)) 5687 return; 5688 5689 /* this is the size of op from size of operand */ 5690 switch (GET_MODE_SIZE (GET_MODE (x))) 5691 { 5692 case 2: 5693#ifdef HAVE_GAS_FILDS_FISTS 5694 putc ('s', file); 5695#endif 5696 return; 5697 5698 case 4: 5699 if (GET_MODE (x) == SFmode) 5700 { 5701 putc ('s', file); 5702 return; 5703 } 5704 else 5705 putc ('l', file); 5706 return; 5707 5708 case 12: 5709 case 16: 5710 putc ('t', file); 5711 return; 5712 5713 case 8: 5714 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 5715 { 5716#ifdef GAS_MNEMONICS 5717 putc ('q', file); 5718#else 5719 putc ('l', file); 5720 putc ('l', file); 5721#endif 5722 } 5723 else 5724 putc ('l', file); 5725 return; 5726 5727 default: 5728 abort (); 5729 } 5730 5731 case 'b': 5732 case 'w': 5733 case 'k': 5734 case 'q': 5735 case 'h': 5736 case 'y': 5737 case 'X': 5738 case 'P': 5739 break; 5740 5741 case 's': 5742 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 5743 { 5744 PRINT_OPERAND (file, x, 0); 5745 putc (',', file); 5746 } 5747 return; 5748 5749 case 'D': 5750 /* Little bit of braindamage here. The SSE compare instructions 5751 does use completely different names for the comparisons that the 5752 fp conditional moves. */ 5753 switch (GET_CODE (x)) 5754 { 5755 case EQ: 5756 case UNEQ: 5757 fputs ("eq", file); 5758 break; 5759 case LT: 5760 case UNLT: 5761 fputs ("lt", file); 5762 break; 5763 case LE: 5764 case UNLE: 5765 fputs ("le", file); 5766 break; 5767 case UNORDERED: 5768 fputs ("unord", file); 5769 break; 5770 case NE: 5771 case LTGT: 5772 fputs ("neq", file); 5773 break; 5774 case UNGE: 5775 case GE: 5776 fputs ("nlt", file); 5777 break; 5778 case UNGT: 5779 case GT: 5780 fputs ("nle", file); 5781 break; 5782 case ORDERED: 5783 fputs ("ord", file); 5784 break; 5785 default: 5786 abort (); 5787 break; 5788 } 5789 return; 5790 case 'C': 5791 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 5792 return; 5793 case 'F': 5794 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 5795 return; 5796 5797 /* Like above, but reverse condition */ 5798 case 'c': 5799 /* Check to see if argument to %c is really a constant 5800 and not a condition code which needs to be reversed. */ 5801 if (GET_RTX_CLASS (GET_CODE (x)) != '<') 5802 { 5803 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 5804 return; 5805 } 5806 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 5807 return; 5808 case 'f': 5809 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 5810 return; 5811 case '+': 5812 { 5813 rtx x; 5814 5815 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 5816 return; 5817 5818 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 5819 if (x) 5820 { 5821 int pred_val = INTVAL (XEXP (x, 0)); 5822 5823 if (pred_val < REG_BR_PROB_BASE * 45 / 100 5824 || pred_val > REG_BR_PROB_BASE * 55 / 100) 5825 { 5826 int taken = pred_val > REG_BR_PROB_BASE / 2; 5827 int cputaken = final_forward_branch_p (current_output_insn) == 0; 5828 5829 /* Emit hints only in the case default branch prediction 5830 heruistics would fail. */ 5831 if (taken != cputaken) 5832 { 5833 /* We use 3e (DS) prefix for taken branches and 5834 2e (CS) prefix for not taken branches. */ 5835 if (taken) 5836 fputs ("ds ; ", file); 5837 else 5838 fputs ("cs ; ", file); 5839 } 5840 } 5841 } 5842 return; 5843 } 5844 default: 5845 { 5846 char str[50]; 5847 sprintf (str, "invalid operand code `%c'", code); 5848 output_operand_lossage (str); 5849 } 5850 } 5851 } 5852 5853 if (GET_CODE (x) == REG) 5854 { 5855 PRINT_REG (x, code, file); 5856 } 5857 5858 else if (GET_CODE (x) == MEM) 5859 { 5860 /* No `byte ptr' prefix for call instructions. */ 5861 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 5862 { 5863 const char * size; 5864 switch (GET_MODE_SIZE (GET_MODE (x))) 5865 { 5866 case 1: size = "BYTE"; break; 5867 case 2: size = "WORD"; break; 5868 case 4: size = "DWORD"; break; 5869 case 8: size = "QWORD"; break; 5870 case 12: size = "XWORD"; break; 5871 case 16: size = "XMMWORD"; break; 5872 default: 5873 abort (); 5874 } 5875 5876 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 5877 if (code == 'b') 5878 size = "BYTE"; 5879 else if (code == 'w') 5880 size = "WORD"; 5881 else if (code == 'k') 5882 size = "DWORD"; 5883 5884 fputs (size, file); 5885 fputs (" PTR ", file); 5886 } 5887 5888 x = XEXP (x, 0); 5889 if (flag_pic && CONSTANT_ADDRESS_P (x)) 5890 output_pic_addr_const (file, x, code); 5891 /* Avoid (%rip) for call operands. */ 5892 else if (CONSTANT_ADDRESS_P (x) && code =='P' 5893 && GET_CODE (x) != CONST_INT) 5894 output_addr_const (file, x); 5895 else 5896 output_address (x); 5897 } 5898 5899 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 5900 { 5901 REAL_VALUE_TYPE r; 5902 long l; 5903 5904 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 5905 REAL_VALUE_TO_TARGET_SINGLE (r, l); 5906 5907 if (ASSEMBLER_DIALECT == ASM_ATT) 5908 putc ('$', file); 5909 fprintf (file, "0x%lx", l); 5910 } 5911 5912 /* These float cases don't actually occur as immediate operands. */ 5913 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 5914 { 5915 REAL_VALUE_TYPE r; 5916 char dstr[30]; 5917 5918 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 5919 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr); 5920 fprintf (file, "%s", dstr); 5921 } 5922 5923 else if (GET_CODE (x) == CONST_DOUBLE 5924 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)) 5925 { 5926 REAL_VALUE_TYPE r; 5927 char dstr[30]; 5928 5929 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 5930 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr); 5931 fprintf (file, "%s", dstr); 5932 } 5933 else 5934 { 5935 if (code != 'P') 5936 { 5937 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 5938 { 5939 if (ASSEMBLER_DIALECT == ASM_ATT) 5940 putc ('$', file); 5941 } 5942 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 5943 || GET_CODE (x) == LABEL_REF) 5944 { 5945 if (ASSEMBLER_DIALECT == ASM_ATT) 5946 putc ('$', file); 5947 else 5948 fputs ("OFFSET FLAT:", file); 5949 } 5950 } 5951 if (GET_CODE (x) == CONST_INT) 5952 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 5953 else if (flag_pic) 5954 output_pic_addr_const (file, x, code); 5955 else 5956 output_addr_const (file, x); 5957 } 5958} 5959 5960/* Print a memory operand whose address is ADDR. */ 5961 5962void 5963print_operand_address (file, addr) 5964 FILE *file; 5965 register rtx addr; 5966{ 5967 struct ix86_address parts; 5968 rtx base, index, disp; 5969 int scale; 5970 5971 if (! ix86_decompose_address (addr, &parts)) 5972 abort (); 5973 5974 base = parts.base; 5975 index = parts.index; 5976 disp = parts.disp; 5977 scale = parts.scale; 5978 5979 if (!base && !index) 5980 { 5981 /* Displacement only requires special attention. */ 5982 5983 if (GET_CODE (disp) == CONST_INT) 5984 { 5985 if (ASSEMBLER_DIALECT == ASM_INTEL) 5986 { 5987 if (USER_LABEL_PREFIX[0] == 0) 5988 putc ('%', file); 5989 fputs ("ds:", file); 5990 } 5991 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr)); 5992 } 5993 else if (flag_pic) 5994 output_pic_addr_const (file, addr, 0); 5995 else 5996 output_addr_const (file, addr); 5997 5998 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 5999 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT) 6000 fputs ("(%rip)", file); 6001 } 6002 else 6003 { 6004 if (ASSEMBLER_DIALECT == ASM_ATT) 6005 { 6006 if (disp) 6007 { 6008 if (flag_pic) 6009 output_pic_addr_const (file, disp, 0); 6010 else if (GET_CODE (disp) == LABEL_REF) 6011 output_asm_label (disp); 6012 else 6013 output_addr_const (file, disp); 6014 } 6015 6016 putc ('(', file); 6017 if (base) 6018 PRINT_REG (base, 0, file); 6019 if (index) 6020 { 6021 putc (',', file); 6022 PRINT_REG (index, 0, file); 6023 if (scale != 1) 6024 fprintf (file, ",%d", scale); 6025 } 6026 putc (')', file); 6027 } 6028 else 6029 { 6030 rtx offset = NULL_RTX; 6031 6032 if (disp) 6033 { 6034 /* Pull out the offset of a symbol; print any symbol itself. */ 6035 if (GET_CODE (disp) == CONST 6036 && GET_CODE (XEXP (disp, 0)) == PLUS 6037 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 6038 { 6039 offset = XEXP (XEXP (disp, 0), 1); 6040 disp = gen_rtx_CONST (VOIDmode, 6041 XEXP (XEXP (disp, 0), 0)); 6042 } 6043 6044 if (flag_pic) 6045 output_pic_addr_const (file, disp, 0); 6046 else if (GET_CODE (disp) == LABEL_REF) 6047 output_asm_label (disp); 6048 else if (GET_CODE (disp) == CONST_INT) 6049 offset = disp; 6050 else 6051 output_addr_const (file, disp); 6052 } 6053 6054 putc ('[', file); 6055 if (base) 6056 { 6057 PRINT_REG (base, 0, file); 6058 if (offset) 6059 { 6060 if (INTVAL (offset) >= 0) 6061 putc ('+', file); 6062 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 6063 } 6064 } 6065 else if (offset) 6066 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 6067 else 6068 putc ('0', file); 6069 6070 if (index) 6071 { 6072 putc ('+', file); 6073 PRINT_REG (index, 0, file); 6074 if (scale != 1) 6075 fprintf (file, "*%d", scale); 6076 } 6077 putc (']', file); 6078 } 6079 } 6080} 6081 6082/* Split one or more DImode RTL references into pairs of SImode 6083 references. The RTL can be REG, offsettable MEM, integer constant, or 6084 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 6085 split and "num" is its length. lo_half and hi_half are output arrays 6086 that parallel "operands". */ 6087 6088void 6089split_di (operands, num, lo_half, hi_half) 6090 rtx operands[]; 6091 int num; 6092 rtx lo_half[], hi_half[]; 6093{ 6094 while (num--) 6095 { 6096 rtx op = operands[num]; 6097 6098 /* simplify_subreg refuse to split volatile memory addresses, 6099 but we still have to handle it. */ 6100 if (GET_CODE (op) == MEM) 6101 { 6102 lo_half[num] = adjust_address (op, SImode, 0); 6103 hi_half[num] = adjust_address (op, SImode, 4); 6104 } 6105 else 6106 { 6107 lo_half[num] = simplify_gen_subreg (SImode, op, 6108 GET_MODE (op) == VOIDmode 6109 ? DImode : GET_MODE (op), 0); 6110 hi_half[num] = simplify_gen_subreg (SImode, op, 6111 GET_MODE (op) == VOIDmode 6112 ? DImode : GET_MODE (op), 4); 6113 } 6114 } 6115} 6116/* Split one or more TImode RTL references into pairs of SImode 6117 references. The RTL can be REG, offsettable MEM, integer constant, or 6118 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 6119 split and "num" is its length. lo_half and hi_half are output arrays 6120 that parallel "operands". */ 6121 6122void 6123split_ti (operands, num, lo_half, hi_half) 6124 rtx operands[]; 6125 int num; 6126 rtx lo_half[], hi_half[]; 6127{ 6128 while (num--) 6129 { 6130 rtx op = operands[num]; 6131 6132 /* simplify_subreg refuse to split volatile memory addresses, but we 6133 still have to handle it. */ 6134 if (GET_CODE (op) == MEM) 6135 { 6136 lo_half[num] = adjust_address (op, DImode, 0); 6137 hi_half[num] = adjust_address (op, DImode, 8); 6138 } 6139 else 6140 { 6141 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 6142 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 6143 } 6144 } 6145} 6146 6147/* Output code to perform a 387 binary operation in INSN, one of PLUS, 6148 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 6149 is the expression of the binary operation. The output may either be 6150 emitted here, or returned to the caller, like all output_* functions. 6151 6152 There is no guarantee that the operands are the same mode, as they 6153 might be within FLOAT or FLOAT_EXTEND expressions. */ 6154 6155#ifndef SYSV386_COMPAT 6156/* Set to 1 for compatibility with brain-damaged assemblers. No-one 6157 wants to fix the assemblers because that causes incompatibility 6158 with gcc. No-one wants to fix gcc because that causes 6159 incompatibility with assemblers... You can use the option of 6160 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 6161#define SYSV386_COMPAT 1 6162#endif 6163 6164const char * 6165output_387_binary_op (insn, operands) 6166 rtx insn; 6167 rtx *operands; 6168{ 6169 static char buf[30]; 6170 const char *p; 6171 const char *ssep; 6172 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]); 6173 6174#ifdef ENABLE_CHECKING 6175 /* Even if we do not want to check the inputs, this documents input 6176 constraints. Which helps in understanding the following code. */ 6177 if (STACK_REG_P (operands[0]) 6178 && ((REG_P (operands[1]) 6179 && REGNO (operands[0]) == REGNO (operands[1]) 6180 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 6181 || (REG_P (operands[2]) 6182 && REGNO (operands[0]) == REGNO (operands[2]) 6183 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 6184 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 6185 ; /* ok */ 6186 else if (!is_sse) 6187 abort (); 6188#endif 6189 6190 switch (GET_CODE (operands[3])) 6191 { 6192 case PLUS: 6193 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6194 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6195 p = "fiadd"; 6196 else 6197 p = "fadd"; 6198 ssep = "add"; 6199 break; 6200 6201 case MINUS: 6202 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6203 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6204 p = "fisub"; 6205 else 6206 p = "fsub"; 6207 ssep = "sub"; 6208 break; 6209 6210 case MULT: 6211 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6212 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6213 p = "fimul"; 6214 else 6215 p = "fmul"; 6216 ssep = "mul"; 6217 break; 6218 6219 case DIV: 6220 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6221 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6222 p = "fidiv"; 6223 else 6224 p = "fdiv"; 6225 ssep = "div"; 6226 break; 6227 6228 default: 6229 abort (); 6230 } 6231 6232 if (is_sse) 6233 { 6234 strcpy (buf, ssep); 6235 if (GET_MODE (operands[0]) == SFmode) 6236 strcat (buf, "ss\t{%2, %0|%0, %2}"); 6237 else 6238 strcat (buf, "sd\t{%2, %0|%0, %2}"); 6239 return buf; 6240 } 6241 strcpy (buf, p); 6242 6243 switch (GET_CODE (operands[3])) 6244 { 6245 case MULT: 6246 case PLUS: 6247 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 6248 { 6249 rtx temp = operands[2]; 6250 operands[2] = operands[1]; 6251 operands[1] = temp; 6252 } 6253 6254 /* know operands[0] == operands[1]. */ 6255 6256 if (GET_CODE (operands[2]) == MEM) 6257 { 6258 p = "%z2\t%2"; 6259 break; 6260 } 6261 6262 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 6263 { 6264 if (STACK_TOP_P (operands[0])) 6265 /* How is it that we are storing to a dead operand[2]? 6266 Well, presumably operands[1] is dead too. We can't 6267 store the result to st(0) as st(0) gets popped on this 6268 instruction. Instead store to operands[2] (which I 6269 think has to be st(1)). st(1) will be popped later. 6270 gcc <= 2.8.1 didn't have this check and generated 6271 assembly code that the Unixware assembler rejected. */ 6272 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 6273 else 6274 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 6275 break; 6276 } 6277 6278 if (STACK_TOP_P (operands[0])) 6279 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 6280 else 6281 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 6282 break; 6283 6284 case MINUS: 6285 case DIV: 6286 if (GET_CODE (operands[1]) == MEM) 6287 { 6288 p = "r%z1\t%1"; 6289 break; 6290 } 6291 6292 if (GET_CODE (operands[2]) == MEM) 6293 { 6294 p = "%z2\t%2"; 6295 break; 6296 } 6297 6298 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 6299 { 6300#if SYSV386_COMPAT 6301 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 6302 derived assemblers, confusingly reverse the direction of 6303 the operation for fsub{r} and fdiv{r} when the 6304 destination register is not st(0). The Intel assembler 6305 doesn't have this brain damage. Read !SYSV386_COMPAT to 6306 figure out what the hardware really does. */ 6307 if (STACK_TOP_P (operands[0])) 6308 p = "{p\t%0, %2|rp\t%2, %0}"; 6309 else 6310 p = "{rp\t%2, %0|p\t%0, %2}"; 6311#else 6312 if (STACK_TOP_P (operands[0])) 6313 /* As above for fmul/fadd, we can't store to st(0). */ 6314 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 6315 else 6316 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 6317#endif 6318 break; 6319 } 6320 6321 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 6322 { 6323#if SYSV386_COMPAT 6324 if (STACK_TOP_P (operands[0])) 6325 p = "{rp\t%0, %1|p\t%1, %0}"; 6326 else 6327 p = "{p\t%1, %0|rp\t%0, %1}"; 6328#else 6329 if (STACK_TOP_P (operands[0])) 6330 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 6331 else 6332 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 6333#endif 6334 break; 6335 } 6336 6337 if (STACK_TOP_P (operands[0])) 6338 { 6339 if (STACK_TOP_P (operands[1])) 6340 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 6341 else 6342 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 6343 break; 6344 } 6345 else if (STACK_TOP_P (operands[1])) 6346 { 6347#if SYSV386_COMPAT 6348 p = "{\t%1, %0|r\t%0, %1}"; 6349#else 6350 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 6351#endif 6352 } 6353 else 6354 { 6355#if SYSV386_COMPAT 6356 p = "{r\t%2, %0|\t%0, %2}"; 6357#else 6358 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 6359#endif 6360 } 6361 break; 6362 6363 default: 6364 abort (); 6365 } 6366 6367 strcat (buf, p); 6368 return buf; 6369} 6370 6371/* Output code to initialize control word copies used by 6372 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN 6373 is set to control word rounding downwards. */ 6374void 6375emit_i387_cw_initialization (normal, round_down) 6376 rtx normal, round_down; 6377{ 6378 rtx reg = gen_reg_rtx (HImode); 6379 6380 emit_insn (gen_x86_fnstcw_1 (normal)); 6381 emit_move_insn (reg, normal); 6382 if (!TARGET_PARTIAL_REG_STALL && !optimize_size 6383 && !TARGET_64BIT) 6384 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 6385 else 6386 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00))); 6387 emit_move_insn (round_down, reg); 6388} 6389 6390/* Output code for INSN to convert a float to a signed int. OPERANDS 6391 are the insn operands. The output may be [HSD]Imode and the input 6392 operand may be [SDX]Fmode. */ 6393 6394const char * 6395output_fix_trunc (insn, operands) 6396 rtx insn; 6397 rtx *operands; 6398{ 6399 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 6400 int dimode_p = GET_MODE (operands[0]) == DImode; 6401 6402 /* Jump through a hoop or two for DImode, since the hardware has no 6403 non-popping instruction. We used to do this a different way, but 6404 that was somewhat fragile and broke with post-reload splitters. */ 6405 if (dimode_p && !stack_top_dies) 6406 output_asm_insn ("fld\t%y1", operands); 6407 6408 if (!STACK_TOP_P (operands[1])) 6409 abort (); 6410 6411 if (GET_CODE (operands[0]) != MEM) 6412 abort (); 6413 6414 output_asm_insn ("fldcw\t%3", operands); 6415 if (stack_top_dies || dimode_p) 6416 output_asm_insn ("fistp%z0\t%0", operands); 6417 else 6418 output_asm_insn ("fist%z0\t%0", operands); 6419 output_asm_insn ("fldcw\t%2", operands); 6420 6421 return ""; 6422} 6423 6424/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 6425 should be used and 2 when fnstsw should be used. UNORDERED_P is true 6426 when fucom should be used. */ 6427 6428const char * 6429output_fp_compare (insn, operands, eflags_p, unordered_p) 6430 rtx insn; 6431 rtx *operands; 6432 int eflags_p, unordered_p; 6433{ 6434 int stack_top_dies; 6435 rtx cmp_op0 = operands[0]; 6436 rtx cmp_op1 = operands[1]; 6437 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]); 6438 6439 if (eflags_p == 2) 6440 { 6441 cmp_op0 = cmp_op1; 6442 cmp_op1 = operands[2]; 6443 } 6444 if (is_sse) 6445 { 6446 if (GET_MODE (operands[0]) == SFmode) 6447 if (unordered_p) 6448 return "ucomiss\t{%1, %0|%0, %1}"; 6449 else 6450 return "comiss\t{%1, %0|%0, %y}"; 6451 else 6452 if (unordered_p) 6453 return "ucomisd\t{%1, %0|%0, %1}"; 6454 else 6455 return "comisd\t{%1, %0|%0, %y}"; 6456 } 6457 6458 if (! STACK_TOP_P (cmp_op0)) 6459 abort (); 6460 6461 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 6462 6463 if (STACK_REG_P (cmp_op1) 6464 && stack_top_dies 6465 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 6466 && REGNO (cmp_op1) != FIRST_STACK_REG) 6467 { 6468 /* If both the top of the 387 stack dies, and the other operand 6469 is also a stack register that dies, then this must be a 6470 `fcompp' float compare */ 6471 6472 if (eflags_p == 1) 6473 { 6474 /* There is no double popping fcomi variant. Fortunately, 6475 eflags is immune from the fstp's cc clobbering. */ 6476 if (unordered_p) 6477 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 6478 else 6479 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 6480 return "fstp\t%y0"; 6481 } 6482 else 6483 { 6484 if (eflags_p == 2) 6485 { 6486 if (unordered_p) 6487 return "fucompp\n\tfnstsw\t%0"; 6488 else 6489 return "fcompp\n\tfnstsw\t%0"; 6490 } 6491 else 6492 { 6493 if (unordered_p) 6494 return "fucompp"; 6495 else 6496 return "fcompp"; 6497 } 6498 } 6499 } 6500 else 6501 { 6502 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 6503 6504 static const char * const alt[24] = 6505 { 6506 "fcom%z1\t%y1", 6507 "fcomp%z1\t%y1", 6508 "fucom%z1\t%y1", 6509 "fucomp%z1\t%y1", 6510 6511 "ficom%z1\t%y1", 6512 "ficomp%z1\t%y1", 6513 NULL, 6514 NULL, 6515 6516 "fcomi\t{%y1, %0|%0, %y1}", 6517 "fcomip\t{%y1, %0|%0, %y1}", 6518 "fucomi\t{%y1, %0|%0, %y1}", 6519 "fucomip\t{%y1, %0|%0, %y1}", 6520 6521 NULL, 6522 NULL, 6523 NULL, 6524 NULL, 6525 6526 "fcom%z2\t%y2\n\tfnstsw\t%0", 6527 "fcomp%z2\t%y2\n\tfnstsw\t%0", 6528 "fucom%z2\t%y2\n\tfnstsw\t%0", 6529 "fucomp%z2\t%y2\n\tfnstsw\t%0", 6530 6531 "ficom%z2\t%y2\n\tfnstsw\t%0", 6532 "ficomp%z2\t%y2\n\tfnstsw\t%0", 6533 NULL, 6534 NULL 6535 }; 6536 6537 int mask; 6538 const char *ret; 6539 6540 mask = eflags_p << 3; 6541 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2; 6542 mask |= unordered_p << 1; 6543 mask |= stack_top_dies; 6544 6545 if (mask >= 24) 6546 abort (); 6547 ret = alt[mask]; 6548 if (ret == NULL) 6549 abort (); 6550 6551 return ret; 6552 } 6553} 6554 6555void 6556ix86_output_addr_vec_elt (file, value) 6557 FILE *file; 6558 int value; 6559{ 6560 const char *directive = ASM_LONG; 6561 6562 if (TARGET_64BIT) 6563 { 6564#ifdef ASM_QUAD 6565 directive = ASM_QUAD; 6566#else 6567 abort (); 6568#endif 6569 } 6570 6571 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 6572} 6573 6574void 6575ix86_output_addr_diff_elt (file, value, rel) 6576 FILE *file; 6577 int value, rel; 6578{ 6579 if (TARGET_64BIT) 6580 fprintf (file, "%s%s%d-.+4+(.-%s%d)\n", 6581 ASM_LONG, LPREFIX, value, LPREFIX, rel); 6582 else if (HAVE_AS_GOTOFF_IN_DATA) 6583 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 6584 else 6585 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n", 6586 ASM_LONG, LPREFIX, value); 6587} 6588 6589/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 6590 for the target. */ 6591 6592void 6593ix86_expand_clear (dest) 6594 rtx dest; 6595{ 6596 rtx tmp; 6597 6598 /* We play register width games, which are only valid after reload. */ 6599 if (!reload_completed) 6600 abort (); 6601 6602 /* Avoid HImode and its attendant prefix byte. */ 6603 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 6604 dest = gen_rtx_REG (SImode, REGNO (dest)); 6605 6606 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 6607 6608 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 6609 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 6610 { 6611 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 6612 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 6613 } 6614 6615 emit_insn (tmp); 6616} 6617 6618void 6619ix86_expand_move (mode, operands) 6620 enum machine_mode mode; 6621 rtx operands[]; 6622{ 6623 int strict = (reload_in_progress || reload_completed); 6624 rtx insn; 6625 6626 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode)) 6627 { 6628 /* Emit insns to move operands[1] into operands[0]. */ 6629 6630 if (GET_CODE (operands[0]) == MEM) 6631 operands[1] = force_reg (Pmode, operands[1]); 6632 else 6633 { 6634 rtx temp = operands[0]; 6635 if (GET_CODE (temp) != REG) 6636 temp = gen_reg_rtx (Pmode); 6637 temp = legitimize_pic_address (operands[1], temp); 6638 if (temp == operands[0]) 6639 return; 6640 operands[1] = temp; 6641 } 6642 } 6643 else 6644 { 6645 if (GET_CODE (operands[0]) == MEM 6646 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 6647 || !push_operand (operands[0], mode)) 6648 && GET_CODE (operands[1]) == MEM) 6649 operands[1] = force_reg (mode, operands[1]); 6650 6651 if (push_operand (operands[0], mode) 6652 && ! general_no_elim_operand (operands[1], mode)) 6653 operands[1] = copy_to_mode_reg (mode, operands[1]); 6654 6655 /* Force large constants in 64bit compilation into register 6656 to get them CSEed. */ 6657 if (TARGET_64BIT && mode == DImode 6658 && immediate_operand (operands[1], mode) 6659 && !x86_64_zero_extended_value (operands[1]) 6660 && !register_operand (operands[0], mode) 6661 && optimize && !reload_completed && !reload_in_progress) 6662 operands[1] = copy_to_mode_reg (mode, operands[1]); 6663 6664 if (FLOAT_MODE_P (mode)) 6665 { 6666 /* If we are loading a floating point constant to a register, 6667 force the value to memory now, since we'll get better code 6668 out the back end. */ 6669 6670 if (strict) 6671 ; 6672 else if (GET_CODE (operands[1]) == CONST_DOUBLE 6673 && register_operand (operands[0], mode)) 6674 operands[1] = validize_mem (force_const_mem (mode, operands[1])); 6675 } 6676 } 6677 6678 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]); 6679 6680 emit_insn (insn); 6681} 6682 6683void 6684ix86_expand_vector_move (mode, operands) 6685 enum machine_mode mode; 6686 rtx operands[]; 6687{ 6688 /* Force constants other than zero into memory. We do not know how 6689 the instructions used to build constants modify the upper 64 bits 6690 of the register, once we have that information we may be able 6691 to handle some of them more efficiently. */ 6692 if ((reload_in_progress | reload_completed) == 0 6693 && register_operand (operands[0], mode) 6694 && CONSTANT_P (operands[1])) 6695 { 6696 rtx addr = gen_reg_rtx (Pmode); 6697 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0)); 6698 operands[1] = gen_rtx_MEM (mode, addr); 6699 } 6700 6701 /* Make operand1 a register if it isn't already. */ 6702 if ((reload_in_progress | reload_completed) == 0 6703 && !register_operand (operands[0], mode) 6704 && !register_operand (operands[1], mode) 6705 && operands[1] != CONST0_RTX (mode)) 6706 { 6707 rtx temp = force_reg (TImode, operands[1]); 6708 emit_move_insn (operands[0], temp); 6709 return; 6710 } 6711 6712 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); 6713} 6714 6715/* Attempt to expand a binary operator. Make the expansion closer to the 6716 actual machine, then just general_operand, which will allow 3 separate 6717 memory references (one output, two input) in a single insn. */ 6718 6719void 6720ix86_expand_binary_operator (code, mode, operands) 6721 enum rtx_code code; 6722 enum machine_mode mode; 6723 rtx operands[]; 6724{ 6725 int matching_memory; 6726 rtx src1, src2, dst, op, clob; 6727 6728 dst = operands[0]; 6729 src1 = operands[1]; 6730 src2 = operands[2]; 6731 6732 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 6733 if (GET_RTX_CLASS (code) == 'c' 6734 && (rtx_equal_p (dst, src2) 6735 || immediate_operand (src1, mode))) 6736 { 6737 rtx temp = src1; 6738 src1 = src2; 6739 src2 = temp; 6740 } 6741 6742 /* If the destination is memory, and we do not have matching source 6743 operands, do things in registers. */ 6744 matching_memory = 0; 6745 if (GET_CODE (dst) == MEM) 6746 { 6747 if (rtx_equal_p (dst, src1)) 6748 matching_memory = 1; 6749 else if (GET_RTX_CLASS (code) == 'c' 6750 && rtx_equal_p (dst, src2)) 6751 matching_memory = 2; 6752 else 6753 dst = gen_reg_rtx (mode); 6754 } 6755 6756 /* Both source operands cannot be in memory. */ 6757 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 6758 { 6759 if (matching_memory != 2) 6760 src2 = force_reg (mode, src2); 6761 else 6762 src1 = force_reg (mode, src1); 6763 } 6764 6765 /* If the operation is not commutable, source 1 cannot be a constant 6766 or non-matching memory. */ 6767 if ((CONSTANT_P (src1) 6768 || (!matching_memory && GET_CODE (src1) == MEM)) 6769 && GET_RTX_CLASS (code) != 'c') 6770 src1 = force_reg (mode, src1); 6771 6772 /* If optimizing, copy to regs to improve CSE */ 6773 if (optimize && ! no_new_pseudos) 6774 { 6775 if (GET_CODE (dst) == MEM) 6776 dst = gen_reg_rtx (mode); 6777 if (GET_CODE (src1) == MEM) 6778 src1 = force_reg (mode, src1); 6779 if (GET_CODE (src2) == MEM) 6780 src2 = force_reg (mode, src2); 6781 } 6782 6783 /* Emit the instruction. */ 6784 6785 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 6786 if (reload_in_progress) 6787 { 6788 /* Reload doesn't know about the flags register, and doesn't know that 6789 it doesn't want to clobber it. We can only do this with PLUS. */ 6790 if (code != PLUS) 6791 abort (); 6792 emit_insn (op); 6793 } 6794 else 6795 { 6796 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 6797 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 6798 } 6799 6800 /* Fix up the destination if needed. */ 6801 if (dst != operands[0]) 6802 emit_move_insn (operands[0], dst); 6803} 6804 6805/* Return TRUE or FALSE depending on whether the binary operator meets the 6806 appropriate constraints. */ 6807 6808int 6809ix86_binary_operator_ok (code, mode, operands) 6810 enum rtx_code code; 6811 enum machine_mode mode ATTRIBUTE_UNUSED; 6812 rtx operands[3]; 6813{ 6814 /* Both source operands cannot be in memory. */ 6815 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 6816 return 0; 6817 /* If the operation is not commutable, source 1 cannot be a constant. */ 6818 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c') 6819 return 0; 6820 /* If the destination is memory, we must have a matching source operand. */ 6821 if (GET_CODE (operands[0]) == MEM 6822 && ! (rtx_equal_p (operands[0], operands[1]) 6823 || (GET_RTX_CLASS (code) == 'c' 6824 && rtx_equal_p (operands[0], operands[2])))) 6825 return 0; 6826 /* If the operation is not commutable and the source 1 is memory, we must 6827 have a matching destination. */ 6828 if (GET_CODE (operands[1]) == MEM 6829 && GET_RTX_CLASS (code) != 'c' 6830 && ! rtx_equal_p (operands[0], operands[1])) 6831 return 0; 6832 return 1; 6833} 6834 6835/* Attempt to expand a unary operator. Make the expansion closer to the 6836 actual machine, then just general_operand, which will allow 2 separate 6837 memory references (one output, one input) in a single insn. */ 6838 6839void 6840ix86_expand_unary_operator (code, mode, operands) 6841 enum rtx_code code; 6842 enum machine_mode mode; 6843 rtx operands[]; 6844{ 6845 int matching_memory; 6846 rtx src, dst, op, clob; 6847 6848 dst = operands[0]; 6849 src = operands[1]; 6850 6851 /* If the destination is memory, and we do not have matching source 6852 operands, do things in registers. */ 6853 matching_memory = 0; 6854 if (GET_CODE (dst) == MEM) 6855 { 6856 if (rtx_equal_p (dst, src)) 6857 matching_memory = 1; 6858 else 6859 dst = gen_reg_rtx (mode); 6860 } 6861 6862 /* When source operand is memory, destination must match. */ 6863 if (!matching_memory && GET_CODE (src) == MEM) 6864 src = force_reg (mode, src); 6865 6866 /* If optimizing, copy to regs to improve CSE */ 6867 if (optimize && ! no_new_pseudos) 6868 { 6869 if (GET_CODE (dst) == MEM) 6870 dst = gen_reg_rtx (mode); 6871 if (GET_CODE (src) == MEM) 6872 src = force_reg (mode, src); 6873 } 6874 6875 /* Emit the instruction. */ 6876 6877 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 6878 if (reload_in_progress || code == NOT) 6879 { 6880 /* Reload doesn't know about the flags register, and doesn't know that 6881 it doesn't want to clobber it. */ 6882 if (code != NOT) 6883 abort (); 6884 emit_insn (op); 6885 } 6886 else 6887 { 6888 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 6889 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 6890 } 6891 6892 /* Fix up the destination if needed. */ 6893 if (dst != operands[0]) 6894 emit_move_insn (operands[0], dst); 6895} 6896 6897/* Return TRUE or FALSE depending on whether the unary operator meets the 6898 appropriate constraints. */ 6899 6900int 6901ix86_unary_operator_ok (code, mode, operands) 6902 enum rtx_code code ATTRIBUTE_UNUSED; 6903 enum machine_mode mode ATTRIBUTE_UNUSED; 6904 rtx operands[2] ATTRIBUTE_UNUSED; 6905{ 6906 /* If one of operands is memory, source and destination must match. */ 6907 if ((GET_CODE (operands[0]) == MEM 6908 || GET_CODE (operands[1]) == MEM) 6909 && ! rtx_equal_p (operands[0], operands[1])) 6910 return FALSE; 6911 return TRUE; 6912} 6913 6914/* Return TRUE or FALSE depending on whether the first SET in INSN 6915 has source and destination with matching CC modes, and that the 6916 CC mode is at least as constrained as REQ_MODE. */ 6917 6918int 6919ix86_match_ccmode (insn, req_mode) 6920 rtx insn; 6921 enum machine_mode req_mode; 6922{ 6923 rtx set; 6924 enum machine_mode set_mode; 6925 6926 set = PATTERN (insn); 6927 if (GET_CODE (set) == PARALLEL) 6928 set = XVECEXP (set, 0, 0); 6929 if (GET_CODE (set) != SET) 6930 abort (); 6931 if (GET_CODE (SET_SRC (set)) != COMPARE) 6932 abort (); 6933 6934 set_mode = GET_MODE (SET_DEST (set)); 6935 switch (set_mode) 6936 { 6937 case CCNOmode: 6938 if (req_mode != CCNOmode 6939 && (req_mode != CCmode 6940 || XEXP (SET_SRC (set), 1) != const0_rtx)) 6941 return 0; 6942 break; 6943 case CCmode: 6944 if (req_mode == CCGCmode) 6945 return 0; 6946 /* FALLTHRU */ 6947 case CCGCmode: 6948 if (req_mode == CCGOCmode || req_mode == CCNOmode) 6949 return 0; 6950 /* FALLTHRU */ 6951 case CCGOCmode: 6952 if (req_mode == CCZmode) 6953 return 0; 6954 /* FALLTHRU */ 6955 case CCZmode: 6956 break; 6957 6958 default: 6959 abort (); 6960 } 6961 6962 return (GET_MODE (SET_SRC (set)) == set_mode); 6963} 6964 6965/* Generate insn patterns to do an integer compare of OPERANDS. */ 6966 6967static rtx 6968ix86_expand_int_compare (code, op0, op1) 6969 enum rtx_code code; 6970 rtx op0, op1; 6971{ 6972 enum machine_mode cmpmode; 6973 rtx tmp, flags; 6974 6975 cmpmode = SELECT_CC_MODE (code, op0, op1); 6976 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 6977 6978 /* This is very simple, but making the interface the same as in the 6979 FP case makes the rest of the code easier. */ 6980 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 6981 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 6982 6983 /* Return the test that should be put into the flags user, i.e. 6984 the bcc, scc, or cmov instruction. */ 6985 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 6986} 6987 6988/* Figure out whether to use ordered or unordered fp comparisons. 6989 Return the appropriate mode to use. */ 6990 6991enum machine_mode 6992ix86_fp_compare_mode (code) 6993 enum rtx_code code ATTRIBUTE_UNUSED; 6994{ 6995 /* ??? In order to make all comparisons reversible, we do all comparisons 6996 non-trapping when compiling for IEEE. Once gcc is able to distinguish 6997 all forms trapping and nontrapping comparisons, we can make inequality 6998 comparisons trapping again, since it results in better code when using 6999 FCOM based compares. */ 7000 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 7001} 7002 7003enum machine_mode 7004ix86_cc_mode (code, op0, op1) 7005 enum rtx_code code; 7006 rtx op0, op1; 7007{ 7008 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 7009 return ix86_fp_compare_mode (code); 7010 switch (code) 7011 { 7012 /* Only zero flag is needed. */ 7013 case EQ: /* ZF=0 */ 7014 case NE: /* ZF!=0 */ 7015 return CCZmode; 7016 /* Codes needing carry flag. */ 7017 case GEU: /* CF=0 */ 7018 case GTU: /* CF=0 & ZF=0 */ 7019 case LTU: /* CF=1 */ 7020 case LEU: /* CF=1 | ZF=1 */ 7021 return CCmode; 7022 /* Codes possibly doable only with sign flag when 7023 comparing against zero. */ 7024 case GE: /* SF=OF or SF=0 */ 7025 case LT: /* SF<>OF or SF=1 */ 7026 if (op1 == const0_rtx) 7027 return CCGOCmode; 7028 else 7029 /* For other cases Carry flag is not required. */ 7030 return CCGCmode; 7031 /* Codes doable only with sign flag when comparing 7032 against zero, but we miss jump instruction for it 7033 so we need to use relational tests agains overflow 7034 that thus needs to be zero. */ 7035 case GT: /* ZF=0 & SF=OF */ 7036 case LE: /* ZF=1 | SF<>OF */ 7037 if (op1 == const0_rtx) 7038 return CCNOmode; 7039 else 7040 return CCGCmode; 7041 /* strcmp pattern do (use flags) and combine may ask us for proper 7042 mode. */ 7043 case USE: 7044 return CCmode; 7045 default: 7046 abort (); 7047 } 7048} 7049 7050/* Return true if we should use an FCOMI instruction for this fp comparison. */ 7051 7052int 7053ix86_use_fcomi_compare (code) 7054 enum rtx_code code ATTRIBUTE_UNUSED; 7055{ 7056 enum rtx_code swapped_code = swap_condition (code); 7057 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 7058 || (ix86_fp_comparison_cost (swapped_code) 7059 == ix86_fp_comparison_fcomi_cost (swapped_code))); 7060} 7061 7062/* Swap, force into registers, or otherwise massage the two operands 7063 to a fp comparison. The operands are updated in place; the new 7064 comparsion code is returned. */ 7065 7066static enum rtx_code 7067ix86_prepare_fp_compare_args (code, pop0, pop1) 7068 enum rtx_code code; 7069 rtx *pop0, *pop1; 7070{ 7071 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 7072 rtx op0 = *pop0, op1 = *pop1; 7073 enum machine_mode op_mode = GET_MODE (op0); 7074 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1); 7075 7076 /* All of the unordered compare instructions only work on registers. 7077 The same is true of the XFmode compare instructions. The same is 7078 true of the fcomi compare instructions. */ 7079 7080 if (!is_sse 7081 && (fpcmp_mode == CCFPUmode 7082 || op_mode == XFmode 7083 || op_mode == TFmode 7084 || ix86_use_fcomi_compare (code))) 7085 { 7086 op0 = force_reg (op_mode, op0); 7087 op1 = force_reg (op_mode, op1); 7088 } 7089 else 7090 { 7091 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 7092 things around if they appear profitable, otherwise force op0 7093 into a register. */ 7094 7095 if (standard_80387_constant_p (op0) == 0 7096 || (GET_CODE (op0) == MEM 7097 && ! (standard_80387_constant_p (op1) == 0 7098 || GET_CODE (op1) == MEM))) 7099 { 7100 rtx tmp; 7101 tmp = op0, op0 = op1, op1 = tmp; 7102 code = swap_condition (code); 7103 } 7104 7105 if (GET_CODE (op0) != REG) 7106 op0 = force_reg (op_mode, op0); 7107 7108 if (CONSTANT_P (op1)) 7109 { 7110 if (standard_80387_constant_p (op1)) 7111 op1 = force_reg (op_mode, op1); 7112 else 7113 op1 = validize_mem (force_const_mem (op_mode, op1)); 7114 } 7115 } 7116 7117 /* Try to rearrange the comparison to make it cheaper. */ 7118 if (ix86_fp_comparison_cost (code) 7119 > ix86_fp_comparison_cost (swap_condition (code)) 7120 && (GET_CODE (op0) == REG || !reload_completed)) 7121 { 7122 rtx tmp; 7123 tmp = op0, op0 = op1, op1 = tmp; 7124 code = swap_condition (code); 7125 if (GET_CODE (op0) != REG) 7126 op0 = force_reg (op_mode, op0); 7127 } 7128 7129 *pop0 = op0; 7130 *pop1 = op1; 7131 return code; 7132} 7133 7134/* Convert comparison codes we use to represent FP comparison to integer 7135 code that will result in proper branch. Return UNKNOWN if no such code 7136 is available. */ 7137static enum rtx_code 7138ix86_fp_compare_code_to_integer (code) 7139 enum rtx_code code; 7140{ 7141 switch (code) 7142 { 7143 case GT: 7144 return GTU; 7145 case GE: 7146 return GEU; 7147 case ORDERED: 7148 case UNORDERED: 7149 return code; 7150 break; 7151 case UNEQ: 7152 return EQ; 7153 break; 7154 case UNLT: 7155 return LTU; 7156 break; 7157 case UNLE: 7158 return LEU; 7159 break; 7160 case LTGT: 7161 return NE; 7162 break; 7163 default: 7164 return UNKNOWN; 7165 } 7166} 7167 7168/* Split comparison code CODE into comparisons we can do using branch 7169 instructions. BYPASS_CODE is comparison code for branch that will 7170 branch around FIRST_CODE and SECOND_CODE. If some of branches 7171 is not required, set value to NIL. 7172 We never require more than two branches. */ 7173static void 7174ix86_fp_comparison_codes (code, bypass_code, first_code, second_code) 7175 enum rtx_code code, *bypass_code, *first_code, *second_code; 7176{ 7177 *first_code = code; 7178 *bypass_code = NIL; 7179 *second_code = NIL; 7180 7181 /* The fcomi comparison sets flags as follows: 7182 7183 cmp ZF PF CF 7184 > 0 0 0 7185 < 0 0 1 7186 = 1 0 0 7187 un 1 1 1 */ 7188 7189 switch (code) 7190 { 7191 case GT: /* GTU - CF=0 & ZF=0 */ 7192 case GE: /* GEU - CF=0 */ 7193 case ORDERED: /* PF=0 */ 7194 case UNORDERED: /* PF=1 */ 7195 case UNEQ: /* EQ - ZF=1 */ 7196 case UNLT: /* LTU - CF=1 */ 7197 case UNLE: /* LEU - CF=1 | ZF=1 */ 7198 case LTGT: /* EQ - ZF=0 */ 7199 break; 7200 case LT: /* LTU - CF=1 - fails on unordered */ 7201 *first_code = UNLT; 7202 *bypass_code = UNORDERED; 7203 break; 7204 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 7205 *first_code = UNLE; 7206 *bypass_code = UNORDERED; 7207 break; 7208 case EQ: /* EQ - ZF=1 - fails on unordered */ 7209 *first_code = UNEQ; 7210 *bypass_code = UNORDERED; 7211 break; 7212 case NE: /* NE - ZF=0 - fails on unordered */ 7213 *first_code = LTGT; 7214 *second_code = UNORDERED; 7215 break; 7216 case UNGE: /* GEU - CF=0 - fails on unordered */ 7217 *first_code = GE; 7218 *second_code = UNORDERED; 7219 break; 7220 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 7221 *first_code = GT; 7222 *second_code = UNORDERED; 7223 break; 7224 default: 7225 abort (); 7226 } 7227 if (!TARGET_IEEE_FP) 7228 { 7229 *second_code = NIL; 7230 *bypass_code = NIL; 7231 } 7232} 7233 7234/* Return cost of comparison done fcom + arithmetics operations on AX. 7235 All following functions do use number of instructions as an cost metrics. 7236 In future this should be tweaked to compute bytes for optimize_size and 7237 take into account performance of various instructions on various CPUs. */ 7238static int 7239ix86_fp_comparison_arithmetics_cost (code) 7240 enum rtx_code code; 7241{ 7242 if (!TARGET_IEEE_FP) 7243 return 4; 7244 /* The cost of code output by ix86_expand_fp_compare. */ 7245 switch (code) 7246 { 7247 case UNLE: 7248 case UNLT: 7249 case LTGT: 7250 case GT: 7251 case GE: 7252 case UNORDERED: 7253 case ORDERED: 7254 case UNEQ: 7255 return 4; 7256 break; 7257 case LT: 7258 case NE: 7259 case EQ: 7260 case UNGE: 7261 return 5; 7262 break; 7263 case LE: 7264 case UNGT: 7265 return 6; 7266 break; 7267 default: 7268 abort (); 7269 } 7270} 7271 7272/* Return cost of comparison done using fcomi operation. 7273 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7274static int 7275ix86_fp_comparison_fcomi_cost (code) 7276 enum rtx_code code; 7277{ 7278 enum rtx_code bypass_code, first_code, second_code; 7279 /* Return arbitarily high cost when instruction is not supported - this 7280 prevents gcc from using it. */ 7281 if (!TARGET_CMOVE) 7282 return 1024; 7283 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7284 return (bypass_code != NIL || second_code != NIL) + 2; 7285} 7286 7287/* Return cost of comparison done using sahf operation. 7288 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7289static int 7290ix86_fp_comparison_sahf_cost (code) 7291 enum rtx_code code; 7292{ 7293 enum rtx_code bypass_code, first_code, second_code; 7294 /* Return arbitarily high cost when instruction is not preferred - this 7295 avoids gcc from using it. */ 7296 if (!TARGET_USE_SAHF && !optimize_size) 7297 return 1024; 7298 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7299 return (bypass_code != NIL || second_code != NIL) + 3; 7300} 7301 7302/* Compute cost of the comparison done using any method. 7303 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7304static int 7305ix86_fp_comparison_cost (code) 7306 enum rtx_code code; 7307{ 7308 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 7309 int min; 7310 7311 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 7312 sahf_cost = ix86_fp_comparison_sahf_cost (code); 7313 7314 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 7315 if (min > sahf_cost) 7316 min = sahf_cost; 7317 if (min > fcomi_cost) 7318 min = fcomi_cost; 7319 return min; 7320} 7321 7322/* Generate insn patterns to do a floating point compare of OPERANDS. */ 7323 7324static rtx 7325ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test) 7326 enum rtx_code code; 7327 rtx op0, op1, scratch; 7328 rtx *second_test; 7329 rtx *bypass_test; 7330{ 7331 enum machine_mode fpcmp_mode, intcmp_mode; 7332 rtx tmp, tmp2; 7333 int cost = ix86_fp_comparison_cost (code); 7334 enum rtx_code bypass_code, first_code, second_code; 7335 7336 fpcmp_mode = ix86_fp_compare_mode (code); 7337 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 7338 7339 if (second_test) 7340 *second_test = NULL_RTX; 7341 if (bypass_test) 7342 *bypass_test = NULL_RTX; 7343 7344 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7345 7346 /* Do fcomi/sahf based test when profitable. */ 7347 if ((bypass_code == NIL || bypass_test) 7348 && (second_code == NIL || second_test) 7349 && ix86_fp_comparison_arithmetics_cost (code) > cost) 7350 { 7351 if (TARGET_CMOVE) 7352 { 7353 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7354 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 7355 tmp); 7356 emit_insn (tmp); 7357 } 7358 else 7359 { 7360 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7361 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9); 7362 if (!scratch) 7363 scratch = gen_reg_rtx (HImode); 7364 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 7365 emit_insn (gen_x86_sahf_1 (scratch)); 7366 } 7367 7368 /* The FP codes work out to act like unsigned. */ 7369 intcmp_mode = fpcmp_mode; 7370 code = first_code; 7371 if (bypass_code != NIL) 7372 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 7373 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7374 const0_rtx); 7375 if (second_code != NIL) 7376 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 7377 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7378 const0_rtx); 7379 } 7380 else 7381 { 7382 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 7383 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7384 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9); 7385 if (!scratch) 7386 scratch = gen_reg_rtx (HImode); 7387 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 7388 7389 /* In the unordered case, we have to check C2 for NaN's, which 7390 doesn't happen to work out to anything nice combination-wise. 7391 So do some bit twiddling on the value we've got in AH to come 7392 up with an appropriate set of condition codes. */ 7393 7394 intcmp_mode = CCNOmode; 7395 switch (code) 7396 { 7397 case GT: 7398 case UNGT: 7399 if (code == GT || !TARGET_IEEE_FP) 7400 { 7401 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 7402 code = EQ; 7403 } 7404 else 7405 { 7406 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7407 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 7408 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 7409 intcmp_mode = CCmode; 7410 code = GEU; 7411 } 7412 break; 7413 case LT: 7414 case UNLT: 7415 if (code == LT && TARGET_IEEE_FP) 7416 { 7417 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7418 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 7419 intcmp_mode = CCmode; 7420 code = EQ; 7421 } 7422 else 7423 { 7424 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 7425 code = NE; 7426 } 7427 break; 7428 case GE: 7429 case UNGE: 7430 if (code == GE || !TARGET_IEEE_FP) 7431 { 7432 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 7433 code = EQ; 7434 } 7435 else 7436 { 7437 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7438 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 7439 GEN_INT (0x01))); 7440 code = NE; 7441 } 7442 break; 7443 case LE: 7444 case UNLE: 7445 if (code == LE && TARGET_IEEE_FP) 7446 { 7447 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7448 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 7449 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 7450 intcmp_mode = CCmode; 7451 code = LTU; 7452 } 7453 else 7454 { 7455 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 7456 code = NE; 7457 } 7458 break; 7459 case EQ: 7460 case UNEQ: 7461 if (code == EQ && TARGET_IEEE_FP) 7462 { 7463 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7464 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 7465 intcmp_mode = CCmode; 7466 code = EQ; 7467 } 7468 else 7469 { 7470 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 7471 code = NE; 7472 break; 7473 } 7474 break; 7475 case NE: 7476 case LTGT: 7477 if (code == NE && TARGET_IEEE_FP) 7478 { 7479 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7480 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 7481 GEN_INT (0x40))); 7482 code = NE; 7483 } 7484 else 7485 { 7486 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 7487 code = EQ; 7488 } 7489 break; 7490 7491 case UNORDERED: 7492 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 7493 code = NE; 7494 break; 7495 case ORDERED: 7496 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 7497 code = EQ; 7498 break; 7499 7500 default: 7501 abort (); 7502 } 7503 } 7504 7505 /* Return the test that should be put into the flags user, i.e. 7506 the bcc, scc, or cmov instruction. */ 7507 return gen_rtx_fmt_ee (code, VOIDmode, 7508 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7509 const0_rtx); 7510} 7511 7512rtx 7513ix86_expand_compare (code, second_test, bypass_test) 7514 enum rtx_code code; 7515 rtx *second_test, *bypass_test; 7516{ 7517 rtx op0, op1, ret; 7518 op0 = ix86_compare_op0; 7519 op1 = ix86_compare_op1; 7520 7521 if (second_test) 7522 *second_test = NULL_RTX; 7523 if (bypass_test) 7524 *bypass_test = NULL_RTX; 7525 7526 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 7527 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 7528 second_test, bypass_test); 7529 else 7530 ret = ix86_expand_int_compare (code, op0, op1); 7531 7532 return ret; 7533} 7534 7535/* Return true if the CODE will result in nontrivial jump sequence. */ 7536bool 7537ix86_fp_jump_nontrivial_p (code) 7538 enum rtx_code code; 7539{ 7540 enum rtx_code bypass_code, first_code, second_code; 7541 if (!TARGET_CMOVE) 7542 return true; 7543 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7544 return bypass_code != NIL || second_code != NIL; 7545} 7546 7547void 7548ix86_expand_branch (code, label) 7549 enum rtx_code code; 7550 rtx label; 7551{ 7552 rtx tmp; 7553 7554 switch (GET_MODE (ix86_compare_op0)) 7555 { 7556 case QImode: 7557 case HImode: 7558 case SImode: 7559 simple: 7560 tmp = ix86_expand_compare (code, NULL, NULL); 7561 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 7562 gen_rtx_LABEL_REF (VOIDmode, label), 7563 pc_rtx); 7564 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 7565 return; 7566 7567 case SFmode: 7568 case DFmode: 7569 case XFmode: 7570 case TFmode: 7571 { 7572 rtvec vec; 7573 int use_fcomi; 7574 enum rtx_code bypass_code, first_code, second_code; 7575 7576 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 7577 &ix86_compare_op1); 7578 7579 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7580 7581 /* Check whether we will use the natural sequence with one jump. If 7582 so, we can expand jump early. Otherwise delay expansion by 7583 creating compound insn to not confuse optimizers. */ 7584 if (bypass_code == NIL && second_code == NIL 7585 && TARGET_CMOVE) 7586 { 7587 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 7588 gen_rtx_LABEL_REF (VOIDmode, label), 7589 pc_rtx, NULL_RTX); 7590 } 7591 else 7592 { 7593 tmp = gen_rtx_fmt_ee (code, VOIDmode, 7594 ix86_compare_op0, ix86_compare_op1); 7595 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 7596 gen_rtx_LABEL_REF (VOIDmode, label), 7597 pc_rtx); 7598 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 7599 7600 use_fcomi = ix86_use_fcomi_compare (code); 7601 vec = rtvec_alloc (3 + !use_fcomi); 7602 RTVEC_ELT (vec, 0) = tmp; 7603 RTVEC_ELT (vec, 1) 7604 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 7605 RTVEC_ELT (vec, 2) 7606 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 7607 if (! use_fcomi) 7608 RTVEC_ELT (vec, 3) 7609 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 7610 7611 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 7612 } 7613 return; 7614 } 7615 7616 case DImode: 7617 if (TARGET_64BIT) 7618 goto simple; 7619 /* Expand DImode branch into multiple compare+branch. */ 7620 { 7621 rtx lo[2], hi[2], label2; 7622 enum rtx_code code1, code2, code3; 7623 7624 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 7625 { 7626 tmp = ix86_compare_op0; 7627 ix86_compare_op0 = ix86_compare_op1; 7628 ix86_compare_op1 = tmp; 7629 code = swap_condition (code); 7630 } 7631 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 7632 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 7633 7634 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 7635 avoid two branches. This costs one extra insn, so disable when 7636 optimizing for size. */ 7637 7638 if ((code == EQ || code == NE) 7639 && (!optimize_size 7640 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 7641 { 7642 rtx xor0, xor1; 7643 7644 xor1 = hi[0]; 7645 if (hi[1] != const0_rtx) 7646 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1], 7647 NULL_RTX, 0, OPTAB_WIDEN); 7648 7649 xor0 = lo[0]; 7650 if (lo[1] != const0_rtx) 7651 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1], 7652 NULL_RTX, 0, OPTAB_WIDEN); 7653 7654 tmp = expand_binop (SImode, ior_optab, xor1, xor0, 7655 NULL_RTX, 0, OPTAB_WIDEN); 7656 7657 ix86_compare_op0 = tmp; 7658 ix86_compare_op1 = const0_rtx; 7659 ix86_expand_branch (code, label); 7660 return; 7661 } 7662 7663 /* Otherwise, if we are doing less-than or greater-or-equal-than, 7664 op1 is a constant and the low word is zero, then we can just 7665 examine the high word. */ 7666 7667 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 7668 switch (code) 7669 { 7670 case LT: case LTU: case GE: case GEU: 7671 ix86_compare_op0 = hi[0]; 7672 ix86_compare_op1 = hi[1]; 7673 ix86_expand_branch (code, label); 7674 return; 7675 default: 7676 break; 7677 } 7678 7679 /* Otherwise, we need two or three jumps. */ 7680 7681 label2 = gen_label_rtx (); 7682 7683 code1 = code; 7684 code2 = swap_condition (code); 7685 code3 = unsigned_condition (code); 7686 7687 switch (code) 7688 { 7689 case LT: case GT: case LTU: case GTU: 7690 break; 7691 7692 case LE: code1 = LT; code2 = GT; break; 7693 case GE: code1 = GT; code2 = LT; break; 7694 case LEU: code1 = LTU; code2 = GTU; break; 7695 case GEU: code1 = GTU; code2 = LTU; break; 7696 7697 case EQ: code1 = NIL; code2 = NE; break; 7698 case NE: code2 = NIL; break; 7699 7700 default: 7701 abort (); 7702 } 7703 7704 /* 7705 * a < b => 7706 * if (hi(a) < hi(b)) goto true; 7707 * if (hi(a) > hi(b)) goto false; 7708 * if (lo(a) < lo(b)) goto true; 7709 * false: 7710 */ 7711 7712 ix86_compare_op0 = hi[0]; 7713 ix86_compare_op1 = hi[1]; 7714 7715 if (code1 != NIL) 7716 ix86_expand_branch (code1, label); 7717 if (code2 != NIL) 7718 ix86_expand_branch (code2, label2); 7719 7720 ix86_compare_op0 = lo[0]; 7721 ix86_compare_op1 = lo[1]; 7722 ix86_expand_branch (code3, label); 7723 7724 if (code2 != NIL) 7725 emit_label (label2); 7726 return; 7727 } 7728 7729 default: 7730 abort (); 7731 } 7732} 7733 7734/* Split branch based on floating point condition. */ 7735void 7736ix86_split_fp_branch (code, op1, op2, target1, target2, tmp) 7737 enum rtx_code code; 7738 rtx op1, op2, target1, target2, tmp; 7739{ 7740 rtx second, bypass; 7741 rtx label = NULL_RTX; 7742 rtx condition; 7743 int bypass_probability = -1, second_probability = -1, probability = -1; 7744 rtx i; 7745 7746 if (target2 != pc_rtx) 7747 { 7748 rtx tmp = target2; 7749 code = reverse_condition_maybe_unordered (code); 7750 target2 = target1; 7751 target1 = tmp; 7752 } 7753 7754 condition = ix86_expand_fp_compare (code, op1, op2, 7755 tmp, &second, &bypass); 7756 7757 if (split_branch_probability >= 0) 7758 { 7759 /* Distribute the probabilities across the jumps. 7760 Assume the BYPASS and SECOND to be always test 7761 for UNORDERED. */ 7762 probability = split_branch_probability; 7763 7764 /* Value of 1 is low enough to make no need for probability 7765 to be updated. Later we may run some experiments and see 7766 if unordered values are more frequent in practice. */ 7767 if (bypass) 7768 bypass_probability = 1; 7769 if (second) 7770 second_probability = 1; 7771 } 7772 if (bypass != NULL_RTX) 7773 { 7774 label = gen_label_rtx (); 7775 i = emit_jump_insn (gen_rtx_SET 7776 (VOIDmode, pc_rtx, 7777 gen_rtx_IF_THEN_ELSE (VOIDmode, 7778 bypass, 7779 gen_rtx_LABEL_REF (VOIDmode, 7780 label), 7781 pc_rtx))); 7782 if (bypass_probability >= 0) 7783 REG_NOTES (i) 7784 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7785 GEN_INT (bypass_probability), 7786 REG_NOTES (i)); 7787 } 7788 i = emit_jump_insn (gen_rtx_SET 7789 (VOIDmode, pc_rtx, 7790 gen_rtx_IF_THEN_ELSE (VOIDmode, 7791 condition, target1, target2))); 7792 if (probability >= 0) 7793 REG_NOTES (i) 7794 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7795 GEN_INT (probability), 7796 REG_NOTES (i)); 7797 if (second != NULL_RTX) 7798 { 7799 i = emit_jump_insn (gen_rtx_SET 7800 (VOIDmode, pc_rtx, 7801 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 7802 target2))); 7803 if (second_probability >= 0) 7804 REG_NOTES (i) 7805 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7806 GEN_INT (second_probability), 7807 REG_NOTES (i)); 7808 } 7809 if (label != NULL_RTX) 7810 emit_label (label); 7811} 7812 7813int 7814ix86_expand_setcc (code, dest) 7815 enum rtx_code code; 7816 rtx dest; 7817{ 7818 rtx ret, tmp, tmpreg; 7819 rtx second_test, bypass_test; 7820 7821 if (GET_MODE (ix86_compare_op0) == DImode 7822 && !TARGET_64BIT) 7823 return 0; /* FAIL */ 7824 7825 if (GET_MODE (dest) != QImode) 7826 abort (); 7827 7828 ret = ix86_expand_compare (code, &second_test, &bypass_test); 7829 PUT_MODE (ret, QImode); 7830 7831 tmp = dest; 7832 tmpreg = dest; 7833 7834 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 7835 if (bypass_test || second_test) 7836 { 7837 rtx test = second_test; 7838 int bypass = 0; 7839 rtx tmp2 = gen_reg_rtx (QImode); 7840 if (bypass_test) 7841 { 7842 if (second_test) 7843 abort (); 7844 test = bypass_test; 7845 bypass = 1; 7846 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 7847 } 7848 PUT_MODE (test, QImode); 7849 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 7850 7851 if (bypass) 7852 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 7853 else 7854 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 7855 } 7856 7857 return 1; /* DONE */ 7858} 7859 7860int 7861ix86_expand_int_movcc (operands) 7862 rtx operands[]; 7863{ 7864 enum rtx_code code = GET_CODE (operands[1]), compare_code; 7865 rtx compare_seq, compare_op; 7866 rtx second_test, bypass_test; 7867 enum machine_mode mode = GET_MODE (operands[0]); 7868 7869 /* When the compare code is not LTU or GEU, we can not use sbbl case. 7870 In case comparsion is done with immediate, we can convert it to LTU or 7871 GEU by altering the integer. */ 7872 7873 if ((code == LEU || code == GTU) 7874 && GET_CODE (ix86_compare_op1) == CONST_INT 7875 && mode != HImode 7876 && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff 7877 && GET_CODE (operands[2]) == CONST_INT 7878 && GET_CODE (operands[3]) == CONST_INT) 7879 { 7880 if (code == LEU) 7881 code = LTU; 7882 else 7883 code = GEU; 7884 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1); 7885 } 7886 7887 start_sequence (); 7888 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 7889 compare_seq = gen_sequence (); 7890 end_sequence (); 7891 7892 compare_code = GET_CODE (compare_op); 7893 7894 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 7895 HImode insns, we'd be swallowed in word prefix ops. */ 7896 7897 if (mode != HImode 7898 && (mode != DImode || TARGET_64BIT) 7899 && GET_CODE (operands[2]) == CONST_INT 7900 && GET_CODE (operands[3]) == CONST_INT) 7901 { 7902 rtx out = operands[0]; 7903 HOST_WIDE_INT ct = INTVAL (operands[2]); 7904 HOST_WIDE_INT cf = INTVAL (operands[3]); 7905 HOST_WIDE_INT diff; 7906 7907 if ((compare_code == LTU || compare_code == GEU) 7908 && !second_test && !bypass_test) 7909 { 7910 7911 /* Detect overlap between destination and compare sources. */ 7912 rtx tmp = out; 7913 7914 /* To simplify rest of code, restrict to the GEU case. */ 7915 if (compare_code == LTU) 7916 { 7917 int tmp = ct; 7918 ct = cf; 7919 cf = tmp; 7920 compare_code = reverse_condition (compare_code); 7921 code = reverse_condition (code); 7922 } 7923 diff = ct - cf; 7924 7925 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 7926 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 7927 tmp = gen_reg_rtx (mode); 7928 7929 emit_insn (compare_seq); 7930 if (mode == DImode) 7931 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp)); 7932 else 7933 emit_insn (gen_x86_movsicc_0_m1 (tmp)); 7934 7935 if (diff == 1) 7936 { 7937 /* 7938 * cmpl op0,op1 7939 * sbbl dest,dest 7940 * [addl dest, ct] 7941 * 7942 * Size 5 - 8. 7943 */ 7944 if (ct) 7945 tmp = expand_simple_binop (mode, PLUS, 7946 tmp, GEN_INT (ct), 7947 tmp, 1, OPTAB_DIRECT); 7948 } 7949 else if (cf == -1) 7950 { 7951 /* 7952 * cmpl op0,op1 7953 * sbbl dest,dest 7954 * orl $ct, dest 7955 * 7956 * Size 8. 7957 */ 7958 tmp = expand_simple_binop (mode, IOR, 7959 tmp, GEN_INT (ct), 7960 tmp, 1, OPTAB_DIRECT); 7961 } 7962 else if (diff == -1 && ct) 7963 { 7964 /* 7965 * cmpl op0,op1 7966 * sbbl dest,dest 7967 * xorl $-1, dest 7968 * [addl dest, cf] 7969 * 7970 * Size 8 - 11. 7971 */ 7972 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1); 7973 if (cf) 7974 tmp = expand_simple_binop (mode, PLUS, 7975 tmp, GEN_INT (cf), 7976 tmp, 1, OPTAB_DIRECT); 7977 } 7978 else 7979 { 7980 /* 7981 * cmpl op0,op1 7982 * sbbl dest,dest 7983 * andl cf - ct, dest 7984 * [addl dest, ct] 7985 * 7986 * Size 8 - 11. 7987 */ 7988 tmp = expand_simple_binop (mode, AND, 7989 tmp, 7990 GEN_INT (trunc_int_for_mode 7991 (cf - ct, mode)), 7992 tmp, 1, OPTAB_DIRECT); 7993 if (ct) 7994 tmp = expand_simple_binop (mode, PLUS, 7995 tmp, GEN_INT (ct), 7996 tmp, 1, OPTAB_DIRECT); 7997 } 7998 7999 if (tmp != out) 8000 emit_move_insn (out, tmp); 8001 8002 return 1; /* DONE */ 8003 } 8004 8005 diff = ct - cf; 8006 if (diff < 0) 8007 { 8008 HOST_WIDE_INT tmp; 8009 tmp = ct, ct = cf, cf = tmp; 8010 diff = -diff; 8011 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 8012 { 8013 /* We may be reversing unordered compare to normal compare, that 8014 is not valid in general (we may convert non-trapping condition 8015 to trapping one), however on i386 we currently emit all 8016 comparisons unordered. */ 8017 compare_code = reverse_condition_maybe_unordered (compare_code); 8018 code = reverse_condition_maybe_unordered (code); 8019 } 8020 else 8021 { 8022 compare_code = reverse_condition (compare_code); 8023 code = reverse_condition (code); 8024 } 8025 } 8026 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 8027 || diff == 3 || diff == 5 || diff == 9) 8028 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf)))) 8029 { 8030 /* 8031 * xorl dest,dest 8032 * cmpl op1,op2 8033 * setcc dest 8034 * lea cf(dest*(ct-cf)),dest 8035 * 8036 * Size 14. 8037 * 8038 * This also catches the degenerate setcc-only case. 8039 */ 8040 8041 rtx tmp; 8042 int nops; 8043 8044 out = emit_store_flag (out, code, ix86_compare_op0, 8045 ix86_compare_op1, VOIDmode, 0, 1); 8046 8047 nops = 0; 8048 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics 8049 done in proper mode to match. */ 8050 if (diff == 1) 8051 tmp = out; 8052 else 8053 { 8054 rtx out1; 8055 out1 = out; 8056 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 8057 nops++; 8058 if (diff & 1) 8059 { 8060 tmp = gen_rtx_PLUS (mode, tmp, out1); 8061 nops++; 8062 } 8063 } 8064 if (cf != 0) 8065 { 8066 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 8067 nops++; 8068 } 8069 if (tmp != out 8070 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out)) 8071 { 8072 if (nops == 1) 8073 { 8074 rtx clob; 8075 8076 clob = gen_rtx_REG (CCmode, FLAGS_REG); 8077 clob = gen_rtx_CLOBBER (VOIDmode, clob); 8078 8079 tmp = gen_rtx_SET (VOIDmode, out, tmp); 8080 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 8081 emit_insn (tmp); 8082 } 8083 else 8084 emit_insn (gen_rtx_SET (VOIDmode, out, tmp)); 8085 } 8086 if (out != operands[0]) 8087 emit_move_insn (operands[0], out); 8088 8089 return 1; /* DONE */ 8090 } 8091 8092 /* 8093 * General case: Jumpful: 8094 * xorl dest,dest cmpl op1, op2 8095 * cmpl op1, op2 movl ct, dest 8096 * setcc dest jcc 1f 8097 * decl dest movl cf, dest 8098 * andl (cf-ct),dest 1: 8099 * addl ct,dest 8100 * 8101 * Size 20. Size 14. 8102 * 8103 * This is reasonably steep, but branch mispredict costs are 8104 * high on modern cpus, so consider failing only if optimizing 8105 * for space. 8106 * 8107 * %%% Parameterize branch_cost on the tuning architecture, then 8108 * use that. The 80386 couldn't care less about mispredicts. 8109 */ 8110 8111 if (!optimize_size && !TARGET_CMOVE) 8112 { 8113 if (ct == 0) 8114 { 8115 ct = cf; 8116 cf = 0; 8117 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 8118 { 8119 /* We may be reversing unordered compare to normal compare, 8120 that is not valid in general (we may convert non-trapping 8121 condition to trapping one), however on i386 we currently 8122 emit all comparisons unordered. */ 8123 compare_code = reverse_condition_maybe_unordered (compare_code); 8124 code = reverse_condition_maybe_unordered (code); 8125 } 8126 else 8127 { 8128 compare_code = reverse_condition (compare_code); 8129 code = reverse_condition (code); 8130 } 8131 } 8132 8133 out = emit_store_flag (out, code, ix86_compare_op0, 8134 ix86_compare_op1, VOIDmode, 0, 1); 8135 8136 out = expand_simple_binop (mode, PLUS, 8137 out, constm1_rtx, 8138 out, 1, OPTAB_DIRECT); 8139 out = expand_simple_binop (mode, AND, 8140 out, 8141 GEN_INT (trunc_int_for_mode 8142 (cf - ct, mode)), 8143 out, 1, OPTAB_DIRECT); 8144 out = expand_simple_binop (mode, PLUS, 8145 out, GEN_INT (ct), 8146 out, 1, OPTAB_DIRECT); 8147 if (out != operands[0]) 8148 emit_move_insn (operands[0], out); 8149 8150 return 1; /* DONE */ 8151 } 8152 } 8153 8154 if (!TARGET_CMOVE) 8155 { 8156 /* Try a few things more with specific constants and a variable. */ 8157 8158 optab op; 8159 rtx var, orig_out, out, tmp; 8160 8161 if (optimize_size) 8162 return 0; /* FAIL */ 8163 8164 /* If one of the two operands is an interesting constant, load a 8165 constant with the above and mask it in with a logical operation. */ 8166 8167 if (GET_CODE (operands[2]) == CONST_INT) 8168 { 8169 var = operands[3]; 8170 if (INTVAL (operands[2]) == 0) 8171 operands[3] = constm1_rtx, op = and_optab; 8172 else if (INTVAL (operands[2]) == -1) 8173 operands[3] = const0_rtx, op = ior_optab; 8174 else 8175 return 0; /* FAIL */ 8176 } 8177 else if (GET_CODE (operands[3]) == CONST_INT) 8178 { 8179 var = operands[2]; 8180 if (INTVAL (operands[3]) == 0) 8181 operands[2] = constm1_rtx, op = and_optab; 8182 else if (INTVAL (operands[3]) == -1) 8183 operands[2] = const0_rtx, op = ior_optab; 8184 else 8185 return 0; /* FAIL */ 8186 } 8187 else 8188 return 0; /* FAIL */ 8189 8190 orig_out = operands[0]; 8191 tmp = gen_reg_rtx (mode); 8192 operands[0] = tmp; 8193 8194 /* Recurse to get the constant loaded. */ 8195 if (ix86_expand_int_movcc (operands) == 0) 8196 return 0; /* FAIL */ 8197 8198 /* Mask in the interesting variable. */ 8199 out = expand_binop (mode, op, var, tmp, orig_out, 0, 8200 OPTAB_WIDEN); 8201 if (out != orig_out) 8202 emit_move_insn (orig_out, out); 8203 8204 return 1; /* DONE */ 8205 } 8206 8207 /* 8208 * For comparison with above, 8209 * 8210 * movl cf,dest 8211 * movl ct,tmp 8212 * cmpl op1,op2 8213 * cmovcc tmp,dest 8214 * 8215 * Size 15. 8216 */ 8217 8218 if (! nonimmediate_operand (operands[2], mode)) 8219 operands[2] = force_reg (mode, operands[2]); 8220 if (! nonimmediate_operand (operands[3], mode)) 8221 operands[3] = force_reg (mode, operands[3]); 8222 8223 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 8224 { 8225 rtx tmp = gen_reg_rtx (mode); 8226 emit_move_insn (tmp, operands[3]); 8227 operands[3] = tmp; 8228 } 8229 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 8230 { 8231 rtx tmp = gen_reg_rtx (mode); 8232 emit_move_insn (tmp, operands[2]); 8233 operands[2] = tmp; 8234 } 8235 if (! register_operand (operands[2], VOIDmode) 8236 && ! register_operand (operands[3], VOIDmode)) 8237 operands[2] = force_reg (mode, operands[2]); 8238 8239 emit_insn (compare_seq); 8240 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8241 gen_rtx_IF_THEN_ELSE (mode, 8242 compare_op, operands[2], 8243 operands[3]))); 8244 if (bypass_test) 8245 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8246 gen_rtx_IF_THEN_ELSE (mode, 8247 bypass_test, 8248 operands[3], 8249 operands[0]))); 8250 if (second_test) 8251 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8252 gen_rtx_IF_THEN_ELSE (mode, 8253 second_test, 8254 operands[2], 8255 operands[0]))); 8256 8257 return 1; /* DONE */ 8258} 8259 8260int 8261ix86_expand_fp_movcc (operands) 8262 rtx operands[]; 8263{ 8264 enum rtx_code code; 8265 rtx tmp; 8266 rtx compare_op, second_test, bypass_test; 8267 8268 /* For SF/DFmode conditional moves based on comparisons 8269 in same mode, we may want to use SSE min/max instructions. */ 8270 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode) 8271 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode)) 8272 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]) 8273 /* The SSE comparisons does not support the LTGT/UNEQ pair. */ 8274 && (!TARGET_IEEE_FP 8275 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ)) 8276 /* We may be called from the post-reload splitter. */ 8277 && (!REG_P (operands[0]) 8278 || SSE_REG_P (operands[0]) 8279 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)) 8280 { 8281 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1; 8282 code = GET_CODE (operands[1]); 8283 8284 /* See if we have (cross) match between comparison operands and 8285 conditional move operands. */ 8286 if (rtx_equal_p (operands[2], op1)) 8287 { 8288 rtx tmp = op0; 8289 op0 = op1; 8290 op1 = tmp; 8291 code = reverse_condition_maybe_unordered (code); 8292 } 8293 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1)) 8294 { 8295 /* Check for min operation. */ 8296 if (code == LT) 8297 { 8298 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 8299 if (memory_operand (op0, VOIDmode)) 8300 op0 = force_reg (GET_MODE (operands[0]), op0); 8301 if (GET_MODE (operands[0]) == SFmode) 8302 emit_insn (gen_minsf3 (operands[0], op0, op1)); 8303 else 8304 emit_insn (gen_mindf3 (operands[0], op0, op1)); 8305 return 1; 8306 } 8307 /* Check for max operation. */ 8308 if (code == GT) 8309 { 8310 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 8311 if (memory_operand (op0, VOIDmode)) 8312 op0 = force_reg (GET_MODE (operands[0]), op0); 8313 if (GET_MODE (operands[0]) == SFmode) 8314 emit_insn (gen_maxsf3 (operands[0], op0, op1)); 8315 else 8316 emit_insn (gen_maxdf3 (operands[0], op0, op1)); 8317 return 1; 8318 } 8319 } 8320 /* Manage condition to be sse_comparison_operator. In case we are 8321 in non-ieee mode, try to canonicalize the destination operand 8322 to be first in the comparison - this helps reload to avoid extra 8323 moves. */ 8324 if (!sse_comparison_operator (operands[1], VOIDmode) 8325 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP)) 8326 { 8327 rtx tmp = ix86_compare_op0; 8328 ix86_compare_op0 = ix86_compare_op1; 8329 ix86_compare_op1 = tmp; 8330 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), 8331 VOIDmode, ix86_compare_op0, 8332 ix86_compare_op1); 8333 } 8334 /* Similary try to manage result to be first operand of conditional 8335 move. We also don't support the NE comparison on SSE, so try to 8336 avoid it. */ 8337 if ((rtx_equal_p (operands[0], operands[3]) 8338 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ)) 8339 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP)) 8340 { 8341 rtx tmp = operands[2]; 8342 operands[2] = operands[3]; 8343 operands[3] = tmp; 8344 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered 8345 (GET_CODE (operands[1])), 8346 VOIDmode, ix86_compare_op0, 8347 ix86_compare_op1); 8348 } 8349 if (GET_MODE (operands[0]) == SFmode) 8350 emit_insn (gen_sse_movsfcc (operands[0], operands[1], 8351 operands[2], operands[3], 8352 ix86_compare_op0, ix86_compare_op1)); 8353 else 8354 emit_insn (gen_sse_movdfcc (operands[0], operands[1], 8355 operands[2], operands[3], 8356 ix86_compare_op0, ix86_compare_op1)); 8357 return 1; 8358 } 8359 8360 /* The floating point conditional move instructions don't directly 8361 support conditions resulting from a signed integer comparison. */ 8362 8363 code = GET_CODE (operands[1]); 8364 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 8365 8366 /* The floating point conditional move instructions don't directly 8367 support signed integer comparisons. */ 8368 8369 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 8370 { 8371 if (second_test != NULL || bypass_test != NULL) 8372 abort (); 8373 tmp = gen_reg_rtx (QImode); 8374 ix86_expand_setcc (code, tmp); 8375 code = NE; 8376 ix86_compare_op0 = tmp; 8377 ix86_compare_op1 = const0_rtx; 8378 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 8379 } 8380 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 8381 { 8382 tmp = gen_reg_rtx (GET_MODE (operands[0])); 8383 emit_move_insn (tmp, operands[3]); 8384 operands[3] = tmp; 8385 } 8386 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 8387 { 8388 tmp = gen_reg_rtx (GET_MODE (operands[0])); 8389 emit_move_insn (tmp, operands[2]); 8390 operands[2] = tmp; 8391 } 8392 8393 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8394 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8395 compare_op, 8396 operands[2], 8397 operands[3]))); 8398 if (bypass_test) 8399 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8400 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8401 bypass_test, 8402 operands[3], 8403 operands[0]))); 8404 if (second_test) 8405 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8406 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8407 second_test, 8408 operands[2], 8409 operands[0]))); 8410 8411 return 1; 8412} 8413 8414/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 8415 works for floating pointer parameters and nonoffsetable memories. 8416 For pushes, it returns just stack offsets; the values will be saved 8417 in the right order. Maximally three parts are generated. */ 8418 8419static int 8420ix86_split_to_parts (operand, parts, mode) 8421 rtx operand; 8422 rtx *parts; 8423 enum machine_mode mode; 8424{ 8425 int size; 8426 8427 if (!TARGET_64BIT) 8428 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4); 8429 else 8430 size = (GET_MODE_SIZE (mode) + 4) / 8; 8431 8432 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand))) 8433 abort (); 8434 if (size < 2 || size > 3) 8435 abort (); 8436 8437 /* Optimize constant pool reference to immediates. This is used by fp moves, 8438 that force all constants to memory to allow combining. */ 8439 8440 if (GET_CODE (operand) == MEM 8441 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF 8442 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0))) 8443 operand = get_pool_constant (XEXP (operand, 0)); 8444 8445 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 8446 { 8447 /* The only non-offsetable memories we handle are pushes. */ 8448 if (! push_operand (operand, VOIDmode)) 8449 abort (); 8450 8451 operand = copy_rtx (operand); 8452 PUT_MODE (operand, Pmode); 8453 parts[0] = parts[1] = parts[2] = operand; 8454 } 8455 else if (!TARGET_64BIT) 8456 { 8457 if (mode == DImode) 8458 split_di (&operand, 1, &parts[0], &parts[1]); 8459 else 8460 { 8461 if (REG_P (operand)) 8462 { 8463 if (!reload_completed) 8464 abort (); 8465 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 8466 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 8467 if (size == 3) 8468 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 8469 } 8470 else if (offsettable_memref_p (operand)) 8471 { 8472 operand = adjust_address (operand, SImode, 0); 8473 parts[0] = operand; 8474 parts[1] = adjust_address (operand, SImode, 4); 8475 if (size == 3) 8476 parts[2] = adjust_address (operand, SImode, 8); 8477 } 8478 else if (GET_CODE (operand) == CONST_DOUBLE) 8479 { 8480 REAL_VALUE_TYPE r; 8481 long l[4]; 8482 8483 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 8484 switch (mode) 8485 { 8486 case XFmode: 8487 case TFmode: 8488 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 8489 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode)); 8490 break; 8491 case DFmode: 8492 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 8493 break; 8494 default: 8495 abort (); 8496 } 8497 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode)); 8498 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode)); 8499 } 8500 else 8501 abort (); 8502 } 8503 } 8504 else 8505 { 8506 if (mode == TImode) 8507 split_ti (&operand, 1, &parts[0], &parts[1]); 8508 if (mode == XFmode || mode == TFmode) 8509 { 8510 if (REG_P (operand)) 8511 { 8512 if (!reload_completed) 8513 abort (); 8514 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 8515 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 8516 } 8517 else if (offsettable_memref_p (operand)) 8518 { 8519 operand = adjust_address (operand, DImode, 0); 8520 parts[0] = operand; 8521 parts[1] = adjust_address (operand, SImode, 8); 8522 } 8523 else if (GET_CODE (operand) == CONST_DOUBLE) 8524 { 8525 REAL_VALUE_TYPE r; 8526 long l[3]; 8527 8528 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 8529 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 8530 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 8531 if (HOST_BITS_PER_WIDE_INT >= 64) 8532 parts[0] 8533 = GEN_INT (trunc_int_for_mode 8534 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 8535 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 8536 DImode)); 8537 else 8538 parts[0] = immed_double_const (l[0], l[1], DImode); 8539 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode)); 8540 } 8541 else 8542 abort (); 8543 } 8544 } 8545 8546 return size; 8547} 8548 8549/* Emit insns to perform a move or push of DI, DF, and XF values. 8550 Return false when normal moves are needed; true when all required 8551 insns have been emitted. Operands 2-4 contain the input values 8552 int the correct order; operands 5-7 contain the output values. */ 8553 8554void 8555ix86_split_long_move (operands) 8556 rtx operands[]; 8557{ 8558 rtx part[2][3]; 8559 int nparts; 8560 int push = 0; 8561 int collisions = 0; 8562 enum machine_mode mode = GET_MODE (operands[0]); 8563 8564 /* The DFmode expanders may ask us to move double. 8565 For 64bit target this is single move. By hiding the fact 8566 here we simplify i386.md splitters. */ 8567 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 8568 { 8569 /* Optimize constant pool reference to immediates. This is used by 8570 fp moves, that force all constants to memory to allow combining. */ 8571 8572 if (GET_CODE (operands[1]) == MEM 8573 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 8574 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 8575 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 8576 if (push_operand (operands[0], VOIDmode)) 8577 { 8578 operands[0] = copy_rtx (operands[0]); 8579 PUT_MODE (operands[0], Pmode); 8580 } 8581 else 8582 operands[0] = gen_lowpart (DImode, operands[0]); 8583 operands[1] = gen_lowpart (DImode, operands[1]); 8584 emit_move_insn (operands[0], operands[1]); 8585 return; 8586 } 8587 8588 /* The only non-offsettable memory we handle is push. */ 8589 if (push_operand (operands[0], VOIDmode)) 8590 push = 1; 8591 else if (GET_CODE (operands[0]) == MEM 8592 && ! offsettable_memref_p (operands[0])) 8593 abort (); 8594 8595 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 8596 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 8597 8598 /* When emitting push, take care for source operands on the stack. */ 8599 if (push && GET_CODE (operands[1]) == MEM 8600 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 8601 { 8602 if (nparts == 3) 8603 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 8604 XEXP (part[1][2], 0)); 8605 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 8606 XEXP (part[1][1], 0)); 8607 } 8608 8609 /* We need to do copy in the right order in case an address register 8610 of the source overlaps the destination. */ 8611 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 8612 { 8613 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 8614 collisions++; 8615 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 8616 collisions++; 8617 if (nparts == 3 8618 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 8619 collisions++; 8620 8621 /* Collision in the middle part can be handled by reordering. */ 8622 if (collisions == 1 && nparts == 3 8623 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 8624 { 8625 rtx tmp; 8626 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 8627 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 8628 } 8629 8630 /* If there are more collisions, we can't handle it by reordering. 8631 Do an lea to the last part and use only one colliding move. */ 8632 else if (collisions > 1) 8633 { 8634 collisions = 1; 8635 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1], 8636 XEXP (part[1][0], 0))); 8637 part[1][0] = change_address (part[1][0], 8638 TARGET_64BIT ? DImode : SImode, 8639 part[0][nparts - 1]); 8640 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD); 8641 if (nparts == 3) 8642 part[1][2] = adjust_address (part[1][0], VOIDmode, 8); 8643 } 8644 } 8645 8646 if (push) 8647 { 8648 if (!TARGET_64BIT) 8649 { 8650 if (nparts == 3) 8651 { 8652 /* We use only first 12 bytes of TFmode value, but for pushing we 8653 are required to adjust stack as if we were pushing real 16byte 8654 value. */ 8655 if (mode == TFmode && !TARGET_64BIT) 8656 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 8657 GEN_INT (-4))); 8658 emit_move_insn (part[0][2], part[1][2]); 8659 } 8660 } 8661 else 8662 { 8663 /* In 64bit mode we don't have 32bit push available. In case this is 8664 register, it is OK - we will just use larger counterpart. We also 8665 retype memory - these comes from attempt to avoid REX prefix on 8666 moving of second half of TFmode value. */ 8667 if (GET_MODE (part[1][1]) == SImode) 8668 { 8669 if (GET_CODE (part[1][1]) == MEM) 8670 part[1][1] = adjust_address (part[1][1], DImode, 0); 8671 else if (REG_P (part[1][1])) 8672 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 8673 else 8674 abort (); 8675 if (GET_MODE (part[1][0]) == SImode) 8676 part[1][0] = part[1][1]; 8677 } 8678 } 8679 emit_move_insn (part[0][1], part[1][1]); 8680 emit_move_insn (part[0][0], part[1][0]); 8681 return; 8682 } 8683 8684 /* Choose correct order to not overwrite the source before it is copied. */ 8685 if ((REG_P (part[0][0]) 8686 && REG_P (part[1][1]) 8687 && (REGNO (part[0][0]) == REGNO (part[1][1]) 8688 || (nparts == 3 8689 && REGNO (part[0][0]) == REGNO (part[1][2])))) 8690 || (collisions > 0 8691 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 8692 { 8693 if (nparts == 3) 8694 { 8695 operands[2] = part[0][2]; 8696 operands[3] = part[0][1]; 8697 operands[4] = part[0][0]; 8698 operands[5] = part[1][2]; 8699 operands[6] = part[1][1]; 8700 operands[7] = part[1][0]; 8701 } 8702 else 8703 { 8704 operands[2] = part[0][1]; 8705 operands[3] = part[0][0]; 8706 operands[5] = part[1][1]; 8707 operands[6] = part[1][0]; 8708 } 8709 } 8710 else 8711 { 8712 if (nparts == 3) 8713 { 8714 operands[2] = part[0][0]; 8715 operands[3] = part[0][1]; 8716 operands[4] = part[0][2]; 8717 operands[5] = part[1][0]; 8718 operands[6] = part[1][1]; 8719 operands[7] = part[1][2]; 8720 } 8721 else 8722 { 8723 operands[2] = part[0][0]; 8724 operands[3] = part[0][1]; 8725 operands[5] = part[1][0]; 8726 operands[6] = part[1][1]; 8727 } 8728 } 8729 emit_move_insn (operands[2], operands[5]); 8730 emit_move_insn (operands[3], operands[6]); 8731 if (nparts == 3) 8732 emit_move_insn (operands[4], operands[7]); 8733 8734 return; 8735} 8736 8737void 8738ix86_split_ashldi (operands, scratch) 8739 rtx *operands, scratch; 8740{ 8741 rtx low[2], high[2]; 8742 int count; 8743 8744 if (GET_CODE (operands[2]) == CONST_INT) 8745 { 8746 split_di (operands, 2, low, high); 8747 count = INTVAL (operands[2]) & 63; 8748 8749 if (count >= 32) 8750 { 8751 emit_move_insn (high[0], low[1]); 8752 emit_move_insn (low[0], const0_rtx); 8753 8754 if (count > 32) 8755 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32))); 8756 } 8757 else 8758 { 8759 if (!rtx_equal_p (operands[0], operands[1])) 8760 emit_move_insn (operands[0], operands[1]); 8761 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); 8762 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count))); 8763 } 8764 } 8765 else 8766 { 8767 if (!rtx_equal_p (operands[0], operands[1])) 8768 emit_move_insn (operands[0], operands[1]); 8769 8770 split_di (operands, 1, low, high); 8771 8772 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); 8773 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); 8774 8775 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 8776 { 8777 if (! no_new_pseudos) 8778 scratch = force_reg (SImode, const0_rtx); 8779 else 8780 emit_move_insn (scratch, const0_rtx); 8781 8782 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], 8783 scratch)); 8784 } 8785 else 8786 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 8787 } 8788} 8789 8790void 8791ix86_split_ashrdi (operands, scratch) 8792 rtx *operands, scratch; 8793{ 8794 rtx low[2], high[2]; 8795 int count; 8796 8797 if (GET_CODE (operands[2]) == CONST_INT) 8798 { 8799 split_di (operands, 2, low, high); 8800 count = INTVAL (operands[2]) & 63; 8801 8802 if (count >= 32) 8803 { 8804 emit_move_insn (low[0], high[1]); 8805 8806 if (! reload_completed) 8807 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31))); 8808 else 8809 { 8810 emit_move_insn (high[0], low[0]); 8811 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); 8812 } 8813 8814 if (count > 32) 8815 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); 8816 } 8817 else 8818 { 8819 if (!rtx_equal_p (operands[0], operands[1])) 8820 emit_move_insn (operands[0], operands[1]); 8821 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 8822 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count))); 8823 } 8824 } 8825 else 8826 { 8827 if (!rtx_equal_p (operands[0], operands[1])) 8828 emit_move_insn (operands[0], operands[1]); 8829 8830 split_di (operands, 1, low, high); 8831 8832 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 8833 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); 8834 8835 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 8836 { 8837 if (! no_new_pseudos) 8838 scratch = gen_reg_rtx (SImode); 8839 emit_move_insn (scratch, high[0]); 8840 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); 8841 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 8842 scratch)); 8843 } 8844 else 8845 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 8846 } 8847} 8848 8849void 8850ix86_split_lshrdi (operands, scratch) 8851 rtx *operands, scratch; 8852{ 8853 rtx low[2], high[2]; 8854 int count; 8855 8856 if (GET_CODE (operands[2]) == CONST_INT) 8857 { 8858 split_di (operands, 2, low, high); 8859 count = INTVAL (operands[2]) & 63; 8860 8861 if (count >= 32) 8862 { 8863 emit_move_insn (low[0], high[1]); 8864 emit_move_insn (high[0], const0_rtx); 8865 8866 if (count > 32) 8867 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); 8868 } 8869 else 8870 { 8871 if (!rtx_equal_p (operands[0], operands[1])) 8872 emit_move_insn (operands[0], operands[1]); 8873 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 8874 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count))); 8875 } 8876 } 8877 else 8878 { 8879 if (!rtx_equal_p (operands[0], operands[1])) 8880 emit_move_insn (operands[0], operands[1]); 8881 8882 split_di (operands, 1, low, high); 8883 8884 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 8885 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); 8886 8887 /* Heh. By reversing the arguments, we can reuse this pattern. */ 8888 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 8889 { 8890 if (! no_new_pseudos) 8891 scratch = force_reg (SImode, const0_rtx); 8892 else 8893 emit_move_insn (scratch, const0_rtx); 8894 8895 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 8896 scratch)); 8897 } 8898 else 8899 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 8900 } 8901} 8902 8903/* Helper function for the string operations below. Dest VARIABLE whether 8904 it is aligned to VALUE bytes. If true, jump to the label. */ 8905static rtx 8906ix86_expand_aligntest (variable, value) 8907 rtx variable; 8908 int value; 8909{ 8910 rtx label = gen_label_rtx (); 8911 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 8912 if (GET_MODE (variable) == DImode) 8913 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 8914 else 8915 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 8916 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 8917 1, label); 8918 return label; 8919} 8920 8921/* Adjust COUNTER by the VALUE. */ 8922static void 8923ix86_adjust_counter (countreg, value) 8924 rtx countreg; 8925 HOST_WIDE_INT value; 8926{ 8927 if (GET_MODE (countreg) == DImode) 8928 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 8929 else 8930 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 8931} 8932 8933/* Zero extend possibly SImode EXP to Pmode register. */ 8934rtx 8935ix86_zero_extend_to_Pmode (exp) 8936 rtx exp; 8937{ 8938 rtx r; 8939 if (GET_MODE (exp) == VOIDmode) 8940 return force_reg (Pmode, exp); 8941 if (GET_MODE (exp) == Pmode) 8942 return copy_to_mode_reg (Pmode, exp); 8943 r = gen_reg_rtx (Pmode); 8944 emit_insn (gen_zero_extendsidi2 (r, exp)); 8945 return r; 8946} 8947 8948/* Expand string move (memcpy) operation. Use i386 string operations when 8949 profitable. expand_clrstr contains similar code. */ 8950int 8951ix86_expand_movstr (dst, src, count_exp, align_exp) 8952 rtx dst, src, count_exp, align_exp; 8953{ 8954 rtx srcreg, destreg, countreg; 8955 enum machine_mode counter_mode; 8956 HOST_WIDE_INT align = 0; 8957 unsigned HOST_WIDE_INT count = 0; 8958 rtx insns; 8959 8960 start_sequence (); 8961 8962 if (GET_CODE (align_exp) == CONST_INT) 8963 align = INTVAL (align_exp); 8964 8965 /* This simple hack avoids all inlining code and simplifies code below. */ 8966 if (!TARGET_ALIGN_STRINGOPS) 8967 align = 64; 8968 8969 if (GET_CODE (count_exp) == CONST_INT) 8970 count = INTVAL (count_exp); 8971 8972 /* Figure out proper mode for counter. For 32bits it is always SImode, 8973 for 64bits use SImode when possible, otherwise DImode. 8974 Set count to number of bytes copied when known at compile time. */ 8975 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 8976 || x86_64_zero_extended_value (count_exp)) 8977 counter_mode = SImode; 8978 else 8979 counter_mode = DImode; 8980 8981 if (counter_mode != SImode && counter_mode != DImode) 8982 abort (); 8983 8984 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 8985 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 8986 8987 emit_insn (gen_cld ()); 8988 8989 /* When optimizing for size emit simple rep ; movsb instruction for 8990 counts not divisible by 4. */ 8991 8992 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 8993 { 8994 countreg = ix86_zero_extend_to_Pmode (count_exp); 8995 if (TARGET_64BIT) 8996 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg, 8997 destreg, srcreg, countreg)); 8998 else 8999 emit_insn (gen_rep_movqi (destreg, srcreg, countreg, 9000 destreg, srcreg, countreg)); 9001 } 9002 9003 /* For constant aligned (or small unaligned) copies use rep movsl 9004 followed by code copying the rest. For PentiumPro ensure 8 byte 9005 alignment to allow rep movsl acceleration. */ 9006 9007 else if (count != 0 9008 && (align >= 8 9009 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 9010 || optimize_size || count < (unsigned int) 64)) 9011 { 9012 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 9013 if (count & ~(size - 1)) 9014 { 9015 countreg = copy_to_mode_reg (counter_mode, 9016 GEN_INT ((count >> (size == 4 ? 2 : 3)) 9017 & (TARGET_64BIT ? -1 : 0x3fffffff))); 9018 countreg = ix86_zero_extend_to_Pmode (countreg); 9019 if (size == 4) 9020 { 9021 if (TARGET_64BIT) 9022 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg, 9023 destreg, srcreg, countreg)); 9024 else 9025 emit_insn (gen_rep_movsi (destreg, srcreg, countreg, 9026 destreg, srcreg, countreg)); 9027 } 9028 else 9029 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg, 9030 destreg, srcreg, countreg)); 9031 } 9032 if (size == 8 && (count & 0x04)) 9033 emit_insn (gen_strmovsi (destreg, srcreg)); 9034 if (count & 0x02) 9035 emit_insn (gen_strmovhi (destreg, srcreg)); 9036 if (count & 0x01) 9037 emit_insn (gen_strmovqi (destreg, srcreg)); 9038 } 9039 /* The generic code based on the glibc implementation: 9040 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 9041 allowing accelerated copying there) 9042 - copy the data using rep movsl 9043 - copy the rest. */ 9044 else 9045 { 9046 rtx countreg2; 9047 rtx label = NULL; 9048 9049 /* In case we don't know anything about the alignment, default to 9050 library version, since it is usually equally fast and result in 9051 shorter code. */ 9052 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 9053 { 9054 end_sequence (); 9055 return 0; 9056 } 9057 9058 if (TARGET_SINGLE_STRINGOP) 9059 emit_insn (gen_cld ()); 9060 9061 countreg2 = gen_reg_rtx (Pmode); 9062 countreg = copy_to_mode_reg (counter_mode, count_exp); 9063 9064 /* We don't use loops to align destination and to copy parts smaller 9065 than 4 bytes, because gcc is able to optimize such code better (in 9066 the case the destination or the count really is aligned, gcc is often 9067 able to predict the branches) and also it is friendlier to the 9068 hardware branch prediction. 9069 9070 Using loops is benefical for generic case, because we can 9071 handle small counts using the loops. Many CPUs (such as Athlon) 9072 have large REP prefix setup costs. 9073 9074 This is quite costy. Maybe we can revisit this decision later or 9075 add some customizability to this code. */ 9076 9077 if (count == 0 9078 && align < (TARGET_PENTIUMPRO && (count == 0 9079 || count >= (unsigned int) 260) 9080 ? 8 : UNITS_PER_WORD)) 9081 { 9082 label = gen_label_rtx (); 9083 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1), 9084 LEU, 0, counter_mode, 1, label); 9085 } 9086 if (align <= 1) 9087 { 9088 rtx label = ix86_expand_aligntest (destreg, 1); 9089 emit_insn (gen_strmovqi (destreg, srcreg)); 9090 ix86_adjust_counter (countreg, 1); 9091 emit_label (label); 9092 LABEL_NUSES (label) = 1; 9093 } 9094 if (align <= 2) 9095 { 9096 rtx label = ix86_expand_aligntest (destreg, 2); 9097 emit_insn (gen_strmovhi (destreg, srcreg)); 9098 ix86_adjust_counter (countreg, 2); 9099 emit_label (label); 9100 LABEL_NUSES (label) = 1; 9101 } 9102 if (align <= 4 9103 && ((TARGET_PENTIUMPRO && (count == 0 9104 || count >= (unsigned int) 260)) 9105 || TARGET_64BIT)) 9106 { 9107 rtx label = ix86_expand_aligntest (destreg, 4); 9108 emit_insn (gen_strmovsi (destreg, srcreg)); 9109 ix86_adjust_counter (countreg, 4); 9110 emit_label (label); 9111 LABEL_NUSES (label) = 1; 9112 } 9113 9114 if (!TARGET_SINGLE_STRINGOP) 9115 emit_insn (gen_cld ()); 9116 if (TARGET_64BIT) 9117 { 9118 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 9119 GEN_INT (3))); 9120 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2, 9121 destreg, srcreg, countreg2)); 9122 } 9123 else 9124 { 9125 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 9126 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2, 9127 destreg, srcreg, countreg2)); 9128 } 9129 9130 if (label) 9131 { 9132 emit_label (label); 9133 LABEL_NUSES (label) = 1; 9134 } 9135 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 9136 emit_insn (gen_strmovsi (destreg, srcreg)); 9137 if ((align <= 4 || count == 0) && TARGET_64BIT) 9138 { 9139 rtx label = ix86_expand_aligntest (countreg, 4); 9140 emit_insn (gen_strmovsi (destreg, srcreg)); 9141 emit_label (label); 9142 LABEL_NUSES (label) = 1; 9143 } 9144 if (align > 2 && count != 0 && (count & 2)) 9145 emit_insn (gen_strmovhi (destreg, srcreg)); 9146 if (align <= 2 || count == 0) 9147 { 9148 rtx label = ix86_expand_aligntest (countreg, 2); 9149 emit_insn (gen_strmovhi (destreg, srcreg)); 9150 emit_label (label); 9151 LABEL_NUSES (label) = 1; 9152 } 9153 if (align > 1 && count != 0 && (count & 1)) 9154 emit_insn (gen_strmovqi (destreg, srcreg)); 9155 if (align <= 1 || count == 0) 9156 { 9157 rtx label = ix86_expand_aligntest (countreg, 1); 9158 emit_insn (gen_strmovqi (destreg, srcreg)); 9159 emit_label (label); 9160 LABEL_NUSES (label) = 1; 9161 } 9162 } 9163 9164 insns = get_insns (); 9165 end_sequence (); 9166 9167 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg); 9168 emit_insns (insns); 9169 return 1; 9170} 9171 9172/* Expand string clear operation (bzero). Use i386 string operations when 9173 profitable. expand_movstr contains similar code. */ 9174int 9175ix86_expand_clrstr (src, count_exp, align_exp) 9176 rtx src, count_exp, align_exp; 9177{ 9178 rtx destreg, zeroreg, countreg; 9179 enum machine_mode counter_mode; 9180 HOST_WIDE_INT align = 0; 9181 unsigned HOST_WIDE_INT count = 0; 9182 9183 if (GET_CODE (align_exp) == CONST_INT) 9184 align = INTVAL (align_exp); 9185 9186 /* This simple hack avoids all inlining code and simplifies code below. */ 9187 if (!TARGET_ALIGN_STRINGOPS) 9188 align = 32; 9189 9190 if (GET_CODE (count_exp) == CONST_INT) 9191 count = INTVAL (count_exp); 9192 /* Figure out proper mode for counter. For 32bits it is always SImode, 9193 for 64bits use SImode when possible, otherwise DImode. 9194 Set count to number of bytes copied when known at compile time. */ 9195 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 9196 || x86_64_zero_extended_value (count_exp)) 9197 counter_mode = SImode; 9198 else 9199 counter_mode = DImode; 9200 9201 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 9202 9203 emit_insn (gen_cld ()); 9204 9205 /* When optimizing for size emit simple rep ; movsb instruction for 9206 counts not divisible by 4. */ 9207 9208 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 9209 { 9210 countreg = ix86_zero_extend_to_Pmode (count_exp); 9211 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 9212 if (TARGET_64BIT) 9213 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg, 9214 destreg, countreg)); 9215 else 9216 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg, 9217 destreg, countreg)); 9218 } 9219 else if (count != 0 9220 && (align >= 8 9221 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 9222 || optimize_size || count < (unsigned int) 64)) 9223 { 9224 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 9225 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 9226 if (count & ~(size - 1)) 9227 { 9228 countreg = copy_to_mode_reg (counter_mode, 9229 GEN_INT ((count >> (size == 4 ? 2 : 3)) 9230 & (TARGET_64BIT ? -1 : 0x3fffffff))); 9231 countreg = ix86_zero_extend_to_Pmode (countreg); 9232 if (size == 4) 9233 { 9234 if (TARGET_64BIT) 9235 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg, 9236 destreg, countreg)); 9237 else 9238 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg, 9239 destreg, countreg)); 9240 } 9241 else 9242 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg, 9243 destreg, countreg)); 9244 } 9245 if (size == 8 && (count & 0x04)) 9246 emit_insn (gen_strsetsi (destreg, 9247 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9248 if (count & 0x02) 9249 emit_insn (gen_strsethi (destreg, 9250 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9251 if (count & 0x01) 9252 emit_insn (gen_strsetqi (destreg, 9253 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9254 } 9255 else 9256 { 9257 rtx countreg2; 9258 rtx label = NULL; 9259 9260 /* In case we don't know anything about the alignment, default to 9261 library version, since it is usually equally fast and result in 9262 shorter code. */ 9263 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 9264 return 0; 9265 9266 if (TARGET_SINGLE_STRINGOP) 9267 emit_insn (gen_cld ()); 9268 9269 countreg2 = gen_reg_rtx (Pmode); 9270 countreg = copy_to_mode_reg (counter_mode, count_exp); 9271 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 9272 9273 if (count == 0 9274 && align < (TARGET_PENTIUMPRO && (count == 0 9275 || count >= (unsigned int) 260) 9276 ? 8 : UNITS_PER_WORD)) 9277 { 9278 label = gen_label_rtx (); 9279 emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1), 9280 LEU, 0, counter_mode, 1, label); 9281 } 9282 if (align <= 1) 9283 { 9284 rtx label = ix86_expand_aligntest (destreg, 1); 9285 emit_insn (gen_strsetqi (destreg, 9286 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9287 ix86_adjust_counter (countreg, 1); 9288 emit_label (label); 9289 LABEL_NUSES (label) = 1; 9290 } 9291 if (align <= 2) 9292 { 9293 rtx label = ix86_expand_aligntest (destreg, 2); 9294 emit_insn (gen_strsethi (destreg, 9295 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9296 ix86_adjust_counter (countreg, 2); 9297 emit_label (label); 9298 LABEL_NUSES (label) = 1; 9299 } 9300 if (align <= 4 && TARGET_PENTIUMPRO && (count == 0 9301 || count >= (unsigned int) 260)) 9302 { 9303 rtx label = ix86_expand_aligntest (destreg, 4); 9304 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT 9305 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 9306 : zeroreg))); 9307 ix86_adjust_counter (countreg, 4); 9308 emit_label (label); 9309 LABEL_NUSES (label) = 1; 9310 } 9311 9312 if (!TARGET_SINGLE_STRINGOP) 9313 emit_insn (gen_cld ()); 9314 if (TARGET_64BIT) 9315 { 9316 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 9317 GEN_INT (3))); 9318 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg, 9319 destreg, countreg2)); 9320 } 9321 else 9322 { 9323 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 9324 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg, 9325 destreg, countreg2)); 9326 } 9327 9328 if (label) 9329 { 9330 emit_label (label); 9331 LABEL_NUSES (label) = 1; 9332 } 9333 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 9334 emit_insn (gen_strsetsi (destreg, 9335 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9336 if (TARGET_64BIT && (align <= 4 || count == 0)) 9337 { 9338 rtx label = ix86_expand_aligntest (destreg, 2); 9339 emit_insn (gen_strsetsi (destreg, 9340 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9341 emit_label (label); 9342 LABEL_NUSES (label) = 1; 9343 } 9344 if (align > 2 && count != 0 && (count & 2)) 9345 emit_insn (gen_strsethi (destreg, 9346 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9347 if (align <= 2 || count == 0) 9348 { 9349 rtx label = ix86_expand_aligntest (destreg, 2); 9350 emit_insn (gen_strsethi (destreg, 9351 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9352 emit_label (label); 9353 LABEL_NUSES (label) = 1; 9354 } 9355 if (align > 1 && count != 0 && (count & 1)) 9356 emit_insn (gen_strsetqi (destreg, 9357 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9358 if (align <= 1 || count == 0) 9359 { 9360 rtx label = ix86_expand_aligntest (destreg, 1); 9361 emit_insn (gen_strsetqi (destreg, 9362 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9363 emit_label (label); 9364 LABEL_NUSES (label) = 1; 9365 } 9366 } 9367 return 1; 9368} 9369/* Expand strlen. */ 9370int 9371ix86_expand_strlen (out, src, eoschar, align) 9372 rtx out, src, eoschar, align; 9373{ 9374 rtx addr, scratch1, scratch2, scratch3, scratch4; 9375 9376 /* The generic case of strlen expander is long. Avoid it's 9377 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 9378 9379 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 9380 && !TARGET_INLINE_ALL_STRINGOPS 9381 && !optimize_size 9382 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 9383 return 0; 9384 9385 addr = force_reg (Pmode, XEXP (src, 0)); 9386 scratch1 = gen_reg_rtx (Pmode); 9387 9388 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 9389 && !optimize_size) 9390 { 9391 /* Well it seems that some optimizer does not combine a call like 9392 foo(strlen(bar), strlen(bar)); 9393 when the move and the subtraction is done here. It does calculate 9394 the length just once when these instructions are done inside of 9395 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 9396 often used and I use one fewer register for the lifetime of 9397 output_strlen_unroll() this is better. */ 9398 9399 emit_move_insn (out, addr); 9400 9401 ix86_expand_strlensi_unroll_1 (out, align); 9402 9403 /* strlensi_unroll_1 returns the address of the zero at the end of 9404 the string, like memchr(), so compute the length by subtracting 9405 the start address. */ 9406 if (TARGET_64BIT) 9407 emit_insn (gen_subdi3 (out, out, addr)); 9408 else 9409 emit_insn (gen_subsi3 (out, out, addr)); 9410 } 9411 else 9412 { 9413 scratch2 = gen_reg_rtx (Pmode); 9414 scratch3 = gen_reg_rtx (Pmode); 9415 scratch4 = force_reg (Pmode, constm1_rtx); 9416 9417 emit_move_insn (scratch3, addr); 9418 eoschar = force_reg (QImode, eoschar); 9419 9420 emit_insn (gen_cld ()); 9421 if (TARGET_64BIT) 9422 { 9423 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar, 9424 align, scratch4, scratch3)); 9425 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 9426 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 9427 } 9428 else 9429 { 9430 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar, 9431 align, scratch4, scratch3)); 9432 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 9433 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 9434 } 9435 } 9436 return 1; 9437} 9438 9439/* Expand the appropriate insns for doing strlen if not just doing 9440 repnz; scasb 9441 9442 out = result, initialized with the start address 9443 align_rtx = alignment of the address. 9444 scratch = scratch register, initialized with the startaddress when 9445 not aligned, otherwise undefined 9446 9447 This is just the body. It needs the initialisations mentioned above and 9448 some address computing at the end. These things are done in i386.md. */ 9449 9450static void 9451ix86_expand_strlensi_unroll_1 (out, align_rtx) 9452 rtx out, align_rtx; 9453{ 9454 int align; 9455 rtx tmp; 9456 rtx align_2_label = NULL_RTX; 9457 rtx align_3_label = NULL_RTX; 9458 rtx align_4_label = gen_label_rtx (); 9459 rtx end_0_label = gen_label_rtx (); 9460 rtx mem; 9461 rtx tmpreg = gen_reg_rtx (SImode); 9462 rtx scratch = gen_reg_rtx (SImode); 9463 9464 align = 0; 9465 if (GET_CODE (align_rtx) == CONST_INT) 9466 align = INTVAL (align_rtx); 9467 9468 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 9469 9470 /* Is there a known alignment and is it less than 4? */ 9471 if (align < 4) 9472 { 9473 rtx scratch1 = gen_reg_rtx (Pmode); 9474 emit_move_insn (scratch1, out); 9475 /* Is there a known alignment and is it not 2? */ 9476 if (align != 2) 9477 { 9478 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 9479 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 9480 9481 /* Leave just the 3 lower bits. */ 9482 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 9483 NULL_RTX, 0, OPTAB_WIDEN); 9484 9485 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 9486 Pmode, 1, align_4_label); 9487 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL, 9488 Pmode, 1, align_2_label); 9489 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL, 9490 Pmode, 1, align_3_label); 9491 } 9492 else 9493 { 9494 /* Since the alignment is 2, we have to check 2 or 0 bytes; 9495 check if is aligned to 4 - byte. */ 9496 9497 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2), 9498 NULL_RTX, 0, OPTAB_WIDEN); 9499 9500 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 9501 Pmode, 1, align_4_label); 9502 } 9503 9504 mem = gen_rtx_MEM (QImode, out); 9505 9506 /* Now compare the bytes. */ 9507 9508 /* Compare the first n unaligned byte on a byte per byte basis. */ 9509 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 9510 QImode, 1, end_0_label); 9511 9512 /* Increment the address. */ 9513 if (TARGET_64BIT) 9514 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9515 else 9516 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9517 9518 /* Not needed with an alignment of 2 */ 9519 if (align != 2) 9520 { 9521 emit_label (align_2_label); 9522 9523 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 9524 end_0_label); 9525 9526 if (TARGET_64BIT) 9527 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9528 else 9529 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9530 9531 emit_label (align_3_label); 9532 } 9533 9534 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 9535 end_0_label); 9536 9537 if (TARGET_64BIT) 9538 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9539 else 9540 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9541 } 9542 9543 /* Generate loop to check 4 bytes at a time. It is not a good idea to 9544 align this loop. It gives only huge programs, but does not help to 9545 speed up. */ 9546 emit_label (align_4_label); 9547 9548 mem = gen_rtx_MEM (SImode, out); 9549 emit_move_insn (scratch, mem); 9550 if (TARGET_64BIT) 9551 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 9552 else 9553 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 9554 9555 /* This formula yields a nonzero result iff one of the bytes is zero. 9556 This saves three branches inside loop and many cycles. */ 9557 9558 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 9559 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 9560 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 9561 emit_insn (gen_andsi3 (tmpreg, tmpreg, 9562 GEN_INT (trunc_int_for_mode 9563 (0x80808080, SImode)))); 9564 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 9565 align_4_label); 9566 9567 if (TARGET_CMOVE) 9568 { 9569 rtx reg = gen_reg_rtx (SImode); 9570 rtx reg2 = gen_reg_rtx (Pmode); 9571 emit_move_insn (reg, tmpreg); 9572 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 9573 9574 /* If zero is not in the first two bytes, move two bytes forward. */ 9575 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 9576 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9577 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 9578 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 9579 gen_rtx_IF_THEN_ELSE (SImode, tmp, 9580 reg, 9581 tmpreg))); 9582 /* Emit lea manually to avoid clobbering of flags. */ 9583 emit_insn (gen_rtx_SET (SImode, reg2, 9584 gen_rtx_PLUS (Pmode, out, GEN_INT (2)))); 9585 9586 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9587 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 9588 emit_insn (gen_rtx_SET (VOIDmode, out, 9589 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 9590 reg2, 9591 out))); 9592 9593 } 9594 else 9595 { 9596 rtx end_2_label = gen_label_rtx (); 9597 /* Is zero in the first two bytes? */ 9598 9599 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 9600 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9601 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 9602 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 9603 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 9604 pc_rtx); 9605 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 9606 JUMP_LABEL (tmp) = end_2_label; 9607 9608 /* Not in the first two. Move two bytes forward. */ 9609 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 9610 if (TARGET_64BIT) 9611 emit_insn (gen_adddi3 (out, out, GEN_INT (2))); 9612 else 9613 emit_insn (gen_addsi3 (out, out, GEN_INT (2))); 9614 9615 emit_label (end_2_label); 9616 9617 } 9618 9619 /* Avoid branch in fixing the byte. */ 9620 tmpreg = gen_lowpart (QImode, tmpreg); 9621 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 9622 if (TARGET_64BIT) 9623 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3))); 9624 else 9625 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3))); 9626 9627 emit_label (end_0_label); 9628} 9629 9630/* Clear stack slot assignments remembered from previous functions. 9631 This is called from INIT_EXPANDERS once before RTL is emitted for each 9632 function. */ 9633 9634static void 9635ix86_init_machine_status (p) 9636 struct function *p; 9637{ 9638 p->machine = (struct machine_function *) 9639 xcalloc (1, sizeof (struct machine_function)); 9640} 9641 9642/* Mark machine specific bits of P for GC. */ 9643static void 9644ix86_mark_machine_status (p) 9645 struct function *p; 9646{ 9647 struct machine_function *machine = p->machine; 9648 enum machine_mode mode; 9649 int n; 9650 9651 if (! machine) 9652 return; 9653 9654 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE; 9655 mode = (enum machine_mode) ((int) mode + 1)) 9656 for (n = 0; n < MAX_386_STACK_LOCALS; n++) 9657 ggc_mark_rtx (machine->stack_locals[(int) mode][n]); 9658} 9659 9660static void 9661ix86_free_machine_status (p) 9662 struct function *p; 9663{ 9664 free (p->machine); 9665 p->machine = NULL; 9666} 9667 9668/* Return a MEM corresponding to a stack slot with mode MODE. 9669 Allocate a new slot if necessary. 9670 9671 The RTL for a function can have several slots available: N is 9672 which slot to use. */ 9673 9674rtx 9675assign_386_stack_local (mode, n) 9676 enum machine_mode mode; 9677 int n; 9678{ 9679 if (n < 0 || n >= MAX_386_STACK_LOCALS) 9680 abort (); 9681 9682 if (ix86_stack_locals[(int) mode][n] == NULL_RTX) 9683 ix86_stack_locals[(int) mode][n] 9684 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 9685 9686 return ix86_stack_locals[(int) mode][n]; 9687} 9688 9689/* Calculate the length of the memory address in the instruction 9690 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 9691 9692static int 9693memory_address_length (addr) 9694 rtx addr; 9695{ 9696 struct ix86_address parts; 9697 rtx base, index, disp; 9698 int len; 9699 9700 if (GET_CODE (addr) == PRE_DEC 9701 || GET_CODE (addr) == POST_INC 9702 || GET_CODE (addr) == PRE_MODIFY 9703 || GET_CODE (addr) == POST_MODIFY) 9704 return 0; 9705 9706 if (! ix86_decompose_address (addr, &parts)) 9707 abort (); 9708 9709 base = parts.base; 9710 index = parts.index; 9711 disp = parts.disp; 9712 len = 0; 9713 9714 /* Register Indirect. */ 9715 if (base && !index && !disp) 9716 { 9717 /* Special cases: ebp and esp need the two-byte modrm form. */ 9718 if (addr == stack_pointer_rtx 9719 || addr == arg_pointer_rtx 9720 || addr == frame_pointer_rtx 9721 || addr == hard_frame_pointer_rtx) 9722 len = 1; 9723 } 9724 9725 /* Direct Addressing. */ 9726 else if (disp && !base && !index) 9727 len = 4; 9728 9729 else 9730 { 9731 /* Find the length of the displacement constant. */ 9732 if (disp) 9733 { 9734 if (GET_CODE (disp) == CONST_INT 9735 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')) 9736 len = 1; 9737 else 9738 len = 4; 9739 } 9740 9741 /* An index requires the two-byte modrm form. */ 9742 if (index) 9743 len += 1; 9744 } 9745 9746 return len; 9747} 9748 9749/* Compute default value for "length_immediate" attribute. When SHORTFORM is set 9750 expect that insn have 8bit immediate alternative. */ 9751int 9752ix86_attr_length_immediate_default (insn, shortform) 9753 rtx insn; 9754 int shortform; 9755{ 9756 int len = 0; 9757 int i; 9758 extract_insn_cached (insn); 9759 for (i = recog_data.n_operands - 1; i >= 0; --i) 9760 if (CONSTANT_P (recog_data.operand[i])) 9761 { 9762 if (len) 9763 abort (); 9764 if (shortform 9765 && GET_CODE (recog_data.operand[i]) == CONST_INT 9766 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K')) 9767 len = 1; 9768 else 9769 { 9770 switch (get_attr_mode (insn)) 9771 { 9772 case MODE_QI: 9773 len+=1; 9774 break; 9775 case MODE_HI: 9776 len+=2; 9777 break; 9778 case MODE_SI: 9779 len+=4; 9780 break; 9781 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 9782 case MODE_DI: 9783 len+=4; 9784 break; 9785 default: 9786 fatal_insn ("unknown insn mode", insn); 9787 } 9788 } 9789 } 9790 return len; 9791} 9792/* Compute default value for "length_address" attribute. */ 9793int 9794ix86_attr_length_address_default (insn) 9795 rtx insn; 9796{ 9797 int i; 9798 extract_insn_cached (insn); 9799 for (i = recog_data.n_operands - 1; i >= 0; --i) 9800 if (GET_CODE (recog_data.operand[i]) == MEM) 9801 { 9802 return memory_address_length (XEXP (recog_data.operand[i], 0)); 9803 break; 9804 } 9805 return 0; 9806} 9807 9808/* Return the maximum number of instructions a cpu can issue. */ 9809 9810static int 9811ix86_issue_rate () 9812{ 9813 switch (ix86_cpu) 9814 { 9815 case PROCESSOR_PENTIUM: 9816 case PROCESSOR_K6: 9817 return 2; 9818 9819 case PROCESSOR_PENTIUMPRO: 9820 case PROCESSOR_PENTIUM4: 9821 case PROCESSOR_ATHLON: 9822 return 3; 9823 9824 default: 9825 return 1; 9826 } 9827} 9828 9829/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 9830 by DEP_INSN and nothing set by DEP_INSN. */ 9831 9832static int 9833ix86_flags_dependant (insn, dep_insn, insn_type) 9834 rtx insn, dep_insn; 9835 enum attr_type insn_type; 9836{ 9837 rtx set, set2; 9838 9839 /* Simplify the test for uninteresting insns. */ 9840 if (insn_type != TYPE_SETCC 9841 && insn_type != TYPE_ICMOV 9842 && insn_type != TYPE_FCMOV 9843 && insn_type != TYPE_IBR) 9844 return 0; 9845 9846 if ((set = single_set (dep_insn)) != 0) 9847 { 9848 set = SET_DEST (set); 9849 set2 = NULL_RTX; 9850 } 9851 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 9852 && XVECLEN (PATTERN (dep_insn), 0) == 2 9853 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 9854 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 9855 { 9856 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 9857 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 9858 } 9859 else 9860 return 0; 9861 9862 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 9863 return 0; 9864 9865 /* This test is true if the dependent insn reads the flags but 9866 not any other potentially set register. */ 9867 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 9868 return 0; 9869 9870 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 9871 return 0; 9872 9873 return 1; 9874} 9875 9876/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 9877 address with operands set by DEP_INSN. */ 9878 9879static int 9880ix86_agi_dependant (insn, dep_insn, insn_type) 9881 rtx insn, dep_insn; 9882 enum attr_type insn_type; 9883{ 9884 rtx addr; 9885 9886 if (insn_type == TYPE_LEA 9887 && TARGET_PENTIUM) 9888 { 9889 addr = PATTERN (insn); 9890 if (GET_CODE (addr) == SET) 9891 ; 9892 else if (GET_CODE (addr) == PARALLEL 9893 && GET_CODE (XVECEXP (addr, 0, 0)) == SET) 9894 addr = XVECEXP (addr, 0, 0); 9895 else 9896 abort (); 9897 addr = SET_SRC (addr); 9898 } 9899 else 9900 { 9901 int i; 9902 extract_insn_cached (insn); 9903 for (i = recog_data.n_operands - 1; i >= 0; --i) 9904 if (GET_CODE (recog_data.operand[i]) == MEM) 9905 { 9906 addr = XEXP (recog_data.operand[i], 0); 9907 goto found; 9908 } 9909 return 0; 9910 found:; 9911 } 9912 9913 return modified_in_p (addr, dep_insn); 9914} 9915 9916static int 9917ix86_adjust_cost (insn, link, dep_insn, cost) 9918 rtx insn, link, dep_insn; 9919 int cost; 9920{ 9921 enum attr_type insn_type, dep_insn_type; 9922 enum attr_memory memory, dep_memory; 9923 rtx set, set2; 9924 int dep_insn_code_number; 9925 9926 /* Anti and output depenancies have zero cost on all CPUs. */ 9927 if (REG_NOTE_KIND (link) != 0) 9928 return 0; 9929 9930 dep_insn_code_number = recog_memoized (dep_insn); 9931 9932 /* If we can't recognize the insns, we can't really do anything. */ 9933 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 9934 return cost; 9935 9936 insn_type = get_attr_type (insn); 9937 dep_insn_type = get_attr_type (dep_insn); 9938 9939 switch (ix86_cpu) 9940 { 9941 case PROCESSOR_PENTIUM: 9942 /* Address Generation Interlock adds a cycle of latency. */ 9943 if (ix86_agi_dependant (insn, dep_insn, insn_type)) 9944 cost += 1; 9945 9946 /* ??? Compares pair with jump/setcc. */ 9947 if (ix86_flags_dependant (insn, dep_insn, insn_type)) 9948 cost = 0; 9949 9950 /* Floating point stores require value to be ready one cycle ealier. */ 9951 if (insn_type == TYPE_FMOV 9952 && get_attr_memory (insn) == MEMORY_STORE 9953 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 9954 cost += 1; 9955 break; 9956 9957 case PROCESSOR_PENTIUMPRO: 9958 memory = get_attr_memory (insn); 9959 dep_memory = get_attr_memory (dep_insn); 9960 9961 /* Since we can't represent delayed latencies of load+operation, 9962 increase the cost here for non-imov insns. */ 9963 if (dep_insn_type != TYPE_IMOV 9964 && dep_insn_type != TYPE_FMOV 9965 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)) 9966 cost += 1; 9967 9968 /* INT->FP conversion is expensive. */ 9969 if (get_attr_fp_int_src (dep_insn)) 9970 cost += 5; 9971 9972 /* There is one cycle extra latency between an FP op and a store. */ 9973 if (insn_type == TYPE_FMOV 9974 && (set = single_set (dep_insn)) != NULL_RTX 9975 && (set2 = single_set (insn)) != NULL_RTX 9976 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 9977 && GET_CODE (SET_DEST (set2)) == MEM) 9978 cost += 1; 9979 9980 /* Show ability of reorder buffer to hide latency of load by executing 9981 in parallel with previous instruction in case 9982 previous instruction is not needed to compute the address. */ 9983 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 9984 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 9985 { 9986 /* Claim moves to take one cycle, as core can issue one load 9987 at time and the next load can start cycle later. */ 9988 if (dep_insn_type == TYPE_IMOV 9989 || dep_insn_type == TYPE_FMOV) 9990 cost = 1; 9991 else if (cost > 1) 9992 cost--; 9993 } 9994 break; 9995 9996 case PROCESSOR_K6: 9997 memory = get_attr_memory (insn); 9998 dep_memory = get_attr_memory (dep_insn); 9999 /* The esp dependency is resolved before the instruction is really 10000 finished. */ 10001 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 10002 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 10003 return 1; 10004 10005 /* Since we can't represent delayed latencies of load+operation, 10006 increase the cost here for non-imov insns. */ 10007 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 10008 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; 10009 10010 /* INT->FP conversion is expensive. */ 10011 if (get_attr_fp_int_src (dep_insn)) 10012 cost += 5; 10013 10014 /* Show ability of reorder buffer to hide latency of load by executing 10015 in parallel with previous instruction in case 10016 previous instruction is not needed to compute the address. */ 10017 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 10018 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10019 { 10020 /* Claim moves to take one cycle, as core can issue one load 10021 at time and the next load can start cycle later. */ 10022 if (dep_insn_type == TYPE_IMOV 10023 || dep_insn_type == TYPE_FMOV) 10024 cost = 1; 10025 else if (cost > 2) 10026 cost -= 2; 10027 else 10028 cost = 1; 10029 } 10030 break; 10031 10032 case PROCESSOR_ATHLON: 10033 memory = get_attr_memory (insn); 10034 dep_memory = get_attr_memory (dep_insn); 10035 10036 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 10037 { 10038 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV) 10039 cost += 2; 10040 else 10041 cost += 3; 10042 } 10043 /* Show ability of reorder buffer to hide latency of load by executing 10044 in parallel with previous instruction in case 10045 previous instruction is not needed to compute the address. */ 10046 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 10047 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10048 { 10049 /* Claim moves to take one cycle, as core can issue one load 10050 at time and the next load can start cycle later. */ 10051 if (dep_insn_type == TYPE_IMOV 10052 || dep_insn_type == TYPE_FMOV) 10053 cost = 0; 10054 else if (cost >= 3) 10055 cost -= 3; 10056 else 10057 cost = 0; 10058 } 10059 10060 default: 10061 break; 10062 } 10063 10064 return cost; 10065} 10066 10067static union 10068{ 10069 struct ppro_sched_data 10070 { 10071 rtx decode[3]; 10072 int issued_this_cycle; 10073 } ppro; 10074} ix86_sched_data; 10075 10076static int 10077ix86_safe_length (insn) 10078 rtx insn; 10079{ 10080 if (recog_memoized (insn) >= 0) 10081 return get_attr_length (insn); 10082 else 10083 return 128; 10084} 10085 10086static int 10087ix86_safe_length_prefix (insn) 10088 rtx insn; 10089{ 10090 if (recog_memoized (insn) >= 0) 10091 return get_attr_length (insn); 10092 else 10093 return 0; 10094} 10095 10096static enum attr_memory 10097ix86_safe_memory (insn) 10098 rtx insn; 10099{ 10100 if (recog_memoized (insn) >= 0) 10101 return get_attr_memory (insn); 10102 else 10103 return MEMORY_UNKNOWN; 10104} 10105 10106static enum attr_pent_pair 10107ix86_safe_pent_pair (insn) 10108 rtx insn; 10109{ 10110 if (recog_memoized (insn) >= 0) 10111 return get_attr_pent_pair (insn); 10112 else 10113 return PENT_PAIR_NP; 10114} 10115 10116static enum attr_ppro_uops 10117ix86_safe_ppro_uops (insn) 10118 rtx insn; 10119{ 10120 if (recog_memoized (insn) >= 0) 10121 return get_attr_ppro_uops (insn); 10122 else 10123 return PPRO_UOPS_MANY; 10124} 10125 10126static void 10127ix86_dump_ppro_packet (dump) 10128 FILE *dump; 10129{ 10130 if (ix86_sched_data.ppro.decode[0]) 10131 { 10132 fprintf (dump, "PPRO packet: %d", 10133 INSN_UID (ix86_sched_data.ppro.decode[0])); 10134 if (ix86_sched_data.ppro.decode[1]) 10135 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1])); 10136 if (ix86_sched_data.ppro.decode[2]) 10137 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2])); 10138 fputc ('\n', dump); 10139 } 10140} 10141 10142/* We're beginning a new block. Initialize data structures as necessary. */ 10143 10144static void 10145ix86_sched_init (dump, sched_verbose, veclen) 10146 FILE *dump ATTRIBUTE_UNUSED; 10147 int sched_verbose ATTRIBUTE_UNUSED; 10148 int veclen ATTRIBUTE_UNUSED; 10149{ 10150 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data)); 10151} 10152 10153/* Shift INSN to SLOT, and shift everything else down. */ 10154 10155static void 10156ix86_reorder_insn (insnp, slot) 10157 rtx *insnp, *slot; 10158{ 10159 if (insnp != slot) 10160 { 10161 rtx insn = *insnp; 10162 do 10163 insnp[0] = insnp[1]; 10164 while (++insnp != slot); 10165 *insnp = insn; 10166 } 10167} 10168 10169/* Find an instruction with given pairability and minimal amount of cycles 10170 lost by the fact that the CPU waits for both pipelines to finish before 10171 reading next instructions. Also take care that both instructions together 10172 can not exceed 7 bytes. */ 10173 10174static rtx * 10175ix86_pent_find_pair (e_ready, ready, type, first) 10176 rtx *e_ready; 10177 rtx *ready; 10178 enum attr_pent_pair type; 10179 rtx first; 10180{ 10181 int mincycles, cycles; 10182 enum attr_pent_pair tmp; 10183 enum attr_memory memory; 10184 rtx *insnp, *bestinsnp = NULL; 10185 10186 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first)) 10187 return NULL; 10188 10189 memory = ix86_safe_memory (first); 10190 cycles = result_ready_cost (first); 10191 mincycles = INT_MAX; 10192 10193 for (insnp = e_ready; insnp >= ready && mincycles; --insnp) 10194 if ((tmp = ix86_safe_pent_pair (*insnp)) == type 10195 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp)) 10196 { 10197 enum attr_memory second_memory; 10198 int secondcycles, currentcycles; 10199 10200 second_memory = ix86_safe_memory (*insnp); 10201 secondcycles = result_ready_cost (*insnp); 10202 currentcycles = abs (cycles - secondcycles); 10203 10204 if (secondcycles >= 1 && cycles >= 1) 10205 { 10206 /* Two read/modify/write instructions together takes two 10207 cycles longer. */ 10208 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH) 10209 currentcycles += 2; 10210 10211 /* Read modify/write instruction followed by read/modify 10212 takes one cycle longer. */ 10213 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD 10214 && tmp != PENT_PAIR_UV 10215 && ix86_safe_pent_pair (first) != PENT_PAIR_UV) 10216 currentcycles += 1; 10217 } 10218 if (currentcycles < mincycles) 10219 bestinsnp = insnp, mincycles = currentcycles; 10220 } 10221 10222 return bestinsnp; 10223} 10224 10225/* Subroutines of ix86_sched_reorder. */ 10226 10227static void 10228ix86_sched_reorder_pentium (ready, e_ready) 10229 rtx *ready; 10230 rtx *e_ready; 10231{ 10232 enum attr_pent_pair pair1, pair2; 10233 rtx *insnp; 10234 10235 /* This wouldn't be necessary if Haifa knew that static insn ordering 10236 is important to which pipe an insn is issued to. So we have to make 10237 some minor rearrangements. */ 10238 10239 pair1 = ix86_safe_pent_pair (*e_ready); 10240 10241 /* If the first insn is non-pairable, let it be. */ 10242 if (pair1 == PENT_PAIR_NP) 10243 return; 10244 10245 pair2 = PENT_PAIR_NP; 10246 insnp = 0; 10247 10248 /* If the first insn is UV or PV pairable, search for a PU 10249 insn to go with. */ 10250 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV) 10251 { 10252 insnp = ix86_pent_find_pair (e_ready-1, ready, 10253 PENT_PAIR_PU, *e_ready); 10254 if (insnp) 10255 pair2 = PENT_PAIR_PU; 10256 } 10257 10258 /* If the first insn is PU or UV pairable, search for a PV 10259 insn to go with. */ 10260 if (pair2 == PENT_PAIR_NP 10261 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV)) 10262 { 10263 insnp = ix86_pent_find_pair (e_ready-1, ready, 10264 PENT_PAIR_PV, *e_ready); 10265 if (insnp) 10266 pair2 = PENT_PAIR_PV; 10267 } 10268 10269 /* If the first insn is pairable, search for a UV 10270 insn to go with. */ 10271 if (pair2 == PENT_PAIR_NP) 10272 { 10273 insnp = ix86_pent_find_pair (e_ready-1, ready, 10274 PENT_PAIR_UV, *e_ready); 10275 if (insnp) 10276 pair2 = PENT_PAIR_UV; 10277 } 10278 10279 if (pair2 == PENT_PAIR_NP) 10280 return; 10281 10282 /* Found something! Decide if we need to swap the order. */ 10283 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU 10284 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV 10285 && ix86_safe_memory (*e_ready) == MEMORY_BOTH 10286 && ix86_safe_memory (*insnp) == MEMORY_LOAD)) 10287 ix86_reorder_insn (insnp, e_ready); 10288 else 10289 ix86_reorder_insn (insnp, e_ready - 1); 10290} 10291 10292static void 10293ix86_sched_reorder_ppro (ready, e_ready) 10294 rtx *ready; 10295 rtx *e_ready; 10296{ 10297 rtx decode[3]; 10298 enum attr_ppro_uops cur_uops; 10299 int issued_this_cycle; 10300 rtx *insnp; 10301 int i; 10302 10303 /* At this point .ppro.decode contains the state of the three 10304 decoders from last "cycle". That is, those insns that were 10305 actually independent. But here we're scheduling for the 10306 decoder, and we may find things that are decodable in the 10307 same cycle. */ 10308 10309 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode)); 10310 issued_this_cycle = 0; 10311 10312 insnp = e_ready; 10313 cur_uops = ix86_safe_ppro_uops (*insnp); 10314 10315 /* If the decoders are empty, and we've a complex insn at the 10316 head of the priority queue, let it issue without complaint. */ 10317 if (decode[0] == NULL) 10318 { 10319 if (cur_uops == PPRO_UOPS_MANY) 10320 { 10321 decode[0] = *insnp; 10322 goto ppro_done; 10323 } 10324 10325 /* Otherwise, search for a 2-4 uop unsn to issue. */ 10326 while (cur_uops != PPRO_UOPS_FEW) 10327 { 10328 if (insnp == ready) 10329 break; 10330 cur_uops = ix86_safe_ppro_uops (*--insnp); 10331 } 10332 10333 /* If so, move it to the head of the line. */ 10334 if (cur_uops == PPRO_UOPS_FEW) 10335 ix86_reorder_insn (insnp, e_ready); 10336 10337 /* Issue the head of the queue. */ 10338 issued_this_cycle = 1; 10339 decode[0] = *e_ready--; 10340 } 10341 10342 /* Look for simple insns to fill in the other two slots. */ 10343 for (i = 1; i < 3; ++i) 10344 if (decode[i] == NULL) 10345 { 10346 if (ready >= e_ready) 10347 goto ppro_done; 10348 10349 insnp = e_ready; 10350 cur_uops = ix86_safe_ppro_uops (*insnp); 10351 while (cur_uops != PPRO_UOPS_ONE) 10352 { 10353 if (insnp == ready) 10354 break; 10355 cur_uops = ix86_safe_ppro_uops (*--insnp); 10356 } 10357 10358 /* Found one. Move it to the head of the queue and issue it. */ 10359 if (cur_uops == PPRO_UOPS_ONE) 10360 { 10361 ix86_reorder_insn (insnp, e_ready); 10362 decode[i] = *e_ready--; 10363 issued_this_cycle++; 10364 continue; 10365 } 10366 10367 /* ??? Didn't find one. Ideally, here we would do a lazy split 10368 of 2-uop insns, issue one and queue the other. */ 10369 } 10370 10371 ppro_done: 10372 if (issued_this_cycle == 0) 10373 issued_this_cycle = 1; 10374 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle; 10375} 10376 10377/* We are about to being issuing insns for this clock cycle. 10378 Override the default sort algorithm to better slot instructions. */ 10379static int 10380ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var) 10381 FILE *dump ATTRIBUTE_UNUSED; 10382 int sched_verbose ATTRIBUTE_UNUSED; 10383 rtx *ready; 10384 int *n_readyp; 10385 int clock_var ATTRIBUTE_UNUSED; 10386{ 10387 int n_ready = *n_readyp; 10388 rtx *e_ready = ready + n_ready - 1; 10389 10390 if (n_ready < 2) 10391 goto out; 10392 10393 switch (ix86_cpu) 10394 { 10395 default: 10396 break; 10397 10398 case PROCESSOR_PENTIUM: 10399 ix86_sched_reorder_pentium (ready, e_ready); 10400 break; 10401 10402 case PROCESSOR_PENTIUMPRO: 10403 ix86_sched_reorder_ppro (ready, e_ready); 10404 break; 10405 } 10406 10407out: 10408 return ix86_issue_rate (); 10409} 10410 10411/* We are about to issue INSN. Return the number of insns left on the 10412 ready queue that can be issued this cycle. */ 10413 10414static int 10415ix86_variable_issue (dump, sched_verbose, insn, can_issue_more) 10416 FILE *dump; 10417 int sched_verbose; 10418 rtx insn; 10419 int can_issue_more; 10420{ 10421 int i; 10422 switch (ix86_cpu) 10423 { 10424 default: 10425 return can_issue_more - 1; 10426 10427 case PROCESSOR_PENTIUMPRO: 10428 { 10429 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn); 10430 10431 if (uops == PPRO_UOPS_MANY) 10432 { 10433 if (sched_verbose) 10434 ix86_dump_ppro_packet (dump); 10435 ix86_sched_data.ppro.decode[0] = insn; 10436 ix86_sched_data.ppro.decode[1] = NULL; 10437 ix86_sched_data.ppro.decode[2] = NULL; 10438 if (sched_verbose) 10439 ix86_dump_ppro_packet (dump); 10440 ix86_sched_data.ppro.decode[0] = NULL; 10441 } 10442 else if (uops == PPRO_UOPS_FEW) 10443 { 10444 if (sched_verbose) 10445 ix86_dump_ppro_packet (dump); 10446 ix86_sched_data.ppro.decode[0] = insn; 10447 ix86_sched_data.ppro.decode[1] = NULL; 10448 ix86_sched_data.ppro.decode[2] = NULL; 10449 } 10450 else 10451 { 10452 for (i = 0; i < 3; ++i) 10453 if (ix86_sched_data.ppro.decode[i] == NULL) 10454 { 10455 ix86_sched_data.ppro.decode[i] = insn; 10456 break; 10457 } 10458 if (i == 3) 10459 abort (); 10460 if (i == 2) 10461 { 10462 if (sched_verbose) 10463 ix86_dump_ppro_packet (dump); 10464 ix86_sched_data.ppro.decode[0] = NULL; 10465 ix86_sched_data.ppro.decode[1] = NULL; 10466 ix86_sched_data.ppro.decode[2] = NULL; 10467 } 10468 } 10469 } 10470 return --ix86_sched_data.ppro.issued_this_cycle; 10471 } 10472} 10473 10474/* Walk through INSNS and look for MEM references whose address is DSTREG or 10475 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as 10476 appropriate. */ 10477 10478void 10479ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg) 10480 rtx insns; 10481 rtx dstref, srcref, dstreg, srcreg; 10482{ 10483 rtx insn; 10484 10485 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn)) 10486 if (INSN_P (insn)) 10487 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref, 10488 dstreg, srcreg); 10489} 10490 10491/* Subroutine of above to actually do the updating by recursively walking 10492 the rtx. */ 10493 10494static void 10495ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg) 10496 rtx x; 10497 rtx dstref, srcref, dstreg, srcreg; 10498{ 10499 enum rtx_code code = GET_CODE (x); 10500 const char *format_ptr = GET_RTX_FORMAT (code); 10501 int i, j; 10502 10503 if (code == MEM && XEXP (x, 0) == dstreg) 10504 MEM_COPY_ATTRIBUTES (x, dstref); 10505 else if (code == MEM && XEXP (x, 0) == srcreg) 10506 MEM_COPY_ATTRIBUTES (x, srcref); 10507 10508 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++) 10509 { 10510 if (*format_ptr == 'e') 10511 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref, 10512 dstreg, srcreg); 10513 else if (*format_ptr == 'E') 10514 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 10515 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref, 10516 dstreg, srcreg); 10517 } 10518} 10519 10520/* Compute the alignment given to a constant that is being placed in memory. 10521 EXP is the constant and ALIGN is the alignment that the object would 10522 ordinarily have. 10523 The value of this function is used instead of that alignment to align 10524 the object. */ 10525 10526int 10527ix86_constant_alignment (exp, align) 10528 tree exp; 10529 int align; 10530{ 10531 if (TREE_CODE (exp) == REAL_CST) 10532 { 10533 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 10534 return 64; 10535 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 10536 return 128; 10537 } 10538 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31 10539 && align < 256) 10540 return 256; 10541 10542 return align; 10543} 10544 10545/* Compute the alignment for a static variable. 10546 TYPE is the data type, and ALIGN is the alignment that 10547 the object would ordinarily have. The value of this function is used 10548 instead of that alignment to align the object. */ 10549 10550int 10551ix86_data_alignment (type, align) 10552 tree type; 10553 int align; 10554{ 10555 if (AGGREGATE_TYPE_P (type) 10556 && TYPE_SIZE (type) 10557 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10558 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256 10559 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256) 10560 return 256; 10561 10562 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 10563 to 16byte boundary. */ 10564 if (TARGET_64BIT) 10565 { 10566 if (AGGREGATE_TYPE_P (type) 10567 && TYPE_SIZE (type) 10568 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10569 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 10570 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 10571 return 128; 10572 } 10573 10574 if (TREE_CODE (type) == ARRAY_TYPE) 10575 { 10576 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 10577 return 64; 10578 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 10579 return 128; 10580 } 10581 else if (TREE_CODE (type) == COMPLEX_TYPE) 10582 { 10583 10584 if (TYPE_MODE (type) == DCmode && align < 64) 10585 return 64; 10586 if (TYPE_MODE (type) == XCmode && align < 128) 10587 return 128; 10588 } 10589 else if ((TREE_CODE (type) == RECORD_TYPE 10590 || TREE_CODE (type) == UNION_TYPE 10591 || TREE_CODE (type) == QUAL_UNION_TYPE) 10592 && TYPE_FIELDS (type)) 10593 { 10594 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 10595 return 64; 10596 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 10597 return 128; 10598 } 10599 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 10600 || TREE_CODE (type) == INTEGER_TYPE) 10601 { 10602 if (TYPE_MODE (type) == DFmode && align < 64) 10603 return 64; 10604 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 10605 return 128; 10606 } 10607 10608 return align; 10609} 10610 10611/* Compute the alignment for a local variable. 10612 TYPE is the data type, and ALIGN is the alignment that 10613 the object would ordinarily have. The value of this macro is used 10614 instead of that alignment to align the object. */ 10615 10616int 10617ix86_local_alignment (type, align) 10618 tree type; 10619 int align; 10620{ 10621 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 10622 to 16byte boundary. */ 10623 if (TARGET_64BIT) 10624 { 10625 if (AGGREGATE_TYPE_P (type) 10626 && TYPE_SIZE (type) 10627 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10628 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 10629 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 10630 return 128; 10631 } 10632 if (TREE_CODE (type) == ARRAY_TYPE) 10633 { 10634 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 10635 return 64; 10636 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 10637 return 128; 10638 } 10639 else if (TREE_CODE (type) == COMPLEX_TYPE) 10640 { 10641 if (TYPE_MODE (type) == DCmode && align < 64) 10642 return 64; 10643 if (TYPE_MODE (type) == XCmode && align < 128) 10644 return 128; 10645 } 10646 else if ((TREE_CODE (type) == RECORD_TYPE 10647 || TREE_CODE (type) == UNION_TYPE 10648 || TREE_CODE (type) == QUAL_UNION_TYPE) 10649 && TYPE_FIELDS (type)) 10650 { 10651 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 10652 return 64; 10653 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 10654 return 128; 10655 } 10656 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 10657 || TREE_CODE (type) == INTEGER_TYPE) 10658 { 10659 10660 if (TYPE_MODE (type) == DFmode && align < 64) 10661 return 64; 10662 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 10663 return 128; 10664 } 10665 return align; 10666} 10667 10668/* Emit RTL insns to initialize the variable parts of a trampoline. 10669 FNADDR is an RTX for the address of the function's pure code. 10670 CXT is an RTX for the static chain value for the function. */ 10671void 10672x86_initialize_trampoline (tramp, fnaddr, cxt) 10673 rtx tramp, fnaddr, cxt; 10674{ 10675 if (!TARGET_64BIT) 10676 { 10677 /* Compute offset from the end of the jmp to the target function. */ 10678 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 10679 plus_constant (tramp, 10), 10680 NULL_RTX, 1, OPTAB_DIRECT); 10681 emit_move_insn (gen_rtx_MEM (QImode, tramp), 10682 GEN_INT (trunc_int_for_mode (0xb9, QImode))); 10683 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 10684 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 10685 GEN_INT (trunc_int_for_mode (0xe9, QImode))); 10686 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 10687 } 10688 else 10689 { 10690 int offset = 0; 10691 /* Try to load address using shorter movl instead of movabs. 10692 We may want to support movq for kernel mode, but kernel does not use 10693 trampolines at the moment. */ 10694 if (x86_64_zero_extended_value (fnaddr)) 10695 { 10696 fnaddr = copy_to_mode_reg (DImode, fnaddr); 10697 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10698 GEN_INT (trunc_int_for_mode (0xbb41, HImode))); 10699 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 10700 gen_lowpart (SImode, fnaddr)); 10701 offset += 6; 10702 } 10703 else 10704 { 10705 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10706 GEN_INT (trunc_int_for_mode (0xbb49, HImode))); 10707 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 10708 fnaddr); 10709 offset += 10; 10710 } 10711 /* Load static chain using movabs to r10. */ 10712 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10713 GEN_INT (trunc_int_for_mode (0xba49, HImode))); 10714 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 10715 cxt); 10716 offset += 10; 10717 /* Jump to the r11 */ 10718 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10719 GEN_INT (trunc_int_for_mode (0xff49, HImode))); 10720 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 10721 GEN_INT (trunc_int_for_mode (0xe3, QImode))); 10722 offset += 3; 10723 if (offset > TRAMPOLINE_SIZE) 10724 abort (); 10725 } 10726} 10727 10728#define def_builtin(MASK, NAME, TYPE, CODE) \ 10729do { \ 10730 if ((MASK) & target_flags) \ 10731 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \ 10732} while (0) 10733 10734struct builtin_description 10735{ 10736 const unsigned int mask; 10737 const enum insn_code icode; 10738 const char *const name; 10739 const enum ix86_builtins code; 10740 const enum rtx_code comparison; 10741 const unsigned int flag; 10742}; 10743 10744static const struct builtin_description bdesc_comi[] = 10745{ 10746 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 }, 10747 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 }, 10748 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 }, 10749 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 }, 10750 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 }, 10751 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 }, 10752 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, 10753 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, 10754 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, 10755 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 }, 10756 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 }, 10757 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 } 10758}; 10759 10760static const struct builtin_description bdesc_2arg[] = 10761{ 10762 /* SSE */ 10763 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 10764 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 10765 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 10766 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 10767 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 10768 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 10769 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 10770 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 10771 10772 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 10773 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 10774 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 10775 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, 10776 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, 10777 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 10778 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, 10779 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, 10780 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, 10781 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, 10782 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, 10783 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, 10784 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 10785 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 10786 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 10787 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 }, 10788 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 }, 10789 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 10790 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, 10791 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, 10792 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, 10793 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 }, 10794 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 }, 10795 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, 10796 10797 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 10798 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 10799 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 10800 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 10801 10802 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 10803 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 10804 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 10805 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 10806 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 10807 10808 /* MMX */ 10809 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 10810 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 10811 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 10812 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 10813 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 10814 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 10815 10816 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 10817 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 10818 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 10819 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 10820 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 10821 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 10822 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 10823 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 10824 10825 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 10826 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 10827 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 10828 10829 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 10830 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 10831 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 10832 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 10833 10834 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 10835 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 10836 10837 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 10838 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 10839 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 10840 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 10841 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 10842 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 10843 10844 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 10845 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 10846 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 10847 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 10848 10849 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 10850 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 10851 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 10852 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 10853 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 10854 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 10855 10856 /* Special. */ 10857 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 10858 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 10859 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 10860 10861 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 10862 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 10863 10864 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 10865 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 10866 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 10867 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 10868 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 10869 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 10870 10871 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 10872 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 10873 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 10874 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 10875 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 10876 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 10877 10878 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 10879 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 10880 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 10881 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 10882 10883 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 10884 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 } 10885 10886}; 10887 10888static const struct builtin_description bdesc_1arg[] = 10889{ 10890 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 10891 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 10892 10893 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 10894 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 10895 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 10896 10897 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 10898 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 10899 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 10900 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 } 10901 10902}; 10903 10904void 10905ix86_init_builtins () 10906{ 10907 if (TARGET_MMX) 10908 ix86_init_mmx_sse_builtins (); 10909} 10910 10911/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 10912 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 10913 builtins. */ 10914static void 10915ix86_init_mmx_sse_builtins () 10916{ 10917 const struct builtin_description * d; 10918 size_t i; 10919 tree endlink = void_list_node; 10920 10921 tree pchar_type_node = build_pointer_type (char_type_node); 10922 tree pfloat_type_node = build_pointer_type (float_type_node); 10923 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 10924 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 10925 10926 /* Comparisons. */ 10927 tree int_ftype_v4sf_v4sf 10928 = build_function_type (integer_type_node, 10929 tree_cons (NULL_TREE, V4SF_type_node, 10930 tree_cons (NULL_TREE, 10931 V4SF_type_node, 10932 endlink))); 10933 tree v4si_ftype_v4sf_v4sf 10934 = build_function_type (V4SI_type_node, 10935 tree_cons (NULL_TREE, V4SF_type_node, 10936 tree_cons (NULL_TREE, 10937 V4SF_type_node, 10938 endlink))); 10939 /* MMX/SSE/integer conversions. */ 10940 tree int_ftype_v4sf 10941 = build_function_type (integer_type_node, 10942 tree_cons (NULL_TREE, V4SF_type_node, 10943 endlink)); 10944 tree int_ftype_v8qi 10945 = build_function_type (integer_type_node, 10946 tree_cons (NULL_TREE, V8QI_type_node, 10947 endlink)); 10948 tree v4sf_ftype_v4sf_int 10949 = build_function_type (V4SF_type_node, 10950 tree_cons (NULL_TREE, V4SF_type_node, 10951 tree_cons (NULL_TREE, integer_type_node, 10952 endlink))); 10953 tree v4sf_ftype_v4sf_v2si 10954 = build_function_type (V4SF_type_node, 10955 tree_cons (NULL_TREE, V4SF_type_node, 10956 tree_cons (NULL_TREE, V2SI_type_node, 10957 endlink))); 10958 tree int_ftype_v4hi_int 10959 = build_function_type (integer_type_node, 10960 tree_cons (NULL_TREE, V4HI_type_node, 10961 tree_cons (NULL_TREE, integer_type_node, 10962 endlink))); 10963 tree v4hi_ftype_v4hi_int_int 10964 = build_function_type (V4HI_type_node, 10965 tree_cons (NULL_TREE, V4HI_type_node, 10966 tree_cons (NULL_TREE, integer_type_node, 10967 tree_cons (NULL_TREE, 10968 integer_type_node, 10969 endlink)))); 10970 /* Miscellaneous. */ 10971 tree v8qi_ftype_v4hi_v4hi 10972 = build_function_type (V8QI_type_node, 10973 tree_cons (NULL_TREE, V4HI_type_node, 10974 tree_cons (NULL_TREE, V4HI_type_node, 10975 endlink))); 10976 tree v4hi_ftype_v2si_v2si 10977 = build_function_type (V4HI_type_node, 10978 tree_cons (NULL_TREE, V2SI_type_node, 10979 tree_cons (NULL_TREE, V2SI_type_node, 10980 endlink))); 10981 tree v4sf_ftype_v4sf_v4sf_int 10982 = build_function_type (V4SF_type_node, 10983 tree_cons (NULL_TREE, V4SF_type_node, 10984 tree_cons (NULL_TREE, V4SF_type_node, 10985 tree_cons (NULL_TREE, 10986 integer_type_node, 10987 endlink)))); 10988 tree v4hi_ftype_v8qi_v8qi 10989 = build_function_type (V4HI_type_node, 10990 tree_cons (NULL_TREE, V8QI_type_node, 10991 tree_cons (NULL_TREE, V8QI_type_node, 10992 endlink))); 10993 tree v2si_ftype_v4hi_v4hi 10994 = build_function_type (V2SI_type_node, 10995 tree_cons (NULL_TREE, V4HI_type_node, 10996 tree_cons (NULL_TREE, V4HI_type_node, 10997 endlink))); 10998 tree v4hi_ftype_v4hi_int 10999 = build_function_type (V4HI_type_node, 11000 tree_cons (NULL_TREE, V4HI_type_node, 11001 tree_cons (NULL_TREE, integer_type_node, 11002 endlink))); 11003 tree v4hi_ftype_v4hi_di 11004 = build_function_type (V4HI_type_node, 11005 tree_cons (NULL_TREE, V4HI_type_node, 11006 tree_cons (NULL_TREE, 11007 long_long_integer_type_node, 11008 endlink))); 11009 tree v2si_ftype_v2si_di 11010 = build_function_type (V2SI_type_node, 11011 tree_cons (NULL_TREE, V2SI_type_node, 11012 tree_cons (NULL_TREE, 11013 long_long_integer_type_node, 11014 endlink))); 11015 tree void_ftype_void 11016 = build_function_type (void_type_node, endlink); 11017 tree void_ftype_unsigned 11018 = build_function_type (void_type_node, 11019 tree_cons (NULL_TREE, unsigned_type_node, 11020 endlink)); 11021 tree unsigned_ftype_void 11022 = build_function_type (unsigned_type_node, endlink); 11023 tree di_ftype_void 11024 = build_function_type (long_long_unsigned_type_node, endlink); 11025 tree v4sf_ftype_void 11026 = build_function_type (V4SF_type_node, endlink); 11027 tree v2si_ftype_v4sf 11028 = build_function_type (V2SI_type_node, 11029 tree_cons (NULL_TREE, V4SF_type_node, 11030 endlink)); 11031 /* Loads/stores. */ 11032 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node, 11033 tree_cons (NULL_TREE, V8QI_type_node, 11034 tree_cons (NULL_TREE, 11035 pchar_type_node, 11036 endlink))); 11037 tree void_ftype_v8qi_v8qi_pchar 11038 = build_function_type (void_type_node, maskmovq_args); 11039 tree v4sf_ftype_pfloat 11040 = build_function_type (V4SF_type_node, 11041 tree_cons (NULL_TREE, pfloat_type_node, 11042 endlink)); 11043 /* @@@ the type is bogus */ 11044 tree v4sf_ftype_v4sf_pv2si 11045 = build_function_type (V4SF_type_node, 11046 tree_cons (NULL_TREE, V4SF_type_node, 11047 tree_cons (NULL_TREE, pv2si_type_node, 11048 endlink))); 11049 tree void_ftype_pv2si_v4sf 11050 = build_function_type (void_type_node, 11051 tree_cons (NULL_TREE, pv2si_type_node, 11052 tree_cons (NULL_TREE, V4SF_type_node, 11053 endlink))); 11054 tree void_ftype_pfloat_v4sf 11055 = build_function_type (void_type_node, 11056 tree_cons (NULL_TREE, pfloat_type_node, 11057 tree_cons (NULL_TREE, V4SF_type_node, 11058 endlink))); 11059 tree void_ftype_pdi_di 11060 = build_function_type (void_type_node, 11061 tree_cons (NULL_TREE, pdi_type_node, 11062 tree_cons (NULL_TREE, 11063 long_long_unsigned_type_node, 11064 endlink))); 11065 /* Normal vector unops. */ 11066 tree v4sf_ftype_v4sf 11067 = build_function_type (V4SF_type_node, 11068 tree_cons (NULL_TREE, V4SF_type_node, 11069 endlink)); 11070 11071 /* Normal vector binops. */ 11072 tree v4sf_ftype_v4sf_v4sf 11073 = build_function_type (V4SF_type_node, 11074 tree_cons (NULL_TREE, V4SF_type_node, 11075 tree_cons (NULL_TREE, V4SF_type_node, 11076 endlink))); 11077 tree v8qi_ftype_v8qi_v8qi 11078 = build_function_type (V8QI_type_node, 11079 tree_cons (NULL_TREE, V8QI_type_node, 11080 tree_cons (NULL_TREE, V8QI_type_node, 11081 endlink))); 11082 tree v4hi_ftype_v4hi_v4hi 11083 = build_function_type (V4HI_type_node, 11084 tree_cons (NULL_TREE, V4HI_type_node, 11085 tree_cons (NULL_TREE, V4HI_type_node, 11086 endlink))); 11087 tree v2si_ftype_v2si_v2si 11088 = build_function_type (V2SI_type_node, 11089 tree_cons (NULL_TREE, V2SI_type_node, 11090 tree_cons (NULL_TREE, V2SI_type_node, 11091 endlink))); 11092 tree di_ftype_di_di 11093 = build_function_type (long_long_unsigned_type_node, 11094 tree_cons (NULL_TREE, long_long_unsigned_type_node, 11095 tree_cons (NULL_TREE, 11096 long_long_unsigned_type_node, 11097 endlink))); 11098 11099 tree v2si_ftype_v2sf 11100 = build_function_type (V2SI_type_node, 11101 tree_cons (NULL_TREE, V2SF_type_node, 11102 endlink)); 11103 tree v2sf_ftype_v2si 11104 = build_function_type (V2SF_type_node, 11105 tree_cons (NULL_TREE, V2SI_type_node, 11106 endlink)); 11107 tree v2si_ftype_v2si 11108 = build_function_type (V2SI_type_node, 11109 tree_cons (NULL_TREE, V2SI_type_node, 11110 endlink)); 11111 tree v2sf_ftype_v2sf 11112 = build_function_type (V2SF_type_node, 11113 tree_cons (NULL_TREE, V2SF_type_node, 11114 endlink)); 11115 tree v2sf_ftype_v2sf_v2sf 11116 = build_function_type (V2SF_type_node, 11117 tree_cons (NULL_TREE, V2SF_type_node, 11118 tree_cons (NULL_TREE, 11119 V2SF_type_node, 11120 endlink))); 11121 tree v2si_ftype_v2sf_v2sf 11122 = build_function_type (V2SI_type_node, 11123 tree_cons (NULL_TREE, V2SF_type_node, 11124 tree_cons (NULL_TREE, 11125 V2SF_type_node, 11126 endlink))); 11127 11128 /* Add all builtins that are more or less simple operations on two 11129 operands. */ 11130 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) 11131 { 11132 /* Use one of the operands; the target can have a different mode for 11133 mask-generating compares. */ 11134 enum machine_mode mode; 11135 tree type; 11136 11137 if (d->name == 0) 11138 continue; 11139 mode = insn_data[d->icode].operand[1].mode; 11140 11141 switch (mode) 11142 { 11143 case V4SFmode: 11144 type = v4sf_ftype_v4sf_v4sf; 11145 break; 11146 case V8QImode: 11147 type = v8qi_ftype_v8qi_v8qi; 11148 break; 11149 case V4HImode: 11150 type = v4hi_ftype_v4hi_v4hi; 11151 break; 11152 case V2SImode: 11153 type = v2si_ftype_v2si_v2si; 11154 break; 11155 case DImode: 11156 type = di_ftype_di_di; 11157 break; 11158 11159 default: 11160 abort (); 11161 } 11162 11163 /* Override for comparisons. */ 11164 if (d->icode == CODE_FOR_maskcmpv4sf3 11165 || d->icode == CODE_FOR_maskncmpv4sf3 11166 || d->icode == CODE_FOR_vmmaskcmpv4sf3 11167 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 11168 type = v4si_ftype_v4sf_v4sf; 11169 11170 def_builtin (d->mask, d->name, type, d->code); 11171 } 11172 11173 /* Add the remaining MMX insns with somewhat more complicated types. */ 11174 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); 11175 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 11176 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 11177 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 11178 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 11179 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 11180 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 11181 11182 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 11183 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 11184 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 11185 11186 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 11187 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 11188 11189 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 11190 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 11191 11192 /* comi/ucomi insns. */ 11193 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++) 11194 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 11195 11196 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 11197 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 11198 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 11199 11200 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 11201 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 11202 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 11203 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 11204 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 11205 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 11206 11207 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); 11208 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); 11209 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); 11210 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); 11211 11212 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); 11213 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); 11214 11215 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 11216 11217 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); 11218 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); 11219 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS); 11220 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); 11221 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 11222 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); 11223 11224 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 11225 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 11226 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 11227 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 11228 11229 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 11230 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 11231 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 11232 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 11233 11234 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 11235 11236 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 11237 11238 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 11239 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 11240 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 11241 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 11242 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 11243 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 11244 11245 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 11246 11247 /* Original 3DNow! */ 11248 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 11249 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 11250 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 11251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 11252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 11253 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 11254 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 11255 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 11256 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 11257 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 11258 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 11259 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 11260 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 11261 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 11262 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 11263 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 11264 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 11265 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 11266 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 11267 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 11268 11269 /* 3DNow! extension as used in the Athlon CPU. */ 11270 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 11271 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 11272 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 11273 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 11274 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 11275 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 11276 11277 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); 11278} 11279 11280/* Errors in the source file can cause expand_expr to return const0_rtx 11281 where we expect a vector. To avoid crashing, use one of the vector 11282 clear instructions. */ 11283static rtx 11284safe_vector_operand (x, mode) 11285 rtx x; 11286 enum machine_mode mode; 11287{ 11288 if (x != const0_rtx) 11289 return x; 11290 x = gen_reg_rtx (mode); 11291 11292 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) 11293 emit_insn (gen_mmx_clrdi (mode == DImode ? x 11294 : gen_rtx_SUBREG (DImode, x, 0))); 11295 else 11296 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x 11297 : gen_rtx_SUBREG (V4SFmode, x, 0))); 11298 return x; 11299} 11300 11301/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 11302 11303static rtx 11304ix86_expand_binop_builtin (icode, arglist, target) 11305 enum insn_code icode; 11306 tree arglist; 11307 rtx target; 11308{ 11309 rtx pat; 11310 tree arg0 = TREE_VALUE (arglist); 11311 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11312 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11313 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11314 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11315 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11316 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 11317 11318 if (VECTOR_MODE_P (mode0)) 11319 op0 = safe_vector_operand (op0, mode0); 11320 if (VECTOR_MODE_P (mode1)) 11321 op1 = safe_vector_operand (op1, mode1); 11322 11323 if (! target 11324 || GET_MODE (target) != tmode 11325 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11326 target = gen_reg_rtx (tmode); 11327 11328 /* In case the insn wants input operands in modes different from 11329 the result, abort. */ 11330 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1) 11331 abort (); 11332 11333 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11334 op0 = copy_to_mode_reg (mode0, op0); 11335 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11336 op1 = copy_to_mode_reg (mode1, op1); 11337 11338 pat = GEN_FCN (icode) (target, op0, op1); 11339 if (! pat) 11340 return 0; 11341 emit_insn (pat); 11342 return target; 11343} 11344 11345/* In type_for_mode we restrict the ability to create TImode types 11346 to hosts with 64-bit H_W_I. So we've defined the SSE logicals 11347 to have a V4SFmode signature. Convert them in-place to TImode. */ 11348 11349static rtx 11350ix86_expand_timode_binop_builtin (icode, arglist, target) 11351 enum insn_code icode; 11352 tree arglist; 11353 rtx target; 11354{ 11355 rtx pat; 11356 tree arg0 = TREE_VALUE (arglist); 11357 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11358 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11359 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11360 11361 op0 = gen_lowpart (TImode, op0); 11362 op1 = gen_lowpart (TImode, op1); 11363 target = gen_reg_rtx (TImode); 11364 11365 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode)) 11366 op0 = copy_to_mode_reg (TImode, op0); 11367 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) 11368 op1 = copy_to_mode_reg (TImode, op1); 11369 11370 pat = GEN_FCN (icode) (target, op0, op1); 11371 if (! pat) 11372 return 0; 11373 emit_insn (pat); 11374 11375 return gen_lowpart (V4SFmode, target); 11376} 11377 11378/* Subroutine of ix86_expand_builtin to take care of stores. */ 11379 11380static rtx 11381ix86_expand_store_builtin (icode, arglist) 11382 enum insn_code icode; 11383 tree arglist; 11384{ 11385 rtx pat; 11386 tree arg0 = TREE_VALUE (arglist); 11387 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11388 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11389 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11390 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 11391 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 11392 11393 if (VECTOR_MODE_P (mode1)) 11394 op1 = safe_vector_operand (op1, mode1); 11395 11396 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11397 pat = GEN_FCN (icode) (op0, op1); 11398 if (pat) 11399 emit_insn (pat); 11400 return 0; 11401} 11402 11403/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 11404 11405static rtx 11406ix86_expand_unop_builtin (icode, arglist, target, do_load) 11407 enum insn_code icode; 11408 tree arglist; 11409 rtx target; 11410 int do_load; 11411{ 11412 rtx pat; 11413 tree arg0 = TREE_VALUE (arglist); 11414 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11415 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11416 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11417 11418 if (! target 11419 || GET_MODE (target) != tmode 11420 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11421 target = gen_reg_rtx (tmode); 11422 if (do_load) 11423 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11424 else 11425 { 11426 if (VECTOR_MODE_P (mode0)) 11427 op0 = safe_vector_operand (op0, mode0); 11428 11429 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11430 op0 = copy_to_mode_reg (mode0, op0); 11431 } 11432 11433 pat = GEN_FCN (icode) (target, op0); 11434 if (! pat) 11435 return 0; 11436 emit_insn (pat); 11437 return target; 11438} 11439 11440/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 11441 sqrtss, rsqrtss, rcpss. */ 11442 11443static rtx 11444ix86_expand_unop1_builtin (icode, arglist, target) 11445 enum insn_code icode; 11446 tree arglist; 11447 rtx target; 11448{ 11449 rtx pat; 11450 tree arg0 = TREE_VALUE (arglist); 11451 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11452 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11453 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11454 11455 if (! target 11456 || GET_MODE (target) != tmode 11457 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11458 target = gen_reg_rtx (tmode); 11459 11460 if (VECTOR_MODE_P (mode0)) 11461 op0 = safe_vector_operand (op0, mode0); 11462 11463 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11464 op0 = copy_to_mode_reg (mode0, op0); 11465 11466 pat = GEN_FCN (icode) (target, op0, op0); 11467 if (! pat) 11468 return 0; 11469 emit_insn (pat); 11470 return target; 11471} 11472 11473/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 11474 11475static rtx 11476ix86_expand_sse_compare (d, arglist, target) 11477 const struct builtin_description *d; 11478 tree arglist; 11479 rtx target; 11480{ 11481 rtx pat; 11482 tree arg0 = TREE_VALUE (arglist); 11483 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11484 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11485 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11486 rtx op2; 11487 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 11488 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 11489 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 11490 enum rtx_code comparison = d->comparison; 11491 11492 if (VECTOR_MODE_P (mode0)) 11493 op0 = safe_vector_operand (op0, mode0); 11494 if (VECTOR_MODE_P (mode1)) 11495 op1 = safe_vector_operand (op1, mode1); 11496 11497 /* Swap operands if we have a comparison that isn't available in 11498 hardware. */ 11499 if (d->flag) 11500 { 11501 rtx tmp = gen_reg_rtx (mode1); 11502 emit_move_insn (tmp, op1); 11503 op1 = op0; 11504 op0 = tmp; 11505 } 11506 11507 if (! target 11508 || GET_MODE (target) != tmode 11509 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 11510 target = gen_reg_rtx (tmode); 11511 11512 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 11513 op0 = copy_to_mode_reg (mode0, op0); 11514 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 11515 op1 = copy_to_mode_reg (mode1, op1); 11516 11517 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 11518 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 11519 if (! pat) 11520 return 0; 11521 emit_insn (pat); 11522 return target; 11523} 11524 11525/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 11526 11527static rtx 11528ix86_expand_sse_comi (d, arglist, target) 11529 const struct builtin_description *d; 11530 tree arglist; 11531 rtx target; 11532{ 11533 rtx pat; 11534 tree arg0 = TREE_VALUE (arglist); 11535 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11536 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11537 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11538 rtx op2; 11539 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 11540 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 11541 enum rtx_code comparison = d->comparison; 11542 11543 if (VECTOR_MODE_P (mode0)) 11544 op0 = safe_vector_operand (op0, mode0); 11545 if (VECTOR_MODE_P (mode1)) 11546 op1 = safe_vector_operand (op1, mode1); 11547 11548 /* Swap operands if we have a comparison that isn't available in 11549 hardware. */ 11550 if (d->flag) 11551 { 11552 rtx tmp = op1; 11553 op1 = op0; 11554 op0 = tmp; 11555 } 11556 11557 target = gen_reg_rtx (SImode); 11558 emit_move_insn (target, const0_rtx); 11559 target = gen_rtx_SUBREG (QImode, target, 0); 11560 11561 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 11562 op0 = copy_to_mode_reg (mode0, op0); 11563 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 11564 op1 = copy_to_mode_reg (mode1, op1); 11565 11566 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 11567 pat = GEN_FCN (d->icode) (op0, op1, op2); 11568 if (! pat) 11569 return 0; 11570 emit_insn (pat); 11571 emit_insn (gen_rtx_SET (VOIDmode, 11572 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 11573 gen_rtx_fmt_ee (comparison, QImode, 11574 gen_rtx_REG (CCmode, FLAGS_REG), 11575 const0_rtx))); 11576 11577 return SUBREG_REG (target); 11578} 11579 11580/* Expand an expression EXP that calls a built-in function, 11581 with result going to TARGET if that's convenient 11582 (and in mode MODE if that's convenient). 11583 SUBTARGET may be used as the target for computing one of EXP's operands. 11584 IGNORE is nonzero if the value is to be ignored. */ 11585 11586rtx 11587ix86_expand_builtin (exp, target, subtarget, mode, ignore) 11588 tree exp; 11589 rtx target; 11590 rtx subtarget ATTRIBUTE_UNUSED; 11591 enum machine_mode mode ATTRIBUTE_UNUSED; 11592 int ignore ATTRIBUTE_UNUSED; 11593{ 11594 const struct builtin_description *d; 11595 size_t i; 11596 enum insn_code icode; 11597 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 11598 tree arglist = TREE_OPERAND (exp, 1); 11599 tree arg0, arg1, arg2; 11600 rtx op0, op1, op2, pat; 11601 enum machine_mode tmode, mode0, mode1, mode2; 11602 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 11603 11604 switch (fcode) 11605 { 11606 case IX86_BUILTIN_EMMS: 11607 emit_insn (gen_emms ()); 11608 return 0; 11609 11610 case IX86_BUILTIN_SFENCE: 11611 emit_insn (gen_sfence ()); 11612 return 0; 11613 11614 case IX86_BUILTIN_PEXTRW: 11615 icode = CODE_FOR_mmx_pextrw; 11616 arg0 = TREE_VALUE (arglist); 11617 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11618 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11619 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11620 tmode = insn_data[icode].operand[0].mode; 11621 mode0 = insn_data[icode].operand[1].mode; 11622 mode1 = insn_data[icode].operand[2].mode; 11623 11624 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11625 op0 = copy_to_mode_reg (mode0, op0); 11626 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11627 { 11628 /* @@@ better error message */ 11629 error ("selector must be an immediate"); 11630 return gen_reg_rtx (tmode); 11631 } 11632 if (target == 0 11633 || GET_MODE (target) != tmode 11634 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11635 target = gen_reg_rtx (tmode); 11636 pat = GEN_FCN (icode) (target, op0, op1); 11637 if (! pat) 11638 return 0; 11639 emit_insn (pat); 11640 return target; 11641 11642 case IX86_BUILTIN_PINSRW: 11643 icode = CODE_FOR_mmx_pinsrw; 11644 arg0 = TREE_VALUE (arglist); 11645 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11646 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 11647 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11648 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11649 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 11650 tmode = insn_data[icode].operand[0].mode; 11651 mode0 = insn_data[icode].operand[1].mode; 11652 mode1 = insn_data[icode].operand[2].mode; 11653 mode2 = insn_data[icode].operand[3].mode; 11654 11655 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11656 op0 = copy_to_mode_reg (mode0, op0); 11657 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11658 op1 = copy_to_mode_reg (mode1, op1); 11659 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 11660 { 11661 /* @@@ better error message */ 11662 error ("selector must be an immediate"); 11663 return const0_rtx; 11664 } 11665 if (target == 0 11666 || GET_MODE (target) != tmode 11667 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11668 target = gen_reg_rtx (tmode); 11669 pat = GEN_FCN (icode) (target, op0, op1, op2); 11670 if (! pat) 11671 return 0; 11672 emit_insn (pat); 11673 return target; 11674 11675 case IX86_BUILTIN_MASKMOVQ: 11676 icode = CODE_FOR_mmx_maskmovq; 11677 /* Note the arg order is different from the operand order. */ 11678 arg1 = TREE_VALUE (arglist); 11679 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 11680 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 11681 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11682 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11683 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 11684 mode0 = insn_data[icode].operand[0].mode; 11685 mode1 = insn_data[icode].operand[1].mode; 11686 mode2 = insn_data[icode].operand[2].mode; 11687 11688 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11689 op0 = copy_to_mode_reg (mode0, op0); 11690 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 11691 op1 = copy_to_mode_reg (mode1, op1); 11692 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 11693 op2 = copy_to_mode_reg (mode2, op2); 11694 pat = GEN_FCN (icode) (op0, op1, op2); 11695 if (! pat) 11696 return 0; 11697 emit_insn (pat); 11698 return 0; 11699 11700 case IX86_BUILTIN_SQRTSS: 11701 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target); 11702 case IX86_BUILTIN_RSQRTSS: 11703 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target); 11704 case IX86_BUILTIN_RCPSS: 11705 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); 11706 11707 case IX86_BUILTIN_ANDPS: 11708 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3, 11709 arglist, target); 11710 case IX86_BUILTIN_ANDNPS: 11711 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3, 11712 arglist, target); 11713 case IX86_BUILTIN_ORPS: 11714 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3, 11715 arglist, target); 11716 case IX86_BUILTIN_XORPS: 11717 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3, 11718 arglist, target); 11719 11720 case IX86_BUILTIN_LOADAPS: 11721 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); 11722 11723 case IX86_BUILTIN_LOADUPS: 11724 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 11725 11726 case IX86_BUILTIN_STOREAPS: 11727 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); 11728 case IX86_BUILTIN_STOREUPS: 11729 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 11730 11731 case IX86_BUILTIN_LOADSS: 11732 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1); 11733 11734 case IX86_BUILTIN_STORESS: 11735 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist); 11736 11737 case IX86_BUILTIN_LOADHPS: 11738 case IX86_BUILTIN_LOADLPS: 11739 icode = (fcode == IX86_BUILTIN_LOADHPS 11740 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); 11741 arg0 = TREE_VALUE (arglist); 11742 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11743 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11744 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11745 tmode = insn_data[icode].operand[0].mode; 11746 mode0 = insn_data[icode].operand[1].mode; 11747 mode1 = insn_data[icode].operand[2].mode; 11748 11749 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11750 op0 = copy_to_mode_reg (mode0, op0); 11751 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 11752 if (target == 0 11753 || GET_MODE (target) != tmode 11754 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11755 target = gen_reg_rtx (tmode); 11756 pat = GEN_FCN (icode) (target, op0, op1); 11757 if (! pat) 11758 return 0; 11759 emit_insn (pat); 11760 return target; 11761 11762 case IX86_BUILTIN_STOREHPS: 11763 case IX86_BUILTIN_STORELPS: 11764 icode = (fcode == IX86_BUILTIN_STOREHPS 11765 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); 11766 arg0 = TREE_VALUE (arglist); 11767 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11768 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11769 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11770 mode0 = insn_data[icode].operand[1].mode; 11771 mode1 = insn_data[icode].operand[2].mode; 11772 11773 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11774 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11775 op1 = copy_to_mode_reg (mode1, op1); 11776 11777 pat = GEN_FCN (icode) (op0, op0, op1); 11778 if (! pat) 11779 return 0; 11780 emit_insn (pat); 11781 return 0; 11782 11783 case IX86_BUILTIN_MOVNTPS: 11784 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 11785 case IX86_BUILTIN_MOVNTQ: 11786 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 11787 11788 case IX86_BUILTIN_LDMXCSR: 11789 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); 11790 target = assign_386_stack_local (SImode, 0); 11791 emit_move_insn (target, op0); 11792 emit_insn (gen_ldmxcsr (target)); 11793 return 0; 11794 11795 case IX86_BUILTIN_STMXCSR: 11796 target = assign_386_stack_local (SImode, 0); 11797 emit_insn (gen_stmxcsr (target)); 11798 return copy_to_mode_reg (SImode, target); 11799 11800 case IX86_BUILTIN_SHUFPS: 11801 icode = CODE_FOR_sse_shufps; 11802 arg0 = TREE_VALUE (arglist); 11803 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11804 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 11805 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11806 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11807 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 11808 tmode = insn_data[icode].operand[0].mode; 11809 mode0 = insn_data[icode].operand[1].mode; 11810 mode1 = insn_data[icode].operand[2].mode; 11811 mode2 = insn_data[icode].operand[3].mode; 11812 11813 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11814 op0 = copy_to_mode_reg (mode0, op0); 11815 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11816 op1 = copy_to_mode_reg (mode1, op1); 11817 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 11818 { 11819 /* @@@ better error message */ 11820 error ("mask must be an immediate"); 11821 return gen_reg_rtx (tmode); 11822 } 11823 if (target == 0 11824 || GET_MODE (target) != tmode 11825 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11826 target = gen_reg_rtx (tmode); 11827 pat = GEN_FCN (icode) (target, op0, op1, op2); 11828 if (! pat) 11829 return 0; 11830 emit_insn (pat); 11831 return target; 11832 11833 case IX86_BUILTIN_PSHUFW: 11834 icode = CODE_FOR_mmx_pshufw; 11835 arg0 = TREE_VALUE (arglist); 11836 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11837 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11838 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11839 tmode = insn_data[icode].operand[0].mode; 11840 mode1 = insn_data[icode].operand[1].mode; 11841 mode2 = insn_data[icode].operand[2].mode; 11842 11843 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 11844 op0 = copy_to_mode_reg (mode1, op0); 11845 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 11846 { 11847 /* @@@ better error message */ 11848 error ("mask must be an immediate"); 11849 return const0_rtx; 11850 } 11851 if (target == 0 11852 || GET_MODE (target) != tmode 11853 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11854 target = gen_reg_rtx (tmode); 11855 pat = GEN_FCN (icode) (target, op0, op1); 11856 if (! pat) 11857 return 0; 11858 emit_insn (pat); 11859 return target; 11860 11861 case IX86_BUILTIN_FEMMS: 11862 emit_insn (gen_femms ()); 11863 return NULL_RTX; 11864 11865 case IX86_BUILTIN_PAVGUSB: 11866 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target); 11867 11868 case IX86_BUILTIN_PF2ID: 11869 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0); 11870 11871 case IX86_BUILTIN_PFACC: 11872 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target); 11873 11874 case IX86_BUILTIN_PFADD: 11875 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target); 11876 11877 case IX86_BUILTIN_PFCMPEQ: 11878 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target); 11879 11880 case IX86_BUILTIN_PFCMPGE: 11881 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target); 11882 11883 case IX86_BUILTIN_PFCMPGT: 11884 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target); 11885 11886 case IX86_BUILTIN_PFMAX: 11887 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target); 11888 11889 case IX86_BUILTIN_PFMIN: 11890 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target); 11891 11892 case IX86_BUILTIN_PFMUL: 11893 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target); 11894 11895 case IX86_BUILTIN_PFRCP: 11896 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0); 11897 11898 case IX86_BUILTIN_PFRCPIT1: 11899 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target); 11900 11901 case IX86_BUILTIN_PFRCPIT2: 11902 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target); 11903 11904 case IX86_BUILTIN_PFRSQIT1: 11905 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target); 11906 11907 case IX86_BUILTIN_PFRSQRT: 11908 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0); 11909 11910 case IX86_BUILTIN_PFSUB: 11911 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target); 11912 11913 case IX86_BUILTIN_PFSUBR: 11914 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target); 11915 11916 case IX86_BUILTIN_PI2FD: 11917 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0); 11918 11919 case IX86_BUILTIN_PMULHRW: 11920 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); 11921 11922 case IX86_BUILTIN_PF2IW: 11923 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); 11924 11925 case IX86_BUILTIN_PFNACC: 11926 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target); 11927 11928 case IX86_BUILTIN_PFPNACC: 11929 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target); 11930 11931 case IX86_BUILTIN_PI2FW: 11932 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0); 11933 11934 case IX86_BUILTIN_PSWAPDSI: 11935 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0); 11936 11937 case IX86_BUILTIN_PSWAPDSF: 11938 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); 11939 11940 case IX86_BUILTIN_SSE_ZERO: 11941 target = gen_reg_rtx (V4SFmode); 11942 emit_insn (gen_sse_clrv4sf (target)); 11943 return target; 11944 11945 case IX86_BUILTIN_MMX_ZERO: 11946 target = gen_reg_rtx (DImode); 11947 emit_insn (gen_mmx_clrdi (target)); 11948 return target; 11949 11950 default: 11951 break; 11952 } 11953 11954 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) 11955 if (d->code == fcode) 11956 { 11957 /* Compares are treated specially. */ 11958 if (d->icode == CODE_FOR_maskcmpv4sf3 11959 || d->icode == CODE_FOR_vmmaskcmpv4sf3 11960 || d->icode == CODE_FOR_maskncmpv4sf3 11961 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 11962 return ix86_expand_sse_compare (d, arglist, target); 11963 11964 return ix86_expand_binop_builtin (d->icode, arglist, target); 11965 } 11966 11967 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++) 11968 if (d->code == fcode) 11969 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 11970 11971 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++) 11972 if (d->code == fcode) 11973 return ix86_expand_sse_comi (d, arglist, target); 11974 11975 /* @@@ Should really do something sensible here. */ 11976 return 0; 11977} 11978 11979/* Store OPERAND to the memory after reload is completed. This means 11980 that we can't easily use assign_stack_local. */ 11981rtx 11982ix86_force_to_memory (mode, operand) 11983 enum machine_mode mode; 11984 rtx operand; 11985{ 11986 rtx result; 11987 if (!reload_completed) 11988 abort (); 11989 if (TARGET_64BIT && TARGET_RED_ZONE) 11990 { 11991 result = gen_rtx_MEM (mode, 11992 gen_rtx_PLUS (Pmode, 11993 stack_pointer_rtx, 11994 GEN_INT (-RED_ZONE_SIZE))); 11995 emit_move_insn (result, operand); 11996 } 11997 else if (TARGET_64BIT && !TARGET_RED_ZONE) 11998 { 11999 switch (mode) 12000 { 12001 case HImode: 12002 case SImode: 12003 operand = gen_lowpart (DImode, operand); 12004 /* FALLTHRU */ 12005 case DImode: 12006 emit_insn ( 12007 gen_rtx_SET (VOIDmode, 12008 gen_rtx_MEM (DImode, 12009 gen_rtx_PRE_DEC (DImode, 12010 stack_pointer_rtx)), 12011 operand)); 12012 break; 12013 default: 12014 abort (); 12015 } 12016 result = gen_rtx_MEM (mode, stack_pointer_rtx); 12017 } 12018 else 12019 { 12020 switch (mode) 12021 { 12022 case DImode: 12023 { 12024 rtx operands[2]; 12025 split_di (&operand, 1, operands, operands + 1); 12026 emit_insn ( 12027 gen_rtx_SET (VOIDmode, 12028 gen_rtx_MEM (SImode, 12029 gen_rtx_PRE_DEC (Pmode, 12030 stack_pointer_rtx)), 12031 operands[1])); 12032 emit_insn ( 12033 gen_rtx_SET (VOIDmode, 12034 gen_rtx_MEM (SImode, 12035 gen_rtx_PRE_DEC (Pmode, 12036 stack_pointer_rtx)), 12037 operands[0])); 12038 } 12039 break; 12040 case HImode: 12041 /* It is better to store HImodes as SImodes. */ 12042 if (!TARGET_PARTIAL_REG_STALL) 12043 operand = gen_lowpart (SImode, operand); 12044 /* FALLTHRU */ 12045 case SImode: 12046 emit_insn ( 12047 gen_rtx_SET (VOIDmode, 12048 gen_rtx_MEM (GET_MODE (operand), 12049 gen_rtx_PRE_DEC (SImode, 12050 stack_pointer_rtx)), 12051 operand)); 12052 break; 12053 default: 12054 abort (); 12055 } 12056 result = gen_rtx_MEM (mode, stack_pointer_rtx); 12057 } 12058 return result; 12059} 12060 12061/* Free operand from the memory. */ 12062void 12063ix86_free_from_memory (mode) 12064 enum machine_mode mode; 12065{ 12066 if (!TARGET_64BIT || !TARGET_RED_ZONE) 12067 { 12068 int size; 12069 12070 if (mode == DImode || TARGET_64BIT) 12071 size = 8; 12072 else if (mode == HImode && TARGET_PARTIAL_REG_STALL) 12073 size = 2; 12074 else 12075 size = 4; 12076 /* Use LEA to deallocate stack space. In peephole2 it will be converted 12077 to pop or add instruction if registers are available. */ 12078 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 12079 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 12080 GEN_INT (size)))); 12081 } 12082} 12083 12084/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 12085 QImode must go into class Q_REGS. 12086 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 12087 movdf to do mem-to-mem moves through integer regs. */ 12088enum reg_class 12089ix86_preferred_reload_class (x, class) 12090 rtx x; 12091 enum reg_class class; 12092{ 12093 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 12094 { 12095 /* SSE can't load any constant directly yet. */ 12096 if (SSE_CLASS_P (class)) 12097 return NO_REGS; 12098 /* Floats can load 0 and 1. */ 12099 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x)) 12100 { 12101 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */ 12102 if (MAYBE_SSE_CLASS_P (class)) 12103 return (reg_class_subset_p (class, GENERAL_REGS) 12104 ? GENERAL_REGS : FLOAT_REGS); 12105 else 12106 return class; 12107 } 12108 /* General regs can load everything. */ 12109 if (reg_class_subset_p (class, GENERAL_REGS)) 12110 return GENERAL_REGS; 12111 /* In case we haven't resolved FLOAT or SSE yet, give up. */ 12112 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)) 12113 return NO_REGS; 12114 } 12115 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x)) 12116 return NO_REGS; 12117 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS)) 12118 return Q_REGS; 12119 return class; 12120} 12121 12122/* If we are copying between general and FP registers, we need a memory 12123 location. The same is true for SSE and MMX registers. 12124 12125 The macro can't work reliably when one of the CLASSES is class containing 12126 registers from multiple units (SSE, MMX, integer). We avoid this by never 12127 combining those units in single alternative in the machine description. 12128 Ensure that this constraint holds to avoid unexpected surprises. 12129 12130 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 12131 enforce these sanity checks. */ 12132int 12133ix86_secondary_memory_needed (class1, class2, mode, strict) 12134 enum reg_class class1, class2; 12135 enum machine_mode mode; 12136 int strict; 12137{ 12138 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 12139 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 12140 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 12141 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 12142 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 12143 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 12144 { 12145 if (strict) 12146 abort (); 12147 else 12148 return 1; 12149 } 12150 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2) 12151 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2) 12152 && (mode) != SImode) 12153 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 12154 && (mode) != SImode)); 12155} 12156/* Return the cost of moving data from a register in class CLASS1 to 12157 one in class CLASS2. 12158 12159 It is not required that the cost always equal 2 when FROM is the same as TO; 12160 on some machines it is expensive to move between registers if they are not 12161 general registers. */ 12162int 12163ix86_register_move_cost (mode, class1, class2) 12164 enum machine_mode mode; 12165 enum reg_class class1, class2; 12166{ 12167 /* In case we require secondary memory, compute cost of the store followed 12168 by load. In case of copying from general_purpose_register we may emit 12169 multiple stores followed by single load causing memory size mismatch 12170 stall. Count this as arbitarily high cost of 20. */ 12171 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 12172 { 12173 int add_cost = 0; 12174 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 12175 add_cost = 20; 12176 return (MEMORY_MOVE_COST (mode, class1, 0) 12177 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost); 12178 } 12179 /* Moves between SSE/MMX and integer unit are expensive. */ 12180 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 12181 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 12182 return ix86_cost->mmxsse_to_integer; 12183 if (MAYBE_FLOAT_CLASS_P (class1)) 12184 return ix86_cost->fp_move; 12185 if (MAYBE_SSE_CLASS_P (class1)) 12186 return ix86_cost->sse_move; 12187 if (MAYBE_MMX_CLASS_P (class1)) 12188 return ix86_cost->mmx_move; 12189 return 2; 12190} 12191 12192/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 12193int 12194ix86_hard_regno_mode_ok (regno, mode) 12195 int regno; 12196 enum machine_mode mode; 12197{ 12198 /* Flags and only flags can only hold CCmode values. */ 12199 if (CC_REGNO_P (regno)) 12200 return GET_MODE_CLASS (mode) == MODE_CC; 12201 if (GET_MODE_CLASS (mode) == MODE_CC 12202 || GET_MODE_CLASS (mode) == MODE_RANDOM 12203 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 12204 return 0; 12205 if (FP_REGNO_P (regno)) 12206 return VALID_FP_MODE_P (mode); 12207 if (SSE_REGNO_P (regno)) 12208 return VALID_SSE_REG_MODE (mode); 12209 if (MMX_REGNO_P (regno)) 12210 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode); 12211 /* We handle both integer and floats in the general purpose registers. 12212 In future we should be able to handle vector modes as well. */ 12213 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) 12214 return 0; 12215 /* Take care for QImode values - they can be in non-QI regs, but then 12216 they do cause partial register stalls. */ 12217 if (regno < 4 || mode != QImode || TARGET_64BIT) 12218 return 1; 12219 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL; 12220} 12221 12222/* Return the cost of moving data of mode M between a 12223 register and memory. A value of 2 is the default; this cost is 12224 relative to those in `REGISTER_MOVE_COST'. 12225 12226 If moving between registers and memory is more expensive than 12227 between two registers, you should define this macro to express the 12228 relative cost. 12229 12230 Model also increased moving costs of QImode registers in non 12231 Q_REGS classes. 12232 */ 12233int 12234ix86_memory_move_cost (mode, class, in) 12235 enum machine_mode mode; 12236 enum reg_class class; 12237 int in; 12238{ 12239 if (FLOAT_CLASS_P (class)) 12240 { 12241 int index; 12242 switch (mode) 12243 { 12244 case SFmode: 12245 index = 0; 12246 break; 12247 case DFmode: 12248 index = 1; 12249 break; 12250 case XFmode: 12251 case TFmode: 12252 index = 2; 12253 break; 12254 default: 12255 return 100; 12256 } 12257 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 12258 } 12259 if (SSE_CLASS_P (class)) 12260 { 12261 int index; 12262 switch (GET_MODE_SIZE (mode)) 12263 { 12264 case 4: 12265 index = 0; 12266 break; 12267 case 8: 12268 index = 1; 12269 break; 12270 case 16: 12271 index = 2; 12272 break; 12273 default: 12274 return 100; 12275 } 12276 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 12277 } 12278 if (MMX_CLASS_P (class)) 12279 { 12280 int index; 12281 switch (GET_MODE_SIZE (mode)) 12282 { 12283 case 4: 12284 index = 0; 12285 break; 12286 case 8: 12287 index = 1; 12288 break; 12289 default: 12290 return 100; 12291 } 12292 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 12293 } 12294 switch (GET_MODE_SIZE (mode)) 12295 { 12296 case 1: 12297 if (in) 12298 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 12299 : ix86_cost->movzbl_load); 12300 else 12301 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 12302 : ix86_cost->int_store[0] + 4); 12303 break; 12304 case 2: 12305 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 12306 default: 12307 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 12308 if (mode == TFmode) 12309 mode = XFmode; 12310 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 12311 * (int) GET_MODE_SIZE (mode) / 4); 12312 } 12313} 12314 12315#ifdef DO_GLOBAL_CTORS_BODY 12316static void 12317ix86_svr3_asm_out_constructor (symbol, priority) 12318 rtx symbol; 12319 int priority ATTRIBUTE_UNUSED; 12320{ 12321 init_section (); 12322 fputs ("\tpushl $", asm_out_file); 12323 assemble_name (asm_out_file, XSTR (symbol, 0)); 12324 fputc ('\n', asm_out_file); 12325} 12326#endif 12327