i386.c revision 108173
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002 Free Software Foundation, Inc. 4 5This file is part of GNU CC. 6 7GNU CC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GNU CC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GNU CC; see the file COPYING. If not, write to 19the Free Software Foundation, 59 Temple Place - Suite 330, 20Boston, MA 02111-1307, USA. */ 21 22 23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 108173 2002-12-22 05:57:53Z kan $ */ 24 25 26#include "config.h" 27#include "system.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-attr.h" 38#include "flags.h" 39#include "except.h" 40#include "function.h" 41#include "recog.h" 42#include "expr.h" 43#include "optabs.h" 44#include "toplev.h" 45#include "basic-block.h" 46#include "ggc.h" 47#include "target.h" 48#include "target-def.h" 49 50#ifndef CHECK_STACK_LIMIT 51#define CHECK_STACK_LIMIT (-1) 52#endif 53 54/* Processor costs (relative to an add) */ 55static const 56struct processor_costs size_cost = { /* costs for tunning for size */ 57 2, /* cost of an add instruction */ 58 3, /* cost of a lea instruction */ 59 2, /* variable shift costs */ 60 3, /* constant shift costs */ 61 3, /* cost of starting a multiply */ 62 0, /* cost of multiply per each bit set */ 63 3, /* cost of a divide/mod */ 64 3, /* cost of movsx */ 65 3, /* cost of movzx */ 66 0, /* "large" insn */ 67 2, /* MOVE_RATIO */ 68 2, /* cost for loading QImode using movzbl */ 69 {2, 2, 2}, /* cost of loading integer registers 70 in QImode, HImode and SImode. 71 Relative to reg-reg move (2). */ 72 {2, 2, 2}, /* cost of storing integer registers */ 73 2, /* cost of reg,reg fld/fst */ 74 {2, 2, 2}, /* cost of loading fp registers 75 in SFmode, DFmode and XFmode */ 76 {2, 2, 2}, /* cost of loading integer registers */ 77 3, /* cost of moving MMX register */ 78 {3, 3}, /* cost of loading MMX registers 79 in SImode and DImode */ 80 {3, 3}, /* cost of storing MMX registers 81 in SImode and DImode */ 82 3, /* cost of moving SSE register */ 83 {3, 3, 3}, /* cost of loading SSE registers 84 in SImode, DImode and TImode */ 85 {3, 3, 3}, /* cost of storing SSE registers 86 in SImode, DImode and TImode */ 87 3, /* MMX or SSE register to integer */ 88 0, /* size of prefetch block */ 89 0, /* number of parallel prefetches */ 90}; 91/* Processor costs (relative to an add) */ 92static const 93struct processor_costs i386_cost = { /* 386 specific costs */ 94 1, /* cost of an add instruction */ 95 1, /* cost of a lea instruction */ 96 3, /* variable shift costs */ 97 2, /* constant shift costs */ 98 6, /* cost of starting a multiply */ 99 1, /* cost of multiply per each bit set */ 100 23, /* cost of a divide/mod */ 101 3, /* cost of movsx */ 102 2, /* cost of movzx */ 103 15, /* "large" insn */ 104 3, /* MOVE_RATIO */ 105 4, /* cost for loading QImode using movzbl */ 106 {2, 4, 2}, /* cost of loading integer registers 107 in QImode, HImode and SImode. 108 Relative to reg-reg move (2). */ 109 {2, 4, 2}, /* cost of storing integer registers */ 110 2, /* cost of reg,reg fld/fst */ 111 {8, 8, 8}, /* cost of loading fp registers 112 in SFmode, DFmode and XFmode */ 113 {8, 8, 8}, /* cost of loading integer registers */ 114 2, /* cost of moving MMX register */ 115 {4, 8}, /* cost of loading MMX registers 116 in SImode and DImode */ 117 {4, 8}, /* cost of storing MMX registers 118 in SImode and DImode */ 119 2, /* cost of moving SSE register */ 120 {4, 8, 16}, /* cost of loading SSE registers 121 in SImode, DImode and TImode */ 122 {4, 8, 16}, /* cost of storing SSE registers 123 in SImode, DImode and TImode */ 124 3, /* MMX or SSE register to integer */ 125 0, /* size of prefetch block */ 126 0, /* number of parallel prefetches */ 127}; 128 129static const 130struct processor_costs i486_cost = { /* 486 specific costs */ 131 1, /* cost of an add instruction */ 132 1, /* cost of a lea instruction */ 133 3, /* variable shift costs */ 134 2, /* constant shift costs */ 135 12, /* cost of starting a multiply */ 136 1, /* cost of multiply per each bit set */ 137 40, /* cost of a divide/mod */ 138 3, /* cost of movsx */ 139 2, /* cost of movzx */ 140 15, /* "large" insn */ 141 3, /* MOVE_RATIO */ 142 4, /* cost for loading QImode using movzbl */ 143 {2, 4, 2}, /* cost of loading integer registers 144 in QImode, HImode and SImode. 145 Relative to reg-reg move (2). */ 146 {2, 4, 2}, /* cost of storing integer registers */ 147 2, /* cost of reg,reg fld/fst */ 148 {8, 8, 8}, /* cost of loading fp registers 149 in SFmode, DFmode and XFmode */ 150 {8, 8, 8}, /* cost of loading integer registers */ 151 2, /* cost of moving MMX register */ 152 {4, 8}, /* cost of loading MMX registers 153 in SImode and DImode */ 154 {4, 8}, /* cost of storing MMX registers 155 in SImode and DImode */ 156 2, /* cost of moving SSE register */ 157 {4, 8, 16}, /* cost of loading SSE registers 158 in SImode, DImode and TImode */ 159 {4, 8, 16}, /* cost of storing SSE registers 160 in SImode, DImode and TImode */ 161 3, /* MMX or SSE register to integer */ 162 0, /* size of prefetch block */ 163 0, /* number of parallel prefetches */ 164}; 165 166static const 167struct processor_costs pentium_cost = { 168 1, /* cost of an add instruction */ 169 1, /* cost of a lea instruction */ 170 4, /* variable shift costs */ 171 1, /* constant shift costs */ 172 11, /* cost of starting a multiply */ 173 0, /* cost of multiply per each bit set */ 174 25, /* cost of a divide/mod */ 175 3, /* cost of movsx */ 176 2, /* cost of movzx */ 177 8, /* "large" insn */ 178 6, /* MOVE_RATIO */ 179 6, /* cost for loading QImode using movzbl */ 180 {2, 4, 2}, /* cost of loading integer registers 181 in QImode, HImode and SImode. 182 Relative to reg-reg move (2). */ 183 {2, 4, 2}, /* cost of storing integer registers */ 184 2, /* cost of reg,reg fld/fst */ 185 {2, 2, 6}, /* cost of loading fp registers 186 in SFmode, DFmode and XFmode */ 187 {4, 4, 6}, /* cost of loading integer registers */ 188 8, /* cost of moving MMX register */ 189 {8, 8}, /* cost of loading MMX registers 190 in SImode and DImode */ 191 {8, 8}, /* cost of storing MMX registers 192 in SImode and DImode */ 193 2, /* cost of moving SSE register */ 194 {4, 8, 16}, /* cost of loading SSE registers 195 in SImode, DImode and TImode */ 196 {4, 8, 16}, /* cost of storing SSE registers 197 in SImode, DImode and TImode */ 198 3, /* MMX or SSE register to integer */ 199 0, /* size of prefetch block */ 200 0, /* number of parallel prefetches */ 201}; 202 203static const 204struct processor_costs pentiumpro_cost = { 205 1, /* cost of an add instruction */ 206 1, /* cost of a lea instruction */ 207 1, /* variable shift costs */ 208 1, /* constant shift costs */ 209 4, /* cost of starting a multiply */ 210 0, /* cost of multiply per each bit set */ 211 17, /* cost of a divide/mod */ 212 1, /* cost of movsx */ 213 1, /* cost of movzx */ 214 8, /* "large" insn */ 215 6, /* MOVE_RATIO */ 216 2, /* cost for loading QImode using movzbl */ 217 {4, 4, 4}, /* cost of loading integer registers 218 in QImode, HImode and SImode. 219 Relative to reg-reg move (2). */ 220 {2, 2, 2}, /* cost of storing integer registers */ 221 2, /* cost of reg,reg fld/fst */ 222 {2, 2, 6}, /* cost of loading fp registers 223 in SFmode, DFmode and XFmode */ 224 {4, 4, 6}, /* cost of loading integer registers */ 225 2, /* cost of moving MMX register */ 226 {2, 2}, /* cost of loading MMX registers 227 in SImode and DImode */ 228 {2, 2}, /* cost of storing MMX registers 229 in SImode and DImode */ 230 2, /* cost of moving SSE register */ 231 {2, 2, 8}, /* cost of loading SSE registers 232 in SImode, DImode and TImode */ 233 {2, 2, 8}, /* cost of storing SSE registers 234 in SImode, DImode and TImode */ 235 3, /* MMX or SSE register to integer */ 236 32, /* size of prefetch block */ 237 6, /* number of parallel prefetches */ 238}; 239 240static const 241struct processor_costs k6_cost = { 242 1, /* cost of an add instruction */ 243 2, /* cost of a lea instruction */ 244 1, /* variable shift costs */ 245 1, /* constant shift costs */ 246 3, /* cost of starting a multiply */ 247 0, /* cost of multiply per each bit set */ 248 18, /* cost of a divide/mod */ 249 2, /* cost of movsx */ 250 2, /* cost of movzx */ 251 8, /* "large" insn */ 252 4, /* MOVE_RATIO */ 253 3, /* cost for loading QImode using movzbl */ 254 {4, 5, 4}, /* cost of loading integer registers 255 in QImode, HImode and SImode. 256 Relative to reg-reg move (2). */ 257 {2, 3, 2}, /* cost of storing integer registers */ 258 4, /* cost of reg,reg fld/fst */ 259 {6, 6, 6}, /* cost of loading fp registers 260 in SFmode, DFmode and XFmode */ 261 {4, 4, 4}, /* cost of loading integer registers */ 262 2, /* cost of moving MMX register */ 263 {2, 2}, /* cost of loading MMX registers 264 in SImode and DImode */ 265 {2, 2}, /* cost of storing MMX registers 266 in SImode and DImode */ 267 2, /* cost of moving SSE register */ 268 {2, 2, 8}, /* cost of loading SSE registers 269 in SImode, DImode and TImode */ 270 {2, 2, 8}, /* cost of storing SSE registers 271 in SImode, DImode and TImode */ 272 6, /* MMX or SSE register to integer */ 273 32, /* size of prefetch block */ 274 1, /* number of parallel prefetches */ 275}; 276 277static const 278struct processor_costs athlon_cost = { 279 1, /* cost of an add instruction */ 280 2, /* cost of a lea instruction */ 281 1, /* variable shift costs */ 282 1, /* constant shift costs */ 283 5, /* cost of starting a multiply */ 284 0, /* cost of multiply per each bit set */ 285 42, /* cost of a divide/mod */ 286 1, /* cost of movsx */ 287 1, /* cost of movzx */ 288 8, /* "large" insn */ 289 9, /* MOVE_RATIO */ 290 4, /* cost for loading QImode using movzbl */ 291 {3, 4, 3}, /* cost of loading integer registers 292 in QImode, HImode and SImode. 293 Relative to reg-reg move (2). */ 294 {3, 4, 3}, /* cost of storing integer registers */ 295 4, /* cost of reg,reg fld/fst */ 296 {4, 4, 12}, /* cost of loading fp registers 297 in SFmode, DFmode and XFmode */ 298 {6, 6, 8}, /* cost of loading integer registers */ 299 2, /* cost of moving MMX register */ 300 {4, 4}, /* cost of loading MMX registers 301 in SImode and DImode */ 302 {4, 4}, /* cost of storing MMX registers 303 in SImode and DImode */ 304 2, /* cost of moving SSE register */ 305 {4, 4, 6}, /* cost of loading SSE registers 306 in SImode, DImode and TImode */ 307 {4, 4, 5}, /* cost of storing SSE registers 308 in SImode, DImode and TImode */ 309 5, /* MMX or SSE register to integer */ 310 64, /* size of prefetch block */ 311 6, /* number of parallel prefetches */ 312}; 313 314static const 315struct processor_costs pentium4_cost = { 316 1, /* cost of an add instruction */ 317 1, /* cost of a lea instruction */ 318 8, /* variable shift costs */ 319 8, /* constant shift costs */ 320 30, /* cost of starting a multiply */ 321 0, /* cost of multiply per each bit set */ 322 112, /* cost of a divide/mod */ 323 1, /* cost of movsx */ 324 1, /* cost of movzx */ 325 16, /* "large" insn */ 326 6, /* MOVE_RATIO */ 327 2, /* cost for loading QImode using movzbl */ 328 {4, 5, 4}, /* cost of loading integer registers 329 in QImode, HImode and SImode. 330 Relative to reg-reg move (2). */ 331 {2, 3, 2}, /* cost of storing integer registers */ 332 2, /* cost of reg,reg fld/fst */ 333 {2, 2, 6}, /* cost of loading fp registers 334 in SFmode, DFmode and XFmode */ 335 {4, 4, 6}, /* cost of loading integer registers */ 336 2, /* cost of moving MMX register */ 337 {2, 2}, /* cost of loading MMX registers 338 in SImode and DImode */ 339 {2, 2}, /* cost of storing MMX registers 340 in SImode and DImode */ 341 12, /* cost of moving SSE register */ 342 {12, 12, 12}, /* cost of loading SSE registers 343 in SImode, DImode and TImode */ 344 {2, 2, 8}, /* cost of storing SSE registers 345 in SImode, DImode and TImode */ 346 10, /* MMX or SSE register to integer */ 347 64, /* size of prefetch block */ 348 6, /* number of parallel prefetches */ 349}; 350 351const struct processor_costs *ix86_cost = &pentium_cost; 352 353/* Processor feature/optimization bitmasks. */ 354#define m_386 (1<<PROCESSOR_I386) 355#define m_486 (1<<PROCESSOR_I486) 356#define m_PENT (1<<PROCESSOR_PENTIUM) 357#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 358#define m_K6 (1<<PROCESSOR_K6) 359#define m_ATHLON (1<<PROCESSOR_ATHLON) 360#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 361 362const int x86_use_leave = m_386 | m_K6 | m_ATHLON; 363const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4; 364const int x86_zero_extend_with_and = m_486 | m_PENT; 365const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */; 366const int x86_double_with_add = ~m_386; 367const int x86_use_bit_test = m_386; 368const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; 369const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4; 370const int x86_3dnow_a = m_ATHLON; 371const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4; 372const int x86_branch_hints = m_PENT4; 373const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; 374const int x86_partial_reg_stall = m_PPRO; 375const int x86_use_loop = m_K6; 376const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT); 377const int x86_use_mov0 = m_K6; 378const int x86_use_cltd = ~(m_PENT | m_K6); 379const int x86_read_modify_write = ~m_PENT; 380const int x86_read_modify = ~(m_PENT | m_PPRO); 381const int x86_split_long_moves = m_PPRO; 382const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486; 383const int x86_single_stringop = m_386 | m_PENT4; 384const int x86_qimode_math = ~(0); 385const int x86_promote_qi_regs = 0; 386const int x86_himode_math = ~(m_PPRO); 387const int x86_promote_hi_regs = m_PPRO; 388const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4; 389const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4; 390const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4; 391const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; 392const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4); 393const int x86_partial_reg_dependency = m_ATHLON | m_PENT4; 394const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4; 395const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO; 396const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 397const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 398const int x86_decompose_lea = m_PENT4; 399const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4; 400 401/* In case the avreage insn count for single function invocation is 402 lower than this constant, emit fast (but longer) prologue and 403 epilogue code. */ 404#define FAST_PROLOGUE_INSN_COUNT 30 405/* Set by prologue expander and used by epilogue expander to determine 406 the style used. */ 407static int use_fast_prologue_epilogue; 408 409#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx)) 410 411static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */ 412static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */ 413static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */ 414 415/* Array of the smallest class containing reg number REGNO, indexed by 416 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 417 418enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 419{ 420 /* ax, dx, cx, bx */ 421 AREG, DREG, CREG, BREG, 422 /* si, di, bp, sp */ 423 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 424 /* FP registers */ 425 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 426 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 427 /* arg pointer */ 428 NON_Q_REGS, 429 /* flags, fpsr, dirflag, frame */ 430 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 431 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 432 SSE_REGS, SSE_REGS, 433 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 434 MMX_REGS, MMX_REGS, 435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 436 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 437 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 438 SSE_REGS, SSE_REGS, 439}; 440 441/* The "default" register map used in 32bit mode. */ 442 443int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 444{ 445 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 446 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 447 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 448 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 449 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 451 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 452}; 453 454static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/, 455 1 /*RDX*/, 2 /*RCX*/, 456 FIRST_REX_INT_REG /*R8 */, 457 FIRST_REX_INT_REG + 1 /*R9 */}; 458static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4}; 459 460/* The "default" register map used in 64bit mode. */ 461int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 462{ 463 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 464 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 465 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 466 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 467 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 468 8,9,10,11,12,13,14,15, /* extended integer registers */ 469 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 470}; 471 472/* Define the register numbers to be used in Dwarf debugging information. 473 The SVR4 reference port C compiler uses the following register numbers 474 in its Dwarf output code: 475 0 for %eax (gcc regno = 0) 476 1 for %ecx (gcc regno = 2) 477 2 for %edx (gcc regno = 1) 478 3 for %ebx (gcc regno = 3) 479 4 for %esp (gcc regno = 7) 480 5 for %ebp (gcc regno = 6) 481 6 for %esi (gcc regno = 4) 482 7 for %edi (gcc regno = 5) 483 The following three DWARF register numbers are never generated by 484 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 485 believes these numbers have these meanings. 486 8 for %eip (no gcc equivalent) 487 9 for %eflags (gcc regno = 17) 488 10 for %trapno (no gcc equivalent) 489 It is not at all clear how we should number the FP stack registers 490 for the x86 architecture. If the version of SDB on x86/svr4 were 491 a bit less brain dead with respect to floating-point then we would 492 have a precedent to follow with respect to DWARF register numbers 493 for x86 FP registers, but the SDB on x86/svr4 is so completely 494 broken with respect to FP registers that it is hardly worth thinking 495 of it as something to strive for compatibility with. 496 The version of x86/svr4 SDB I have at the moment does (partially) 497 seem to believe that DWARF register number 11 is associated with 498 the x86 register %st(0), but that's about all. Higher DWARF 499 register numbers don't seem to be associated with anything in 500 particular, and even for DWARF regno 11, SDB only seems to under- 501 stand that it should say that a variable lives in %st(0) (when 502 asked via an `=' command) if we said it was in DWARF regno 11, 503 but SDB still prints garbage when asked for the value of the 504 variable in question (via a `/' command). 505 (Also note that the labels SDB prints for various FP stack regs 506 when doing an `x' command are all wrong.) 507 Note that these problems generally don't affect the native SVR4 508 C compiler because it doesn't allow the use of -O with -g and 509 because when it is *not* optimizing, it allocates a memory 510 location for each floating-point variable, and the memory 511 location is what gets described in the DWARF AT_location 512 attribute for the variable in question. 513 Regardless of the severe mental illness of the x86/svr4 SDB, we 514 do something sensible here and we use the following DWARF 515 register numbers. Note that these are all stack-top-relative 516 numbers. 517 11 for %st(0) (gcc regno = 8) 518 12 for %st(1) (gcc regno = 9) 519 13 for %st(2) (gcc regno = 10) 520 14 for %st(3) (gcc regno = 11) 521 15 for %st(4) (gcc regno = 12) 522 16 for %st(5) (gcc regno = 13) 523 17 for %st(6) (gcc regno = 14) 524 18 for %st(7) (gcc regno = 15) 525*/ 526int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 527{ 528 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 529 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 530 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 531 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 532 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 533 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */ 534 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */ 535}; 536 537/* Test and compare insns in i386.md store the information needed to 538 generate branch and scc insns here. */ 539 540rtx ix86_compare_op0 = NULL_RTX; 541rtx ix86_compare_op1 = NULL_RTX; 542 543#define MAX_386_STACK_LOCALS 3 544/* Size of the register save area. */ 545#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 546 547/* Define the structure for the machine field in struct function. */ 548struct machine_function 549{ 550 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS]; 551 int save_varrargs_registers; 552 int accesses_prev_frame; 553}; 554 555#define ix86_stack_locals (cfun->machine->stack_locals) 556#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) 557 558/* Structure describing stack frame layout. 559 Stack grows downward: 560 561 [arguments] 562 <- ARG_POINTER 563 saved pc 564 565 saved frame pointer if frame_pointer_needed 566 <- HARD_FRAME_POINTER 567 [saved regs] 568 569 [padding1] \ 570 ) 571 [va_arg registers] ( 572 > to_allocate <- FRAME_POINTER 573 [frame] ( 574 ) 575 [padding2] / 576 */ 577struct ix86_frame 578{ 579 int nregs; 580 int padding1; 581 int va_arg_size; 582 HOST_WIDE_INT frame; 583 int padding2; 584 int outgoing_arguments_size; 585 int red_zone_size; 586 587 HOST_WIDE_INT to_allocate; 588 /* The offsets relative to ARG_POINTER. */ 589 HOST_WIDE_INT frame_pointer_offset; 590 HOST_WIDE_INT hard_frame_pointer_offset; 591 HOST_WIDE_INT stack_pointer_offset; 592}; 593 594/* Used to enable/disable debugging features. */ 595const char *ix86_debug_arg_string, *ix86_debug_addr_string; 596/* Code model option as passed by user. */ 597const char *ix86_cmodel_string; 598/* Parsed value. */ 599enum cmodel ix86_cmodel; 600/* Asm dialect. */ 601const char *ix86_asm_string; 602enum asm_dialect ix86_asm_dialect = ASM_ATT; 603 604/* which cpu are we scheduling for */ 605enum processor_type ix86_cpu; 606 607/* which unit we are generating floating point math for */ 608enum fpmath_unit ix86_fpmath; 609 610/* which instruction set architecture to use. */ 611int ix86_arch; 612 613/* Strings to hold which cpu and instruction set architecture to use. */ 614const char *ix86_cpu_string; /* for -mcpu=<xxx> */ 615const char *ix86_arch_string; /* for -march=<xxx> */ 616const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */ 617 618/* # of registers to use to pass arguments. */ 619const char *ix86_regparm_string; 620 621/* true if sse prefetch instruction is not NOOP. */ 622int x86_prefetch_sse; 623 624/* ix86_regparm_string as a number */ 625int ix86_regparm; 626 627/* Alignment to use for loops and jumps: */ 628 629/* Power of two alignment for loops. */ 630const char *ix86_align_loops_string; 631 632/* Power of two alignment for non-loop jumps. */ 633const char *ix86_align_jumps_string; 634 635/* Power of two alignment for stack boundary in bytes. */ 636const char *ix86_preferred_stack_boundary_string; 637 638/* Preferred alignment for stack boundary in bits. */ 639int ix86_preferred_stack_boundary; 640 641/* Values 1-5: see jump.c */ 642int ix86_branch_cost; 643const char *ix86_branch_cost_string; 644 645/* Power of two alignment for functions. */ 646const char *ix86_align_funcs_string; 647 648/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 649static char internal_label_prefix[16]; 650static int internal_label_prefix_len; 651 652static int local_symbolic_operand PARAMS ((rtx, enum machine_mode)); 653static void output_pic_addr_const PARAMS ((FILE *, rtx, int)); 654static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode, 655 int, int, FILE *)); 656static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx)); 657static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code, 658 rtx *, rtx *)); 659static rtx gen_push PARAMS ((rtx)); 660static int memory_address_length PARAMS ((rtx addr)); 661static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type)); 662static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type)); 663static int ix86_safe_length PARAMS ((rtx)); 664static enum attr_memory ix86_safe_memory PARAMS ((rtx)); 665static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx)); 666static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx)); 667static void ix86_dump_ppro_packet PARAMS ((FILE *)); 668static void ix86_reorder_insn PARAMS ((rtx *, rtx *)); 669static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair, 670 rtx)); 671static void ix86_init_machine_status PARAMS ((struct function *)); 672static void ix86_mark_machine_status PARAMS ((struct function *)); 673static void ix86_free_machine_status PARAMS ((struct function *)); 674static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode)); 675static int ix86_safe_length_prefix PARAMS ((rtx)); 676static int ix86_nsaved_regs PARAMS ((void)); 677static void ix86_emit_save_regs PARAMS ((void)); 678static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT)); 679static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int)); 680static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx)); 681static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *)); 682static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *)); 683static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void)); 684static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT)); 685static rtx ix86_expand_aligntest PARAMS ((rtx, int)); 686static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx)); 687static int ix86_issue_rate PARAMS ((void)); 688static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int)); 689static void ix86_sched_init PARAMS ((FILE *, int, int)); 690static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); 691static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int)); 692static void ix86_init_mmx_sse_builtins PARAMS ((void)); 693 694struct ix86_address 695{ 696 rtx base, index, disp; 697 HOST_WIDE_INT scale; 698}; 699 700static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *)); 701 702struct builtin_description; 703static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *, 704 tree, rtx)); 705static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, 706 tree, rtx)); 707static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); 708static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); 709static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); 710static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code, 711 tree, rtx)); 712static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); 713static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); 714static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); 715static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code, 716 enum rtx_code *, 717 enum rtx_code *, 718 enum rtx_code *)); 719static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx, 720 rtx *, rtx *)); 721static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code)); 722static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code)); 723static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code)); 724static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code)); 725static int ix86_save_reg PARAMS ((int, int)); 726static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *)); 727static int ix86_comp_type_attributes PARAMS ((tree, tree)); 728const struct attribute_spec ix86_attribute_table[]; 729static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *)); 730static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *)); 731 732#ifdef DO_GLOBAL_CTORS_BODY 733static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); 734#endif 735 736/* Register class used for passing given 64bit part of the argument. 737 These represent classes as documented by the PS ABI, with the exception 738 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 739 use SF or DFmode move instead of DImode to avoid reformating penalties. 740 741 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 742 whenever possible (upper half does contain padding). 743 */ 744enum x86_64_reg_class 745 { 746 X86_64_NO_CLASS, 747 X86_64_INTEGER_CLASS, 748 X86_64_INTEGERSI_CLASS, 749 X86_64_SSE_CLASS, 750 X86_64_SSESF_CLASS, 751 X86_64_SSEDF_CLASS, 752 X86_64_SSEUP_CLASS, 753 X86_64_X87_CLASS, 754 X86_64_X87UP_CLASS, 755 X86_64_MEMORY_CLASS 756 }; 757static const char * const x86_64_reg_class_name[] = 758 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"}; 759 760#define MAX_CLASSES 4 761static int classify_argument PARAMS ((enum machine_mode, tree, 762 enum x86_64_reg_class [MAX_CLASSES], 763 int)); 764static int examine_argument PARAMS ((enum machine_mode, tree, int, int *, 765 int *)); 766static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int, 767 const int *, int)); 768static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, 769 enum x86_64_reg_class)); 770 771/* Initialize the GCC target structure. */ 772#undef TARGET_ATTRIBUTE_TABLE 773#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 774#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 775# undef TARGET_MERGE_DECL_ATTRIBUTES 776# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 777#endif 778 779#undef TARGET_COMP_TYPE_ATTRIBUTES 780#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 781 782#undef TARGET_INIT_BUILTINS 783#define TARGET_INIT_BUILTINS ix86_init_builtins 784 785#undef TARGET_EXPAND_BUILTIN 786#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 787 788#if defined (OSF_OS) || defined (TARGET_OSF1ELF) 789 static void ix86_osf_output_function_prologue PARAMS ((FILE *, 790 HOST_WIDE_INT)); 791# undef TARGET_ASM_FUNCTION_PROLOGUE 792# define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue 793#endif 794 795#undef TARGET_ASM_OPEN_PAREN 796#define TARGET_ASM_OPEN_PAREN "" 797#undef TARGET_ASM_CLOSE_PAREN 798#define TARGET_ASM_CLOSE_PAREN "" 799 800#undef TARGET_ASM_ALIGNED_HI_OP 801#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 802#undef TARGET_ASM_ALIGNED_SI_OP 803#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 804#ifdef ASM_QUAD 805#undef TARGET_ASM_ALIGNED_DI_OP 806#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 807#endif 808 809#undef TARGET_ASM_UNALIGNED_HI_OP 810#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 811#undef TARGET_ASM_UNALIGNED_SI_OP 812#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 813#undef TARGET_ASM_UNALIGNED_DI_OP 814#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 815 816#undef TARGET_SCHED_ADJUST_COST 817#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 818#undef TARGET_SCHED_ISSUE_RATE 819#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 820#undef TARGET_SCHED_VARIABLE_ISSUE 821#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue 822#undef TARGET_SCHED_INIT 823#define TARGET_SCHED_INIT ix86_sched_init 824#undef TARGET_SCHED_REORDER 825#define TARGET_SCHED_REORDER ix86_sched_reorder 826 827struct gcc_target targetm = TARGET_INITIALIZER; 828 829/* Sometimes certain combinations of command options do not make 830 sense on a particular target machine. You can define a macro 831 `OVERRIDE_OPTIONS' to take account of this. This macro, if 832 defined, is executed once just after all the command options have 833 been parsed. 834 835 Don't use this macro to turn on various extra optimizations for 836 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 837 838void 839override_options () 840{ 841 int i; 842 /* Comes from final.c -- no real reason to change it. */ 843#define MAX_CODE_ALIGN 16 844 845 static struct ptt 846 { 847 const struct processor_costs *cost; /* Processor costs */ 848 const int target_enable; /* Target flags to enable. */ 849 const int target_disable; /* Target flags to disable. */ 850 const int align_loop; /* Default alignments. */ 851 const int align_loop_max_skip; 852 const int align_jump; 853 const int align_jump_max_skip; 854 const int align_func; 855 const int branch_cost; 856 } 857 const processor_target_table[PROCESSOR_max] = 858 { 859 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1}, 860 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1}, 861 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1}, 862 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1}, 863 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1}, 864 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1}, 865 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1} 866 }; 867 868 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 869 static struct pta 870 { 871 const char *const name; /* processor name or nickname. */ 872 const enum processor_type processor; 873 const enum pta_flags 874 { 875 PTA_SSE = 1, 876 PTA_SSE2 = 2, 877 PTA_MMX = 4, 878 PTA_PREFETCH_SSE = 8, 879 PTA_3DNOW = 16, 880 PTA_3DNOW_A = 64 881 } flags; 882 } 883 const processor_alias_table[] = 884 { 885 {"i386", PROCESSOR_I386, 0}, 886 {"i486", PROCESSOR_I486, 0}, 887 {"i586", PROCESSOR_PENTIUM, 0}, 888 {"pentium", PROCESSOR_PENTIUM, 0}, 889 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 890 {"i686", PROCESSOR_PENTIUMPRO, 0}, 891 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 892 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 893 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 894 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | 895 PTA_MMX | PTA_PREFETCH_SSE}, 896 {"k6", PROCESSOR_K6, PTA_MMX}, 897 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 898 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 899 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 900 | PTA_3DNOW_A}, 901 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 902 | PTA_3DNOW | PTA_3DNOW_A}, 903 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 904 | PTA_3DNOW_A | PTA_SSE}, 905 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 906 | PTA_3DNOW_A | PTA_SSE}, 907 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 908 | PTA_3DNOW_A | PTA_SSE}, 909 }; 910 911 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta); 912 913 /* Set the default values for switches whose default depends on TARGET_64BIT 914 in case they weren't overwriten by command line options. */ 915 if (TARGET_64BIT) 916 { 917 if (flag_omit_frame_pointer == 2) 918 flag_omit_frame_pointer = 1; 919 if (flag_asynchronous_unwind_tables == 2) 920 flag_asynchronous_unwind_tables = 1; 921 if (flag_pcc_struct_return == 2) 922 flag_pcc_struct_return = 0; 923 } 924 else 925 { 926 if (flag_omit_frame_pointer == 2) 927 flag_omit_frame_pointer = 0; 928 if (flag_asynchronous_unwind_tables == 2) 929 flag_asynchronous_unwind_tables = 0; 930 if (flag_pcc_struct_return == 2) 931 flag_pcc_struct_return = 1; 932 } 933 934#ifdef SUBTARGET_OVERRIDE_OPTIONS 935 SUBTARGET_OVERRIDE_OPTIONS; 936#endif 937 938 if (!ix86_cpu_string && ix86_arch_string) 939 ix86_cpu_string = ix86_arch_string; 940 if (!ix86_cpu_string) 941 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT]; 942 if (!ix86_arch_string) 943 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386"; 944 945 if (ix86_cmodel_string != 0) 946 { 947 if (!strcmp (ix86_cmodel_string, "small")) 948 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 949 else if (flag_pic) 950 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 951 else if (!strcmp (ix86_cmodel_string, "32")) 952 ix86_cmodel = CM_32; 953 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 954 ix86_cmodel = CM_KERNEL; 955 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic) 956 ix86_cmodel = CM_MEDIUM; 957 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 958 ix86_cmodel = CM_LARGE; 959 else 960 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 961 } 962 else 963 { 964 ix86_cmodel = CM_32; 965 if (TARGET_64BIT) 966 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 967 } 968 if (ix86_asm_string != 0) 969 { 970 if (!strcmp (ix86_asm_string, "intel")) 971 ix86_asm_dialect = ASM_INTEL; 972 else if (!strcmp (ix86_asm_string, "att")) 973 ix86_asm_dialect = ASM_ATT; 974 else 975 error ("bad value (%s) for -masm= switch", ix86_asm_string); 976 } 977 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 978 error ("code model `%s' not supported in the %s bit mode", 979 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 980 if (ix86_cmodel == CM_LARGE) 981 sorry ("code model `large' not supported yet"); 982 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 983 sorry ("%i-bit mode not compiled in", 984 (target_flags & MASK_64BIT) ? 64 : 32); 985 986 for (i = 0; i < pta_size; i++) 987 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 988 { 989 ix86_arch = processor_alias_table[i].processor; 990 /* Default cpu tuning to the architecture. */ 991 ix86_cpu = ix86_arch; 992 if (processor_alias_table[i].flags & PTA_MMX 993 && !(target_flags & MASK_MMX_SET)) 994 target_flags |= MASK_MMX; 995 if (processor_alias_table[i].flags & PTA_3DNOW 996 && !(target_flags & MASK_3DNOW_SET)) 997 target_flags |= MASK_3DNOW; 998 if (processor_alias_table[i].flags & PTA_3DNOW_A 999 && !(target_flags & MASK_3DNOW_A_SET)) 1000 target_flags |= MASK_3DNOW_A; 1001 if (processor_alias_table[i].flags & PTA_SSE 1002 && !(target_flags & MASK_SSE_SET)) 1003 target_flags |= MASK_SSE; 1004 if (processor_alias_table[i].flags & PTA_SSE2 1005 && !(target_flags & MASK_SSE2_SET)) 1006 target_flags |= MASK_SSE2; 1007 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1008 x86_prefetch_sse = true; 1009 break; 1010 } 1011 1012 if (i == pta_size) 1013 error ("bad value (%s) for -march= switch", ix86_arch_string); 1014 1015 for (i = 0; i < pta_size; i++) 1016 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name)) 1017 { 1018 ix86_cpu = processor_alias_table[i].processor; 1019 break; 1020 } 1021 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1022 x86_prefetch_sse = true; 1023 if (i == pta_size) 1024 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string); 1025 1026 if (optimize_size) 1027 ix86_cost = &size_cost; 1028 else 1029 ix86_cost = processor_target_table[ix86_cpu].cost; 1030 target_flags |= processor_target_table[ix86_cpu].target_enable; 1031 target_flags &= ~processor_target_table[ix86_cpu].target_disable; 1032 1033 /* Arrange to set up i386_stack_locals for all functions. */ 1034 init_machine_status = ix86_init_machine_status; 1035 mark_machine_status = ix86_mark_machine_status; 1036 free_machine_status = ix86_free_machine_status; 1037 1038 /* Validate -mregparm= value. */ 1039 if (ix86_regparm_string) 1040 { 1041 i = atoi (ix86_regparm_string); 1042 if (i < 0 || i > REGPARM_MAX) 1043 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1044 else 1045 ix86_regparm = i; 1046 } 1047 else 1048 if (TARGET_64BIT) 1049 ix86_regparm = REGPARM_MAX; 1050 1051 /* If the user has provided any of the -malign-* options, 1052 warn and use that value only if -falign-* is not set. 1053 Remove this code in GCC 3.2 or later. */ 1054 if (ix86_align_loops_string) 1055 { 1056 warning ("-malign-loops is obsolete, use -falign-loops"); 1057 if (align_loops == 0) 1058 { 1059 i = atoi (ix86_align_loops_string); 1060 if (i < 0 || i > MAX_CODE_ALIGN) 1061 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1062 else 1063 align_loops = 1 << i; 1064 } 1065 } 1066 1067 if (ix86_align_jumps_string) 1068 { 1069 warning ("-malign-jumps is obsolete, use -falign-jumps"); 1070 if (align_jumps == 0) 1071 { 1072 i = atoi (ix86_align_jumps_string); 1073 if (i < 0 || i > MAX_CODE_ALIGN) 1074 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1075 else 1076 align_jumps = 1 << i; 1077 } 1078 } 1079 1080 if (ix86_align_funcs_string) 1081 { 1082 warning ("-malign-functions is obsolete, use -falign-functions"); 1083 if (align_functions == 0) 1084 { 1085 i = atoi (ix86_align_funcs_string); 1086 if (i < 0 || i > MAX_CODE_ALIGN) 1087 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1088 else 1089 align_functions = 1 << i; 1090 } 1091 } 1092 1093 /* Default align_* from the processor table. */ 1094 if (align_loops == 0) 1095 { 1096 align_loops = processor_target_table[ix86_cpu].align_loop; 1097 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip; 1098 } 1099 if (align_jumps == 0) 1100 { 1101 align_jumps = processor_target_table[ix86_cpu].align_jump; 1102 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip; 1103 } 1104 if (align_functions == 0) 1105 { 1106 align_functions = processor_target_table[ix86_cpu].align_func; 1107 } 1108 1109 /* Validate -mpreferred-stack-boundary= value, or provide default. 1110 The default of 128 bits is for Pentium III's SSE __m128, but we 1111 don't want additional code to keep the stack aligned when 1112 optimizing for code size. */ 1113 ix86_preferred_stack_boundary = (optimize_size 1114 ? TARGET_64BIT ? 128 : 32 1115 : 128); 1116 if (ix86_preferred_stack_boundary_string) 1117 { 1118 i = atoi (ix86_preferred_stack_boundary_string); 1119 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 1120 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1121 TARGET_64BIT ? 4 : 2); 1122 else 1123 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1124 } 1125 1126 /* Validate -mbranch-cost= value, or provide default. */ 1127 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost; 1128 if (ix86_branch_cost_string) 1129 { 1130 i = atoi (ix86_branch_cost_string); 1131 if (i < 0 || i > 5) 1132 error ("-mbranch-cost=%d is not between 0 and 5", i); 1133 else 1134 ix86_branch_cost = i; 1135 } 1136 1137 /* Keep nonleaf frame pointers. */ 1138 if (TARGET_OMIT_LEAF_FRAME_POINTER) 1139 flag_omit_frame_pointer = 1; 1140 1141 /* If we're doing fast math, we don't care about comparison order 1142 wrt NaNs. This lets us use a shorter comparison sequence. */ 1143 if (flag_unsafe_math_optimizations) 1144 target_flags &= ~MASK_IEEE_FP; 1145 1146 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1147 since the insns won't need emulation. */ 1148 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1149 target_flags &= ~MASK_NO_FANCY_MATH_387; 1150 1151 if (TARGET_64BIT) 1152 { 1153 if (TARGET_ALIGN_DOUBLE) 1154 error ("-malign-double makes no sense in the 64bit mode"); 1155 if (TARGET_RTD) 1156 error ("-mrtd calling convention not supported in the 64bit mode"); 1157 /* Enable by default the SSE and MMX builtins. */ 1158 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE); 1159 ix86_fpmath = FPMATH_SSE; 1160 } 1161 else 1162 ix86_fpmath = FPMATH_387; 1163 1164 if (ix86_fpmath_string != 0) 1165 { 1166 if (! strcmp (ix86_fpmath_string, "387")) 1167 ix86_fpmath = FPMATH_387; 1168 else if (! strcmp (ix86_fpmath_string, "sse")) 1169 { 1170 if (!TARGET_SSE) 1171 { 1172 warning ("SSE instruction set disabled, using 387 arithmetics"); 1173 ix86_fpmath = FPMATH_387; 1174 } 1175 else 1176 ix86_fpmath = FPMATH_SSE; 1177 } 1178 else if (! strcmp (ix86_fpmath_string, "387,sse") 1179 || ! strcmp (ix86_fpmath_string, "sse,387")) 1180 { 1181 if (!TARGET_SSE) 1182 { 1183 warning ("SSE instruction set disabled, using 387 arithmetics"); 1184 ix86_fpmath = FPMATH_387; 1185 } 1186 else if (!TARGET_80387) 1187 { 1188 warning ("387 instruction set disabled, using SSE arithmetics"); 1189 ix86_fpmath = FPMATH_SSE; 1190 } 1191 else 1192 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1193 } 1194 else 1195 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1196 } 1197 1198 /* It makes no sense to ask for just SSE builtins, so MMX is also turned 1199 on by -msse. */ 1200 if (TARGET_SSE) 1201 { 1202 target_flags |= MASK_MMX; 1203 x86_prefetch_sse = true; 1204 } 1205 1206 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ 1207 if (TARGET_3DNOW) 1208 { 1209 target_flags |= MASK_MMX; 1210 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX 1211 extensions it adds. */ 1212 if (x86_3dnow_a & (1 << ix86_arch)) 1213 target_flags |= MASK_3DNOW_A; 1214 } 1215 if ((x86_accumulate_outgoing_args & CPUMASK) 1216 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET) 1217 && !optimize_size) 1218 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1219 1220 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1221 { 1222 char *p; 1223 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1224 p = strchr (internal_label_prefix, 'X'); 1225 internal_label_prefix_len = p - internal_label_prefix; 1226 *p = '\0'; 1227 } 1228} 1229 1230void 1231optimization_options (level, size) 1232 int level; 1233 int size ATTRIBUTE_UNUSED; 1234{ 1235 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 1236 make the problem with not enough registers even worse. */ 1237#ifdef INSN_SCHEDULING 1238 if (level > 1) 1239 flag_schedule_insns = 0; 1240#endif 1241 /* The default values of these switches depend on the TARGET_64BIT 1242 that is not known at this moment. Mark these values with 2 and 1243 let user the to override these. In case there is no command line option 1244 specifying them, we will set the defaults in override_options. */ 1245 if (optimize >= 1) 1246 flag_omit_frame_pointer = 2; 1247 flag_asynchronous_unwind_tables = 2; 1248} 1249 1250/* Table of valid machine attributes. */ 1251const struct attribute_spec ix86_attribute_table[] = 1252{ 1253 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 1254 /* Stdcall attribute says callee is responsible for popping arguments 1255 if they are not variable. */ 1256 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1257 /* Cdecl attribute says the callee is a normal C declaration */ 1258 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1259 /* Regparm attribute specifies how many integer arguments are to be 1260 passed in registers. */ 1261 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute }, 1262#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 1263 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1264 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1265 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 1266#endif 1267 { NULL, 0, 0, false, false, false, NULL } 1268}; 1269 1270/* Handle a "cdecl" or "stdcall" attribute; 1271 arguments as in struct attribute_spec.handler. */ 1272static tree 1273ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs) 1274 tree *node; 1275 tree name; 1276 tree args ATTRIBUTE_UNUSED; 1277 int flags ATTRIBUTE_UNUSED; 1278 bool *no_add_attrs; 1279{ 1280 if (TREE_CODE (*node) != FUNCTION_TYPE 1281 && TREE_CODE (*node) != METHOD_TYPE 1282 && TREE_CODE (*node) != FIELD_DECL 1283 && TREE_CODE (*node) != TYPE_DECL) 1284 { 1285 warning ("`%s' attribute only applies to functions", 1286 IDENTIFIER_POINTER (name)); 1287 *no_add_attrs = true; 1288 } 1289 1290 if (TARGET_64BIT) 1291 { 1292 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name)); 1293 *no_add_attrs = true; 1294 } 1295 1296 return NULL_TREE; 1297} 1298 1299/* Handle a "regparm" attribute; 1300 arguments as in struct attribute_spec.handler. */ 1301static tree 1302ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs) 1303 tree *node; 1304 tree name; 1305 tree args; 1306 int flags ATTRIBUTE_UNUSED; 1307 bool *no_add_attrs; 1308{ 1309 if (TREE_CODE (*node) != FUNCTION_TYPE 1310 && TREE_CODE (*node) != METHOD_TYPE 1311 && TREE_CODE (*node) != FIELD_DECL 1312 && TREE_CODE (*node) != TYPE_DECL) 1313 { 1314 warning ("`%s' attribute only applies to functions", 1315 IDENTIFIER_POINTER (name)); 1316 *no_add_attrs = true; 1317 } 1318 else 1319 { 1320 tree cst; 1321 1322 cst = TREE_VALUE (args); 1323 if (TREE_CODE (cst) != INTEGER_CST) 1324 { 1325 warning ("`%s' attribute requires an integer constant argument", 1326 IDENTIFIER_POINTER (name)); 1327 *no_add_attrs = true; 1328 } 1329 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 1330 { 1331 warning ("argument to `%s' attribute larger than %d", 1332 IDENTIFIER_POINTER (name), REGPARM_MAX); 1333 *no_add_attrs = true; 1334 } 1335 } 1336 1337 return NULL_TREE; 1338} 1339 1340#if defined (OSF_OS) || defined (TARGET_OSF1ELF) 1341 1342/* Generate the assembly code for function entry. FILE is a stdio 1343 stream to output the code to. SIZE is an int: how many units of 1344 temporary storage to allocate. 1345 1346 Refer to the array `regs_ever_live' to determine which registers to 1347 save; `regs_ever_live[I]' is nonzero if register number I is ever 1348 used in the function. This function is responsible for knowing 1349 which registers should not be saved even if used. 1350 1351 We override it here to allow for the new profiling code to go before 1352 the prologue and the old mcount code to go after the prologue (and 1353 after %ebx has been set up for ELF shared library support). */ 1354 1355static void 1356ix86_osf_output_function_prologue (file, size) 1357 FILE *file; 1358 HOST_WIDE_INT size; 1359{ 1360 const char *prefix = ""; 1361 const char *const lprefix = LPREFIX; 1362 int labelno = current_function_profile_label_no; 1363 1364#ifdef OSF_OS 1365 1366 if (TARGET_UNDERSCORES) 1367 prefix = "_"; 1368 1369 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE) 1370 { 1371 if (!flag_pic && !HALF_PIC_P ()) 1372 { 1373 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1374 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix); 1375 } 1376 1377 else if (HALF_PIC_P ()) 1378 { 1379 rtx symref; 1380 1381 HALF_PIC_EXTERNAL ("_mcount_ptr"); 1382 symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode, 1383 "_mcount_ptr")); 1384 1385 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1386 fprintf (file, "\tmovl %s%s,%%eax\n", prefix, 1387 XSTR (symref, 0)); 1388 fprintf (file, "\tcall *(%%eax)\n"); 1389 } 1390 1391 else 1392 { 1393 static int call_no = 0; 1394 1395 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no); 1396 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no); 1397 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n", 1398 lprefix, call_no++); 1399 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n", 1400 lprefix, labelno); 1401 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n", 1402 prefix); 1403 fprintf (file, "\tcall *(%%eax)\n"); 1404 } 1405 } 1406 1407#else /* !OSF_OS */ 1408 1409 if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE) 1410 { 1411 if (!flag_pic) 1412 { 1413 fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno); 1414 fprintf (file, "\tcall *%s_mcount_ptr\n", prefix); 1415 } 1416 1417 else 1418 { 1419 static int call_no = 0; 1420 1421 fprintf (file, "\tcall %sPc%d\n", lprefix, call_no); 1422 fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no); 1423 fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n", 1424 lprefix, call_no++); 1425 fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n", 1426 lprefix, labelno); 1427 fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n", 1428 prefix); 1429 fprintf (file, "\tcall *(%%eax)\n"); 1430 } 1431 } 1432#endif /* !OSF_OS */ 1433 1434 function_prologue (file, size); 1435} 1436 1437#endif /* OSF_OS || TARGET_OSF1ELF */ 1438 1439/* Return 0 if the attributes for two types are incompatible, 1 if they 1440 are compatible, and 2 if they are nearly compatible (which causes a 1441 warning to be generated). */ 1442 1443static int 1444ix86_comp_type_attributes (type1, type2) 1445 tree type1; 1446 tree type2; 1447{ 1448 /* Check for mismatch of non-default calling convention. */ 1449 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 1450 1451 if (TREE_CODE (type1) != FUNCTION_TYPE) 1452 return 1; 1453 1454 /* Check for mismatched return types (cdecl vs stdcall). */ 1455 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 1456 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 1457 return 0; 1458 return 1; 1459} 1460 1461/* Value is the number of bytes of arguments automatically 1462 popped when returning from a subroutine call. 1463 FUNDECL is the declaration node of the function (as a tree), 1464 FUNTYPE is the data type of the function (as a tree), 1465 or for a library call it is an identifier node for the subroutine name. 1466 SIZE is the number of bytes of arguments passed on the stack. 1467 1468 On the 80386, the RTD insn may be used to pop them if the number 1469 of args is fixed, but if the number is variable then the caller 1470 must pop them all. RTD can't be used for library calls now 1471 because the library is compiled with the Unix compiler. 1472 Use of RTD is a selectable option, since it is incompatible with 1473 standard Unix calling sequences. If the option is not selected, 1474 the caller must always pop the args. 1475 1476 The attribute stdcall is equivalent to RTD on a per module basis. */ 1477 1478int 1479ix86_return_pops_args (fundecl, funtype, size) 1480 tree fundecl; 1481 tree funtype; 1482 int size; 1483{ 1484 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 1485 1486 /* Cdecl functions override -mrtd, and never pop the stack. */ 1487 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 1488 1489 /* Stdcall functions will pop the stack if not variable args. */ 1490 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))) 1491 rtd = 1; 1492 1493 if (rtd 1494 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 1495 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 1496 == void_type_node))) 1497 return size; 1498 } 1499 1500 /* Lose any fake structure return argument if it is passed on the stack. */ 1501 if (aggregate_value_p (TREE_TYPE (funtype)) 1502 && !TARGET_64BIT) 1503 { 1504 int nregs = ix86_regparm; 1505 1506 if (funtype) 1507 { 1508 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype)); 1509 1510 if (attr) 1511 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1512 } 1513 1514 if (!nregs) 1515 return GET_MODE_SIZE (Pmode); 1516 } 1517 1518 return 0; 1519} 1520 1521/* Argument support functions. */ 1522 1523/* Return true when register may be used to pass function parameters. */ 1524bool 1525ix86_function_arg_regno_p (regno) 1526 int regno; 1527{ 1528 int i; 1529 if (!TARGET_64BIT) 1530 return (regno < REGPARM_MAX 1531 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 1532 if (SSE_REGNO_P (regno) && TARGET_SSE) 1533 return true; 1534 /* RAX is used as hidden argument to va_arg functions. */ 1535 if (!regno) 1536 return true; 1537 for (i = 0; i < REGPARM_MAX; i++) 1538 if (regno == x86_64_int_parameter_registers[i]) 1539 return true; 1540 return false; 1541} 1542 1543/* Initialize a variable CUM of type CUMULATIVE_ARGS 1544 for a call to a function whose data type is FNTYPE. 1545 For a library call, FNTYPE is 0. */ 1546 1547void 1548init_cumulative_args (cum, fntype, libname) 1549 CUMULATIVE_ARGS *cum; /* Argument info to initialize */ 1550 tree fntype; /* tree ptr for function decl */ 1551 rtx libname; /* SYMBOL_REF of library name or 0 */ 1552{ 1553 static CUMULATIVE_ARGS zero_cum; 1554 tree param, next_param; 1555 1556 if (TARGET_DEBUG_ARG) 1557 { 1558 fprintf (stderr, "\ninit_cumulative_args ("); 1559 if (fntype) 1560 fprintf (stderr, "fntype code = %s, ret code = %s", 1561 tree_code_name[(int) TREE_CODE (fntype)], 1562 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 1563 else 1564 fprintf (stderr, "no fntype"); 1565 1566 if (libname) 1567 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 1568 } 1569 1570 *cum = zero_cum; 1571 1572 /* Set up the number of registers to use for passing arguments. */ 1573 cum->nregs = ix86_regparm; 1574 cum->sse_nregs = SSE_REGPARM_MAX; 1575 if (fntype && !TARGET_64BIT) 1576 { 1577 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype)); 1578 1579 if (attr) 1580 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1581 } 1582 cum->maybe_vaarg = false; 1583 1584 /* Determine if this function has variable arguments. This is 1585 indicated by the last argument being 'void_type_mode' if there 1586 are no variable arguments. If there are variable arguments, then 1587 we won't pass anything in registers */ 1588 1589 if (cum->nregs) 1590 { 1591 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 1592 param != 0; param = next_param) 1593 { 1594 next_param = TREE_CHAIN (param); 1595 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 1596 { 1597 if (!TARGET_64BIT) 1598 cum->nregs = 0; 1599 cum->maybe_vaarg = true; 1600 } 1601 } 1602 } 1603 if ((!fntype && !libname) 1604 || (fntype && !TYPE_ARG_TYPES (fntype))) 1605 cum->maybe_vaarg = 1; 1606 1607 if (TARGET_DEBUG_ARG) 1608 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 1609 1610 return; 1611} 1612 1613/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal 1614 of this code is to classify each 8bytes of incoming argument by the register 1615 class and assign registers accordingly. */ 1616 1617/* Return the union class of CLASS1 and CLASS2. 1618 See the x86-64 PS ABI for details. */ 1619 1620static enum x86_64_reg_class 1621merge_classes (class1, class2) 1622 enum x86_64_reg_class class1, class2; 1623{ 1624 /* Rule #1: If both classes are equal, this is the resulting class. */ 1625 if (class1 == class2) 1626 return class1; 1627 1628 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 1629 the other class. */ 1630 if (class1 == X86_64_NO_CLASS) 1631 return class2; 1632 if (class2 == X86_64_NO_CLASS) 1633 return class1; 1634 1635 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 1636 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 1637 return X86_64_MEMORY_CLASS; 1638 1639 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 1640 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 1641 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 1642 return X86_64_INTEGERSI_CLASS; 1643 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 1644 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 1645 return X86_64_INTEGER_CLASS; 1646 1647 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */ 1648 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS 1649 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS) 1650 return X86_64_MEMORY_CLASS; 1651 1652 /* Rule #6: Otherwise class SSE is used. */ 1653 return X86_64_SSE_CLASS; 1654} 1655 1656/* Classify the argument of type TYPE and mode MODE. 1657 CLASSES will be filled by the register class used to pass each word 1658 of the operand. The number of words is returned. In case the parameter 1659 should be passed in memory, 0 is returned. As a special case for zero 1660 sized containers, classes[0] will be NO_CLASS and 1 is returned. 1661 1662 BIT_OFFSET is used internally for handling records and specifies offset 1663 of the offset in bits modulo 256 to avoid overflow cases. 1664 1665 See the x86-64 PS ABI for details. 1666*/ 1667 1668static int 1669classify_argument (mode, type, classes, bit_offset) 1670 enum machine_mode mode; 1671 tree type; 1672 enum x86_64_reg_class classes[MAX_CLASSES]; 1673 int bit_offset; 1674{ 1675 int bytes = 1676 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1677 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1678 1679 /* Variable sized structures are always passed on the stack. */ 1680 if (mode == BLKmode && type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) 1681 return 0; 1682 1683 if (type && AGGREGATE_TYPE_P (type)) 1684 { 1685 int i; 1686 tree field; 1687 enum x86_64_reg_class subclasses[MAX_CLASSES]; 1688 1689 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 1690 if (bytes > 16) 1691 return 0; 1692 1693 for (i = 0; i < words; i++) 1694 classes[i] = X86_64_NO_CLASS; 1695 1696 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 1697 signalize memory class, so handle it as special case. */ 1698 if (!words) 1699 { 1700 classes[0] = X86_64_NO_CLASS; 1701 return 1; 1702 } 1703 1704 /* Classify each field of record and merge classes. */ 1705 if (TREE_CODE (type) == RECORD_TYPE) 1706 { 1707 /* For classes first merge in the field of the subclasses. */ 1708 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 1709 { 1710 tree bases = TYPE_BINFO_BASETYPES (type); 1711 int n_bases = TREE_VEC_LENGTH (bases); 1712 int i; 1713 1714 for (i = 0; i < n_bases; ++i) 1715 { 1716 tree binfo = TREE_VEC_ELT (bases, i); 1717 int num; 1718 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 1719 tree type = BINFO_TYPE (binfo); 1720 1721 num = classify_argument (TYPE_MODE (type), 1722 type, subclasses, 1723 (offset + bit_offset) % 256); 1724 if (!num) 1725 return 0; 1726 for (i = 0; i < num; i++) 1727 { 1728 int pos = (offset + bit_offset) / 8 / 8; 1729 classes[i + pos] = 1730 merge_classes (subclasses[i], classes[i + pos]); 1731 } 1732 } 1733 } 1734 /* And now merge the fields of structure. */ 1735 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1736 { 1737 if (TREE_CODE (field) == FIELD_DECL) 1738 { 1739 int num; 1740 1741 /* Bitfields are always classified as integer. Handle them 1742 early, since later code would consider them to be 1743 misaligned integers. */ 1744 if (DECL_BIT_FIELD (field)) 1745 { 1746 for (i = int_bit_position (field) / 8 / 8; 1747 i < (int_bit_position (field) 1748 + tree_low_cst (DECL_SIZE (field), 0) 1749 + 63) / 8 / 8; i++) 1750 classes[i] = 1751 merge_classes (X86_64_INTEGER_CLASS, 1752 classes[i]); 1753 } 1754 else 1755 { 1756 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1757 TREE_TYPE (field), subclasses, 1758 (int_bit_position (field) 1759 + bit_offset) % 256); 1760 if (!num) 1761 return 0; 1762 for (i = 0; i < num; i++) 1763 { 1764 int pos = 1765 (int_bit_position (field) + bit_offset) / 8 / 8; 1766 classes[i + pos] = 1767 merge_classes (subclasses[i], classes[i + pos]); 1768 } 1769 } 1770 } 1771 } 1772 } 1773 /* Arrays are handled as small records. */ 1774 else if (TREE_CODE (type) == ARRAY_TYPE) 1775 { 1776 int num; 1777 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 1778 TREE_TYPE (type), subclasses, bit_offset); 1779 if (!num) 1780 return 0; 1781 1782 /* The partial classes are now full classes. */ 1783 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 1784 subclasses[0] = X86_64_SSE_CLASS; 1785 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 1786 subclasses[0] = X86_64_INTEGER_CLASS; 1787 1788 for (i = 0; i < words; i++) 1789 classes[i] = subclasses[i % num]; 1790 } 1791 /* Unions are similar to RECORD_TYPE but offset is always 0. */ 1792 else if (TREE_CODE (type) == UNION_TYPE 1793 || TREE_CODE (type) == QUAL_UNION_TYPE) 1794 { 1795 /* For classes first merge in the field of the subclasses. */ 1796 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 1797 { 1798 tree bases = TYPE_BINFO_BASETYPES (type); 1799 int n_bases = TREE_VEC_LENGTH (bases); 1800 int i; 1801 1802 for (i = 0; i < n_bases; ++i) 1803 { 1804 tree binfo = TREE_VEC_ELT (bases, i); 1805 int num; 1806 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 1807 tree type = BINFO_TYPE (binfo); 1808 1809 num = classify_argument (TYPE_MODE (type), 1810 type, subclasses, 1811 (offset + bit_offset) % 256); 1812 if (!num) 1813 return 0; 1814 for (i = 0; i < num; i++) 1815 { 1816 int pos = (offset + bit_offset) / 8 / 8; 1817 classes[i + pos] = 1818 merge_classes (subclasses[i], classes[i + pos]); 1819 } 1820 } 1821 } 1822 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1823 { 1824 if (TREE_CODE (field) == FIELD_DECL) 1825 { 1826 int num; 1827 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1828 TREE_TYPE (field), subclasses, 1829 bit_offset); 1830 if (!num) 1831 return 0; 1832 for (i = 0; i < num; i++) 1833 classes[i] = merge_classes (subclasses[i], classes[i]); 1834 } 1835 } 1836 } 1837 else 1838 abort (); 1839 1840 /* Final merger cleanup. */ 1841 for (i = 0; i < words; i++) 1842 { 1843 /* If one class is MEMORY, everything should be passed in 1844 memory. */ 1845 if (classes[i] == X86_64_MEMORY_CLASS) 1846 return 0; 1847 1848 /* The X86_64_SSEUP_CLASS should be always preceded by 1849 X86_64_SSE_CLASS. */ 1850 if (classes[i] == X86_64_SSEUP_CLASS 1851 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 1852 classes[i] = X86_64_SSE_CLASS; 1853 1854 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 1855 if (classes[i] == X86_64_X87UP_CLASS 1856 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 1857 classes[i] = X86_64_SSE_CLASS; 1858 } 1859 return words; 1860 } 1861 1862 /* Compute alignment needed. We align all types to natural boundaries with 1863 exception of XFmode that is aligned to 64bits. */ 1864 if (mode != VOIDmode && mode != BLKmode) 1865 { 1866 int mode_alignment = GET_MODE_BITSIZE (mode); 1867 1868 if (mode == XFmode) 1869 mode_alignment = 128; 1870 else if (mode == XCmode) 1871 mode_alignment = 256; 1872 /* Misaligned fields are always returned in memory. */ 1873 if (bit_offset % mode_alignment) 1874 return 0; 1875 } 1876 1877 /* Classification of atomic types. */ 1878 switch (mode) 1879 { 1880 case DImode: 1881 case SImode: 1882 case HImode: 1883 case QImode: 1884 case CSImode: 1885 case CHImode: 1886 case CQImode: 1887 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 1888 classes[0] = X86_64_INTEGERSI_CLASS; 1889 else 1890 classes[0] = X86_64_INTEGER_CLASS; 1891 return 1; 1892 case CDImode: 1893 case TImode: 1894 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1895 return 2; 1896 case CTImode: 1897 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1898 classes[2] = classes[3] = X86_64_INTEGER_CLASS; 1899 return 4; 1900 case SFmode: 1901 if (!(bit_offset % 64)) 1902 classes[0] = X86_64_SSESF_CLASS; 1903 else 1904 classes[0] = X86_64_SSE_CLASS; 1905 return 1; 1906 case DFmode: 1907 classes[0] = X86_64_SSEDF_CLASS; 1908 return 1; 1909 case TFmode: 1910 classes[0] = X86_64_X87_CLASS; 1911 classes[1] = X86_64_X87UP_CLASS; 1912 return 2; 1913 case TCmode: 1914 classes[0] = X86_64_X87_CLASS; 1915 classes[1] = X86_64_X87UP_CLASS; 1916 classes[2] = X86_64_X87_CLASS; 1917 classes[3] = X86_64_X87UP_CLASS; 1918 return 4; 1919 case DCmode: 1920 classes[0] = X86_64_SSEDF_CLASS; 1921 classes[1] = X86_64_SSEDF_CLASS; 1922 return 2; 1923 case SCmode: 1924 classes[0] = X86_64_SSE_CLASS; 1925 return 1; 1926 case V4SFmode: 1927 case V4SImode: 1928 classes[0] = X86_64_SSE_CLASS; 1929 classes[1] = X86_64_SSEUP_CLASS; 1930 return 2; 1931 case V2SFmode: 1932 case V2SImode: 1933 case V4HImode: 1934 case V8QImode: 1935 classes[0] = X86_64_SSE_CLASS; 1936 return 1; 1937 case BLKmode: 1938 case VOIDmode: 1939 return 0; 1940 default: 1941 abort (); 1942 } 1943} 1944 1945/* Examine the argument and return set number of register required in each 1946 class. Return 0 iff parameter should be passed in memory. */ 1947static int 1948examine_argument (mode, type, in_return, int_nregs, sse_nregs) 1949 enum machine_mode mode; 1950 tree type; 1951 int *int_nregs, *sse_nregs; 1952 int in_return; 1953{ 1954 enum x86_64_reg_class class[MAX_CLASSES]; 1955 int n = classify_argument (mode, type, class, 0); 1956 1957 *int_nregs = 0; 1958 *sse_nregs = 0; 1959 if (!n) 1960 return 0; 1961 for (n--; n >= 0; n--) 1962 switch (class[n]) 1963 { 1964 case X86_64_INTEGER_CLASS: 1965 case X86_64_INTEGERSI_CLASS: 1966 (*int_nregs)++; 1967 break; 1968 case X86_64_SSE_CLASS: 1969 case X86_64_SSESF_CLASS: 1970 case X86_64_SSEDF_CLASS: 1971 (*sse_nregs)++; 1972 break; 1973 case X86_64_NO_CLASS: 1974 case X86_64_SSEUP_CLASS: 1975 break; 1976 case X86_64_X87_CLASS: 1977 case X86_64_X87UP_CLASS: 1978 if (!in_return) 1979 return 0; 1980 break; 1981 case X86_64_MEMORY_CLASS: 1982 abort (); 1983 } 1984 return 1; 1985} 1986/* Construct container for the argument used by GCC interface. See 1987 FUNCTION_ARG for the detailed description. */ 1988static rtx 1989construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno) 1990 enum machine_mode mode; 1991 tree type; 1992 int in_return; 1993 int nintregs, nsseregs; 1994 const int * intreg; 1995 int sse_regno; 1996{ 1997 enum machine_mode tmpmode; 1998 int bytes = 1999 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2000 enum x86_64_reg_class class[MAX_CLASSES]; 2001 int n; 2002 int i; 2003 int nexps = 0; 2004 int needed_sseregs, needed_intregs; 2005 rtx exp[MAX_CLASSES]; 2006 rtx ret; 2007 2008 n = classify_argument (mode, type, class, 0); 2009 if (TARGET_DEBUG_ARG) 2010 { 2011 if (!n) 2012 fprintf (stderr, "Memory class\n"); 2013 else 2014 { 2015 fprintf (stderr, "Classes:"); 2016 for (i = 0; i < n; i++) 2017 { 2018 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 2019 } 2020 fprintf (stderr, "\n"); 2021 } 2022 } 2023 if (!n) 2024 return NULL; 2025 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs)) 2026 return NULL; 2027 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 2028 return NULL; 2029 2030 /* First construct simple cases. Avoid SCmode, since we want to use 2031 single register to pass this type. */ 2032 if (n == 1 && mode != SCmode) 2033 switch (class[0]) 2034 { 2035 case X86_64_INTEGER_CLASS: 2036 case X86_64_INTEGERSI_CLASS: 2037 return gen_rtx_REG (mode, intreg[0]); 2038 case X86_64_SSE_CLASS: 2039 case X86_64_SSESF_CLASS: 2040 case X86_64_SSEDF_CLASS: 2041 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2042 case X86_64_X87_CLASS: 2043 return gen_rtx_REG (mode, FIRST_STACK_REG); 2044 case X86_64_NO_CLASS: 2045 /* Zero sized array, struct or class. */ 2046 return NULL; 2047 default: 2048 abort (); 2049 } 2050 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS) 2051 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2052 if (n == 2 2053 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 2054 return gen_rtx_REG (TFmode, FIRST_STACK_REG); 2055 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 2056 && class[1] == X86_64_INTEGER_CLASS 2057 && (mode == CDImode || mode == TImode) 2058 && intreg[0] + 1 == intreg[1]) 2059 return gen_rtx_REG (mode, intreg[0]); 2060 if (n == 4 2061 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS 2062 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS) 2063 return gen_rtx_REG (TCmode, FIRST_STACK_REG); 2064 2065 /* Otherwise figure out the entries of the PARALLEL. */ 2066 for (i = 0; i < n; i++) 2067 { 2068 switch (class[i]) 2069 { 2070 case X86_64_NO_CLASS: 2071 break; 2072 case X86_64_INTEGER_CLASS: 2073 case X86_64_INTEGERSI_CLASS: 2074 /* Merge TImodes on aligned occassions here too. */ 2075 if (i * 8 + 8 > bytes) 2076 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 2077 else if (class[i] == X86_64_INTEGERSI_CLASS) 2078 tmpmode = SImode; 2079 else 2080 tmpmode = DImode; 2081 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 2082 if (tmpmode == BLKmode) 2083 tmpmode = DImode; 2084 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2085 gen_rtx_REG (tmpmode, *intreg), 2086 GEN_INT (i*8)); 2087 intreg++; 2088 break; 2089 case X86_64_SSESF_CLASS: 2090 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2091 gen_rtx_REG (SFmode, 2092 SSE_REGNO (sse_regno)), 2093 GEN_INT (i*8)); 2094 sse_regno++; 2095 break; 2096 case X86_64_SSEDF_CLASS: 2097 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2098 gen_rtx_REG (DFmode, 2099 SSE_REGNO (sse_regno)), 2100 GEN_INT (i*8)); 2101 sse_regno++; 2102 break; 2103 case X86_64_SSE_CLASS: 2104 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 2105 tmpmode = TImode, i++; 2106 else 2107 tmpmode = DImode; 2108 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2109 gen_rtx_REG (tmpmode, 2110 SSE_REGNO (sse_regno)), 2111 GEN_INT (i*8)); 2112 sse_regno++; 2113 break; 2114 default: 2115 abort (); 2116 } 2117 } 2118 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 2119 for (i = 0; i < nexps; i++) 2120 XVECEXP (ret, 0, i) = exp [i]; 2121 return ret; 2122} 2123 2124/* Update the data in CUM to advance over an argument 2125 of mode MODE and data type TYPE. 2126 (TYPE is null for libcalls where that information may not be available.) */ 2127 2128void 2129function_arg_advance (cum, mode, type, named) 2130 CUMULATIVE_ARGS *cum; /* current arg information */ 2131 enum machine_mode mode; /* current arg mode */ 2132 tree type; /* type of the argument or 0 if lib support */ 2133 int named; /* whether or not the argument was named */ 2134{ 2135 int bytes = 2136 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2137 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2138 2139 if (TARGET_DEBUG_ARG) 2140 fprintf (stderr, 2141 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n", 2142 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2143 if (TARGET_64BIT) 2144 { 2145 int int_nregs, sse_nregs; 2146 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 2147 cum->words += words; 2148 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 2149 { 2150 cum->nregs -= int_nregs; 2151 cum->sse_nregs -= sse_nregs; 2152 cum->regno += int_nregs; 2153 cum->sse_regno += sse_nregs; 2154 } 2155 else 2156 cum->words += words; 2157 } 2158 else 2159 { 2160 if (TARGET_SSE && mode == TImode) 2161 { 2162 cum->sse_words += words; 2163 cum->sse_nregs -= 1; 2164 cum->sse_regno += 1; 2165 if (cum->sse_nregs <= 0) 2166 { 2167 cum->sse_nregs = 0; 2168 cum->sse_regno = 0; 2169 } 2170 } 2171 else 2172 { 2173 cum->words += words; 2174 cum->nregs -= words; 2175 cum->regno += words; 2176 2177 if (cum->nregs <= 0) 2178 { 2179 cum->nregs = 0; 2180 cum->regno = 0; 2181 } 2182 } 2183 } 2184 return; 2185} 2186 2187/* Define where to put the arguments to a function. 2188 Value is zero to push the argument on the stack, 2189 or a hard register in which to store the argument. 2190 2191 MODE is the argument's machine mode. 2192 TYPE is the data type of the argument (as a tree). 2193 This is null for libcalls where that information may 2194 not be available. 2195 CUM is a variable of type CUMULATIVE_ARGS which gives info about 2196 the preceding args and about the function being called. 2197 NAMED is nonzero if this argument is a named parameter 2198 (otherwise it is an extra parameter matching an ellipsis). */ 2199 2200rtx 2201function_arg (cum, mode, type, named) 2202 CUMULATIVE_ARGS *cum; /* current arg information */ 2203 enum machine_mode mode; /* current arg mode */ 2204 tree type; /* type of the argument or 0 if lib support */ 2205 int named; /* != 0 for normal args, == 0 for ... args */ 2206{ 2207 rtx ret = NULL_RTX; 2208 int bytes = 2209 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2210 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2211 2212 /* Handle an hidden AL argument containing number of registers for varargs 2213 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 2214 any AL settings. */ 2215 if (mode == VOIDmode) 2216 { 2217 if (TARGET_64BIT) 2218 return GEN_INT (cum->maybe_vaarg 2219 ? (cum->sse_nregs < 0 2220 ? SSE_REGPARM_MAX 2221 : cum->sse_regno) 2222 : -1); 2223 else 2224 return constm1_rtx; 2225 } 2226 if (TARGET_64BIT) 2227 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs, 2228 &x86_64_int_parameter_registers [cum->regno], 2229 cum->sse_regno); 2230 else 2231 switch (mode) 2232 { 2233 /* For now, pass fp/complex values on the stack. */ 2234 default: 2235 break; 2236 2237 case BLKmode: 2238 case DImode: 2239 case SImode: 2240 case HImode: 2241 case QImode: 2242 if (words <= cum->nregs) 2243 ret = gen_rtx_REG (mode, cum->regno); 2244 break; 2245 case TImode: 2246 if (cum->sse_nregs) 2247 ret = gen_rtx_REG (mode, cum->sse_regno); 2248 break; 2249 } 2250 2251 if (TARGET_DEBUG_ARG) 2252 { 2253 fprintf (stderr, 2254 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d", 2255 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2256 2257 if (ret) 2258 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]); 2259 else 2260 fprintf (stderr, ", stack"); 2261 2262 fprintf (stderr, " )\n"); 2263 } 2264 2265 return ret; 2266} 2267 2268/* Gives the alignment boundary, in bits, of an argument with the specified mode 2269 and type. */ 2270 2271int 2272ix86_function_arg_boundary (mode, type) 2273 enum machine_mode mode; 2274 tree type; 2275{ 2276 int align; 2277 if (!TARGET_64BIT) 2278 return PARM_BOUNDARY; 2279 if (type) 2280 align = TYPE_ALIGN (type); 2281 else 2282 align = GET_MODE_ALIGNMENT (mode); 2283 if (align < PARM_BOUNDARY) 2284 align = PARM_BOUNDARY; 2285 if (align > 128) 2286 align = 128; 2287 return align; 2288} 2289 2290/* Return true if N is a possible register number of function value. */ 2291bool 2292ix86_function_value_regno_p (regno) 2293 int regno; 2294{ 2295 if (!TARGET_64BIT) 2296 { 2297 return ((regno) == 0 2298 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 2299 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 2300 } 2301 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 2302 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 2303 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 2304} 2305 2306/* Define how to find the value returned by a function. 2307 VALTYPE is the data type of the value (as a tree). 2308 If the precise function being called is known, FUNC is its FUNCTION_DECL; 2309 otherwise, FUNC is 0. */ 2310rtx 2311ix86_function_value (valtype) 2312 tree valtype; 2313{ 2314 if (TARGET_64BIT) 2315 { 2316 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1, 2317 REGPARM_MAX, SSE_REGPARM_MAX, 2318 x86_64_int_return_registers, 0); 2319 /* For zero sized structures, construct_continer return NULL, but we need 2320 to keep rest of compiler happy by returning meaningfull value. */ 2321 if (!ret) 2322 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 2323 return ret; 2324 } 2325 else 2326 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype))); 2327} 2328 2329/* Return false iff type is returned in memory. */ 2330int 2331ix86_return_in_memory (type) 2332 tree type; 2333{ 2334 int needed_intregs, needed_sseregs; 2335 if (TARGET_64BIT) 2336 { 2337 return !examine_argument (TYPE_MODE (type), type, 1, 2338 &needed_intregs, &needed_sseregs); 2339 } 2340 else 2341 { 2342 if (TYPE_MODE (type) == BLKmode 2343 || (VECTOR_MODE_P (TYPE_MODE (type)) 2344 && int_size_in_bytes (type) == 8) 2345 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode 2346 && TYPE_MODE (type) != TFmode 2347 && !VECTOR_MODE_P (TYPE_MODE (type)))) 2348 return 1; 2349 return 0; 2350 } 2351} 2352 2353/* Define how to find the value returned by a library function 2354 assuming the value has mode MODE. */ 2355rtx 2356ix86_libcall_value (mode) 2357 enum machine_mode mode; 2358{ 2359 if (TARGET_64BIT) 2360 { 2361 switch (mode) 2362 { 2363 case SFmode: 2364 case SCmode: 2365 case DFmode: 2366 case DCmode: 2367 return gen_rtx_REG (mode, FIRST_SSE_REG); 2368 case TFmode: 2369 case TCmode: 2370 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 2371 default: 2372 return gen_rtx_REG (mode, 0); 2373 } 2374 } 2375 else 2376 return gen_rtx_REG (mode, VALUE_REGNO (mode)); 2377} 2378 2379/* Create the va_list data type. */ 2380 2381tree 2382ix86_build_va_list () 2383{ 2384 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 2385 2386 /* For i386 we use plain pointer to argument area. */ 2387 if (!TARGET_64BIT) 2388 return build_pointer_type (char_type_node); 2389 2390 record = make_lang_type (RECORD_TYPE); 2391 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 2392 2393 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 2394 unsigned_type_node); 2395 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 2396 unsigned_type_node); 2397 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 2398 ptr_type_node); 2399 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 2400 ptr_type_node); 2401 2402 DECL_FIELD_CONTEXT (f_gpr) = record; 2403 DECL_FIELD_CONTEXT (f_fpr) = record; 2404 DECL_FIELD_CONTEXT (f_ovf) = record; 2405 DECL_FIELD_CONTEXT (f_sav) = record; 2406 2407 TREE_CHAIN (record) = type_decl; 2408 TYPE_NAME (record) = type_decl; 2409 TYPE_FIELDS (record) = f_gpr; 2410 TREE_CHAIN (f_gpr) = f_fpr; 2411 TREE_CHAIN (f_fpr) = f_ovf; 2412 TREE_CHAIN (f_ovf) = f_sav; 2413 2414 layout_type (record); 2415 2416 /* The correct type is an array type of one element. */ 2417 return build_array_type (record, build_index_type (size_zero_node)); 2418} 2419 2420/* Perform any needed actions needed for a function that is receiving a 2421 variable number of arguments. 2422 2423 CUM is as above. 2424 2425 MODE and TYPE are the mode and type of the current parameter. 2426 2427 PRETEND_SIZE is a variable that should be set to the amount of stack 2428 that must be pushed by the prolog to pretend that our caller pushed 2429 it. 2430 2431 Normally, this macro will push all remaining incoming registers on the 2432 stack and set PRETEND_SIZE to the length of the registers pushed. */ 2433 2434void 2435ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) 2436 CUMULATIVE_ARGS *cum; 2437 enum machine_mode mode; 2438 tree type; 2439 int *pretend_size ATTRIBUTE_UNUSED; 2440 int no_rtl; 2441 2442{ 2443 CUMULATIVE_ARGS next_cum; 2444 rtx save_area = NULL_RTX, mem; 2445 rtx label; 2446 rtx label_ref; 2447 rtx tmp_reg; 2448 rtx nsse_reg; 2449 int set; 2450 tree fntype; 2451 int stdarg_p; 2452 int i; 2453 2454 if (!TARGET_64BIT) 2455 return; 2456 2457 /* Indicate to allocate space on the stack for varargs save area. */ 2458 ix86_save_varrargs_registers = 1; 2459 2460 fntype = TREE_TYPE (current_function_decl); 2461 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 2462 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 2463 != void_type_node)); 2464 2465 /* For varargs, we do not want to skip the dummy va_dcl argument. 2466 For stdargs, we do want to skip the last named argument. */ 2467 next_cum = *cum; 2468 if (stdarg_p) 2469 function_arg_advance (&next_cum, mode, type, 1); 2470 2471 if (!no_rtl) 2472 save_area = frame_pointer_rtx; 2473 2474 set = get_varargs_alias_set (); 2475 2476 for (i = next_cum.regno; i < ix86_regparm; i++) 2477 { 2478 mem = gen_rtx_MEM (Pmode, 2479 plus_constant (save_area, i * UNITS_PER_WORD)); 2480 set_mem_alias_set (mem, set); 2481 emit_move_insn (mem, gen_rtx_REG (Pmode, 2482 x86_64_int_parameter_registers[i])); 2483 } 2484 2485 if (next_cum.sse_nregs) 2486 { 2487 /* Now emit code to save SSE registers. The AX parameter contains number 2488 of SSE parameter regsiters used to call this function. We use 2489 sse_prologue_save insn template that produces computed jump across 2490 SSE saves. We need some preparation work to get this working. */ 2491 2492 label = gen_label_rtx (); 2493 label_ref = gen_rtx_LABEL_REF (Pmode, label); 2494 2495 /* Compute address to jump to : 2496 label - 5*eax + nnamed_sse_arguments*5 */ 2497 tmp_reg = gen_reg_rtx (Pmode); 2498 nsse_reg = gen_reg_rtx (Pmode); 2499 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 2500 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2501 gen_rtx_MULT (Pmode, nsse_reg, 2502 GEN_INT (4)))); 2503 if (next_cum.sse_regno) 2504 emit_move_insn 2505 (nsse_reg, 2506 gen_rtx_CONST (DImode, 2507 gen_rtx_PLUS (DImode, 2508 label_ref, 2509 GEN_INT (next_cum.sse_regno * 4)))); 2510 else 2511 emit_move_insn (nsse_reg, label_ref); 2512 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 2513 2514 /* Compute address of memory block we save into. We always use pointer 2515 pointing 127 bytes after first byte to store - this is needed to keep 2516 instruction size limited by 4 bytes. */ 2517 tmp_reg = gen_reg_rtx (Pmode); 2518 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2519 plus_constant (save_area, 2520 8 * REGPARM_MAX + 127))); 2521 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 2522 set_mem_alias_set (mem, set); 2523 set_mem_align (mem, BITS_PER_WORD); 2524 2525 /* And finally do the dirty job! */ 2526 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 2527 GEN_INT (next_cum.sse_regno), label)); 2528 } 2529 2530} 2531 2532/* Implement va_start. */ 2533 2534void 2535ix86_va_start (stdarg_p, valist, nextarg) 2536 int stdarg_p; 2537 tree valist; 2538 rtx nextarg; 2539{ 2540 HOST_WIDE_INT words, n_gpr, n_fpr; 2541 tree f_gpr, f_fpr, f_ovf, f_sav; 2542 tree gpr, fpr, ovf, sav, t; 2543 2544 /* Only 64bit target needs something special. */ 2545 if (!TARGET_64BIT) 2546 { 2547 std_expand_builtin_va_start (stdarg_p, valist, nextarg); 2548 return; 2549 } 2550 2551 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2552 f_fpr = TREE_CHAIN (f_gpr); 2553 f_ovf = TREE_CHAIN (f_fpr); 2554 f_sav = TREE_CHAIN (f_ovf); 2555 2556 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2557 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2558 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2559 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2560 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2561 2562 /* Count number of gp and fp argument registers used. */ 2563 words = current_function_args_info.words; 2564 n_gpr = current_function_args_info.regno; 2565 n_fpr = current_function_args_info.sse_regno; 2566 2567 if (TARGET_DEBUG_ARG) 2568 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 2569 (int) words, (int) n_gpr, (int) n_fpr); 2570 2571 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 2572 build_int_2 (n_gpr * 8, 0)); 2573 TREE_SIDE_EFFECTS (t) = 1; 2574 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2575 2576 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 2577 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0)); 2578 TREE_SIDE_EFFECTS (t) = 1; 2579 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2580 2581 /* Find the overflow area. */ 2582 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 2583 if (words != 0) 2584 t = build (PLUS_EXPR, TREE_TYPE (ovf), t, 2585 build_int_2 (words * UNITS_PER_WORD, 0)); 2586 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2587 TREE_SIDE_EFFECTS (t) = 1; 2588 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2589 2590 /* Find the register save area. 2591 Prologue of the function save it right above stack frame. */ 2592 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx); 2593 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 2594 TREE_SIDE_EFFECTS (t) = 1; 2595 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2596 cfun->preferred_stack_boundary = 128; 2597} 2598 2599/* Implement va_arg. */ 2600rtx 2601ix86_va_arg (valist, type) 2602 tree valist, type; 2603{ 2604 static int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 2605 tree f_gpr, f_fpr, f_ovf, f_sav; 2606 tree gpr, fpr, ovf, sav, t; 2607 int size, rsize; 2608 rtx lab_false, lab_over = NULL_RTX; 2609 rtx addr_rtx, r; 2610 rtx container; 2611 2612 /* Only 64bit target needs something special. */ 2613 if (!TARGET_64BIT) 2614 { 2615 return std_expand_builtin_va_arg (valist, type); 2616 } 2617 2618 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2619 f_fpr = TREE_CHAIN (f_gpr); 2620 f_ovf = TREE_CHAIN (f_fpr); 2621 f_sav = TREE_CHAIN (f_ovf); 2622 2623 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2624 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2625 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2626 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2627 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2628 2629 size = int_size_in_bytes (type); 2630 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2631 2632 container = construct_container (TYPE_MODE (type), type, 0, 2633 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 2634 /* 2635 * Pull the value out of the saved registers ... 2636 */ 2637 2638 addr_rtx = gen_reg_rtx (Pmode); 2639 2640 if (container) 2641 { 2642 rtx int_addr_rtx, sse_addr_rtx; 2643 int needed_intregs, needed_sseregs; 2644 int need_temp; 2645 2646 lab_over = gen_label_rtx (); 2647 lab_false = gen_label_rtx (); 2648 2649 examine_argument (TYPE_MODE (type), type, 0, 2650 &needed_intregs, &needed_sseregs); 2651 2652 2653 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64) 2654 || TYPE_ALIGN (type) > 128); 2655 2656 /* In case we are passing structure, verify that it is consetuctive block 2657 on the register save area. If not we need to do moves. */ 2658 if (!need_temp && !REG_P (container)) 2659 { 2660 /* Verify that all registers are strictly consetuctive */ 2661 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 2662 { 2663 int i; 2664 2665 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2666 { 2667 rtx slot = XVECEXP (container, 0, i); 2668 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 2669 || INTVAL (XEXP (slot, 1)) != i * 16) 2670 need_temp = 1; 2671 } 2672 } 2673 else 2674 { 2675 int i; 2676 2677 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2678 { 2679 rtx slot = XVECEXP (container, 0, i); 2680 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 2681 || INTVAL (XEXP (slot, 1)) != i * 8) 2682 need_temp = 1; 2683 } 2684 } 2685 } 2686 if (!need_temp) 2687 { 2688 int_addr_rtx = addr_rtx; 2689 sse_addr_rtx = addr_rtx; 2690 } 2691 else 2692 { 2693 int_addr_rtx = gen_reg_rtx (Pmode); 2694 sse_addr_rtx = gen_reg_rtx (Pmode); 2695 } 2696 /* First ensure that we fit completely in registers. */ 2697 if (needed_intregs) 2698 { 2699 emit_cmp_and_jump_insns (expand_expr 2700 (gpr, NULL_RTX, SImode, EXPAND_NORMAL), 2701 GEN_INT ((REGPARM_MAX - needed_intregs + 2702 1) * 8), GE, const1_rtx, SImode, 2703 1, lab_false); 2704 } 2705 if (needed_sseregs) 2706 { 2707 emit_cmp_and_jump_insns (expand_expr 2708 (fpr, NULL_RTX, SImode, EXPAND_NORMAL), 2709 GEN_INT ((SSE_REGPARM_MAX - 2710 needed_sseregs + 1) * 16 + 2711 REGPARM_MAX * 8), GE, const1_rtx, 2712 SImode, 1, lab_false); 2713 } 2714 2715 /* Compute index to start of area used for integer regs. */ 2716 if (needed_intregs) 2717 { 2718 t = build (PLUS_EXPR, ptr_type_node, sav, gpr); 2719 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL); 2720 if (r != int_addr_rtx) 2721 emit_move_insn (int_addr_rtx, r); 2722 } 2723 if (needed_sseregs) 2724 { 2725 t = build (PLUS_EXPR, ptr_type_node, sav, fpr); 2726 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL); 2727 if (r != sse_addr_rtx) 2728 emit_move_insn (sse_addr_rtx, r); 2729 } 2730 if (need_temp) 2731 { 2732 int i; 2733 rtx mem; 2734 2735 /* Never use the memory itself, as it has the alias set. */ 2736 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0); 2737 mem = gen_rtx_MEM (BLKmode, addr_rtx); 2738 set_mem_alias_set (mem, get_varargs_alias_set ()); 2739 set_mem_align (mem, BITS_PER_UNIT); 2740 2741 for (i = 0; i < XVECLEN (container, 0); i++) 2742 { 2743 rtx slot = XVECEXP (container, 0, i); 2744 rtx reg = XEXP (slot, 0); 2745 enum machine_mode mode = GET_MODE (reg); 2746 rtx src_addr; 2747 rtx src_mem; 2748 int src_offset; 2749 rtx dest_mem; 2750 2751 if (SSE_REGNO_P (REGNO (reg))) 2752 { 2753 src_addr = sse_addr_rtx; 2754 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 2755 } 2756 else 2757 { 2758 src_addr = int_addr_rtx; 2759 src_offset = REGNO (reg) * 8; 2760 } 2761 src_mem = gen_rtx_MEM (mode, src_addr); 2762 set_mem_alias_set (src_mem, get_varargs_alias_set ()); 2763 src_mem = adjust_address (src_mem, mode, src_offset); 2764 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1))); 2765 emit_move_insn (dest_mem, src_mem); 2766 } 2767 } 2768 2769 if (needed_intregs) 2770 { 2771 t = 2772 build (PLUS_EXPR, TREE_TYPE (gpr), gpr, 2773 build_int_2 (needed_intregs * 8, 0)); 2774 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 2775 TREE_SIDE_EFFECTS (t) = 1; 2776 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2777 } 2778 if (needed_sseregs) 2779 { 2780 t = 2781 build (PLUS_EXPR, TREE_TYPE (fpr), fpr, 2782 build_int_2 (needed_sseregs * 16, 0)); 2783 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 2784 TREE_SIDE_EFFECTS (t) = 1; 2785 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2786 } 2787 2788 emit_jump_insn (gen_jump (lab_over)); 2789 emit_barrier (); 2790 emit_label (lab_false); 2791 } 2792 2793 /* ... otherwise out of the overflow area. */ 2794 2795 /* Care for on-stack alignment if needed. */ 2796 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64) 2797 t = ovf; 2798 else 2799 { 2800 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 2801 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0)); 2802 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1)); 2803 } 2804 t = save_expr (t); 2805 2806 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL); 2807 if (r != addr_rtx) 2808 emit_move_insn (addr_rtx, r); 2809 2810 t = 2811 build (PLUS_EXPR, TREE_TYPE (t), t, 2812 build_int_2 (rsize * UNITS_PER_WORD, 0)); 2813 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2814 TREE_SIDE_EFFECTS (t) = 1; 2815 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2816 2817 if (container) 2818 emit_label (lab_over); 2819 2820 return addr_rtx; 2821} 2822 2823/* Return nonzero if OP is general operand representable on x86_64. */ 2824 2825int 2826x86_64_general_operand (op, mode) 2827 rtx op; 2828 enum machine_mode mode; 2829{ 2830 if (!TARGET_64BIT) 2831 return general_operand (op, mode); 2832 if (nonimmediate_operand (op, mode)) 2833 return 1; 2834 return x86_64_sign_extended_value (op); 2835} 2836 2837/* Return nonzero if OP is general operand representable on x86_64 2838 as either sign extended or zero extended constant. */ 2839 2840int 2841x86_64_szext_general_operand (op, mode) 2842 rtx op; 2843 enum machine_mode mode; 2844{ 2845 if (!TARGET_64BIT) 2846 return general_operand (op, mode); 2847 if (nonimmediate_operand (op, mode)) 2848 return 1; 2849 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 2850} 2851 2852/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 2853 2854int 2855x86_64_nonmemory_operand (op, mode) 2856 rtx op; 2857 enum machine_mode mode; 2858{ 2859 if (!TARGET_64BIT) 2860 return nonmemory_operand (op, mode); 2861 if (register_operand (op, mode)) 2862 return 1; 2863 return x86_64_sign_extended_value (op); 2864} 2865 2866/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */ 2867 2868int 2869x86_64_movabs_operand (op, mode) 2870 rtx op; 2871 enum machine_mode mode; 2872{ 2873 if (!TARGET_64BIT || !flag_pic) 2874 return nonmemory_operand (op, mode); 2875 if (register_operand (op, mode) || x86_64_sign_extended_value (op)) 2876 return 1; 2877 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op)) 2878 return 1; 2879 return 0; 2880} 2881 2882/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 2883 2884int 2885x86_64_szext_nonmemory_operand (op, mode) 2886 rtx op; 2887 enum machine_mode mode; 2888{ 2889 if (!TARGET_64BIT) 2890 return nonmemory_operand (op, mode); 2891 if (register_operand (op, mode)) 2892 return 1; 2893 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 2894} 2895 2896/* Return nonzero if OP is immediate operand representable on x86_64. */ 2897 2898int 2899x86_64_immediate_operand (op, mode) 2900 rtx op; 2901 enum machine_mode mode; 2902{ 2903 if (!TARGET_64BIT) 2904 return immediate_operand (op, mode); 2905 return x86_64_sign_extended_value (op); 2906} 2907 2908/* Return nonzero if OP is immediate operand representable on x86_64. */ 2909 2910int 2911x86_64_zext_immediate_operand (op, mode) 2912 rtx op; 2913 enum machine_mode mode ATTRIBUTE_UNUSED; 2914{ 2915 return x86_64_zero_extended_value (op); 2916} 2917 2918/* Return nonzero if OP is (const_int 1), else return zero. */ 2919 2920int 2921const_int_1_operand (op, mode) 2922 rtx op; 2923 enum machine_mode mode ATTRIBUTE_UNUSED; 2924{ 2925 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1); 2926} 2927 2928/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand 2929 for shift & compare patterns, as shifting by 0 does not change flags), 2930 else return zero. */ 2931 2932int 2933const_int_1_31_operand (op, mode) 2934 rtx op; 2935 enum machine_mode mode ATTRIBUTE_UNUSED; 2936{ 2937 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31); 2938} 2939 2940/* Returns 1 if OP is either a symbol reference or a sum of a symbol 2941 reference and a constant. */ 2942 2943int 2944symbolic_operand (op, mode) 2945 register rtx op; 2946 enum machine_mode mode ATTRIBUTE_UNUSED; 2947{ 2948 switch (GET_CODE (op)) 2949 { 2950 case SYMBOL_REF: 2951 case LABEL_REF: 2952 return 1; 2953 2954 case CONST: 2955 op = XEXP (op, 0); 2956 if (GET_CODE (op) == SYMBOL_REF 2957 || GET_CODE (op) == LABEL_REF 2958 || (GET_CODE (op) == UNSPEC 2959 && (XINT (op, 1) == 6 2960 || XINT (op, 1) == 7 2961 || XINT (op, 1) == 15))) 2962 return 1; 2963 if (GET_CODE (op) != PLUS 2964 || GET_CODE (XEXP (op, 1)) != CONST_INT) 2965 return 0; 2966 2967 op = XEXP (op, 0); 2968 if (GET_CODE (op) == SYMBOL_REF 2969 || GET_CODE (op) == LABEL_REF) 2970 return 1; 2971 /* Only @GOTOFF gets offsets. */ 2972 if (GET_CODE (op) != UNSPEC 2973 || XINT (op, 1) != 7) 2974 return 0; 2975 2976 op = XVECEXP (op, 0, 0); 2977 if (GET_CODE (op) == SYMBOL_REF 2978 || GET_CODE (op) == LABEL_REF) 2979 return 1; 2980 return 0; 2981 2982 default: 2983 return 0; 2984 } 2985} 2986 2987/* Return true if the operand contains a @GOT or @GOTOFF reference. */ 2988 2989int 2990pic_symbolic_operand (op, mode) 2991 register rtx op; 2992 enum machine_mode mode ATTRIBUTE_UNUSED; 2993{ 2994 if (GET_CODE (op) != CONST) 2995 return 0; 2996 op = XEXP (op, 0); 2997 if (TARGET_64BIT) 2998 { 2999 if (GET_CODE (XEXP (op, 0)) == UNSPEC) 3000 return 1; 3001 } 3002 else 3003 { 3004 if (GET_CODE (op) == UNSPEC) 3005 return 1; 3006 if (GET_CODE (op) != PLUS 3007 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3008 return 0; 3009 op = XEXP (op, 0); 3010 if (GET_CODE (op) == UNSPEC) 3011 return 1; 3012 } 3013 return 0; 3014} 3015 3016/* Return true if OP is a symbolic operand that resolves locally. */ 3017 3018static int 3019local_symbolic_operand (op, mode) 3020 rtx op; 3021 enum machine_mode mode ATTRIBUTE_UNUSED; 3022{ 3023 if (GET_CODE (op) == LABEL_REF) 3024 return 1; 3025 3026 if (GET_CODE (op) == CONST 3027 && GET_CODE (XEXP (op, 0)) == PLUS 3028 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3029 op = XEXP (XEXP (op, 0), 0); 3030 3031 if (GET_CODE (op) != SYMBOL_REF) 3032 return 0; 3033 3034 /* These we've been told are local by varasm and encode_section_info 3035 respectively. */ 3036 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op)) 3037 return 1; 3038 3039 /* There is, however, a not insubstantial body of code in the rest of 3040 the compiler that assumes it can just stick the results of 3041 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ 3042 /* ??? This is a hack. Should update the body of the compiler to 3043 always create a DECL an invoke ENCODE_SECTION_INFO. */ 3044 if (strncmp (XSTR (op, 0), internal_label_prefix, 3045 internal_label_prefix_len) == 0) 3046 return 1; 3047 3048 return 0; 3049} 3050 3051/* Test for a valid operand for a call instruction. Don't allow the 3052 arg pointer register or virtual regs since they may decay into 3053 reg + const, which the patterns can't handle. */ 3054 3055int 3056call_insn_operand (op, mode) 3057 rtx op; 3058 enum machine_mode mode ATTRIBUTE_UNUSED; 3059{ 3060 /* Disallow indirect through a virtual register. This leads to 3061 compiler aborts when trying to eliminate them. */ 3062 if (GET_CODE (op) == REG 3063 && (op == arg_pointer_rtx 3064 || op == frame_pointer_rtx 3065 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 3066 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 3067 return 0; 3068 3069 /* Disallow `call 1234'. Due to varying assembler lameness this 3070 gets either rejected or translated to `call .+1234'. */ 3071 if (GET_CODE (op) == CONST_INT) 3072 return 0; 3073 3074 /* Explicitly allow SYMBOL_REF even if pic. */ 3075 if (GET_CODE (op) == SYMBOL_REF) 3076 return 1; 3077 3078 /* Half-pic doesn't allow anything but registers and constants. 3079 We've just taken care of the later. */ 3080 if (HALF_PIC_P ()) 3081 return register_operand (op, Pmode); 3082 3083 /* Otherwise we can allow any general_operand in the address. */ 3084 return general_operand (op, Pmode); 3085} 3086 3087int 3088constant_call_address_operand (op, mode) 3089 rtx op; 3090 enum machine_mode mode ATTRIBUTE_UNUSED; 3091{ 3092 if (GET_CODE (op) == CONST 3093 && GET_CODE (XEXP (op, 0)) == PLUS 3094 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3095 op = XEXP (XEXP (op, 0), 0); 3096 return GET_CODE (op) == SYMBOL_REF; 3097} 3098 3099/* Match exactly zero and one. */ 3100 3101int 3102const0_operand (op, mode) 3103 register rtx op; 3104 enum machine_mode mode; 3105{ 3106 return op == CONST0_RTX (mode); 3107} 3108 3109int 3110const1_operand (op, mode) 3111 register rtx op; 3112 enum machine_mode mode ATTRIBUTE_UNUSED; 3113{ 3114 return op == const1_rtx; 3115} 3116 3117/* Match 2, 4, or 8. Used for leal multiplicands. */ 3118 3119int 3120const248_operand (op, mode) 3121 register rtx op; 3122 enum machine_mode mode ATTRIBUTE_UNUSED; 3123{ 3124 return (GET_CODE (op) == CONST_INT 3125 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8)); 3126} 3127 3128/* True if this is a constant appropriate for an increment or decremenmt. */ 3129 3130int 3131incdec_operand (op, mode) 3132 register rtx op; 3133 enum machine_mode mode ATTRIBUTE_UNUSED; 3134{ 3135 /* On Pentium4, the inc and dec operations causes extra dependency on flag 3136 registers, since carry flag is not set. */ 3137 if (TARGET_PENTIUM4 && !optimize_size) 3138 return 0; 3139 return op == const1_rtx || op == constm1_rtx; 3140} 3141 3142/* Return nonzero if OP is acceptable as operand of DImode shift 3143 expander. */ 3144 3145int 3146shiftdi_operand (op, mode) 3147 rtx op; 3148 enum machine_mode mode ATTRIBUTE_UNUSED; 3149{ 3150 if (TARGET_64BIT) 3151 return nonimmediate_operand (op, mode); 3152 else 3153 return register_operand (op, mode); 3154} 3155 3156/* Return false if this is the stack pointer, or any other fake 3157 register eliminable to the stack pointer. Otherwise, this is 3158 a register operand. 3159 3160 This is used to prevent esp from being used as an index reg. 3161 Which would only happen in pathological cases. */ 3162 3163int 3164reg_no_sp_operand (op, mode) 3165 register rtx op; 3166 enum machine_mode mode; 3167{ 3168 rtx t = op; 3169 if (GET_CODE (t) == SUBREG) 3170 t = SUBREG_REG (t); 3171 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx) 3172 return 0; 3173 3174 return register_operand (op, mode); 3175} 3176 3177int 3178mmx_reg_operand (op, mode) 3179 register rtx op; 3180 enum machine_mode mode ATTRIBUTE_UNUSED; 3181{ 3182 return MMX_REG_P (op); 3183} 3184 3185/* Return false if this is any eliminable register. Otherwise 3186 general_operand. */ 3187 3188int 3189general_no_elim_operand (op, mode) 3190 register rtx op; 3191 enum machine_mode mode; 3192{ 3193 rtx t = op; 3194 if (GET_CODE (t) == SUBREG) 3195 t = SUBREG_REG (t); 3196 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3197 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3198 || t == virtual_stack_dynamic_rtx) 3199 return 0; 3200 if (REG_P (t) 3201 && REGNO (t) >= FIRST_VIRTUAL_REGISTER 3202 && REGNO (t) <= LAST_VIRTUAL_REGISTER) 3203 return 0; 3204 3205 return general_operand (op, mode); 3206} 3207 3208/* Return false if this is any eliminable register. Otherwise 3209 register_operand or const_int. */ 3210 3211int 3212nonmemory_no_elim_operand (op, mode) 3213 register rtx op; 3214 enum machine_mode mode; 3215{ 3216 rtx t = op; 3217 if (GET_CODE (t) == SUBREG) 3218 t = SUBREG_REG (t); 3219 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3220 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3221 || t == virtual_stack_dynamic_rtx) 3222 return 0; 3223 3224 return GET_CODE (op) == CONST_INT || register_operand (op, mode); 3225} 3226 3227/* Return true if op is a Q_REGS class register. */ 3228 3229int 3230q_regs_operand (op, mode) 3231 register rtx op; 3232 enum machine_mode mode; 3233{ 3234 if (mode != VOIDmode && GET_MODE (op) != mode) 3235 return 0; 3236 if (GET_CODE (op) == SUBREG) 3237 op = SUBREG_REG (op); 3238 return ANY_QI_REG_P (op); 3239} 3240 3241/* Return true if op is a NON_Q_REGS class register. */ 3242 3243int 3244non_q_regs_operand (op, mode) 3245 register rtx op; 3246 enum machine_mode mode; 3247{ 3248 if (mode != VOIDmode && GET_MODE (op) != mode) 3249 return 0; 3250 if (GET_CODE (op) == SUBREG) 3251 op = SUBREG_REG (op); 3252 return NON_QI_REG_P (op); 3253} 3254 3255/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS 3256 insns. */ 3257int 3258sse_comparison_operator (op, mode) 3259 rtx op; 3260 enum machine_mode mode ATTRIBUTE_UNUSED; 3261{ 3262 enum rtx_code code = GET_CODE (op); 3263 switch (code) 3264 { 3265 /* Operations supported directly. */ 3266 case EQ: 3267 case LT: 3268 case LE: 3269 case UNORDERED: 3270 case NE: 3271 case UNGE: 3272 case UNGT: 3273 case ORDERED: 3274 return 1; 3275 /* These are equivalent to ones above in non-IEEE comparisons. */ 3276 case UNEQ: 3277 case UNLT: 3278 case UNLE: 3279 case LTGT: 3280 case GE: 3281 case GT: 3282 return !TARGET_IEEE_FP; 3283 default: 3284 return 0; 3285 } 3286} 3287/* Return 1 if OP is a valid comparison operator in valid mode. */ 3288int 3289ix86_comparison_operator (op, mode) 3290 register rtx op; 3291 enum machine_mode mode; 3292{ 3293 enum machine_mode inmode; 3294 enum rtx_code code = GET_CODE (op); 3295 if (mode != VOIDmode && GET_MODE (op) != mode) 3296 return 0; 3297 if (GET_RTX_CLASS (code) != '<') 3298 return 0; 3299 inmode = GET_MODE (XEXP (op, 0)); 3300 3301 if (inmode == CCFPmode || inmode == CCFPUmode) 3302 { 3303 enum rtx_code second_code, bypass_code; 3304 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3305 return (bypass_code == NIL && second_code == NIL); 3306 } 3307 switch (code) 3308 { 3309 case EQ: case NE: 3310 return 1; 3311 case LT: case GE: 3312 if (inmode == CCmode || inmode == CCGCmode 3313 || inmode == CCGOCmode || inmode == CCNOmode) 3314 return 1; 3315 return 0; 3316 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU: 3317 if (inmode == CCmode) 3318 return 1; 3319 return 0; 3320 case GT: case LE: 3321 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode) 3322 return 1; 3323 return 0; 3324 default: 3325 return 0; 3326 } 3327} 3328 3329/* Return 1 if OP is a comparison operator that can be issued by fcmov. */ 3330 3331int 3332fcmov_comparison_operator (op, mode) 3333 register rtx op; 3334 enum machine_mode mode; 3335{ 3336 enum machine_mode inmode; 3337 enum rtx_code code = GET_CODE (op); 3338 if (mode != VOIDmode && GET_MODE (op) != mode) 3339 return 0; 3340 if (GET_RTX_CLASS (code) != '<') 3341 return 0; 3342 inmode = GET_MODE (XEXP (op, 0)); 3343 if (inmode == CCFPmode || inmode == CCFPUmode) 3344 { 3345 enum rtx_code second_code, bypass_code; 3346 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3347 if (bypass_code != NIL || second_code != NIL) 3348 return 0; 3349 code = ix86_fp_compare_code_to_integer (code); 3350 } 3351 /* i387 supports just limited amount of conditional codes. */ 3352 switch (code) 3353 { 3354 case LTU: case GTU: case LEU: case GEU: 3355 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode) 3356 return 1; 3357 return 0; 3358 case ORDERED: case UNORDERED: 3359 case EQ: case NE: 3360 return 1; 3361 default: 3362 return 0; 3363 } 3364} 3365 3366/* Return 1 if OP is a binary operator that can be promoted to wider mode. */ 3367 3368int 3369promotable_binary_operator (op, mode) 3370 register rtx op; 3371 enum machine_mode mode ATTRIBUTE_UNUSED; 3372{ 3373 switch (GET_CODE (op)) 3374 { 3375 case MULT: 3376 /* Modern CPUs have same latency for HImode and SImode multiply, 3377 but 386 and 486 do HImode multiply faster. */ 3378 return ix86_cpu > PROCESSOR_I486; 3379 case PLUS: 3380 case AND: 3381 case IOR: 3382 case XOR: 3383 case ASHIFT: 3384 return 1; 3385 default: 3386 return 0; 3387 } 3388} 3389 3390/* Nearly general operand, but accept any const_double, since we wish 3391 to be able to drop them into memory rather than have them get pulled 3392 into registers. */ 3393 3394int 3395cmp_fp_expander_operand (op, mode) 3396 register rtx op; 3397 enum machine_mode mode; 3398{ 3399 if (mode != VOIDmode && mode != GET_MODE (op)) 3400 return 0; 3401 if (GET_CODE (op) == CONST_DOUBLE) 3402 return 1; 3403 return general_operand (op, mode); 3404} 3405 3406/* Match an SI or HImode register for a zero_extract. */ 3407 3408int 3409ext_register_operand (op, mode) 3410 register rtx op; 3411 enum machine_mode mode ATTRIBUTE_UNUSED; 3412{ 3413 int regno; 3414 if ((!TARGET_64BIT || GET_MODE (op) != DImode) 3415 && GET_MODE (op) != SImode && GET_MODE (op) != HImode) 3416 return 0; 3417 3418 if (!register_operand (op, VOIDmode)) 3419 return 0; 3420 3421 /* Be curefull to accept only registers having upper parts. */ 3422 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op)); 3423 return (regno > LAST_VIRTUAL_REGISTER || regno < 4); 3424} 3425 3426/* Return 1 if this is a valid binary floating-point operation. 3427 OP is the expression matched, and MODE is its mode. */ 3428 3429int 3430binary_fp_operator (op, mode) 3431 register rtx op; 3432 enum machine_mode mode; 3433{ 3434 if (mode != VOIDmode && mode != GET_MODE (op)) 3435 return 0; 3436 3437 switch (GET_CODE (op)) 3438 { 3439 case PLUS: 3440 case MINUS: 3441 case MULT: 3442 case DIV: 3443 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT; 3444 3445 default: 3446 return 0; 3447 } 3448} 3449 3450int 3451mult_operator (op, mode) 3452 register rtx op; 3453 enum machine_mode mode ATTRIBUTE_UNUSED; 3454{ 3455 return GET_CODE (op) == MULT; 3456} 3457 3458int 3459div_operator (op, mode) 3460 register rtx op; 3461 enum machine_mode mode ATTRIBUTE_UNUSED; 3462{ 3463 return GET_CODE (op) == DIV; 3464} 3465 3466int 3467arith_or_logical_operator (op, mode) 3468 rtx op; 3469 enum machine_mode mode; 3470{ 3471 return ((mode == VOIDmode || GET_MODE (op) == mode) 3472 && (GET_RTX_CLASS (GET_CODE (op)) == 'c' 3473 || GET_RTX_CLASS (GET_CODE (op)) == '2')); 3474} 3475 3476/* Returns 1 if OP is memory operand with a displacement. */ 3477 3478int 3479memory_displacement_operand (op, mode) 3480 register rtx op; 3481 enum machine_mode mode; 3482{ 3483 struct ix86_address parts; 3484 3485 if (! memory_operand (op, mode)) 3486 return 0; 3487 3488 if (! ix86_decompose_address (XEXP (op, 0), &parts)) 3489 abort (); 3490 3491 return parts.disp != NULL_RTX; 3492} 3493 3494/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0, 3495 re-recognize the operand to avoid a copy_to_mode_reg that will fail. 3496 3497 ??? It seems likely that this will only work because cmpsi is an 3498 expander, and no actual insns use this. */ 3499 3500int 3501cmpsi_operand (op, mode) 3502 rtx op; 3503 enum machine_mode mode; 3504{ 3505 if (nonimmediate_operand (op, mode)) 3506 return 1; 3507 3508 if (GET_CODE (op) == AND 3509 && GET_MODE (op) == SImode 3510 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT 3511 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT 3512 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT 3513 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8 3514 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8 3515 && GET_CODE (XEXP (op, 1)) == CONST_INT) 3516 return 1; 3517 3518 return 0; 3519} 3520 3521/* Returns 1 if OP is memory operand that can not be represented by the 3522 modRM array. */ 3523 3524int 3525long_memory_operand (op, mode) 3526 register rtx op; 3527 enum machine_mode mode; 3528{ 3529 if (! memory_operand (op, mode)) 3530 return 0; 3531 3532 return memory_address_length (op) != 0; 3533} 3534 3535/* Return nonzero if the rtx is known aligned. */ 3536 3537int 3538aligned_operand (op, mode) 3539 rtx op; 3540 enum machine_mode mode; 3541{ 3542 struct ix86_address parts; 3543 3544 if (!general_operand (op, mode)) 3545 return 0; 3546 3547 /* Registers and immediate operands are always "aligned". */ 3548 if (GET_CODE (op) != MEM) 3549 return 1; 3550 3551 /* Don't even try to do any aligned optimizations with volatiles. */ 3552 if (MEM_VOLATILE_P (op)) 3553 return 0; 3554 3555 op = XEXP (op, 0); 3556 3557 /* Pushes and pops are only valid on the stack pointer. */ 3558 if (GET_CODE (op) == PRE_DEC 3559 || GET_CODE (op) == POST_INC) 3560 return 1; 3561 3562 /* Decode the address. */ 3563 if (! ix86_decompose_address (op, &parts)) 3564 abort (); 3565 3566 /* Look for some component that isn't known to be aligned. */ 3567 if (parts.index) 3568 { 3569 if (parts.scale < 4 3570 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32) 3571 return 0; 3572 } 3573 if (parts.base) 3574 { 3575 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32) 3576 return 0; 3577 } 3578 if (parts.disp) 3579 { 3580 if (GET_CODE (parts.disp) != CONST_INT 3581 || (INTVAL (parts.disp) & 3) != 0) 3582 return 0; 3583 } 3584 3585 /* Didn't find one -- this must be an aligned address. */ 3586 return 1; 3587} 3588 3589/* Return true if the constant is something that can be loaded with 3590 a special instruction. Only handle 0.0 and 1.0; others are less 3591 worthwhile. */ 3592 3593int 3594standard_80387_constant_p (x) 3595 rtx x; 3596{ 3597 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 3598 return -1; 3599 /* Note that on the 80387, other constants, such as pi, that we should support 3600 too. On some machines, these are much slower to load as standard constant, 3601 than to load from doubles in memory. */ 3602 if (x == CONST0_RTX (GET_MODE (x))) 3603 return 1; 3604 if (x == CONST1_RTX (GET_MODE (x))) 3605 return 2; 3606 return 0; 3607} 3608 3609/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 3610 */ 3611int 3612standard_sse_constant_p (x) 3613 rtx x; 3614{ 3615 if (GET_CODE (x) != CONST_DOUBLE) 3616 return -1; 3617 return (x == CONST0_RTX (GET_MODE (x))); 3618} 3619 3620/* Returns 1 if OP contains a symbol reference */ 3621 3622int 3623symbolic_reference_mentioned_p (op) 3624 rtx op; 3625{ 3626 register const char *fmt; 3627 register int i; 3628 3629 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 3630 return 1; 3631 3632 fmt = GET_RTX_FORMAT (GET_CODE (op)); 3633 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 3634 { 3635 if (fmt[i] == 'E') 3636 { 3637 register int j; 3638 3639 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 3640 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 3641 return 1; 3642 } 3643 3644 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 3645 return 1; 3646 } 3647 3648 return 0; 3649} 3650 3651/* Return 1 if it is appropriate to emit `ret' instructions in the 3652 body of a function. Do this only if the epilogue is simple, needing a 3653 couple of insns. Prior to reloading, we can't tell how many registers 3654 must be saved, so return 0 then. Return 0 if there is no frame 3655 marker to de-allocate. 3656 3657 If NON_SAVING_SETJMP is defined and true, then it is not possible 3658 for the epilogue to be simple, so return 0. This is a special case 3659 since NON_SAVING_SETJMP will not cause regs_ever_live to change 3660 until final, but jump_optimize may need to know sooner if a 3661 `return' is OK. */ 3662 3663int 3664ix86_can_use_return_insn_p () 3665{ 3666 struct ix86_frame frame; 3667 3668#ifdef NON_SAVING_SETJMP 3669 if (NON_SAVING_SETJMP && current_function_calls_setjmp) 3670 return 0; 3671#endif 3672 3673 if (! reload_completed || frame_pointer_needed) 3674 return 0; 3675 3676 /* Don't allow more than 32 pop, since that's all we can do 3677 with one instruction. */ 3678 if (current_function_pops_args 3679 && current_function_args_size >= 32768) 3680 return 0; 3681 3682 ix86_compute_frame_layout (&frame); 3683 return frame.to_allocate == 0 && frame.nregs == 0; 3684} 3685 3686/* Return 1 if VALUE can be stored in the sign extended immediate field. */ 3687int 3688x86_64_sign_extended_value (value) 3689 rtx value; 3690{ 3691 switch (GET_CODE (value)) 3692 { 3693 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known 3694 to be at least 32 and this all acceptable constants are 3695 represented as CONST_INT. */ 3696 case CONST_INT: 3697 if (HOST_BITS_PER_WIDE_INT == 32) 3698 return 1; 3699 else 3700 { 3701 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode); 3702 return trunc_int_for_mode (val, SImode) == val; 3703 } 3704 break; 3705 3706 /* For certain code models, the symbolic references are known to fit. */ 3707 case SYMBOL_REF: 3708 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL; 3709 3710 /* For certain code models, the code is near as well. */ 3711 case LABEL_REF: 3712 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC; 3713 3714 /* We also may accept the offsetted memory references in certain special 3715 cases. */ 3716 case CONST: 3717 if (GET_CODE (XEXP (value, 0)) == UNSPEC 3718 && XVECLEN (XEXP (value, 0), 0) == 1 3719 && XINT (XEXP (value, 0), 1) == 15) 3720 return 1; 3721 else if (GET_CODE (XEXP (value, 0)) == PLUS) 3722 { 3723 rtx op1 = XEXP (XEXP (value, 0), 0); 3724 rtx op2 = XEXP (XEXP (value, 0), 1); 3725 HOST_WIDE_INT offset; 3726 3727 if (ix86_cmodel == CM_LARGE) 3728 return 0; 3729 if (GET_CODE (op2) != CONST_INT) 3730 return 0; 3731 offset = trunc_int_for_mode (INTVAL (op2), DImode); 3732 switch (GET_CODE (op1)) 3733 { 3734 case SYMBOL_REF: 3735 /* For CM_SMALL assume that latest object is 1MB before 3736 end of 31bits boundary. We may also accept pretty 3737 large negative constants knowing that all objects are 3738 in the positive half of address space. */ 3739 if (ix86_cmodel == CM_SMALL 3740 && offset < 1024*1024*1024 3741 && trunc_int_for_mode (offset, SImode) == offset) 3742 return 1; 3743 /* For CM_KERNEL we know that all object resist in the 3744 negative half of 32bits address space. We may not 3745 accept negative offsets, since they may be just off 3746 and we may accept pretty large positive ones. */ 3747 if (ix86_cmodel == CM_KERNEL 3748 && offset > 0 3749 && trunc_int_for_mode (offset, SImode) == offset) 3750 return 1; 3751 break; 3752 case LABEL_REF: 3753 /* These conditions are similar to SYMBOL_REF ones, just the 3754 constraints for code models differ. */ 3755 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 3756 && offset < 1024*1024*1024 3757 && trunc_int_for_mode (offset, SImode) == offset) 3758 return 1; 3759 if (ix86_cmodel == CM_KERNEL 3760 && offset > 0 3761 && trunc_int_for_mode (offset, SImode) == offset) 3762 return 1; 3763 break; 3764 default: 3765 return 0; 3766 } 3767 } 3768 return 0; 3769 default: 3770 return 0; 3771 } 3772} 3773 3774/* Return 1 if VALUE can be stored in the zero extended immediate field. */ 3775int 3776x86_64_zero_extended_value (value) 3777 rtx value; 3778{ 3779 switch (GET_CODE (value)) 3780 { 3781 case CONST_DOUBLE: 3782 if (HOST_BITS_PER_WIDE_INT == 32) 3783 return (GET_MODE (value) == VOIDmode 3784 && !CONST_DOUBLE_HIGH (value)); 3785 else 3786 return 0; 3787 case CONST_INT: 3788 if (HOST_BITS_PER_WIDE_INT == 32) 3789 return INTVAL (value) >= 0; 3790 else 3791 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff); 3792 break; 3793 3794 /* For certain code models, the symbolic references are known to fit. */ 3795 case SYMBOL_REF: 3796 return ix86_cmodel == CM_SMALL; 3797 3798 /* For certain code models, the code is near as well. */ 3799 case LABEL_REF: 3800 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM; 3801 3802 /* We also may accept the offsetted memory references in certain special 3803 cases. */ 3804 case CONST: 3805 if (GET_CODE (XEXP (value, 0)) == PLUS) 3806 { 3807 rtx op1 = XEXP (XEXP (value, 0), 0); 3808 rtx op2 = XEXP (XEXP (value, 0), 1); 3809 3810 if (ix86_cmodel == CM_LARGE) 3811 return 0; 3812 switch (GET_CODE (op1)) 3813 { 3814 case SYMBOL_REF: 3815 return 0; 3816 /* For small code model we may accept pretty large positive 3817 offsets, since one bit is available for free. Negative 3818 offsets are limited by the size of NULL pointer area 3819 specified by the ABI. */ 3820 if (ix86_cmodel == CM_SMALL 3821 && GET_CODE (op2) == CONST_INT 3822 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 3823 && (trunc_int_for_mode (INTVAL (op2), SImode) 3824 == INTVAL (op2))) 3825 return 1; 3826 /* ??? For the kernel, we may accept adjustment of 3827 -0x10000000, since we know that it will just convert 3828 negative address space to positive, but perhaps this 3829 is not worthwhile. */ 3830 break; 3831 case LABEL_REF: 3832 /* These conditions are similar to SYMBOL_REF ones, just the 3833 constraints for code models differ. */ 3834 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 3835 && GET_CODE (op2) == CONST_INT 3836 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 3837 && (trunc_int_for_mode (INTVAL (op2), SImode) 3838 == INTVAL (op2))) 3839 return 1; 3840 break; 3841 default: 3842 return 0; 3843 } 3844 } 3845 return 0; 3846 default: 3847 return 0; 3848 } 3849} 3850 3851/* Value should be nonzero if functions must have frame pointers. 3852 Zero means the frame pointer need not be set up (and parms may 3853 be accessed via the stack pointer) in functions that seem suitable. */ 3854 3855int 3856ix86_frame_pointer_required () 3857{ 3858 /* If we accessed previous frames, then the generated code expects 3859 to be able to access the saved ebp value in our frame. */ 3860 if (cfun->machine->accesses_prev_frame) 3861 return 1; 3862 3863 /* Several x86 os'es need a frame pointer for other reasons, 3864 usually pertaining to setjmp. */ 3865 if (SUBTARGET_FRAME_POINTER_REQUIRED) 3866 return 1; 3867 3868 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 3869 the frame pointer by default. Turn it back on now if we've not 3870 got a leaf function. */ 3871 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ()) 3872 return 1; 3873 3874 return 0; 3875} 3876 3877/* Record that the current function accesses previous call frames. */ 3878 3879void 3880ix86_setup_frame_addresses () 3881{ 3882 cfun->machine->accesses_prev_frame = 1; 3883} 3884 3885static char pic_label_name[32]; 3886 3887/* This function generates code for -fpic that loads %ebx with 3888 the return address of the caller and then returns. */ 3889 3890void 3891ix86_asm_file_end (file) 3892 FILE *file; 3893{ 3894 rtx xops[2]; 3895 3896 if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0) 3897 return; 3898 3899 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related 3900 to updating relocations to a section being discarded such that this 3901 doesn't work. Ought to detect this at configure time. */ 3902#if 0 3903 /* The trick here is to create a linkonce section containing the 3904 pic label thunk, but to refer to it with an internal label. 3905 Because the label is internal, we don't have inter-dso name 3906 binding issues on hosts that don't support ".hidden". 3907 3908 In order to use these macros, however, we must create a fake 3909 function decl. */ 3910 if (targetm.have_named_sections) 3911 { 3912 tree decl = build_decl (FUNCTION_DECL, 3913 get_identifier ("i686.get_pc_thunk"), 3914 error_mark_node); 3915 DECL_ONE_ONLY (decl) = 1; 3916 UNIQUE_SECTION (decl, 0); 3917 named_section (decl, NULL); 3918 } 3919 else 3920#else 3921 text_section (); 3922#endif 3923 3924 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an 3925 internal (non-global) label that's being emitted, it didn't make 3926 sense to have .type information for local labels. This caused 3927 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving 3928 me debug info for a label that you're declaring non-global?) this 3929 was changed to call ASM_OUTPUT_LABEL() instead. */ 3930 3931 ASM_OUTPUT_LABEL (file, pic_label_name); 3932 3933 xops[0] = pic_offset_table_rtx; 3934 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 3935 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 3936 output_asm_insn ("ret", xops); 3937} 3938 3939void 3940load_pic_register () 3941{ 3942 rtx gotsym, pclab; 3943 3944 if (TARGET_64BIT) 3945 abort (); 3946 3947 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 3948 3949 if (TARGET_DEEP_BRANCH_PREDICTION) 3950 { 3951 if (! pic_label_name[0]) 3952 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0); 3953 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name)); 3954 } 3955 else 3956 { 3957 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 3958 } 3959 3960 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab)); 3961 3962 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab)); 3963} 3964 3965/* Generate an "push" pattern for input ARG. */ 3966 3967static rtx 3968gen_push (arg) 3969 rtx arg; 3970{ 3971 return gen_rtx_SET (VOIDmode, 3972 gen_rtx_MEM (Pmode, 3973 gen_rtx_PRE_DEC (Pmode, 3974 stack_pointer_rtx)), 3975 arg); 3976} 3977 3978/* Return 1 if we need to save REGNO. */ 3979static int 3980ix86_save_reg (regno, maybe_eh_return) 3981 int regno; 3982 int maybe_eh_return; 3983{ 3984 if (regno == PIC_OFFSET_TABLE_REGNUM 3985 && (current_function_uses_pic_offset_table 3986 || current_function_uses_const_pool 3987 || current_function_calls_eh_return)) 3988 return 1; 3989 3990 if (current_function_calls_eh_return && maybe_eh_return) 3991 { 3992 unsigned i; 3993 for (i = 0; ; i++) 3994 { 3995 unsigned test = EH_RETURN_DATA_REGNO (i); 3996 if (test == INVALID_REGNUM) 3997 break; 3998 if (test == (unsigned) regno) 3999 return 1; 4000 } 4001 } 4002 4003 return (regs_ever_live[regno] 4004 && !call_used_regs[regno] 4005 && !fixed_regs[regno] 4006 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 4007} 4008 4009/* Return number of registers to be saved on the stack. */ 4010 4011static int 4012ix86_nsaved_regs () 4013{ 4014 int nregs = 0; 4015 int regno; 4016 4017 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4018 if (ix86_save_reg (regno, true)) 4019 nregs++; 4020 return nregs; 4021} 4022 4023/* Return the offset between two registers, one to be eliminated, and the other 4024 its replacement, at the start of a routine. */ 4025 4026HOST_WIDE_INT 4027ix86_initial_elimination_offset (from, to) 4028 int from; 4029 int to; 4030{ 4031 struct ix86_frame frame; 4032 ix86_compute_frame_layout (&frame); 4033 4034 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 4035 return frame.hard_frame_pointer_offset; 4036 else if (from == FRAME_POINTER_REGNUM 4037 && to == HARD_FRAME_POINTER_REGNUM) 4038 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 4039 else 4040 { 4041 if (to != STACK_POINTER_REGNUM) 4042 abort (); 4043 else if (from == ARG_POINTER_REGNUM) 4044 return frame.stack_pointer_offset; 4045 else if (from != FRAME_POINTER_REGNUM) 4046 abort (); 4047 else 4048 return frame.stack_pointer_offset - frame.frame_pointer_offset; 4049 } 4050} 4051 4052/* Fill structure ix86_frame about frame of currently computed function. */ 4053 4054static void 4055ix86_compute_frame_layout (frame) 4056 struct ix86_frame *frame; 4057{ 4058 HOST_WIDE_INT total_size; 4059 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 4060 int offset; 4061 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 4062 HOST_WIDE_INT size = get_frame_size (); 4063 4064 frame->nregs = ix86_nsaved_regs (); 4065 total_size = size; 4066 4067 /* Skip return value and save base pointer. */ 4068 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 4069 4070 frame->hard_frame_pointer_offset = offset; 4071 4072 /* Do some sanity checking of stack_alignment_needed and 4073 preferred_alignment, since i386 port is the only using those features 4074 that may break easily. */ 4075 4076 if (size && !stack_alignment_needed) 4077 abort (); 4078 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT) 4079 abort (); 4080 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 4081 abort (); 4082 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 4083 abort (); 4084 4085 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 4086 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 4087 4088 /* Register save area */ 4089 offset += frame->nregs * UNITS_PER_WORD; 4090 4091 /* Va-arg area */ 4092 if (ix86_save_varrargs_registers) 4093 { 4094 offset += X86_64_VARARGS_SIZE; 4095 frame->va_arg_size = X86_64_VARARGS_SIZE; 4096 } 4097 else 4098 frame->va_arg_size = 0; 4099 4100 /* Align start of frame for local function. */ 4101 frame->padding1 = ((offset + stack_alignment_needed - 1) 4102 & -stack_alignment_needed) - offset; 4103 4104 offset += frame->padding1; 4105 4106 /* Frame pointer points here. */ 4107 frame->frame_pointer_offset = offset; 4108 4109 offset += size; 4110 4111 /* Add outgoing arguments area. Can be skipped if we eliminated 4112 all the function calls as dead code. */ 4113 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf) 4114 { 4115 offset += current_function_outgoing_args_size; 4116 frame->outgoing_arguments_size = current_function_outgoing_args_size; 4117 } 4118 else 4119 frame->outgoing_arguments_size = 0; 4120 4121 /* Align stack boundary. Only needed if we're calling another function 4122 or using alloca. */ 4123 if (!current_function_is_leaf || current_function_calls_alloca) 4124 frame->padding2 = ((offset + preferred_alignment - 1) 4125 & -preferred_alignment) - offset; 4126 else 4127 frame->padding2 = 0; 4128 4129 offset += frame->padding2; 4130 4131 /* We've reached end of stack frame. */ 4132 frame->stack_pointer_offset = offset; 4133 4134 /* Size prologue needs to allocate. */ 4135 frame->to_allocate = 4136 (size + frame->padding1 + frame->padding2 4137 + frame->outgoing_arguments_size + frame->va_arg_size); 4138 4139 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging 4140 && current_function_is_leaf) 4141 { 4142 frame->red_zone_size = frame->to_allocate; 4143 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 4144 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 4145 } 4146 else 4147 frame->red_zone_size = 0; 4148 frame->to_allocate -= frame->red_zone_size; 4149 frame->stack_pointer_offset -= frame->red_zone_size; 4150#if 0 4151 fprintf (stderr, "nregs: %i\n", frame->nregs); 4152 fprintf (stderr, "size: %i\n", size); 4153 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 4154 fprintf (stderr, "padding1: %i\n", frame->padding1); 4155 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 4156 fprintf (stderr, "padding2: %i\n", frame->padding2); 4157 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 4158 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 4159 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 4160 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 4161 frame->hard_frame_pointer_offset); 4162 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 4163#endif 4164} 4165 4166/* Emit code to save registers in the prologue. */ 4167 4168static void 4169ix86_emit_save_regs () 4170{ 4171 register int regno; 4172 rtx insn; 4173 4174 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4175 if (ix86_save_reg (regno, true)) 4176 { 4177 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 4178 RTX_FRAME_RELATED_P (insn) = 1; 4179 } 4180} 4181 4182/* Emit code to save registers using MOV insns. First register 4183 is restored from POINTER + OFFSET. */ 4184static void 4185ix86_emit_save_regs_using_mov (pointer, offset) 4186 rtx pointer; 4187 HOST_WIDE_INT offset; 4188{ 4189 int regno; 4190 rtx insn; 4191 4192 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4193 if (ix86_save_reg (regno, true)) 4194 { 4195 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 4196 Pmode, offset), 4197 gen_rtx_REG (Pmode, regno)); 4198 RTX_FRAME_RELATED_P (insn) = 1; 4199 offset += UNITS_PER_WORD; 4200 } 4201} 4202 4203/* Expand the prologue into a bunch of separate insns. */ 4204 4205void 4206ix86_expand_prologue () 4207{ 4208 rtx insn; 4209 int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table 4210 || current_function_uses_const_pool) 4211 && !TARGET_64BIT); 4212 struct ix86_frame frame; 4213 int use_mov = 0; 4214 HOST_WIDE_INT allocate; 4215 4216 if (!optimize_size) 4217 { 4218 use_fast_prologue_epilogue 4219 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT); 4220 if (TARGET_PROLOGUE_USING_MOVE) 4221 use_mov = use_fast_prologue_epilogue; 4222 } 4223 ix86_compute_frame_layout (&frame); 4224 4225 /* Note: AT&T enter does NOT have reversed args. Enter is probably 4226 slower on all targets. Also sdb doesn't like it. */ 4227 4228 if (frame_pointer_needed) 4229 { 4230 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 4231 RTX_FRAME_RELATED_P (insn) = 1; 4232 4233 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 4234 RTX_FRAME_RELATED_P (insn) = 1; 4235 } 4236 4237 allocate = frame.to_allocate; 4238 /* In case we are dealing only with single register and empty frame, 4239 push is equivalent of the mov+add sequence. */ 4240 if (allocate == 0 && frame.nregs <= 1) 4241 use_mov = 0; 4242 4243 if (!use_mov) 4244 ix86_emit_save_regs (); 4245 else 4246 allocate += frame.nregs * UNITS_PER_WORD; 4247 4248 if (allocate == 0) 4249 ; 4250 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 4251 { 4252 insn = emit_insn (gen_pro_epilogue_adjust_stack 4253 (stack_pointer_rtx, stack_pointer_rtx, 4254 GEN_INT (-allocate))); 4255 RTX_FRAME_RELATED_P (insn) = 1; 4256 } 4257 else 4258 { 4259 /* ??? Is this only valid for Win32? */ 4260 4261 rtx arg0, sym; 4262 4263 if (TARGET_64BIT) 4264 abort (); 4265 4266 arg0 = gen_rtx_REG (SImode, 0); 4267 emit_move_insn (arg0, GEN_INT (allocate)); 4268 4269 sym = gen_rtx_MEM (FUNCTION_MODE, 4270 gen_rtx_SYMBOL_REF (Pmode, "_alloca")); 4271 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx)); 4272 4273 CALL_INSN_FUNCTION_USAGE (insn) 4274 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0), 4275 CALL_INSN_FUNCTION_USAGE (insn)); 4276 } 4277 if (use_mov) 4278 { 4279 if (!frame_pointer_needed || !frame.to_allocate) 4280 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 4281 else 4282 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 4283 -frame.nregs * UNITS_PER_WORD); 4284 } 4285 4286#ifdef SUBTARGET_PROLOGUE 4287 SUBTARGET_PROLOGUE; 4288#endif 4289 4290 if (pic_reg_used) 4291 load_pic_register (); 4292 4293 /* If we are profiling, make sure no instructions are scheduled before 4294 the call to mcount. However, if -fpic, the above call will have 4295 done that. */ 4296 if (current_function_profile && ! pic_reg_used) 4297 emit_insn (gen_blockage ()); 4298} 4299 4300/* Emit code to restore saved registers using MOV insns. First register 4301 is restored from POINTER + OFFSET. */ 4302static void 4303ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return) 4304 rtx pointer; 4305 int offset; 4306 int maybe_eh_return; 4307{ 4308 int regno; 4309 4310 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4311 if (ix86_save_reg (regno, maybe_eh_return)) 4312 { 4313 emit_move_insn (gen_rtx_REG (Pmode, regno), 4314 adjust_address (gen_rtx_MEM (Pmode, pointer), 4315 Pmode, offset)); 4316 offset += UNITS_PER_WORD; 4317 } 4318} 4319 4320/* Restore function stack, frame, and registers. */ 4321 4322void 4323ix86_expand_epilogue (style) 4324 int style; 4325{ 4326 int regno; 4327 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 4328 struct ix86_frame frame; 4329 HOST_WIDE_INT offset; 4330 4331 ix86_compute_frame_layout (&frame); 4332 4333 /* Calculate start of saved registers relative to ebp. Special care 4334 must be taken for the normal return case of a function using 4335 eh_return: the eax and edx registers are marked as saved, but not 4336 restored along this path. */ 4337 offset = frame.nregs; 4338 if (current_function_calls_eh_return && style != 2) 4339 offset -= 2; 4340 offset *= -UNITS_PER_WORD; 4341 4342 /* If we're only restoring one register and sp is not valid then 4343 using a move instruction to restore the register since it's 4344 less work than reloading sp and popping the register. 4345 4346 The default code result in stack adjustment using add/lea instruction, 4347 while this code results in LEAVE instruction (or discrete equivalent), 4348 so it is profitable in some other cases as well. Especially when there 4349 are no registers to restore. We also use this code when TARGET_USE_LEAVE 4350 and there is exactly one register to pop. This heruistic may need some 4351 tuning in future. */ 4352 if ((!sp_valid && frame.nregs <= 1) 4353 || (TARGET_EPILOGUE_USING_MOVE 4354 && use_fast_prologue_epilogue 4355 && (frame.nregs > 1 || frame.to_allocate)) 4356 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 4357 || (frame_pointer_needed && TARGET_USE_LEAVE 4358 && use_fast_prologue_epilogue && frame.nregs == 1) 4359 || current_function_calls_eh_return) 4360 { 4361 /* Restore registers. We can use ebp or esp to address the memory 4362 locations. If both are available, default to ebp, since offsets 4363 are known to be small. Only exception is esp pointing directly to the 4364 end of block of saved registers, where we may simplify addressing 4365 mode. */ 4366 4367 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 4368 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 4369 frame.to_allocate, style == 2); 4370 else 4371 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 4372 offset, style == 2); 4373 4374 /* eh_return epilogues need %ecx added to the stack pointer. */ 4375 if (style == 2) 4376 { 4377 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 4378 4379 if (frame_pointer_needed) 4380 { 4381 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 4382 tmp = plus_constant (tmp, UNITS_PER_WORD); 4383 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 4384 4385 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 4386 emit_move_insn (hard_frame_pointer_rtx, tmp); 4387 4388 emit_insn (gen_pro_epilogue_adjust_stack 4389 (stack_pointer_rtx, sa, const0_rtx)); 4390 } 4391 else 4392 { 4393 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 4394 tmp = plus_constant (tmp, (frame.to_allocate 4395 + frame.nregs * UNITS_PER_WORD)); 4396 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 4397 } 4398 } 4399 else if (!frame_pointer_needed) 4400 emit_insn (gen_pro_epilogue_adjust_stack 4401 (stack_pointer_rtx, stack_pointer_rtx, 4402 GEN_INT (frame.to_allocate 4403 + frame.nregs * UNITS_PER_WORD))); 4404 /* If not an i386, mov & pop is faster than "leave". */ 4405 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue) 4406 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4407 else 4408 { 4409 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4410 hard_frame_pointer_rtx, 4411 const0_rtx)); 4412 if (TARGET_64BIT) 4413 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4414 else 4415 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4416 } 4417 } 4418 else 4419 { 4420 /* First step is to deallocate the stack frame so that we can 4421 pop the registers. */ 4422 if (!sp_valid) 4423 { 4424 if (!frame_pointer_needed) 4425 abort (); 4426 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4427 hard_frame_pointer_rtx, 4428 GEN_INT (offset))); 4429 } 4430 else if (frame.to_allocate) 4431 emit_insn (gen_pro_epilogue_adjust_stack 4432 (stack_pointer_rtx, stack_pointer_rtx, 4433 GEN_INT (frame.to_allocate))); 4434 4435 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4436 if (ix86_save_reg (regno, false)) 4437 { 4438 if (TARGET_64BIT) 4439 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 4440 else 4441 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 4442 } 4443 if (frame_pointer_needed) 4444 { 4445 /* Leave results in shorter dependency chains on CPUs that are 4446 able to grok it fast. */ 4447 if (TARGET_USE_LEAVE) 4448 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4449 else if (TARGET_64BIT) 4450 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4451 else 4452 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4453 } 4454 } 4455 4456 /* Sibcall epilogues don't want a return instruction. */ 4457 if (style == 0) 4458 return; 4459 4460 if (current_function_pops_args && current_function_args_size) 4461 { 4462 rtx popc = GEN_INT (current_function_pops_args); 4463 4464 /* i386 can only pop 64K bytes. If asked to pop more, pop 4465 return address, do explicit add, and jump indirectly to the 4466 caller. */ 4467 4468 if (current_function_pops_args >= 65536) 4469 { 4470 rtx ecx = gen_rtx_REG (SImode, 2); 4471 4472 /* There are is no "pascal" calling convention in 64bit ABI. */ 4473 if (TARGET_64BIT) 4474 abort (); 4475 4476 emit_insn (gen_popsi1 (ecx)); 4477 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 4478 emit_jump_insn (gen_return_indirect_internal (ecx)); 4479 } 4480 else 4481 emit_jump_insn (gen_return_pop_internal (popc)); 4482 } 4483 else 4484 emit_jump_insn (gen_return_internal ()); 4485} 4486 4487/* Extract the parts of an RTL expression that is a valid memory address 4488 for an instruction. Return 0 if the structure of the address is 4489 grossly off. Return -1 if the address contains ASHIFT, so it is not 4490 strictly valid, but still used for computing length of lea instruction. 4491 */ 4492 4493static int 4494ix86_decompose_address (addr, out) 4495 register rtx addr; 4496 struct ix86_address *out; 4497{ 4498 rtx base = NULL_RTX; 4499 rtx index = NULL_RTX; 4500 rtx disp = NULL_RTX; 4501 HOST_WIDE_INT scale = 1; 4502 rtx scale_rtx = NULL_RTX; 4503 int retval = 1; 4504 4505 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 4506 base = addr; 4507 else if (GET_CODE (addr) == PLUS) 4508 { 4509 rtx op0 = XEXP (addr, 0); 4510 rtx op1 = XEXP (addr, 1); 4511 enum rtx_code code0 = GET_CODE (op0); 4512 enum rtx_code code1 = GET_CODE (op1); 4513 4514 if (code0 == REG || code0 == SUBREG) 4515 { 4516 if (code1 == REG || code1 == SUBREG) 4517 index = op0, base = op1; /* index + base */ 4518 else 4519 base = op0, disp = op1; /* base + displacement */ 4520 } 4521 else if (code0 == MULT) 4522 { 4523 index = XEXP (op0, 0); 4524 scale_rtx = XEXP (op0, 1); 4525 if (code1 == REG || code1 == SUBREG) 4526 base = op1; /* index*scale + base */ 4527 else 4528 disp = op1; /* index*scale + disp */ 4529 } 4530 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT) 4531 { 4532 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */ 4533 scale_rtx = XEXP (XEXP (op0, 0), 1); 4534 base = XEXP (op0, 1); 4535 disp = op1; 4536 } 4537 else if (code0 == PLUS) 4538 { 4539 index = XEXP (op0, 0); /* index + base + disp */ 4540 base = XEXP (op0, 1); 4541 disp = op1; 4542 } 4543 else 4544 return 0; 4545 } 4546 else if (GET_CODE (addr) == MULT) 4547 { 4548 index = XEXP (addr, 0); /* index*scale */ 4549 scale_rtx = XEXP (addr, 1); 4550 } 4551 else if (GET_CODE (addr) == ASHIFT) 4552 { 4553 rtx tmp; 4554 4555 /* We're called for lea too, which implements ashift on occasion. */ 4556 index = XEXP (addr, 0); 4557 tmp = XEXP (addr, 1); 4558 if (GET_CODE (tmp) != CONST_INT) 4559 return 0; 4560 scale = INTVAL (tmp); 4561 if ((unsigned HOST_WIDE_INT) scale > 3) 4562 return 0; 4563 scale = 1 << scale; 4564 retval = -1; 4565 } 4566 else 4567 disp = addr; /* displacement */ 4568 4569 /* Extract the integral value of scale. */ 4570 if (scale_rtx) 4571 { 4572 if (GET_CODE (scale_rtx) != CONST_INT) 4573 return 0; 4574 scale = INTVAL (scale_rtx); 4575 } 4576 4577 /* Allow arg pointer and stack pointer as index if there is not scaling */ 4578 if (base && index && scale == 1 4579 && (index == arg_pointer_rtx || index == frame_pointer_rtx 4580 || index == stack_pointer_rtx)) 4581 { 4582 rtx tmp = base; 4583 base = index; 4584 index = tmp; 4585 } 4586 4587 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 4588 if ((base == hard_frame_pointer_rtx 4589 || base == frame_pointer_rtx 4590 || base == arg_pointer_rtx) && !disp) 4591 disp = const0_rtx; 4592 4593 /* Special case: on K6, [%esi] makes the instruction vector decoded. 4594 Avoid this by transforming to [%esi+0]. */ 4595 if (ix86_cpu == PROCESSOR_K6 && !optimize_size 4596 && base && !index && !disp 4597 && REG_P (base) 4598 && REGNO_REG_CLASS (REGNO (base)) == SIREG) 4599 disp = const0_rtx; 4600 4601 /* Special case: encode reg+reg instead of reg*2. */ 4602 if (!base && index && scale && scale == 2) 4603 base = index, scale = 1; 4604 4605 /* Special case: scaling cannot be encoded without base or displacement. */ 4606 if (!base && !disp && index && scale != 1) 4607 disp = const0_rtx; 4608 4609 out->base = base; 4610 out->index = index; 4611 out->disp = disp; 4612 out->scale = scale; 4613 4614 return retval; 4615} 4616 4617/* Return cost of the memory address x. 4618 For i386, it is better to use a complex address than let gcc copy 4619 the address into a reg and make a new pseudo. But not if the address 4620 requires to two regs - that would mean more pseudos with longer 4621 lifetimes. */ 4622int 4623ix86_address_cost (x) 4624 rtx x; 4625{ 4626 struct ix86_address parts; 4627 int cost = 1; 4628 4629 if (!ix86_decompose_address (x, &parts)) 4630 abort (); 4631 4632 /* More complex memory references are better. */ 4633 if (parts.disp && parts.disp != const0_rtx) 4634 cost--; 4635 4636 /* Attempt to minimize number of registers in the address. */ 4637 if ((parts.base 4638 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 4639 || (parts.index 4640 && (!REG_P (parts.index) 4641 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 4642 cost++; 4643 4644 if (parts.base 4645 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 4646 && parts.index 4647 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 4648 && parts.base != parts.index) 4649 cost++; 4650 4651 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 4652 since it's predecode logic can't detect the length of instructions 4653 and it degenerates to vector decoded. Increase cost of such 4654 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 4655 to split such addresses or even refuse such addresses at all. 4656 4657 Following addressing modes are affected: 4658 [base+scale*index] 4659 [scale*index+disp] 4660 [base+index] 4661 4662 The first and last case may be avoidable by explicitly coding the zero in 4663 memory address, but I don't have AMD-K6 machine handy to check this 4664 theory. */ 4665 4666 if (TARGET_K6 4667 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 4668 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 4669 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 4670 cost += 10; 4671 4672 return cost; 4673} 4674 4675/* If X is a machine specific address (i.e. a symbol or label being 4676 referenced as a displacement from the GOT implemented using an 4677 UNSPEC), then return the base term. Otherwise return X. */ 4678 4679rtx 4680ix86_find_base_term (x) 4681 rtx x; 4682{ 4683 rtx term; 4684 4685 if (TARGET_64BIT) 4686 { 4687 if (GET_CODE (x) != CONST) 4688 return x; 4689 term = XEXP (x, 0); 4690 if (GET_CODE (term) == PLUS 4691 && (GET_CODE (XEXP (term, 1)) == CONST_INT 4692 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 4693 term = XEXP (term, 0); 4694 if (GET_CODE (term) != UNSPEC 4695 || XVECLEN (term, 0) != 1 4696 || XINT (term, 1) != 15) 4697 return x; 4698 4699 term = XVECEXP (term, 0, 0); 4700 4701 if (GET_CODE (term) != SYMBOL_REF 4702 && GET_CODE (term) != LABEL_REF) 4703 return x; 4704 4705 return term; 4706 } 4707 4708 if (GET_CODE (x) != PLUS 4709 || XEXP (x, 0) != pic_offset_table_rtx 4710 || GET_CODE (XEXP (x, 1)) != CONST) 4711 return x; 4712 4713 term = XEXP (XEXP (x, 1), 0); 4714 4715 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT) 4716 term = XEXP (term, 0); 4717 4718 if (GET_CODE (term) != UNSPEC 4719 || XVECLEN (term, 0) != 1 4720 || XINT (term, 1) != 7) 4721 return x; 4722 4723 term = XVECEXP (term, 0, 0); 4724 4725 if (GET_CODE (term) != SYMBOL_REF 4726 && GET_CODE (term) != LABEL_REF) 4727 return x; 4728 4729 return term; 4730} 4731 4732/* Determine if a given CONST RTX is a valid memory displacement 4733 in PIC mode. */ 4734 4735int 4736legitimate_pic_address_disp_p (disp) 4737 register rtx disp; 4738{ 4739 /* In 64bit mode we can allow direct addresses of symbols and labels 4740 when they are not dynamic symbols. */ 4741 if (TARGET_64BIT) 4742 { 4743 rtx x = disp; 4744 if (GET_CODE (disp) == CONST) 4745 x = XEXP (disp, 0); 4746 /* ??? Handle PIC code models */ 4747 if (GET_CODE (x) == PLUS 4748 && (GET_CODE (XEXP (x, 1)) == CONST_INT 4749 && ix86_cmodel == CM_SMALL_PIC 4750 && INTVAL (XEXP (x, 1)) < 1024*1024*1024 4751 && INTVAL (XEXP (x, 1)) > -1024*1024*1024)) 4752 x = XEXP (x, 0); 4753 if (local_symbolic_operand (x, Pmode)) 4754 return 1; 4755 } 4756 if (GET_CODE (disp) != CONST) 4757 return 0; 4758 disp = XEXP (disp, 0); 4759 4760 if (TARGET_64BIT) 4761 { 4762 /* We are unsafe to allow PLUS expressions. This limit allowed distance 4763 of GOT tables. We should not need these anyway. */ 4764 if (GET_CODE (disp) != UNSPEC 4765 || XVECLEN (disp, 0) != 1 4766 || XINT (disp, 1) != 15) 4767 return 0; 4768 4769 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 4770 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 4771 return 0; 4772 return 1; 4773 } 4774 4775 if (GET_CODE (disp) == PLUS) 4776 { 4777 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 4778 return 0; 4779 disp = XEXP (disp, 0); 4780 } 4781 4782 if (GET_CODE (disp) != UNSPEC 4783 || XVECLEN (disp, 0) != 1) 4784 return 0; 4785 4786 /* Must be @GOT or @GOTOFF. */ 4787 switch (XINT (disp, 1)) 4788 { 4789 case 6: /* @GOT */ 4790 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 4791 4792 case 7: /* @GOTOFF */ 4793 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 4794 } 4795 4796 return 0; 4797} 4798 4799/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 4800 memory address for an instruction. The MODE argument is the machine mode 4801 for the MEM expression that wants to use this address. 4802 4803 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 4804 convert common non-canonical forms to canonical form so that they will 4805 be recognized. */ 4806 4807int 4808legitimate_address_p (mode, addr, strict) 4809 enum machine_mode mode; 4810 register rtx addr; 4811 int strict; 4812{ 4813 struct ix86_address parts; 4814 rtx base, index, disp; 4815 HOST_WIDE_INT scale; 4816 const char *reason = NULL; 4817 rtx reason_rtx = NULL_RTX; 4818 4819 if (TARGET_DEBUG_ADDR) 4820 { 4821 fprintf (stderr, 4822 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 4823 GET_MODE_NAME (mode), strict); 4824 debug_rtx (addr); 4825 } 4826 4827 if (ix86_decompose_address (addr, &parts) <= 0) 4828 { 4829 reason = "decomposition failed"; 4830 goto report_error; 4831 } 4832 4833 base = parts.base; 4834 index = parts.index; 4835 disp = parts.disp; 4836 scale = parts.scale; 4837 4838 /* Validate base register. 4839 4840 Don't allow SUBREG's here, it can lead to spill failures when the base 4841 is one word out of a two word structure, which is represented internally 4842 as a DImode int. */ 4843 4844 if (base) 4845 { 4846 reason_rtx = base; 4847 4848 if (GET_CODE (base) != REG) 4849 { 4850 reason = "base is not a register"; 4851 goto report_error; 4852 } 4853 4854 if (GET_MODE (base) != Pmode) 4855 { 4856 reason = "base is not in Pmode"; 4857 goto report_error; 4858 } 4859 4860 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) 4861 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) 4862 { 4863 reason = "base is not valid"; 4864 goto report_error; 4865 } 4866 } 4867 4868 /* Validate index register. 4869 4870 Don't allow SUBREG's here, it can lead to spill failures when the index 4871 is one word out of a two word structure, which is represented internally 4872 as a DImode int. */ 4873 4874 if (index) 4875 { 4876 reason_rtx = index; 4877 4878 if (GET_CODE (index) != REG) 4879 { 4880 reason = "index is not a register"; 4881 goto report_error; 4882 } 4883 4884 if (GET_MODE (index) != Pmode) 4885 { 4886 reason = "index is not in Pmode"; 4887 goto report_error; 4888 } 4889 4890 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index)) 4891 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index))) 4892 { 4893 reason = "index is not valid"; 4894 goto report_error; 4895 } 4896 } 4897 4898 /* Validate scale factor. */ 4899 if (scale != 1) 4900 { 4901 reason_rtx = GEN_INT (scale); 4902 if (!index) 4903 { 4904 reason = "scale without index"; 4905 goto report_error; 4906 } 4907 4908 if (scale != 2 && scale != 4 && scale != 8) 4909 { 4910 reason = "scale is not a valid multiplier"; 4911 goto report_error; 4912 } 4913 } 4914 4915 /* Validate displacement. */ 4916 if (disp) 4917 { 4918 reason_rtx = disp; 4919 4920 if (!CONSTANT_ADDRESS_P (disp)) 4921 { 4922 reason = "displacement is not constant"; 4923 goto report_error; 4924 } 4925 4926 if (TARGET_64BIT) 4927 { 4928 if (!x86_64_sign_extended_value (disp)) 4929 { 4930 reason = "displacement is out of range"; 4931 goto report_error; 4932 } 4933 } 4934 else 4935 { 4936 if (GET_CODE (disp) == CONST_DOUBLE) 4937 { 4938 reason = "displacement is a const_double"; 4939 goto report_error; 4940 } 4941 } 4942 4943 if (flag_pic && SYMBOLIC_CONST (disp)) 4944 { 4945 if (TARGET_64BIT && (index || base)) 4946 { 4947 reason = "non-constant pic memory reference"; 4948 goto report_error; 4949 } 4950 if (! legitimate_pic_address_disp_p (disp)) 4951 { 4952 reason = "displacement is an invalid pic construct"; 4953 goto report_error; 4954 } 4955 4956 /* This code used to verify that a symbolic pic displacement 4957 includes the pic_offset_table_rtx register. 4958 4959 While this is good idea, unfortunately these constructs may 4960 be created by "adds using lea" optimization for incorrect 4961 code like: 4962 4963 int a; 4964 int foo(int i) 4965 { 4966 return *(&a+i); 4967 } 4968 4969 This code is nonsensical, but results in addressing 4970 GOT table with pic_offset_table_rtx base. We can't 4971 just refuse it easily, since it gets matched by 4972 "addsi3" pattern, that later gets split to lea in the 4973 case output register differs from input. While this 4974 can be handled by separate addsi pattern for this case 4975 that never results in lea, this seems to be easier and 4976 correct fix for crash to disable this test. */ 4977 } 4978 else if (HALF_PIC_P ()) 4979 { 4980 if (! HALF_PIC_ADDRESS_P (disp) 4981 || (base != NULL_RTX || index != NULL_RTX)) 4982 { 4983 reason = "displacement is an invalid half-pic reference"; 4984 goto report_error; 4985 } 4986 } 4987 } 4988 4989 /* Everything looks valid. */ 4990 if (TARGET_DEBUG_ADDR) 4991 fprintf (stderr, "Success.\n"); 4992 return TRUE; 4993 4994report_error: 4995 if (TARGET_DEBUG_ADDR) 4996 { 4997 fprintf (stderr, "Error: %s\n", reason); 4998 debug_rtx (reason_rtx); 4999 } 5000 return FALSE; 5001} 5002 5003/* Return an unique alias set for the GOT. */ 5004 5005static HOST_WIDE_INT 5006ix86_GOT_alias_set () 5007{ 5008 static HOST_WIDE_INT set = -1; 5009 if (set == -1) 5010 set = new_alias_set (); 5011 return set; 5012} 5013 5014/* Return a legitimate reference for ORIG (an address) using the 5015 register REG. If REG is 0, a new pseudo is generated. 5016 5017 There are two types of references that must be handled: 5018 5019 1. Global data references must load the address from the GOT, via 5020 the PIC reg. An insn is emitted to do this load, and the reg is 5021 returned. 5022 5023 2. Static data references, constant pool addresses, and code labels 5024 compute the address as an offset from the GOT, whose base is in 5025 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to 5026 differentiate them from global data objects. The returned 5027 address is the PIC reg + an unspec constant. 5028 5029 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 5030 reg also appears in the address. */ 5031 5032rtx 5033legitimize_pic_address (orig, reg) 5034 rtx orig; 5035 rtx reg; 5036{ 5037 rtx addr = orig; 5038 rtx new = orig; 5039 rtx base; 5040 5041 if (local_symbolic_operand (addr, Pmode)) 5042 { 5043 /* In 64bit mode we can address such objects directly. */ 5044 if (TARGET_64BIT) 5045 new = addr; 5046 else 5047 { 5048 /* This symbol may be referenced via a displacement from the PIC 5049 base address (@GOTOFF). */ 5050 5051 current_function_uses_pic_offset_table = 1; 5052 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7); 5053 new = gen_rtx_CONST (Pmode, new); 5054 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5055 5056 if (reg != 0) 5057 { 5058 emit_move_insn (reg, new); 5059 new = reg; 5060 } 5061 } 5062 } 5063 else if (GET_CODE (addr) == SYMBOL_REF) 5064 { 5065 if (TARGET_64BIT) 5066 { 5067 current_function_uses_pic_offset_table = 1; 5068 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15); 5069 new = gen_rtx_CONST (Pmode, new); 5070 new = gen_rtx_MEM (Pmode, new); 5071 RTX_UNCHANGING_P (new) = 1; 5072 set_mem_alias_set (new, ix86_GOT_alias_set ()); 5073 5074 if (reg == 0) 5075 reg = gen_reg_rtx (Pmode); 5076 /* Use directly gen_movsi, otherwise the address is loaded 5077 into register for CSE. We don't want to CSE this addresses, 5078 instead we CSE addresses from the GOT table, so skip this. */ 5079 emit_insn (gen_movsi (reg, new)); 5080 new = reg; 5081 } 5082 else 5083 { 5084 /* This symbol must be referenced via a load from the 5085 Global Offset Table (@GOT). */ 5086 5087 current_function_uses_pic_offset_table = 1; 5088 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6); 5089 new = gen_rtx_CONST (Pmode, new); 5090 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5091 new = gen_rtx_MEM (Pmode, new); 5092 RTX_UNCHANGING_P (new) = 1; 5093 set_mem_alias_set (new, ix86_GOT_alias_set ()); 5094 5095 if (reg == 0) 5096 reg = gen_reg_rtx (Pmode); 5097 emit_move_insn (reg, new); 5098 new = reg; 5099 } 5100 } 5101 else 5102 { 5103 if (GET_CODE (addr) == CONST) 5104 { 5105 addr = XEXP (addr, 0); 5106 5107 /* We must match stuff we generate before. Assume the only 5108 unspecs that can get here are ours. Not that we could do 5109 anything with them anyway... */ 5110 if (GET_CODE (addr) == UNSPEC 5111 || (GET_CODE (addr) == PLUS 5112 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 5113 return orig; 5114 if (GET_CODE (addr) != PLUS) 5115 abort (); 5116 } 5117 if (GET_CODE (addr) == PLUS) 5118 { 5119 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 5120 5121 /* Check first to see if this is a constant offset from a @GOTOFF 5122 symbol reference. */ 5123 if (local_symbolic_operand (op0, Pmode) 5124 && GET_CODE (op1) == CONST_INT) 5125 { 5126 if (!TARGET_64BIT) 5127 { 5128 current_function_uses_pic_offset_table = 1; 5129 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7); 5130 new = gen_rtx_PLUS (Pmode, new, op1); 5131 new = gen_rtx_CONST (Pmode, new); 5132 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5133 5134 if (reg != 0) 5135 { 5136 emit_move_insn (reg, new); 5137 new = reg; 5138 } 5139 } 5140 else 5141 { 5142 /* ??? We need to limit offsets here. */ 5143 } 5144 } 5145 else 5146 { 5147 base = legitimize_pic_address (XEXP (addr, 0), reg); 5148 new = legitimize_pic_address (XEXP (addr, 1), 5149 base == reg ? NULL_RTX : reg); 5150 5151 if (GET_CODE (new) == CONST_INT) 5152 new = plus_constant (base, INTVAL (new)); 5153 else 5154 { 5155 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 5156 { 5157 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 5158 new = XEXP (new, 1); 5159 } 5160 new = gen_rtx_PLUS (Pmode, base, new); 5161 } 5162 } 5163 } 5164 } 5165 return new; 5166} 5167 5168/* Try machine-dependent ways of modifying an illegitimate address 5169 to be legitimate. If we find one, return the new, valid address. 5170 This macro is used in only one place: `memory_address' in explow.c. 5171 5172 OLDX is the address as it was before break_out_memory_refs was called. 5173 In some cases it is useful to look at this to decide what needs to be done. 5174 5175 MODE and WIN are passed so that this macro can use 5176 GO_IF_LEGITIMATE_ADDRESS. 5177 5178 It is always safe for this macro to do nothing. It exists to recognize 5179 opportunities to optimize the output. 5180 5181 For the 80386, we handle X+REG by loading X into a register R and 5182 using R+REG. R will go in a general reg and indexing will be used. 5183 However, if REG is a broken-out memory address or multiplication, 5184 nothing needs to be done because REG can certainly go in a general reg. 5185 5186 When -fpic is used, special handling is needed for symbolic references. 5187 See comments by legitimize_pic_address in i386.c for details. */ 5188 5189rtx 5190legitimize_address (x, oldx, mode) 5191 register rtx x; 5192 register rtx oldx ATTRIBUTE_UNUSED; 5193 enum machine_mode mode; 5194{ 5195 int changed = 0; 5196 unsigned log; 5197 5198 if (TARGET_DEBUG_ADDR) 5199 { 5200 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 5201 GET_MODE_NAME (mode)); 5202 debug_rtx (x); 5203 } 5204 5205 if (flag_pic && SYMBOLIC_CONST (x)) 5206 return legitimize_pic_address (x, 0); 5207 5208 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 5209 if (GET_CODE (x) == ASHIFT 5210 && GET_CODE (XEXP (x, 1)) == CONST_INT 5211 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4) 5212 { 5213 changed = 1; 5214 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 5215 GEN_INT (1 << log)); 5216 } 5217 5218 if (GET_CODE (x) == PLUS) 5219 { 5220 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 5221 5222 if (GET_CODE (XEXP (x, 0)) == ASHIFT 5223 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 5224 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4) 5225 { 5226 changed = 1; 5227 XEXP (x, 0) = gen_rtx_MULT (Pmode, 5228 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 5229 GEN_INT (1 << log)); 5230 } 5231 5232 if (GET_CODE (XEXP (x, 1)) == ASHIFT 5233 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 5234 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4) 5235 { 5236 changed = 1; 5237 XEXP (x, 1) = gen_rtx_MULT (Pmode, 5238 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 5239 GEN_INT (1 << log)); 5240 } 5241 5242 /* Put multiply first if it isn't already. */ 5243 if (GET_CODE (XEXP (x, 1)) == MULT) 5244 { 5245 rtx tmp = XEXP (x, 0); 5246 XEXP (x, 0) = XEXP (x, 1); 5247 XEXP (x, 1) = tmp; 5248 changed = 1; 5249 } 5250 5251 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 5252 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 5253 created by virtual register instantiation, register elimination, and 5254 similar optimizations. */ 5255 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 5256 { 5257 changed = 1; 5258 x = gen_rtx_PLUS (Pmode, 5259 gen_rtx_PLUS (Pmode, XEXP (x, 0), 5260 XEXP (XEXP (x, 1), 0)), 5261 XEXP (XEXP (x, 1), 1)); 5262 } 5263 5264 /* Canonicalize 5265 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 5266 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 5267 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 5268 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 5269 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 5270 && CONSTANT_P (XEXP (x, 1))) 5271 { 5272 rtx constant; 5273 rtx other = NULL_RTX; 5274 5275 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5276 { 5277 constant = XEXP (x, 1); 5278 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 5279 } 5280 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 5281 { 5282 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 5283 other = XEXP (x, 1); 5284 } 5285 else 5286 constant = 0; 5287 5288 if (constant) 5289 { 5290 changed = 1; 5291 x = gen_rtx_PLUS (Pmode, 5292 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 5293 XEXP (XEXP (XEXP (x, 0), 1), 0)), 5294 plus_constant (other, INTVAL (constant))); 5295 } 5296 } 5297 5298 if (changed && legitimate_address_p (mode, x, FALSE)) 5299 return x; 5300 5301 if (GET_CODE (XEXP (x, 0)) == MULT) 5302 { 5303 changed = 1; 5304 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 5305 } 5306 5307 if (GET_CODE (XEXP (x, 1)) == MULT) 5308 { 5309 changed = 1; 5310 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 5311 } 5312 5313 if (changed 5314 && GET_CODE (XEXP (x, 1)) == REG 5315 && GET_CODE (XEXP (x, 0)) == REG) 5316 return x; 5317 5318 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 5319 { 5320 changed = 1; 5321 x = legitimize_pic_address (x, 0); 5322 } 5323 5324 if (changed && legitimate_address_p (mode, x, FALSE)) 5325 return x; 5326 5327 if (GET_CODE (XEXP (x, 0)) == REG) 5328 { 5329 register rtx temp = gen_reg_rtx (Pmode); 5330 register rtx val = force_operand (XEXP (x, 1), temp); 5331 if (val != temp) 5332 emit_move_insn (temp, val); 5333 5334 XEXP (x, 1) = temp; 5335 return x; 5336 } 5337 5338 else if (GET_CODE (XEXP (x, 1)) == REG) 5339 { 5340 register rtx temp = gen_reg_rtx (Pmode); 5341 register rtx val = force_operand (XEXP (x, 0), temp); 5342 if (val != temp) 5343 emit_move_insn (temp, val); 5344 5345 XEXP (x, 0) = temp; 5346 return x; 5347 } 5348 } 5349 5350 return x; 5351} 5352 5353/* Print an integer constant expression in assembler syntax. Addition 5354 and subtraction are the only arithmetic that may appear in these 5355 expressions. FILE is the stdio stream to write to, X is the rtx, and 5356 CODE is the operand print code from the output string. */ 5357 5358static void 5359output_pic_addr_const (file, x, code) 5360 FILE *file; 5361 rtx x; 5362 int code; 5363{ 5364 char buf[256]; 5365 5366 switch (GET_CODE (x)) 5367 { 5368 case PC: 5369 if (flag_pic) 5370 putc ('.', file); 5371 else 5372 abort (); 5373 break; 5374 5375 case SYMBOL_REF: 5376 assemble_name (file, XSTR (x, 0)); 5377 if (code == 'P' && ! SYMBOL_REF_FLAG (x)) 5378 fputs ("@PLT", file); 5379 break; 5380 5381 case LABEL_REF: 5382 x = XEXP (x, 0); 5383 /* FALLTHRU */ 5384 case CODE_LABEL: 5385 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 5386 assemble_name (asm_out_file, buf); 5387 break; 5388 5389 case CONST_INT: 5390 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 5391 break; 5392 5393 case CONST: 5394 /* This used to output parentheses around the expression, 5395 but that does not work on the 386 (either ATT or BSD assembler). */ 5396 output_pic_addr_const (file, XEXP (x, 0), code); 5397 break; 5398 5399 case CONST_DOUBLE: 5400 if (GET_MODE (x) == VOIDmode) 5401 { 5402 /* We can use %d if the number is <32 bits and positive. */ 5403 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 5404 fprintf (file, "0x%lx%08lx", 5405 (unsigned long) CONST_DOUBLE_HIGH (x), 5406 (unsigned long) CONST_DOUBLE_LOW (x)); 5407 else 5408 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 5409 } 5410 else 5411 /* We can't handle floating point constants; 5412 PRINT_OPERAND must handle them. */ 5413 output_operand_lossage ("floating constant misused"); 5414 break; 5415 5416 case PLUS: 5417 /* Some assemblers need integer constants to appear first. */ 5418 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 5419 { 5420 output_pic_addr_const (file, XEXP (x, 0), code); 5421 putc ('+', file); 5422 output_pic_addr_const (file, XEXP (x, 1), code); 5423 } 5424 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5425 { 5426 output_pic_addr_const (file, XEXP (x, 1), code); 5427 putc ('+', file); 5428 output_pic_addr_const (file, XEXP (x, 0), code); 5429 } 5430 else 5431 abort (); 5432 break; 5433 5434 case MINUS: 5435 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 5436 output_pic_addr_const (file, XEXP (x, 0), code); 5437 putc ('-', file); 5438 output_pic_addr_const (file, XEXP (x, 1), code); 5439 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 5440 break; 5441 5442 case UNSPEC: 5443 if (XVECLEN (x, 0) != 1) 5444 abort (); 5445 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 5446 switch (XINT (x, 1)) 5447 { 5448 case 6: 5449 fputs ("@GOT", file); 5450 break; 5451 case 7: 5452 fputs ("@GOTOFF", file); 5453 break; 5454 case 8: 5455 fputs ("@PLT", file); 5456 break; 5457 case 15: 5458 fputs ("@GOTPCREL(%RIP)", file); 5459 break; 5460 default: 5461 output_operand_lossage ("invalid UNSPEC as operand"); 5462 break; 5463 } 5464 break; 5465 5466 default: 5467 output_operand_lossage ("invalid expression as operand"); 5468 } 5469} 5470 5471/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST. 5472 We need to handle our special PIC relocations. */ 5473 5474void 5475i386_dwarf_output_addr_const (file, x) 5476 FILE *file; 5477 rtx x; 5478{ 5479#ifdef ASM_QUAD 5480 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG); 5481#else 5482 if (TARGET_64BIT) 5483 abort (); 5484 fprintf (file, "%s", ASM_LONG); 5485#endif 5486 if (flag_pic) 5487 output_pic_addr_const (file, x, '\0'); 5488 else 5489 output_addr_const (file, x); 5490 fputc ('\n', file); 5491} 5492 5493/* In the name of slightly smaller debug output, and to cater to 5494 general assembler losage, recognize PIC+GOTOFF and turn it back 5495 into a direct symbol reference. */ 5496 5497rtx 5498i386_simplify_dwarf_addr (orig_x) 5499 rtx orig_x; 5500{ 5501 rtx x = orig_x, y; 5502 5503 if (GET_CODE (x) == MEM) 5504 x = XEXP (x, 0); 5505 5506 if (TARGET_64BIT) 5507 { 5508 if (GET_CODE (x) != CONST 5509 || GET_CODE (XEXP (x, 0)) != UNSPEC 5510 || XINT (XEXP (x, 0), 1) != 15 5511 || GET_CODE (orig_x) != MEM) 5512 return orig_x; 5513 return XVECEXP (XEXP (x, 0), 0, 0); 5514 } 5515 5516 if (GET_CODE (x) != PLUS 5517 || GET_CODE (XEXP (x, 1)) != CONST) 5518 return orig_x; 5519 5520 if (GET_CODE (XEXP (x, 0)) == REG 5521 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 5522 /* %ebx + GOT/GOTOFF */ 5523 y = NULL; 5524 else if (GET_CODE (XEXP (x, 0)) == PLUS) 5525 { 5526 /* %ebx + %reg * scale + GOT/GOTOFF */ 5527 y = XEXP (x, 0); 5528 if (GET_CODE (XEXP (y, 0)) == REG 5529 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM) 5530 y = XEXP (y, 1); 5531 else if (GET_CODE (XEXP (y, 1)) == REG 5532 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM) 5533 y = XEXP (y, 0); 5534 else 5535 return orig_x; 5536 if (GET_CODE (y) != REG 5537 && GET_CODE (y) != MULT 5538 && GET_CODE (y) != ASHIFT) 5539 return orig_x; 5540 } 5541 else 5542 return orig_x; 5543 5544 x = XEXP (XEXP (x, 1), 0); 5545 if (GET_CODE (x) == UNSPEC 5546 && ((XINT (x, 1) == 6 && GET_CODE (orig_x) == MEM) 5547 || (XINT (x, 1) == 7 && GET_CODE (orig_x) != MEM))) 5548 { 5549 if (y) 5550 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0)); 5551 return XVECEXP (x, 0, 0); 5552 } 5553 5554 if (GET_CODE (x) == PLUS 5555 && GET_CODE (XEXP (x, 0)) == UNSPEC 5556 && GET_CODE (XEXP (x, 1)) == CONST_INT 5557 && ((XINT (XEXP (x, 0), 1) == 6 && GET_CODE (orig_x) == MEM) 5558 || (XINT (XEXP (x, 0), 1) == 7 && GET_CODE (orig_x) != MEM))) 5559 { 5560 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1)); 5561 if (y) 5562 return gen_rtx_PLUS (Pmode, y, x); 5563 return x; 5564 } 5565 5566 return orig_x; 5567} 5568 5569static void 5570put_condition_code (code, mode, reverse, fp, file) 5571 enum rtx_code code; 5572 enum machine_mode mode; 5573 int reverse, fp; 5574 FILE *file; 5575{ 5576 const char *suffix; 5577 5578 if (mode == CCFPmode || mode == CCFPUmode) 5579 { 5580 enum rtx_code second_code, bypass_code; 5581 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 5582 if (bypass_code != NIL || second_code != NIL) 5583 abort (); 5584 code = ix86_fp_compare_code_to_integer (code); 5585 mode = CCmode; 5586 } 5587 if (reverse) 5588 code = reverse_condition (code); 5589 5590 switch (code) 5591 { 5592 case EQ: 5593 suffix = "e"; 5594 break; 5595 case NE: 5596 suffix = "ne"; 5597 break; 5598 case GT: 5599 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode) 5600 abort (); 5601 suffix = "g"; 5602 break; 5603 case GTU: 5604 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers. 5605 Those same assemblers have the same but opposite losage on cmov. */ 5606 if (mode != CCmode) 5607 abort (); 5608 suffix = fp ? "nbe" : "a"; 5609 break; 5610 case LT: 5611 if (mode == CCNOmode || mode == CCGOCmode) 5612 suffix = "s"; 5613 else if (mode == CCmode || mode == CCGCmode) 5614 suffix = "l"; 5615 else 5616 abort (); 5617 break; 5618 case LTU: 5619 if (mode != CCmode) 5620 abort (); 5621 suffix = "b"; 5622 break; 5623 case GE: 5624 if (mode == CCNOmode || mode == CCGOCmode) 5625 suffix = "ns"; 5626 else if (mode == CCmode || mode == CCGCmode) 5627 suffix = "ge"; 5628 else 5629 abort (); 5630 break; 5631 case GEU: 5632 /* ??? As above. */ 5633 if (mode != CCmode) 5634 abort (); 5635 suffix = fp ? "nb" : "ae"; 5636 break; 5637 case LE: 5638 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode) 5639 abort (); 5640 suffix = "le"; 5641 break; 5642 case LEU: 5643 if (mode != CCmode) 5644 abort (); 5645 suffix = "be"; 5646 break; 5647 case UNORDERED: 5648 suffix = fp ? "u" : "p"; 5649 break; 5650 case ORDERED: 5651 suffix = fp ? "nu" : "np"; 5652 break; 5653 default: 5654 abort (); 5655 } 5656 fputs (suffix, file); 5657} 5658 5659void 5660print_reg (x, code, file) 5661 rtx x; 5662 int code; 5663 FILE *file; 5664{ 5665 if (REGNO (x) == ARG_POINTER_REGNUM 5666 || REGNO (x) == FRAME_POINTER_REGNUM 5667 || REGNO (x) == FLAGS_REG 5668 || REGNO (x) == FPSR_REG) 5669 abort (); 5670 5671 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 5672 putc ('%', file); 5673 5674 if (code == 'w' || MMX_REG_P (x)) 5675 code = 2; 5676 else if (code == 'b') 5677 code = 1; 5678 else if (code == 'k') 5679 code = 4; 5680 else if (code == 'q') 5681 code = 8; 5682 else if (code == 'y') 5683 code = 3; 5684 else if (code == 'h') 5685 code = 0; 5686 else 5687 code = GET_MODE_SIZE (GET_MODE (x)); 5688 5689 /* Irritatingly, AMD extended registers use different naming convention 5690 from the normal registers. */ 5691 if (REX_INT_REG_P (x)) 5692 { 5693 if (!TARGET_64BIT) 5694 abort (); 5695 switch (code) 5696 { 5697 case 0: 5698 error ("extended registers have no high halves"); 5699 break; 5700 case 1: 5701 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 5702 break; 5703 case 2: 5704 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 5705 break; 5706 case 4: 5707 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 5708 break; 5709 case 8: 5710 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 5711 break; 5712 default: 5713 error ("unsupported operand size for extended register"); 5714 break; 5715 } 5716 return; 5717 } 5718 switch (code) 5719 { 5720 case 3: 5721 if (STACK_TOP_P (x)) 5722 { 5723 fputs ("st(0)", file); 5724 break; 5725 } 5726 /* FALLTHRU */ 5727 case 8: 5728 case 4: 5729 case 12: 5730 if (! ANY_FP_REG_P (x)) 5731 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 5732 /* FALLTHRU */ 5733 case 16: 5734 case 2: 5735 fputs (hi_reg_name[REGNO (x)], file); 5736 break; 5737 case 1: 5738 fputs (qi_reg_name[REGNO (x)], file); 5739 break; 5740 case 0: 5741 fputs (qi_high_reg_name[REGNO (x)], file); 5742 break; 5743 default: 5744 abort (); 5745 } 5746} 5747 5748/* Meaning of CODE: 5749 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 5750 C -- print opcode suffix for set/cmov insn. 5751 c -- like C, but print reversed condition 5752 F,f -- likewise, but for floating-point. 5753 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise 5754 nothing 5755 R -- print the prefix for register names. 5756 z -- print the opcode suffix for the size of the current operand. 5757 * -- print a star (in certain assembler syntax) 5758 A -- print an absolute memory reference. 5759 w -- print the operand as if it's a "word" (HImode) even if it isn't. 5760 s -- print a shift double count, followed by the assemblers argument 5761 delimiter. 5762 b -- print the QImode name of the register for the indicated operand. 5763 %b0 would print %al if operands[0] is reg 0. 5764 w -- likewise, print the HImode name of the register. 5765 k -- likewise, print the SImode name of the register. 5766 q -- likewise, print the DImode name of the register. 5767 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 5768 y -- print "st(0)" instead of "st" as a register. 5769 D -- print condition for SSE cmp instruction. 5770 P -- if PIC, print an @PLT suffix. 5771 X -- don't print any sort of PIC '@' suffix for a symbol. 5772 */ 5773 5774void 5775print_operand (file, x, code) 5776 FILE *file; 5777 rtx x; 5778 int code; 5779{ 5780 if (code) 5781 { 5782 switch (code) 5783 { 5784 case '*': 5785 if (ASSEMBLER_DIALECT == ASM_ATT) 5786 putc ('*', file); 5787 return; 5788 5789 case 'A': 5790 if (ASSEMBLER_DIALECT == ASM_ATT) 5791 putc ('*', file); 5792 else if (ASSEMBLER_DIALECT == ASM_INTEL) 5793 { 5794 /* Intel syntax. For absolute addresses, registers should not 5795 be surrounded by braces. */ 5796 if (GET_CODE (x) != REG) 5797 { 5798 putc ('[', file); 5799 PRINT_OPERAND (file, x, 0); 5800 putc (']', file); 5801 return; 5802 } 5803 } 5804 else 5805 abort (); 5806 5807 PRINT_OPERAND (file, x, 0); 5808 return; 5809 5810 5811 case 'L': 5812 if (ASSEMBLER_DIALECT == ASM_ATT) 5813 putc ('l', file); 5814 return; 5815 5816 case 'W': 5817 if (ASSEMBLER_DIALECT == ASM_ATT) 5818 putc ('w', file); 5819 return; 5820 5821 case 'B': 5822 if (ASSEMBLER_DIALECT == ASM_ATT) 5823 putc ('b', file); 5824 return; 5825 5826 case 'Q': 5827 if (ASSEMBLER_DIALECT == ASM_ATT) 5828 putc ('l', file); 5829 return; 5830 5831 case 'S': 5832 if (ASSEMBLER_DIALECT == ASM_ATT) 5833 putc ('s', file); 5834 return; 5835 5836 case 'T': 5837 if (ASSEMBLER_DIALECT == ASM_ATT) 5838 putc ('t', file); 5839 return; 5840 5841 case 'z': 5842 /* 387 opcodes don't get size suffixes if the operands are 5843 registers. */ 5844 if (STACK_REG_P (x)) 5845 return; 5846 5847 /* Likewise if using Intel opcodes. */ 5848 if (ASSEMBLER_DIALECT == ASM_INTEL) 5849 return; 5850 5851 /* This is the size of op from size of operand. */ 5852 switch (GET_MODE_SIZE (GET_MODE (x))) 5853 { 5854 case 2: 5855#ifdef HAVE_GAS_FILDS_FISTS 5856 putc ('s', file); 5857#endif 5858 return; 5859 5860 case 4: 5861 if (GET_MODE (x) == SFmode) 5862 { 5863 putc ('s', file); 5864 return; 5865 } 5866 else 5867 putc ('l', file); 5868 return; 5869 5870 case 12: 5871 case 16: 5872 putc ('t', file); 5873 return; 5874 5875 case 8: 5876 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 5877 { 5878#ifdef GAS_MNEMONICS 5879 putc ('q', file); 5880#else 5881 putc ('l', file); 5882 putc ('l', file); 5883#endif 5884 } 5885 else 5886 putc ('l', file); 5887 return; 5888 5889 default: 5890 abort (); 5891 } 5892 5893 case 'b': 5894 case 'w': 5895 case 'k': 5896 case 'q': 5897 case 'h': 5898 case 'y': 5899 case 'X': 5900 case 'P': 5901 break; 5902 5903 case 's': 5904 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 5905 { 5906 PRINT_OPERAND (file, x, 0); 5907 putc (',', file); 5908 } 5909 return; 5910 5911 case 'D': 5912 /* Little bit of braindamage here. The SSE compare instructions 5913 does use completely different names for the comparisons that the 5914 fp conditional moves. */ 5915 switch (GET_CODE (x)) 5916 { 5917 case EQ: 5918 case UNEQ: 5919 fputs ("eq", file); 5920 break; 5921 case LT: 5922 case UNLT: 5923 fputs ("lt", file); 5924 break; 5925 case LE: 5926 case UNLE: 5927 fputs ("le", file); 5928 break; 5929 case UNORDERED: 5930 fputs ("unord", file); 5931 break; 5932 case NE: 5933 case LTGT: 5934 fputs ("neq", file); 5935 break; 5936 case UNGE: 5937 case GE: 5938 fputs ("nlt", file); 5939 break; 5940 case UNGT: 5941 case GT: 5942 fputs ("nle", file); 5943 break; 5944 case ORDERED: 5945 fputs ("ord", file); 5946 break; 5947 default: 5948 abort (); 5949 break; 5950 } 5951 return; 5952 case 'O': 5953#ifdef CMOV_SUN_AS_SYNTAX 5954 if (ASSEMBLER_DIALECT == ASM_ATT) 5955 { 5956 switch (GET_MODE (x)) 5957 { 5958 case HImode: putc ('w', file); break; 5959 case SImode: 5960 case SFmode: putc ('l', file); break; 5961 case DImode: 5962 case DFmode: putc ('q', file); break; 5963 default: abort (); 5964 } 5965 putc ('.', file); 5966 } 5967#endif 5968 return; 5969 case 'C': 5970 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 5971 return; 5972 case 'F': 5973#ifdef CMOV_SUN_AS_SYNTAX 5974 if (ASSEMBLER_DIALECT == ASM_ATT) 5975 putc ('.', file); 5976#endif 5977 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 5978 return; 5979 5980 /* Like above, but reverse condition */ 5981 case 'c': 5982 /* Check to see if argument to %c is really a constant 5983 and not a condition code which needs to be reversed. */ 5984 if (GET_RTX_CLASS (GET_CODE (x)) != '<') 5985 { 5986 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 5987 return; 5988 } 5989 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 5990 return; 5991 case 'f': 5992#ifdef CMOV_SUN_AS_SYNTAX 5993 if (ASSEMBLER_DIALECT == ASM_ATT) 5994 putc ('.', file); 5995#endif 5996 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 5997 return; 5998 case '+': 5999 { 6000 rtx x; 6001 6002 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 6003 return; 6004 6005 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 6006 if (x) 6007 { 6008 int pred_val = INTVAL (XEXP (x, 0)); 6009 6010 if (pred_val < REG_BR_PROB_BASE * 45 / 100 6011 || pred_val > REG_BR_PROB_BASE * 55 / 100) 6012 { 6013 int taken = pred_val > REG_BR_PROB_BASE / 2; 6014 int cputaken = final_forward_branch_p (current_output_insn) == 0; 6015 6016 /* Emit hints only in the case default branch prediction 6017 heruistics would fail. */ 6018 if (taken != cputaken) 6019 { 6020 /* We use 3e (DS) prefix for taken branches and 6021 2e (CS) prefix for not taken branches. */ 6022 if (taken) 6023 fputs ("ds ; ", file); 6024 else 6025 fputs ("cs ; ", file); 6026 } 6027 } 6028 } 6029 return; 6030 } 6031 default: 6032 output_operand_lossage ("invalid operand code `%c'", code); 6033 } 6034 } 6035 6036 if (GET_CODE (x) == REG) 6037 { 6038 PRINT_REG (x, code, file); 6039 } 6040 6041 else if (GET_CODE (x) == MEM) 6042 { 6043 /* No `byte ptr' prefix for call instructions. */ 6044 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 6045 { 6046 const char * size; 6047 switch (GET_MODE_SIZE (GET_MODE (x))) 6048 { 6049 case 1: size = "BYTE"; break; 6050 case 2: size = "WORD"; break; 6051 case 4: size = "DWORD"; break; 6052 case 8: size = "QWORD"; break; 6053 case 12: size = "XWORD"; break; 6054 case 16: size = "XMMWORD"; break; 6055 default: 6056 abort (); 6057 } 6058 6059 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 6060 if (code == 'b') 6061 size = "BYTE"; 6062 else if (code == 'w') 6063 size = "WORD"; 6064 else if (code == 'k') 6065 size = "DWORD"; 6066 6067 fputs (size, file); 6068 fputs (" PTR ", file); 6069 } 6070 6071 x = XEXP (x, 0); 6072 if (flag_pic && CONSTANT_ADDRESS_P (x)) 6073 output_pic_addr_const (file, x, code); 6074 /* Avoid (%rip) for call operands. */ 6075 else if (CONSTANT_ADDRESS_P (x) && code =='P' 6076 && GET_CODE (x) != CONST_INT) 6077 output_addr_const (file, x); 6078 else 6079 output_address (x); 6080 } 6081 6082 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 6083 { 6084 REAL_VALUE_TYPE r; 6085 long l; 6086 6087 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 6088 REAL_VALUE_TO_TARGET_SINGLE (r, l); 6089 6090 if (ASSEMBLER_DIALECT == ASM_ATT) 6091 putc ('$', file); 6092 fprintf (file, "0x%lx", l); 6093 } 6094 6095 /* These float cases don't actually occur as immediate operands. */ 6096 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 6097 { 6098 REAL_VALUE_TYPE r; 6099 char dstr[30]; 6100 6101 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 6102 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr); 6103 fprintf (file, "%s", dstr); 6104 } 6105 6106 else if (GET_CODE (x) == CONST_DOUBLE 6107 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)) 6108 { 6109 REAL_VALUE_TYPE r; 6110 char dstr[30]; 6111 6112 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 6113 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr); 6114 fprintf (file, "%s", dstr); 6115 } 6116 else 6117 { 6118 if (code != 'P') 6119 { 6120 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 6121 { 6122 if (ASSEMBLER_DIALECT == ASM_ATT) 6123 putc ('$', file); 6124 } 6125 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 6126 || GET_CODE (x) == LABEL_REF) 6127 { 6128 if (ASSEMBLER_DIALECT == ASM_ATT) 6129 putc ('$', file); 6130 else 6131 fputs ("OFFSET FLAT:", file); 6132 } 6133 } 6134 if (GET_CODE (x) == CONST_INT) 6135 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 6136 else if (flag_pic) 6137 output_pic_addr_const (file, x, code); 6138 else 6139 output_addr_const (file, x); 6140 } 6141} 6142 6143/* Print a memory operand whose address is ADDR. */ 6144 6145void 6146print_operand_address (file, addr) 6147 FILE *file; 6148 register rtx addr; 6149{ 6150 struct ix86_address parts; 6151 rtx base, index, disp; 6152 int scale; 6153 6154 if (! ix86_decompose_address (addr, &parts)) 6155 { 6156 output_operand_lossage ("Wrong address expression or operand constraint"); 6157 return; 6158 } 6159 6160 base = parts.base; 6161 index = parts.index; 6162 disp = parts.disp; 6163 scale = parts.scale; 6164 6165 if (!base && !index) 6166 { 6167 /* Displacement only requires special attention. */ 6168 6169 if (GET_CODE (disp) == CONST_INT) 6170 { 6171 if (ASSEMBLER_DIALECT == ASM_INTEL) 6172 { 6173 if (USER_LABEL_PREFIX[0] == 0) 6174 putc ('%', file); 6175 fputs ("ds:", file); 6176 } 6177 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr)); 6178 } 6179 else if (flag_pic) 6180 output_pic_addr_const (file, addr, 0); 6181 else 6182 output_addr_const (file, addr); 6183 6184 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 6185 if (GET_CODE (disp) != CONST_INT && TARGET_64BIT) 6186 fputs ("(%rip)", file); 6187 } 6188 else 6189 { 6190 if (ASSEMBLER_DIALECT == ASM_ATT) 6191 { 6192 if (disp) 6193 { 6194 if (flag_pic) 6195 output_pic_addr_const (file, disp, 0); 6196 else if (GET_CODE (disp) == LABEL_REF) 6197 output_asm_label (disp); 6198 else 6199 output_addr_const (file, disp); 6200 } 6201 6202 putc ('(', file); 6203 if (base) 6204 PRINT_REG (base, 0, file); 6205 if (index) 6206 { 6207 putc (',', file); 6208 PRINT_REG (index, 0, file); 6209 if (scale != 1) 6210 fprintf (file, ",%d", scale); 6211 } 6212 putc (')', file); 6213 } 6214 else 6215 { 6216 rtx offset = NULL_RTX; 6217 6218 if (disp) 6219 { 6220 /* Pull out the offset of a symbol; print any symbol itself. */ 6221 if (GET_CODE (disp) == CONST 6222 && GET_CODE (XEXP (disp, 0)) == PLUS 6223 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 6224 { 6225 offset = XEXP (XEXP (disp, 0), 1); 6226 disp = gen_rtx_CONST (VOIDmode, 6227 XEXP (XEXP (disp, 0), 0)); 6228 } 6229 6230 if (flag_pic) 6231 output_pic_addr_const (file, disp, 0); 6232 else if (GET_CODE (disp) == LABEL_REF) 6233 output_asm_label (disp); 6234 else if (GET_CODE (disp) == CONST_INT) 6235 offset = disp; 6236 else 6237 output_addr_const (file, disp); 6238 } 6239 6240 putc ('[', file); 6241 if (base) 6242 { 6243 PRINT_REG (base, 0, file); 6244 if (offset) 6245 { 6246 if (INTVAL (offset) >= 0) 6247 putc ('+', file); 6248 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 6249 } 6250 } 6251 else if (offset) 6252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 6253 else 6254 putc ('0', file); 6255 6256 if (index) 6257 { 6258 putc ('+', file); 6259 PRINT_REG (index, 0, file); 6260 if (scale != 1) 6261 fprintf (file, "*%d", scale); 6262 } 6263 putc (']', file); 6264 } 6265 } 6266} 6267 6268/* Split one or more DImode RTL references into pairs of SImode 6269 references. The RTL can be REG, offsettable MEM, integer constant, or 6270 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 6271 split and "num" is its length. lo_half and hi_half are output arrays 6272 that parallel "operands". */ 6273 6274void 6275split_di (operands, num, lo_half, hi_half) 6276 rtx operands[]; 6277 int num; 6278 rtx lo_half[], hi_half[]; 6279{ 6280 while (num--) 6281 { 6282 rtx op = operands[num]; 6283 6284 /* simplify_subreg refuse to split volatile memory addresses, 6285 but we still have to handle it. */ 6286 if (GET_CODE (op) == MEM) 6287 { 6288 lo_half[num] = adjust_address (op, SImode, 0); 6289 hi_half[num] = adjust_address (op, SImode, 4); 6290 } 6291 else 6292 { 6293 lo_half[num] = simplify_gen_subreg (SImode, op, 6294 GET_MODE (op) == VOIDmode 6295 ? DImode : GET_MODE (op), 0); 6296 hi_half[num] = simplify_gen_subreg (SImode, op, 6297 GET_MODE (op) == VOIDmode 6298 ? DImode : GET_MODE (op), 4); 6299 } 6300 } 6301} 6302/* Split one or more TImode RTL references into pairs of SImode 6303 references. The RTL can be REG, offsettable MEM, integer constant, or 6304 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 6305 split and "num" is its length. lo_half and hi_half are output arrays 6306 that parallel "operands". */ 6307 6308void 6309split_ti (operands, num, lo_half, hi_half) 6310 rtx operands[]; 6311 int num; 6312 rtx lo_half[], hi_half[]; 6313{ 6314 while (num--) 6315 { 6316 rtx op = operands[num]; 6317 6318 /* simplify_subreg refuse to split volatile memory addresses, but we 6319 still have to handle it. */ 6320 if (GET_CODE (op) == MEM) 6321 { 6322 lo_half[num] = adjust_address (op, DImode, 0); 6323 hi_half[num] = adjust_address (op, DImode, 8); 6324 } 6325 else 6326 { 6327 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 6328 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 6329 } 6330 } 6331} 6332 6333/* Output code to perform a 387 binary operation in INSN, one of PLUS, 6334 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 6335 is the expression of the binary operation. The output may either be 6336 emitted here, or returned to the caller, like all output_* functions. 6337 6338 There is no guarantee that the operands are the same mode, as they 6339 might be within FLOAT or FLOAT_EXTEND expressions. */ 6340 6341#ifndef SYSV386_COMPAT 6342/* Set to 1 for compatibility with brain-damaged assemblers. No-one 6343 wants to fix the assemblers because that causes incompatibility 6344 with gcc. No-one wants to fix gcc because that causes 6345 incompatibility with assemblers... You can use the option of 6346 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 6347#define SYSV386_COMPAT 1 6348#endif 6349 6350const char * 6351output_387_binary_op (insn, operands) 6352 rtx insn; 6353 rtx *operands; 6354{ 6355 static char buf[30]; 6356 const char *p; 6357 const char *ssep; 6358 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]); 6359 6360#ifdef ENABLE_CHECKING 6361 /* Even if we do not want to check the inputs, this documents input 6362 constraints. Which helps in understanding the following code. */ 6363 if (STACK_REG_P (operands[0]) 6364 && ((REG_P (operands[1]) 6365 && REGNO (operands[0]) == REGNO (operands[1]) 6366 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 6367 || (REG_P (operands[2]) 6368 && REGNO (operands[0]) == REGNO (operands[2]) 6369 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 6370 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 6371 ; /* ok */ 6372 else if (!is_sse) 6373 abort (); 6374#endif 6375 6376 switch (GET_CODE (operands[3])) 6377 { 6378 case PLUS: 6379 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6380 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6381 p = "fiadd"; 6382 else 6383 p = "fadd"; 6384 ssep = "add"; 6385 break; 6386 6387 case MINUS: 6388 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6389 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6390 p = "fisub"; 6391 else 6392 p = "fsub"; 6393 ssep = "sub"; 6394 break; 6395 6396 case MULT: 6397 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6398 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6399 p = "fimul"; 6400 else 6401 p = "fmul"; 6402 ssep = "mul"; 6403 break; 6404 6405 case DIV: 6406 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 6407 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 6408 p = "fidiv"; 6409 else 6410 p = "fdiv"; 6411 ssep = "div"; 6412 break; 6413 6414 default: 6415 abort (); 6416 } 6417 6418 if (is_sse) 6419 { 6420 strcpy (buf, ssep); 6421 if (GET_MODE (operands[0]) == SFmode) 6422 strcat (buf, "ss\t{%2, %0|%0, %2}"); 6423 else 6424 strcat (buf, "sd\t{%2, %0|%0, %2}"); 6425 return buf; 6426 } 6427 strcpy (buf, p); 6428 6429 switch (GET_CODE (operands[3])) 6430 { 6431 case MULT: 6432 case PLUS: 6433 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 6434 { 6435 rtx temp = operands[2]; 6436 operands[2] = operands[1]; 6437 operands[1] = temp; 6438 } 6439 6440 /* know operands[0] == operands[1]. */ 6441 6442 if (GET_CODE (operands[2]) == MEM) 6443 { 6444 p = "%z2\t%2"; 6445 break; 6446 } 6447 6448 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 6449 { 6450 if (STACK_TOP_P (operands[0])) 6451 /* How is it that we are storing to a dead operand[2]? 6452 Well, presumably operands[1] is dead too. We can't 6453 store the result to st(0) as st(0) gets popped on this 6454 instruction. Instead store to operands[2] (which I 6455 think has to be st(1)). st(1) will be popped later. 6456 gcc <= 2.8.1 didn't have this check and generated 6457 assembly code that the Unixware assembler rejected. */ 6458 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 6459 else 6460 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 6461 break; 6462 } 6463 6464 if (STACK_TOP_P (operands[0])) 6465 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 6466 else 6467 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 6468 break; 6469 6470 case MINUS: 6471 case DIV: 6472 if (GET_CODE (operands[1]) == MEM) 6473 { 6474 p = "r%z1\t%1"; 6475 break; 6476 } 6477 6478 if (GET_CODE (operands[2]) == MEM) 6479 { 6480 p = "%z2\t%2"; 6481 break; 6482 } 6483 6484 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 6485 { 6486#if SYSV386_COMPAT 6487 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 6488 derived assemblers, confusingly reverse the direction of 6489 the operation for fsub{r} and fdiv{r} when the 6490 destination register is not st(0). The Intel assembler 6491 doesn't have this brain damage. Read !SYSV386_COMPAT to 6492 figure out what the hardware really does. */ 6493 if (STACK_TOP_P (operands[0])) 6494 p = "{p\t%0, %2|rp\t%2, %0}"; 6495 else 6496 p = "{rp\t%2, %0|p\t%0, %2}"; 6497#else 6498 if (STACK_TOP_P (operands[0])) 6499 /* As above for fmul/fadd, we can't store to st(0). */ 6500 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 6501 else 6502 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 6503#endif 6504 break; 6505 } 6506 6507 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 6508 { 6509#if SYSV386_COMPAT 6510 if (STACK_TOP_P (operands[0])) 6511 p = "{rp\t%0, %1|p\t%1, %0}"; 6512 else 6513 p = "{p\t%1, %0|rp\t%0, %1}"; 6514#else 6515 if (STACK_TOP_P (operands[0])) 6516 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 6517 else 6518 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 6519#endif 6520 break; 6521 } 6522 6523 if (STACK_TOP_P (operands[0])) 6524 { 6525 if (STACK_TOP_P (operands[1])) 6526 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 6527 else 6528 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 6529 break; 6530 } 6531 else if (STACK_TOP_P (operands[1])) 6532 { 6533#if SYSV386_COMPAT 6534 p = "{\t%1, %0|r\t%0, %1}"; 6535#else 6536 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 6537#endif 6538 } 6539 else 6540 { 6541#if SYSV386_COMPAT 6542 p = "{r\t%2, %0|\t%0, %2}"; 6543#else 6544 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 6545#endif 6546 } 6547 break; 6548 6549 default: 6550 abort (); 6551 } 6552 6553 strcat (buf, p); 6554 return buf; 6555} 6556 6557/* Output code to initialize control word copies used by 6558 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN 6559 is set to control word rounding downwards. */ 6560void 6561emit_i387_cw_initialization (normal, round_down) 6562 rtx normal, round_down; 6563{ 6564 rtx reg = gen_reg_rtx (HImode); 6565 6566 emit_insn (gen_x86_fnstcw_1 (normal)); 6567 emit_move_insn (reg, normal); 6568 if (!TARGET_PARTIAL_REG_STALL && !optimize_size 6569 && !TARGET_64BIT) 6570 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 6571 else 6572 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00))); 6573 emit_move_insn (round_down, reg); 6574} 6575 6576/* Output code for INSN to convert a float to a signed int. OPERANDS 6577 are the insn operands. The output may be [HSD]Imode and the input 6578 operand may be [SDX]Fmode. */ 6579 6580const char * 6581output_fix_trunc (insn, operands) 6582 rtx insn; 6583 rtx *operands; 6584{ 6585 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 6586 int dimode_p = GET_MODE (operands[0]) == DImode; 6587 6588 /* Jump through a hoop or two for DImode, since the hardware has no 6589 non-popping instruction. We used to do this a different way, but 6590 that was somewhat fragile and broke with post-reload splitters. */ 6591 if (dimode_p && !stack_top_dies) 6592 output_asm_insn ("fld\t%y1", operands); 6593 6594 if (!STACK_TOP_P (operands[1])) 6595 abort (); 6596 6597 if (GET_CODE (operands[0]) != MEM) 6598 abort (); 6599 6600 output_asm_insn ("fldcw\t%3", operands); 6601 if (stack_top_dies || dimode_p) 6602 output_asm_insn ("fistp%z0\t%0", operands); 6603 else 6604 output_asm_insn ("fist%z0\t%0", operands); 6605 output_asm_insn ("fldcw\t%2", operands); 6606 6607 return ""; 6608} 6609 6610/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 6611 should be used and 2 when fnstsw should be used. UNORDERED_P is true 6612 when fucom should be used. */ 6613 6614const char * 6615output_fp_compare (insn, operands, eflags_p, unordered_p) 6616 rtx insn; 6617 rtx *operands; 6618 int eflags_p, unordered_p; 6619{ 6620 int stack_top_dies; 6621 rtx cmp_op0 = operands[0]; 6622 rtx cmp_op1 = operands[1]; 6623 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]); 6624 6625 if (eflags_p == 2) 6626 { 6627 cmp_op0 = cmp_op1; 6628 cmp_op1 = operands[2]; 6629 } 6630 if (is_sse) 6631 { 6632 if (GET_MODE (operands[0]) == SFmode) 6633 if (unordered_p) 6634 return "ucomiss\t{%1, %0|%0, %1}"; 6635 else 6636 return "comiss\t{%1, %0|%0, %y}"; 6637 else 6638 if (unordered_p) 6639 return "ucomisd\t{%1, %0|%0, %1}"; 6640 else 6641 return "comisd\t{%1, %0|%0, %y}"; 6642 } 6643 6644 if (! STACK_TOP_P (cmp_op0)) 6645 abort (); 6646 6647 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 6648 6649 if (STACK_REG_P (cmp_op1) 6650 && stack_top_dies 6651 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 6652 && REGNO (cmp_op1) != FIRST_STACK_REG) 6653 { 6654 /* If both the top of the 387 stack dies, and the other operand 6655 is also a stack register that dies, then this must be a 6656 `fcompp' float compare */ 6657 6658 if (eflags_p == 1) 6659 { 6660 /* There is no double popping fcomi variant. Fortunately, 6661 eflags is immune from the fstp's cc clobbering. */ 6662 if (unordered_p) 6663 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 6664 else 6665 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 6666 return "fstp\t%y0"; 6667 } 6668 else 6669 { 6670 if (eflags_p == 2) 6671 { 6672 if (unordered_p) 6673 return "fucompp\n\tfnstsw\t%0"; 6674 else 6675 return "fcompp\n\tfnstsw\t%0"; 6676 } 6677 else 6678 { 6679 if (unordered_p) 6680 return "fucompp"; 6681 else 6682 return "fcompp"; 6683 } 6684 } 6685 } 6686 else 6687 { 6688 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 6689 6690 static const char * const alt[24] = 6691 { 6692 "fcom%z1\t%y1", 6693 "fcomp%z1\t%y1", 6694 "fucom%z1\t%y1", 6695 "fucomp%z1\t%y1", 6696 6697 "ficom%z1\t%y1", 6698 "ficomp%z1\t%y1", 6699 NULL, 6700 NULL, 6701 6702 "fcomi\t{%y1, %0|%0, %y1}", 6703 "fcomip\t{%y1, %0|%0, %y1}", 6704 "fucomi\t{%y1, %0|%0, %y1}", 6705 "fucomip\t{%y1, %0|%0, %y1}", 6706 6707 NULL, 6708 NULL, 6709 NULL, 6710 NULL, 6711 6712 "fcom%z2\t%y2\n\tfnstsw\t%0", 6713 "fcomp%z2\t%y2\n\tfnstsw\t%0", 6714 "fucom%z2\t%y2\n\tfnstsw\t%0", 6715 "fucomp%z2\t%y2\n\tfnstsw\t%0", 6716 6717 "ficom%z2\t%y2\n\tfnstsw\t%0", 6718 "ficomp%z2\t%y2\n\tfnstsw\t%0", 6719 NULL, 6720 NULL 6721 }; 6722 6723 int mask; 6724 const char *ret; 6725 6726 mask = eflags_p << 3; 6727 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2; 6728 mask |= unordered_p << 1; 6729 mask |= stack_top_dies; 6730 6731 if (mask >= 24) 6732 abort (); 6733 ret = alt[mask]; 6734 if (ret == NULL) 6735 abort (); 6736 6737 return ret; 6738 } 6739} 6740 6741void 6742ix86_output_addr_vec_elt (file, value) 6743 FILE *file; 6744 int value; 6745{ 6746 const char *directive = ASM_LONG; 6747 6748 if (TARGET_64BIT) 6749 { 6750#ifdef ASM_QUAD 6751 directive = ASM_QUAD; 6752#else 6753 abort (); 6754#endif 6755 } 6756 6757 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 6758} 6759 6760void 6761ix86_output_addr_diff_elt (file, value, rel) 6762 FILE *file; 6763 int value, rel; 6764{ 6765 if (TARGET_64BIT) 6766 fprintf (file, "%s%s%d-.+(.-%s%d)\n", 6767 ASM_LONG, LPREFIX, value, LPREFIX, rel); 6768 else if (HAVE_AS_GOTOFF_IN_DATA) 6769 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 6770 else 6771 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n", 6772 ASM_LONG, LPREFIX, value); 6773} 6774 6775/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 6776 for the target. */ 6777 6778void 6779ix86_expand_clear (dest) 6780 rtx dest; 6781{ 6782 rtx tmp; 6783 6784 /* We play register width games, which are only valid after reload. */ 6785 if (!reload_completed) 6786 abort (); 6787 6788 /* Avoid HImode and its attendant prefix byte. */ 6789 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 6790 dest = gen_rtx_REG (SImode, REGNO (dest)); 6791 6792 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 6793 6794 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 6795 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 6796 { 6797 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 6798 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 6799 } 6800 6801 emit_insn (tmp); 6802} 6803 6804void 6805ix86_expand_move (mode, operands) 6806 enum machine_mode mode; 6807 rtx operands[]; 6808{ 6809 int strict = (reload_in_progress || reload_completed); 6810 rtx insn; 6811 6812 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode)) 6813 { 6814 /* Emit insns to move operands[1] into operands[0]. */ 6815 6816 if (GET_CODE (operands[0]) == MEM) 6817 operands[1] = force_reg (Pmode, operands[1]); 6818 else 6819 { 6820 rtx temp = operands[0]; 6821 if (GET_CODE (temp) != REG) 6822 temp = gen_reg_rtx (Pmode); 6823 temp = legitimize_pic_address (operands[1], temp); 6824 if (temp == operands[0]) 6825 return; 6826 operands[1] = temp; 6827 } 6828 } 6829 else 6830 { 6831 if (GET_CODE (operands[0]) == MEM 6832 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 6833 || !push_operand (operands[0], mode)) 6834 && GET_CODE (operands[1]) == MEM) 6835 operands[1] = force_reg (mode, operands[1]); 6836 6837 if (push_operand (operands[0], mode) 6838 && ! general_no_elim_operand (operands[1], mode)) 6839 operands[1] = copy_to_mode_reg (mode, operands[1]); 6840 6841 /* Force large constants in 64bit compilation into register 6842 to get them CSEed. */ 6843 if (TARGET_64BIT && mode == DImode 6844 && immediate_operand (operands[1], mode) 6845 && !x86_64_zero_extended_value (operands[1]) 6846 && !register_operand (operands[0], mode) 6847 && optimize && !reload_completed && !reload_in_progress) 6848 operands[1] = copy_to_mode_reg (mode, operands[1]); 6849 6850 if (FLOAT_MODE_P (mode)) 6851 { 6852 /* If we are loading a floating point constant to a register, 6853 force the value to memory now, since we'll get better code 6854 out the back end. */ 6855 6856 if (strict) 6857 ; 6858 else if (GET_CODE (operands[1]) == CONST_DOUBLE 6859 && register_operand (operands[0], mode)) 6860 operands[1] = validize_mem (force_const_mem (mode, operands[1])); 6861 } 6862 } 6863 6864 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]); 6865 6866 emit_insn (insn); 6867} 6868 6869void 6870ix86_expand_vector_move (mode, operands) 6871 enum machine_mode mode; 6872 rtx operands[]; 6873{ 6874 /* Force constants other than zero into memory. We do not know how 6875 the instructions used to build constants modify the upper 64 bits 6876 of the register, once we have that information we may be able 6877 to handle some of them more efficiently. */ 6878 if ((reload_in_progress | reload_completed) == 0 6879 && register_operand (operands[0], mode) 6880 && CONSTANT_P (operands[1])) 6881 { 6882 rtx addr = gen_reg_rtx (Pmode); 6883 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0)); 6884 operands[1] = gen_rtx_MEM (mode, addr); 6885 } 6886 6887 /* Make operand1 a register if it isn't already. */ 6888 if ((reload_in_progress | reload_completed) == 0 6889 && !register_operand (operands[0], mode) 6890 && !register_operand (operands[1], mode)) 6891 { 6892 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]); 6893 emit_move_insn (operands[0], temp); 6894 return; 6895 } 6896 6897 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); 6898} 6899 6900/* Attempt to expand a binary operator. Make the expansion closer to the 6901 actual machine, then just general_operand, which will allow 3 separate 6902 memory references (one output, two input) in a single insn. */ 6903 6904void 6905ix86_expand_binary_operator (code, mode, operands) 6906 enum rtx_code code; 6907 enum machine_mode mode; 6908 rtx operands[]; 6909{ 6910 int matching_memory; 6911 rtx src1, src2, dst, op, clob; 6912 6913 dst = operands[0]; 6914 src1 = operands[1]; 6915 src2 = operands[2]; 6916 6917 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 6918 if (GET_RTX_CLASS (code) == 'c' 6919 && (rtx_equal_p (dst, src2) 6920 || immediate_operand (src1, mode))) 6921 { 6922 rtx temp = src1; 6923 src1 = src2; 6924 src2 = temp; 6925 } 6926 6927 /* If the destination is memory, and we do not have matching source 6928 operands, do things in registers. */ 6929 matching_memory = 0; 6930 if (GET_CODE (dst) == MEM) 6931 { 6932 if (rtx_equal_p (dst, src1)) 6933 matching_memory = 1; 6934 else if (GET_RTX_CLASS (code) == 'c' 6935 && rtx_equal_p (dst, src2)) 6936 matching_memory = 2; 6937 else 6938 dst = gen_reg_rtx (mode); 6939 } 6940 6941 /* Both source operands cannot be in memory. */ 6942 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 6943 { 6944 if (matching_memory != 2) 6945 src2 = force_reg (mode, src2); 6946 else 6947 src1 = force_reg (mode, src1); 6948 } 6949 6950 /* If the operation is not commutable, source 1 cannot be a constant 6951 or non-matching memory. */ 6952 if ((CONSTANT_P (src1) 6953 || (!matching_memory && GET_CODE (src1) == MEM)) 6954 && GET_RTX_CLASS (code) != 'c') 6955 src1 = force_reg (mode, src1); 6956 6957 /* If optimizing, copy to regs to improve CSE */ 6958 if (optimize && ! no_new_pseudos) 6959 { 6960 if (GET_CODE (dst) == MEM) 6961 dst = gen_reg_rtx (mode); 6962 if (GET_CODE (src1) == MEM) 6963 src1 = force_reg (mode, src1); 6964 if (GET_CODE (src2) == MEM) 6965 src2 = force_reg (mode, src2); 6966 } 6967 6968 /* Emit the instruction. */ 6969 6970 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 6971 if (reload_in_progress) 6972 { 6973 /* Reload doesn't know about the flags register, and doesn't know that 6974 it doesn't want to clobber it. We can only do this with PLUS. */ 6975 if (code != PLUS) 6976 abort (); 6977 emit_insn (op); 6978 } 6979 else 6980 { 6981 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 6982 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 6983 } 6984 6985 /* Fix up the destination if needed. */ 6986 if (dst != operands[0]) 6987 emit_move_insn (operands[0], dst); 6988} 6989 6990/* Return TRUE or FALSE depending on whether the binary operator meets the 6991 appropriate constraints. */ 6992 6993int 6994ix86_binary_operator_ok (code, mode, operands) 6995 enum rtx_code code; 6996 enum machine_mode mode ATTRIBUTE_UNUSED; 6997 rtx operands[3]; 6998{ 6999 /* Both source operands cannot be in memory. */ 7000 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 7001 return 0; 7002 /* If the operation is not commutable, source 1 cannot be a constant. */ 7003 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c') 7004 return 0; 7005 /* If the destination is memory, we must have a matching source operand. */ 7006 if (GET_CODE (operands[0]) == MEM 7007 && ! (rtx_equal_p (operands[0], operands[1]) 7008 || (GET_RTX_CLASS (code) == 'c' 7009 && rtx_equal_p (operands[0], operands[2])))) 7010 return 0; 7011 /* If the operation is not commutable and the source 1 is memory, we must 7012 have a matching destination. */ 7013 if (GET_CODE (operands[1]) == MEM 7014 && GET_RTX_CLASS (code) != 'c' 7015 && ! rtx_equal_p (operands[0], operands[1])) 7016 return 0; 7017 return 1; 7018} 7019 7020/* Attempt to expand a unary operator. Make the expansion closer to the 7021 actual machine, then just general_operand, which will allow 2 separate 7022 memory references (one output, one input) in a single insn. */ 7023 7024void 7025ix86_expand_unary_operator (code, mode, operands) 7026 enum rtx_code code; 7027 enum machine_mode mode; 7028 rtx operands[]; 7029{ 7030 int matching_memory; 7031 rtx src, dst, op, clob; 7032 7033 dst = operands[0]; 7034 src = operands[1]; 7035 7036 /* If the destination is memory, and we do not have matching source 7037 operands, do things in registers. */ 7038 matching_memory = 0; 7039 if (GET_CODE (dst) == MEM) 7040 { 7041 if (rtx_equal_p (dst, src)) 7042 matching_memory = 1; 7043 else 7044 dst = gen_reg_rtx (mode); 7045 } 7046 7047 /* When source operand is memory, destination must match. */ 7048 if (!matching_memory && GET_CODE (src) == MEM) 7049 src = force_reg (mode, src); 7050 7051 /* If optimizing, copy to regs to improve CSE */ 7052 if (optimize && ! no_new_pseudos) 7053 { 7054 if (GET_CODE (dst) == MEM) 7055 dst = gen_reg_rtx (mode); 7056 if (GET_CODE (src) == MEM) 7057 src = force_reg (mode, src); 7058 } 7059 7060 /* Emit the instruction. */ 7061 7062 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 7063 if (reload_in_progress || code == NOT) 7064 { 7065 /* Reload doesn't know about the flags register, and doesn't know that 7066 it doesn't want to clobber it. */ 7067 if (code != NOT) 7068 abort (); 7069 emit_insn (op); 7070 } 7071 else 7072 { 7073 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 7074 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 7075 } 7076 7077 /* Fix up the destination if needed. */ 7078 if (dst != operands[0]) 7079 emit_move_insn (operands[0], dst); 7080} 7081 7082/* Return TRUE or FALSE depending on whether the unary operator meets the 7083 appropriate constraints. */ 7084 7085int 7086ix86_unary_operator_ok (code, mode, operands) 7087 enum rtx_code code ATTRIBUTE_UNUSED; 7088 enum machine_mode mode ATTRIBUTE_UNUSED; 7089 rtx operands[2] ATTRIBUTE_UNUSED; 7090{ 7091 /* If one of operands is memory, source and destination must match. */ 7092 if ((GET_CODE (operands[0]) == MEM 7093 || GET_CODE (operands[1]) == MEM) 7094 && ! rtx_equal_p (operands[0], operands[1])) 7095 return FALSE; 7096 return TRUE; 7097} 7098 7099/* Return TRUE or FALSE depending on whether the first SET in INSN 7100 has source and destination with matching CC modes, and that the 7101 CC mode is at least as constrained as REQ_MODE. */ 7102 7103int 7104ix86_match_ccmode (insn, req_mode) 7105 rtx insn; 7106 enum machine_mode req_mode; 7107{ 7108 rtx set; 7109 enum machine_mode set_mode; 7110 7111 set = PATTERN (insn); 7112 if (GET_CODE (set) == PARALLEL) 7113 set = XVECEXP (set, 0, 0); 7114 if (GET_CODE (set) != SET) 7115 abort (); 7116 if (GET_CODE (SET_SRC (set)) != COMPARE) 7117 abort (); 7118 7119 set_mode = GET_MODE (SET_DEST (set)); 7120 switch (set_mode) 7121 { 7122 case CCNOmode: 7123 if (req_mode != CCNOmode 7124 && (req_mode != CCmode 7125 || XEXP (SET_SRC (set), 1) != const0_rtx)) 7126 return 0; 7127 break; 7128 case CCmode: 7129 if (req_mode == CCGCmode) 7130 return 0; 7131 /* FALLTHRU */ 7132 case CCGCmode: 7133 if (req_mode == CCGOCmode || req_mode == CCNOmode) 7134 return 0; 7135 /* FALLTHRU */ 7136 case CCGOCmode: 7137 if (req_mode == CCZmode) 7138 return 0; 7139 /* FALLTHRU */ 7140 case CCZmode: 7141 break; 7142 7143 default: 7144 abort (); 7145 } 7146 7147 return (GET_MODE (SET_SRC (set)) == set_mode); 7148} 7149 7150/* Generate insn patterns to do an integer compare of OPERANDS. */ 7151 7152static rtx 7153ix86_expand_int_compare (code, op0, op1) 7154 enum rtx_code code; 7155 rtx op0, op1; 7156{ 7157 enum machine_mode cmpmode; 7158 rtx tmp, flags; 7159 7160 cmpmode = SELECT_CC_MODE (code, op0, op1); 7161 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 7162 7163 /* This is very simple, but making the interface the same as in the 7164 FP case makes the rest of the code easier. */ 7165 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 7166 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 7167 7168 /* Return the test that should be put into the flags user, i.e. 7169 the bcc, scc, or cmov instruction. */ 7170 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 7171} 7172 7173/* Figure out whether to use ordered or unordered fp comparisons. 7174 Return the appropriate mode to use. */ 7175 7176enum machine_mode 7177ix86_fp_compare_mode (code) 7178 enum rtx_code code ATTRIBUTE_UNUSED; 7179{ 7180 /* ??? In order to make all comparisons reversible, we do all comparisons 7181 non-trapping when compiling for IEEE. Once gcc is able to distinguish 7182 all forms trapping and nontrapping comparisons, we can make inequality 7183 comparisons trapping again, since it results in better code when using 7184 FCOM based compares. */ 7185 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 7186} 7187 7188enum machine_mode 7189ix86_cc_mode (code, op0, op1) 7190 enum rtx_code code; 7191 rtx op0, op1; 7192{ 7193 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 7194 return ix86_fp_compare_mode (code); 7195 switch (code) 7196 { 7197 /* Only zero flag is needed. */ 7198 case EQ: /* ZF=0 */ 7199 case NE: /* ZF!=0 */ 7200 return CCZmode; 7201 /* Codes needing carry flag. */ 7202 case GEU: /* CF=0 */ 7203 case GTU: /* CF=0 & ZF=0 */ 7204 case LTU: /* CF=1 */ 7205 case LEU: /* CF=1 | ZF=1 */ 7206 return CCmode; 7207 /* Codes possibly doable only with sign flag when 7208 comparing against zero. */ 7209 case GE: /* SF=OF or SF=0 */ 7210 case LT: /* SF<>OF or SF=1 */ 7211 if (op1 == const0_rtx) 7212 return CCGOCmode; 7213 else 7214 /* For other cases Carry flag is not required. */ 7215 return CCGCmode; 7216 /* Codes doable only with sign flag when comparing 7217 against zero, but we miss jump instruction for it 7218 so we need to use relational tests agains overflow 7219 that thus needs to be zero. */ 7220 case GT: /* ZF=0 & SF=OF */ 7221 case LE: /* ZF=1 | SF<>OF */ 7222 if (op1 == const0_rtx) 7223 return CCNOmode; 7224 else 7225 return CCGCmode; 7226 /* strcmp pattern do (use flags) and combine may ask us for proper 7227 mode. */ 7228 case USE: 7229 return CCmode; 7230 default: 7231 abort (); 7232 } 7233} 7234 7235/* Return true if we should use an FCOMI instruction for this fp comparison. */ 7236 7237int 7238ix86_use_fcomi_compare (code) 7239 enum rtx_code code ATTRIBUTE_UNUSED; 7240{ 7241 enum rtx_code swapped_code = swap_condition (code); 7242 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 7243 || (ix86_fp_comparison_cost (swapped_code) 7244 == ix86_fp_comparison_fcomi_cost (swapped_code))); 7245} 7246 7247/* Swap, force into registers, or otherwise massage the two operands 7248 to a fp comparison. The operands are updated in place; the new 7249 comparsion code is returned. */ 7250 7251static enum rtx_code 7252ix86_prepare_fp_compare_args (code, pop0, pop1) 7253 enum rtx_code code; 7254 rtx *pop0, *pop1; 7255{ 7256 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 7257 rtx op0 = *pop0, op1 = *pop1; 7258 enum machine_mode op_mode = GET_MODE (op0); 7259 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1); 7260 7261 /* All of the unordered compare instructions only work on registers. 7262 The same is true of the XFmode compare instructions. The same is 7263 true of the fcomi compare instructions. */ 7264 7265 if (!is_sse 7266 && (fpcmp_mode == CCFPUmode 7267 || op_mode == XFmode 7268 || op_mode == TFmode 7269 || ix86_use_fcomi_compare (code))) 7270 { 7271 op0 = force_reg (op_mode, op0); 7272 op1 = force_reg (op_mode, op1); 7273 } 7274 else 7275 { 7276 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 7277 things around if they appear profitable, otherwise force op0 7278 into a register. */ 7279 7280 if (standard_80387_constant_p (op0) == 0 7281 || (GET_CODE (op0) == MEM 7282 && ! (standard_80387_constant_p (op1) == 0 7283 || GET_CODE (op1) == MEM))) 7284 { 7285 rtx tmp; 7286 tmp = op0, op0 = op1, op1 = tmp; 7287 code = swap_condition (code); 7288 } 7289 7290 if (GET_CODE (op0) != REG) 7291 op0 = force_reg (op_mode, op0); 7292 7293 if (CONSTANT_P (op1)) 7294 { 7295 if (standard_80387_constant_p (op1)) 7296 op1 = force_reg (op_mode, op1); 7297 else 7298 op1 = validize_mem (force_const_mem (op_mode, op1)); 7299 } 7300 } 7301 7302 /* Try to rearrange the comparison to make it cheaper. */ 7303 if (ix86_fp_comparison_cost (code) 7304 > ix86_fp_comparison_cost (swap_condition (code)) 7305 && (GET_CODE (op1) == REG || !no_new_pseudos)) 7306 { 7307 rtx tmp; 7308 tmp = op0, op0 = op1, op1 = tmp; 7309 code = swap_condition (code); 7310 if (GET_CODE (op0) != REG) 7311 op0 = force_reg (op_mode, op0); 7312 } 7313 7314 *pop0 = op0; 7315 *pop1 = op1; 7316 return code; 7317} 7318 7319/* Convert comparison codes we use to represent FP comparison to integer 7320 code that will result in proper branch. Return UNKNOWN if no such code 7321 is available. */ 7322static enum rtx_code 7323ix86_fp_compare_code_to_integer (code) 7324 enum rtx_code code; 7325{ 7326 switch (code) 7327 { 7328 case GT: 7329 return GTU; 7330 case GE: 7331 return GEU; 7332 case ORDERED: 7333 case UNORDERED: 7334 return code; 7335 break; 7336 case UNEQ: 7337 return EQ; 7338 break; 7339 case UNLT: 7340 return LTU; 7341 break; 7342 case UNLE: 7343 return LEU; 7344 break; 7345 case LTGT: 7346 return NE; 7347 break; 7348 default: 7349 return UNKNOWN; 7350 } 7351} 7352 7353/* Split comparison code CODE into comparisons we can do using branch 7354 instructions. BYPASS_CODE is comparison code for branch that will 7355 branch around FIRST_CODE and SECOND_CODE. If some of branches 7356 is not required, set value to NIL. 7357 We never require more than two branches. */ 7358static void 7359ix86_fp_comparison_codes (code, bypass_code, first_code, second_code) 7360 enum rtx_code code, *bypass_code, *first_code, *second_code; 7361{ 7362 *first_code = code; 7363 *bypass_code = NIL; 7364 *second_code = NIL; 7365 7366 /* The fcomi comparison sets flags as follows: 7367 7368 cmp ZF PF CF 7369 > 0 0 0 7370 < 0 0 1 7371 = 1 0 0 7372 un 1 1 1 */ 7373 7374 switch (code) 7375 { 7376 case GT: /* GTU - CF=0 & ZF=0 */ 7377 case GE: /* GEU - CF=0 */ 7378 case ORDERED: /* PF=0 */ 7379 case UNORDERED: /* PF=1 */ 7380 case UNEQ: /* EQ - ZF=1 */ 7381 case UNLT: /* LTU - CF=1 */ 7382 case UNLE: /* LEU - CF=1 | ZF=1 */ 7383 case LTGT: /* EQ - ZF=0 */ 7384 break; 7385 case LT: /* LTU - CF=1 - fails on unordered */ 7386 *first_code = UNLT; 7387 *bypass_code = UNORDERED; 7388 break; 7389 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 7390 *first_code = UNLE; 7391 *bypass_code = UNORDERED; 7392 break; 7393 case EQ: /* EQ - ZF=1 - fails on unordered */ 7394 *first_code = UNEQ; 7395 *bypass_code = UNORDERED; 7396 break; 7397 case NE: /* NE - ZF=0 - fails on unordered */ 7398 *first_code = LTGT; 7399 *second_code = UNORDERED; 7400 break; 7401 case UNGE: /* GEU - CF=0 - fails on unordered */ 7402 *first_code = GE; 7403 *second_code = UNORDERED; 7404 break; 7405 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 7406 *first_code = GT; 7407 *second_code = UNORDERED; 7408 break; 7409 default: 7410 abort (); 7411 } 7412 if (!TARGET_IEEE_FP) 7413 { 7414 *second_code = NIL; 7415 *bypass_code = NIL; 7416 } 7417} 7418 7419/* Return cost of comparison done fcom + arithmetics operations on AX. 7420 All following functions do use number of instructions as an cost metrics. 7421 In future this should be tweaked to compute bytes for optimize_size and 7422 take into account performance of various instructions on various CPUs. */ 7423static int 7424ix86_fp_comparison_arithmetics_cost (code) 7425 enum rtx_code code; 7426{ 7427 if (!TARGET_IEEE_FP) 7428 return 4; 7429 /* The cost of code output by ix86_expand_fp_compare. */ 7430 switch (code) 7431 { 7432 case UNLE: 7433 case UNLT: 7434 case LTGT: 7435 case GT: 7436 case GE: 7437 case UNORDERED: 7438 case ORDERED: 7439 case UNEQ: 7440 return 4; 7441 break; 7442 case LT: 7443 case NE: 7444 case EQ: 7445 case UNGE: 7446 return 5; 7447 break; 7448 case LE: 7449 case UNGT: 7450 return 6; 7451 break; 7452 default: 7453 abort (); 7454 } 7455} 7456 7457/* Return cost of comparison done using fcomi operation. 7458 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7459static int 7460ix86_fp_comparison_fcomi_cost (code) 7461 enum rtx_code code; 7462{ 7463 enum rtx_code bypass_code, first_code, second_code; 7464 /* Return arbitarily high cost when instruction is not supported - this 7465 prevents gcc from using it. */ 7466 if (!TARGET_CMOVE) 7467 return 1024; 7468 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7469 return (bypass_code != NIL || second_code != NIL) + 2; 7470} 7471 7472/* Return cost of comparison done using sahf operation. 7473 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7474static int 7475ix86_fp_comparison_sahf_cost (code) 7476 enum rtx_code code; 7477{ 7478 enum rtx_code bypass_code, first_code, second_code; 7479 /* Return arbitarily high cost when instruction is not preferred - this 7480 avoids gcc from using it. */ 7481 if (!TARGET_USE_SAHF && !optimize_size) 7482 return 1024; 7483 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7484 return (bypass_code != NIL || second_code != NIL) + 3; 7485} 7486 7487/* Compute cost of the comparison done using any method. 7488 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 7489static int 7490ix86_fp_comparison_cost (code) 7491 enum rtx_code code; 7492{ 7493 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 7494 int min; 7495 7496 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 7497 sahf_cost = ix86_fp_comparison_sahf_cost (code); 7498 7499 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 7500 if (min > sahf_cost) 7501 min = sahf_cost; 7502 if (min > fcomi_cost) 7503 min = fcomi_cost; 7504 return min; 7505} 7506 7507/* Generate insn patterns to do a floating point compare of OPERANDS. */ 7508 7509static rtx 7510ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test) 7511 enum rtx_code code; 7512 rtx op0, op1, scratch; 7513 rtx *second_test; 7514 rtx *bypass_test; 7515{ 7516 enum machine_mode fpcmp_mode, intcmp_mode; 7517 rtx tmp, tmp2; 7518 int cost = ix86_fp_comparison_cost (code); 7519 enum rtx_code bypass_code, first_code, second_code; 7520 7521 fpcmp_mode = ix86_fp_compare_mode (code); 7522 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 7523 7524 if (second_test) 7525 *second_test = NULL_RTX; 7526 if (bypass_test) 7527 *bypass_test = NULL_RTX; 7528 7529 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7530 7531 /* Do fcomi/sahf based test when profitable. */ 7532 if ((bypass_code == NIL || bypass_test) 7533 && (second_code == NIL || second_test) 7534 && ix86_fp_comparison_arithmetics_cost (code) > cost) 7535 { 7536 if (TARGET_CMOVE) 7537 { 7538 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7539 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 7540 tmp); 7541 emit_insn (tmp); 7542 } 7543 else 7544 { 7545 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7546 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9); 7547 if (!scratch) 7548 scratch = gen_reg_rtx (HImode); 7549 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 7550 emit_insn (gen_x86_sahf_1 (scratch)); 7551 } 7552 7553 /* The FP codes work out to act like unsigned. */ 7554 intcmp_mode = fpcmp_mode; 7555 code = first_code; 7556 if (bypass_code != NIL) 7557 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 7558 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7559 const0_rtx); 7560 if (second_code != NIL) 7561 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 7562 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7563 const0_rtx); 7564 } 7565 else 7566 { 7567 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 7568 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 7569 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9); 7570 if (!scratch) 7571 scratch = gen_reg_rtx (HImode); 7572 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 7573 7574 /* In the unordered case, we have to check C2 for NaN's, which 7575 doesn't happen to work out to anything nice combination-wise. 7576 So do some bit twiddling on the value we've got in AH to come 7577 up with an appropriate set of condition codes. */ 7578 7579 intcmp_mode = CCNOmode; 7580 switch (code) 7581 { 7582 case GT: 7583 case UNGT: 7584 if (code == GT || !TARGET_IEEE_FP) 7585 { 7586 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 7587 code = EQ; 7588 } 7589 else 7590 { 7591 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7592 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 7593 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 7594 intcmp_mode = CCmode; 7595 code = GEU; 7596 } 7597 break; 7598 case LT: 7599 case UNLT: 7600 if (code == LT && TARGET_IEEE_FP) 7601 { 7602 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7603 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 7604 intcmp_mode = CCmode; 7605 code = EQ; 7606 } 7607 else 7608 { 7609 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 7610 code = NE; 7611 } 7612 break; 7613 case GE: 7614 case UNGE: 7615 if (code == GE || !TARGET_IEEE_FP) 7616 { 7617 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 7618 code = EQ; 7619 } 7620 else 7621 { 7622 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7623 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 7624 GEN_INT (0x01))); 7625 code = NE; 7626 } 7627 break; 7628 case LE: 7629 case UNLE: 7630 if (code == LE && TARGET_IEEE_FP) 7631 { 7632 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7633 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 7634 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 7635 intcmp_mode = CCmode; 7636 code = LTU; 7637 } 7638 else 7639 { 7640 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 7641 code = NE; 7642 } 7643 break; 7644 case EQ: 7645 case UNEQ: 7646 if (code == EQ && TARGET_IEEE_FP) 7647 { 7648 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7649 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 7650 intcmp_mode = CCmode; 7651 code = EQ; 7652 } 7653 else 7654 { 7655 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 7656 code = NE; 7657 break; 7658 } 7659 break; 7660 case NE: 7661 case LTGT: 7662 if (code == NE && TARGET_IEEE_FP) 7663 { 7664 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 7665 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 7666 GEN_INT (0x40))); 7667 code = NE; 7668 } 7669 else 7670 { 7671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 7672 code = EQ; 7673 } 7674 break; 7675 7676 case UNORDERED: 7677 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 7678 code = NE; 7679 break; 7680 case ORDERED: 7681 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 7682 code = EQ; 7683 break; 7684 7685 default: 7686 abort (); 7687 } 7688 } 7689 7690 /* Return the test that should be put into the flags user, i.e. 7691 the bcc, scc, or cmov instruction. */ 7692 return gen_rtx_fmt_ee (code, VOIDmode, 7693 gen_rtx_REG (intcmp_mode, FLAGS_REG), 7694 const0_rtx); 7695} 7696 7697rtx 7698ix86_expand_compare (code, second_test, bypass_test) 7699 enum rtx_code code; 7700 rtx *second_test, *bypass_test; 7701{ 7702 rtx op0, op1, ret; 7703 op0 = ix86_compare_op0; 7704 op1 = ix86_compare_op1; 7705 7706 if (second_test) 7707 *second_test = NULL_RTX; 7708 if (bypass_test) 7709 *bypass_test = NULL_RTX; 7710 7711 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 7712 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 7713 second_test, bypass_test); 7714 else 7715 ret = ix86_expand_int_compare (code, op0, op1); 7716 7717 return ret; 7718} 7719 7720/* Return true if the CODE will result in nontrivial jump sequence. */ 7721bool 7722ix86_fp_jump_nontrivial_p (code) 7723 enum rtx_code code; 7724{ 7725 enum rtx_code bypass_code, first_code, second_code; 7726 if (!TARGET_CMOVE) 7727 return true; 7728 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7729 return bypass_code != NIL || second_code != NIL; 7730} 7731 7732void 7733ix86_expand_branch (code, label) 7734 enum rtx_code code; 7735 rtx label; 7736{ 7737 rtx tmp; 7738 7739 switch (GET_MODE (ix86_compare_op0)) 7740 { 7741 case QImode: 7742 case HImode: 7743 case SImode: 7744 simple: 7745 tmp = ix86_expand_compare (code, NULL, NULL); 7746 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 7747 gen_rtx_LABEL_REF (VOIDmode, label), 7748 pc_rtx); 7749 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 7750 return; 7751 7752 case SFmode: 7753 case DFmode: 7754 case XFmode: 7755 case TFmode: 7756 { 7757 rtvec vec; 7758 int use_fcomi; 7759 enum rtx_code bypass_code, first_code, second_code; 7760 7761 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 7762 &ix86_compare_op1); 7763 7764 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 7765 7766 /* Check whether we will use the natural sequence with one jump. If 7767 so, we can expand jump early. Otherwise delay expansion by 7768 creating compound insn to not confuse optimizers. */ 7769 if (bypass_code == NIL && second_code == NIL 7770 && TARGET_CMOVE) 7771 { 7772 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 7773 gen_rtx_LABEL_REF (VOIDmode, label), 7774 pc_rtx, NULL_RTX); 7775 } 7776 else 7777 { 7778 tmp = gen_rtx_fmt_ee (code, VOIDmode, 7779 ix86_compare_op0, ix86_compare_op1); 7780 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 7781 gen_rtx_LABEL_REF (VOIDmode, label), 7782 pc_rtx); 7783 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 7784 7785 use_fcomi = ix86_use_fcomi_compare (code); 7786 vec = rtvec_alloc (3 + !use_fcomi); 7787 RTVEC_ELT (vec, 0) = tmp; 7788 RTVEC_ELT (vec, 1) 7789 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 7790 RTVEC_ELT (vec, 2) 7791 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 7792 if (! use_fcomi) 7793 RTVEC_ELT (vec, 3) 7794 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 7795 7796 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 7797 } 7798 return; 7799 } 7800 7801 case DImode: 7802 if (TARGET_64BIT) 7803 goto simple; 7804 /* Expand DImode branch into multiple compare+branch. */ 7805 { 7806 rtx lo[2], hi[2], label2; 7807 enum rtx_code code1, code2, code3; 7808 7809 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 7810 { 7811 tmp = ix86_compare_op0; 7812 ix86_compare_op0 = ix86_compare_op1; 7813 ix86_compare_op1 = tmp; 7814 code = swap_condition (code); 7815 } 7816 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 7817 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 7818 7819 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 7820 avoid two branches. This costs one extra insn, so disable when 7821 optimizing for size. */ 7822 7823 if ((code == EQ || code == NE) 7824 && (!optimize_size 7825 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 7826 { 7827 rtx xor0, xor1; 7828 7829 xor1 = hi[0]; 7830 if (hi[1] != const0_rtx) 7831 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1], 7832 NULL_RTX, 0, OPTAB_WIDEN); 7833 7834 xor0 = lo[0]; 7835 if (lo[1] != const0_rtx) 7836 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1], 7837 NULL_RTX, 0, OPTAB_WIDEN); 7838 7839 tmp = expand_binop (SImode, ior_optab, xor1, xor0, 7840 NULL_RTX, 0, OPTAB_WIDEN); 7841 7842 ix86_compare_op0 = tmp; 7843 ix86_compare_op1 = const0_rtx; 7844 ix86_expand_branch (code, label); 7845 return; 7846 } 7847 7848 /* Otherwise, if we are doing less-than or greater-or-equal-than, 7849 op1 is a constant and the low word is zero, then we can just 7850 examine the high word. */ 7851 7852 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 7853 switch (code) 7854 { 7855 case LT: case LTU: case GE: case GEU: 7856 ix86_compare_op0 = hi[0]; 7857 ix86_compare_op1 = hi[1]; 7858 ix86_expand_branch (code, label); 7859 return; 7860 default: 7861 break; 7862 } 7863 7864 /* Otherwise, we need two or three jumps. */ 7865 7866 label2 = gen_label_rtx (); 7867 7868 code1 = code; 7869 code2 = swap_condition (code); 7870 code3 = unsigned_condition (code); 7871 7872 switch (code) 7873 { 7874 case LT: case GT: case LTU: case GTU: 7875 break; 7876 7877 case LE: code1 = LT; code2 = GT; break; 7878 case GE: code1 = GT; code2 = LT; break; 7879 case LEU: code1 = LTU; code2 = GTU; break; 7880 case GEU: code1 = GTU; code2 = LTU; break; 7881 7882 case EQ: code1 = NIL; code2 = NE; break; 7883 case NE: code2 = NIL; break; 7884 7885 default: 7886 abort (); 7887 } 7888 7889 /* 7890 * a < b => 7891 * if (hi(a) < hi(b)) goto true; 7892 * if (hi(a) > hi(b)) goto false; 7893 * if (lo(a) < lo(b)) goto true; 7894 * false: 7895 */ 7896 7897 ix86_compare_op0 = hi[0]; 7898 ix86_compare_op1 = hi[1]; 7899 7900 if (code1 != NIL) 7901 ix86_expand_branch (code1, label); 7902 if (code2 != NIL) 7903 ix86_expand_branch (code2, label2); 7904 7905 ix86_compare_op0 = lo[0]; 7906 ix86_compare_op1 = lo[1]; 7907 ix86_expand_branch (code3, label); 7908 7909 if (code2 != NIL) 7910 emit_label (label2); 7911 return; 7912 } 7913 7914 default: 7915 abort (); 7916 } 7917} 7918 7919/* Split branch based on floating point condition. */ 7920void 7921ix86_split_fp_branch (code, op1, op2, target1, target2, tmp) 7922 enum rtx_code code; 7923 rtx op1, op2, target1, target2, tmp; 7924{ 7925 rtx second, bypass; 7926 rtx label = NULL_RTX; 7927 rtx condition; 7928 int bypass_probability = -1, second_probability = -1, probability = -1; 7929 rtx i; 7930 7931 if (target2 != pc_rtx) 7932 { 7933 rtx tmp = target2; 7934 code = reverse_condition_maybe_unordered (code); 7935 target2 = target1; 7936 target1 = tmp; 7937 } 7938 7939 condition = ix86_expand_fp_compare (code, op1, op2, 7940 tmp, &second, &bypass); 7941 7942 if (split_branch_probability >= 0) 7943 { 7944 /* Distribute the probabilities across the jumps. 7945 Assume the BYPASS and SECOND to be always test 7946 for UNORDERED. */ 7947 probability = split_branch_probability; 7948 7949 /* Value of 1 is low enough to make no need for probability 7950 to be updated. Later we may run some experiments and see 7951 if unordered values are more frequent in practice. */ 7952 if (bypass) 7953 bypass_probability = 1; 7954 if (second) 7955 second_probability = 1; 7956 } 7957 if (bypass != NULL_RTX) 7958 { 7959 label = gen_label_rtx (); 7960 i = emit_jump_insn (gen_rtx_SET 7961 (VOIDmode, pc_rtx, 7962 gen_rtx_IF_THEN_ELSE (VOIDmode, 7963 bypass, 7964 gen_rtx_LABEL_REF (VOIDmode, 7965 label), 7966 pc_rtx))); 7967 if (bypass_probability >= 0) 7968 REG_NOTES (i) 7969 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7970 GEN_INT (bypass_probability), 7971 REG_NOTES (i)); 7972 } 7973 i = emit_jump_insn (gen_rtx_SET 7974 (VOIDmode, pc_rtx, 7975 gen_rtx_IF_THEN_ELSE (VOIDmode, 7976 condition, target1, target2))); 7977 if (probability >= 0) 7978 REG_NOTES (i) 7979 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7980 GEN_INT (probability), 7981 REG_NOTES (i)); 7982 if (second != NULL_RTX) 7983 { 7984 i = emit_jump_insn (gen_rtx_SET 7985 (VOIDmode, pc_rtx, 7986 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 7987 target2))); 7988 if (second_probability >= 0) 7989 REG_NOTES (i) 7990 = gen_rtx_EXPR_LIST (REG_BR_PROB, 7991 GEN_INT (second_probability), 7992 REG_NOTES (i)); 7993 } 7994 if (label != NULL_RTX) 7995 emit_label (label); 7996} 7997 7998int 7999ix86_expand_setcc (code, dest) 8000 enum rtx_code code; 8001 rtx dest; 8002{ 8003 rtx ret, tmp, tmpreg; 8004 rtx second_test, bypass_test; 8005 8006 if (GET_MODE (ix86_compare_op0) == DImode 8007 && !TARGET_64BIT) 8008 return 0; /* FAIL */ 8009 8010 if (GET_MODE (dest) != QImode) 8011 abort (); 8012 8013 ret = ix86_expand_compare (code, &second_test, &bypass_test); 8014 PUT_MODE (ret, QImode); 8015 8016 tmp = dest; 8017 tmpreg = dest; 8018 8019 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 8020 if (bypass_test || second_test) 8021 { 8022 rtx test = second_test; 8023 int bypass = 0; 8024 rtx tmp2 = gen_reg_rtx (QImode); 8025 if (bypass_test) 8026 { 8027 if (second_test) 8028 abort (); 8029 test = bypass_test; 8030 bypass = 1; 8031 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 8032 } 8033 PUT_MODE (test, QImode); 8034 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 8035 8036 if (bypass) 8037 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 8038 else 8039 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 8040 } 8041 8042 return 1; /* DONE */ 8043} 8044 8045int 8046ix86_expand_int_movcc (operands) 8047 rtx operands[]; 8048{ 8049 enum rtx_code code = GET_CODE (operands[1]), compare_code; 8050 rtx compare_seq, compare_op; 8051 rtx second_test, bypass_test; 8052 enum machine_mode mode = GET_MODE (operands[0]); 8053 8054 /* When the compare code is not LTU or GEU, we can not use sbbl case. 8055 In case comparsion is done with immediate, we can convert it to LTU or 8056 GEU by altering the integer. */ 8057 8058 if ((code == LEU || code == GTU) 8059 && GET_CODE (ix86_compare_op1) == CONST_INT 8060 && mode != HImode 8061 && INTVAL (ix86_compare_op1) != -1 8062 /* For x86-64, the immediate field in the instruction is 32-bit 8063 signed, so we can't increment a DImode value above 0x7fffffff. */ 8064 && (!TARGET_64BIT 8065 || GET_MODE (ix86_compare_op0) != DImode 8066 || INTVAL (ix86_compare_op1) != 0x7fffffff) 8067 && GET_CODE (operands[2]) == CONST_INT 8068 && GET_CODE (operands[3]) == CONST_INT) 8069 { 8070 if (code == LEU) 8071 code = LTU; 8072 else 8073 code = GEU; 8074 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1, 8075 GET_MODE (ix86_compare_op0)); 8076 } 8077 8078 start_sequence (); 8079 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 8080 compare_seq = gen_sequence (); 8081 end_sequence (); 8082 8083 compare_code = GET_CODE (compare_op); 8084 8085 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 8086 HImode insns, we'd be swallowed in word prefix ops. */ 8087 8088 if (mode != HImode 8089 && (mode != DImode || TARGET_64BIT) 8090 && GET_CODE (operands[2]) == CONST_INT 8091 && GET_CODE (operands[3]) == CONST_INT) 8092 { 8093 rtx out = operands[0]; 8094 HOST_WIDE_INT ct = INTVAL (operands[2]); 8095 HOST_WIDE_INT cf = INTVAL (operands[3]); 8096 HOST_WIDE_INT diff; 8097 8098 if ((compare_code == LTU || compare_code == GEU) 8099 && !second_test && !bypass_test) 8100 { 8101 8102 /* Detect overlap between destination and compare sources. */ 8103 rtx tmp = out; 8104 8105 /* To simplify rest of code, restrict to the GEU case. */ 8106 if (compare_code == LTU) 8107 { 8108 int tmp = ct; 8109 ct = cf; 8110 cf = tmp; 8111 compare_code = reverse_condition (compare_code); 8112 code = reverse_condition (code); 8113 } 8114 diff = ct - cf; 8115 8116 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 8117 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 8118 tmp = gen_reg_rtx (mode); 8119 8120 emit_insn (compare_seq); 8121 if (mode == DImode) 8122 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp)); 8123 else 8124 emit_insn (gen_x86_movsicc_0_m1 (tmp)); 8125 8126 if (diff == 1) 8127 { 8128 /* 8129 * cmpl op0,op1 8130 * sbbl dest,dest 8131 * [addl dest, ct] 8132 * 8133 * Size 5 - 8. 8134 */ 8135 if (ct) 8136 tmp = expand_simple_binop (mode, PLUS, 8137 tmp, GEN_INT (ct), 8138 tmp, 1, OPTAB_DIRECT); 8139 } 8140 else if (cf == -1) 8141 { 8142 /* 8143 * cmpl op0,op1 8144 * sbbl dest,dest 8145 * orl $ct, dest 8146 * 8147 * Size 8. 8148 */ 8149 tmp = expand_simple_binop (mode, IOR, 8150 tmp, GEN_INT (ct), 8151 tmp, 1, OPTAB_DIRECT); 8152 } 8153 else if (diff == -1 && ct) 8154 { 8155 /* 8156 * cmpl op0,op1 8157 * sbbl dest,dest 8158 * xorl $-1, dest 8159 * [addl dest, cf] 8160 * 8161 * Size 8 - 11. 8162 */ 8163 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1); 8164 if (cf) 8165 tmp = expand_simple_binop (mode, PLUS, 8166 tmp, GEN_INT (cf), 8167 tmp, 1, OPTAB_DIRECT); 8168 } 8169 else 8170 { 8171 /* 8172 * cmpl op0,op1 8173 * sbbl dest,dest 8174 * andl cf - ct, dest 8175 * [addl dest, ct] 8176 * 8177 * Size 8 - 11. 8178 */ 8179 tmp = expand_simple_binop (mode, AND, 8180 tmp, 8181 GEN_INT (trunc_int_for_mode 8182 (cf - ct, mode)), 8183 tmp, 1, OPTAB_DIRECT); 8184 if (ct) 8185 tmp = expand_simple_binop (mode, PLUS, 8186 tmp, GEN_INT (ct), 8187 tmp, 1, OPTAB_DIRECT); 8188 } 8189 8190 if (tmp != out) 8191 emit_move_insn (out, tmp); 8192 8193 return 1; /* DONE */ 8194 } 8195 8196 diff = ct - cf; 8197 if (diff < 0) 8198 { 8199 HOST_WIDE_INT tmp; 8200 tmp = ct, ct = cf, cf = tmp; 8201 diff = -diff; 8202 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 8203 { 8204 /* We may be reversing unordered compare to normal compare, that 8205 is not valid in general (we may convert non-trapping condition 8206 to trapping one), however on i386 we currently emit all 8207 comparisons unordered. */ 8208 compare_code = reverse_condition_maybe_unordered (compare_code); 8209 code = reverse_condition_maybe_unordered (code); 8210 } 8211 else 8212 { 8213 compare_code = reverse_condition (compare_code); 8214 code = reverse_condition (code); 8215 } 8216 } 8217 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 8218 || diff == 3 || diff == 5 || diff == 9) 8219 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf)))) 8220 { 8221 /* 8222 * xorl dest,dest 8223 * cmpl op1,op2 8224 * setcc dest 8225 * lea cf(dest*(ct-cf)),dest 8226 * 8227 * Size 14. 8228 * 8229 * This also catches the degenerate setcc-only case. 8230 */ 8231 8232 rtx tmp; 8233 int nops; 8234 8235 out = emit_store_flag (out, code, ix86_compare_op0, 8236 ix86_compare_op1, VOIDmode, 0, 1); 8237 8238 nops = 0; 8239 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics 8240 done in proper mode to match. */ 8241 if (diff == 1) 8242 tmp = out; 8243 else 8244 { 8245 rtx out1; 8246 out1 = out; 8247 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 8248 nops++; 8249 if (diff & 1) 8250 { 8251 tmp = gen_rtx_PLUS (mode, tmp, out1); 8252 nops++; 8253 } 8254 } 8255 if (cf != 0) 8256 { 8257 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 8258 nops++; 8259 } 8260 if (tmp != out 8261 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out)) 8262 { 8263 if (nops == 1) 8264 { 8265 rtx clob; 8266 8267 clob = gen_rtx_REG (CCmode, FLAGS_REG); 8268 clob = gen_rtx_CLOBBER (VOIDmode, clob); 8269 8270 tmp = gen_rtx_SET (VOIDmode, out, tmp); 8271 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, copy_rtx (tmp), clob)); 8272 emit_insn (tmp); 8273 } 8274 else 8275 emit_insn (gen_rtx_SET (VOIDmode, out, tmp)); 8276 } 8277 if (out != operands[0]) 8278 emit_move_insn (operands[0], out); 8279 8280 return 1; /* DONE */ 8281 } 8282 8283 /* 8284 * General case: Jumpful: 8285 * xorl dest,dest cmpl op1, op2 8286 * cmpl op1, op2 movl ct, dest 8287 * setcc dest jcc 1f 8288 * decl dest movl cf, dest 8289 * andl (cf-ct),dest 1: 8290 * addl ct,dest 8291 * 8292 * Size 20. Size 14. 8293 * 8294 * This is reasonably steep, but branch mispredict costs are 8295 * high on modern cpus, so consider failing only if optimizing 8296 * for space. 8297 * 8298 * %%% Parameterize branch_cost on the tuning architecture, then 8299 * use that. The 80386 couldn't care less about mispredicts. 8300 */ 8301 8302 if (!optimize_size && !TARGET_CMOVE) 8303 { 8304 if (ct == 0) 8305 { 8306 ct = cf; 8307 cf = 0; 8308 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 8309 { 8310 /* We may be reversing unordered compare to normal compare, 8311 that is not valid in general (we may convert non-trapping 8312 condition to trapping one), however on i386 we currently 8313 emit all comparisons unordered. */ 8314 compare_code = reverse_condition_maybe_unordered (compare_code); 8315 code = reverse_condition_maybe_unordered (code); 8316 } 8317 else 8318 { 8319 compare_code = reverse_condition (compare_code); 8320 code = reverse_condition (code); 8321 } 8322 } 8323 8324 out = emit_store_flag (out, code, ix86_compare_op0, 8325 ix86_compare_op1, VOIDmode, 0, 1); 8326 8327 out = expand_simple_binop (mode, PLUS, 8328 out, constm1_rtx, 8329 out, 1, OPTAB_DIRECT); 8330 out = expand_simple_binop (mode, AND, 8331 out, 8332 GEN_INT (trunc_int_for_mode 8333 (cf - ct, mode)), 8334 out, 1, OPTAB_DIRECT); 8335 out = expand_simple_binop (mode, PLUS, 8336 out, GEN_INT (ct), 8337 out, 1, OPTAB_DIRECT); 8338 if (out != operands[0]) 8339 emit_move_insn (operands[0], out); 8340 8341 return 1; /* DONE */ 8342 } 8343 } 8344 8345 if (!TARGET_CMOVE) 8346 { 8347 /* Try a few things more with specific constants and a variable. */ 8348 8349 optab op; 8350 rtx var, orig_out, out, tmp; 8351 8352 if (optimize_size) 8353 return 0; /* FAIL */ 8354 8355 /* If one of the two operands is an interesting constant, load a 8356 constant with the above and mask it in with a logical operation. */ 8357 8358 if (GET_CODE (operands[2]) == CONST_INT) 8359 { 8360 var = operands[3]; 8361 if (INTVAL (operands[2]) == 0) 8362 operands[3] = constm1_rtx, op = and_optab; 8363 else if (INTVAL (operands[2]) == -1) 8364 operands[3] = const0_rtx, op = ior_optab; 8365 else 8366 return 0; /* FAIL */ 8367 } 8368 else if (GET_CODE (operands[3]) == CONST_INT) 8369 { 8370 var = operands[2]; 8371 if (INTVAL (operands[3]) == 0) 8372 operands[2] = constm1_rtx, op = and_optab; 8373 else if (INTVAL (operands[3]) == -1) 8374 operands[2] = const0_rtx, op = ior_optab; 8375 else 8376 return 0; /* FAIL */ 8377 } 8378 else 8379 return 0; /* FAIL */ 8380 8381 orig_out = operands[0]; 8382 tmp = gen_reg_rtx (mode); 8383 operands[0] = tmp; 8384 8385 /* Recurse to get the constant loaded. */ 8386 if (ix86_expand_int_movcc (operands) == 0) 8387 return 0; /* FAIL */ 8388 8389 /* Mask in the interesting variable. */ 8390 out = expand_binop (mode, op, var, tmp, orig_out, 0, 8391 OPTAB_WIDEN); 8392 if (out != orig_out) 8393 emit_move_insn (orig_out, out); 8394 8395 return 1; /* DONE */ 8396 } 8397 8398 /* 8399 * For comparison with above, 8400 * 8401 * movl cf,dest 8402 * movl ct,tmp 8403 * cmpl op1,op2 8404 * cmovcc tmp,dest 8405 * 8406 * Size 15. 8407 */ 8408 8409 if (! nonimmediate_operand (operands[2], mode)) 8410 operands[2] = force_reg (mode, operands[2]); 8411 if (! nonimmediate_operand (operands[3], mode)) 8412 operands[3] = force_reg (mode, operands[3]); 8413 8414 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 8415 { 8416 rtx tmp = gen_reg_rtx (mode); 8417 emit_move_insn (tmp, operands[3]); 8418 operands[3] = tmp; 8419 } 8420 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 8421 { 8422 rtx tmp = gen_reg_rtx (mode); 8423 emit_move_insn (tmp, operands[2]); 8424 operands[2] = tmp; 8425 } 8426 if (! register_operand (operands[2], VOIDmode) 8427 && ! register_operand (operands[3], VOIDmode)) 8428 operands[2] = force_reg (mode, operands[2]); 8429 8430 emit_insn (compare_seq); 8431 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8432 gen_rtx_IF_THEN_ELSE (mode, 8433 compare_op, operands[2], 8434 operands[3]))); 8435 if (bypass_test) 8436 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8437 gen_rtx_IF_THEN_ELSE (mode, 8438 bypass_test, 8439 operands[3], 8440 operands[0]))); 8441 if (second_test) 8442 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8443 gen_rtx_IF_THEN_ELSE (mode, 8444 second_test, 8445 operands[2], 8446 operands[0]))); 8447 8448 return 1; /* DONE */ 8449} 8450 8451int 8452ix86_expand_fp_movcc (operands) 8453 rtx operands[]; 8454{ 8455 enum rtx_code code; 8456 rtx tmp; 8457 rtx compare_op, second_test, bypass_test; 8458 8459 /* For SF/DFmode conditional moves based on comparisons 8460 in same mode, we may want to use SSE min/max instructions. */ 8461 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode) 8462 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode)) 8463 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]) 8464 /* The SSE comparisons does not support the LTGT/UNEQ pair. */ 8465 && (!TARGET_IEEE_FP 8466 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ)) 8467 /* We may be called from the post-reload splitter. */ 8468 && (!REG_P (operands[0]) 8469 || SSE_REG_P (operands[0]) 8470 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)) 8471 { 8472 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1; 8473 code = GET_CODE (operands[1]); 8474 8475 /* See if we have (cross) match between comparison operands and 8476 conditional move operands. */ 8477 if (rtx_equal_p (operands[2], op1)) 8478 { 8479 rtx tmp = op0; 8480 op0 = op1; 8481 op1 = tmp; 8482 code = reverse_condition_maybe_unordered (code); 8483 } 8484 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1)) 8485 { 8486 /* Check for min operation. */ 8487 if (code == LT) 8488 { 8489 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 8490 if (memory_operand (op0, VOIDmode)) 8491 op0 = force_reg (GET_MODE (operands[0]), op0); 8492 if (GET_MODE (operands[0]) == SFmode) 8493 emit_insn (gen_minsf3 (operands[0], op0, op1)); 8494 else 8495 emit_insn (gen_mindf3 (operands[0], op0, op1)); 8496 return 1; 8497 } 8498 /* Check for max operation. */ 8499 if (code == GT) 8500 { 8501 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 8502 if (memory_operand (op0, VOIDmode)) 8503 op0 = force_reg (GET_MODE (operands[0]), op0); 8504 if (GET_MODE (operands[0]) == SFmode) 8505 emit_insn (gen_maxsf3 (operands[0], op0, op1)); 8506 else 8507 emit_insn (gen_maxdf3 (operands[0], op0, op1)); 8508 return 1; 8509 } 8510 } 8511 /* Manage condition to be sse_comparison_operator. In case we are 8512 in non-ieee mode, try to canonicalize the destination operand 8513 to be first in the comparison - this helps reload to avoid extra 8514 moves. */ 8515 if (!sse_comparison_operator (operands[1], VOIDmode) 8516 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP)) 8517 { 8518 rtx tmp = ix86_compare_op0; 8519 ix86_compare_op0 = ix86_compare_op1; 8520 ix86_compare_op1 = tmp; 8521 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), 8522 VOIDmode, ix86_compare_op0, 8523 ix86_compare_op1); 8524 } 8525 /* Similary try to manage result to be first operand of conditional 8526 move. We also don't support the NE comparison on SSE, so try to 8527 avoid it. */ 8528 if ((rtx_equal_p (operands[0], operands[3]) 8529 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ)) 8530 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP)) 8531 { 8532 rtx tmp = operands[2]; 8533 operands[2] = operands[3]; 8534 operands[3] = tmp; 8535 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered 8536 (GET_CODE (operands[1])), 8537 VOIDmode, ix86_compare_op0, 8538 ix86_compare_op1); 8539 } 8540 if (GET_MODE (operands[0]) == SFmode) 8541 emit_insn (gen_sse_movsfcc (operands[0], operands[1], 8542 operands[2], operands[3], 8543 ix86_compare_op0, ix86_compare_op1)); 8544 else 8545 emit_insn (gen_sse_movdfcc (operands[0], operands[1], 8546 operands[2], operands[3], 8547 ix86_compare_op0, ix86_compare_op1)); 8548 return 1; 8549 } 8550 8551 /* The floating point conditional move instructions don't directly 8552 support conditions resulting from a signed integer comparison. */ 8553 8554 code = GET_CODE (operands[1]); 8555 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 8556 8557 /* The floating point conditional move instructions don't directly 8558 support signed integer comparisons. */ 8559 8560 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 8561 { 8562 if (second_test != NULL || bypass_test != NULL) 8563 abort (); 8564 tmp = gen_reg_rtx (QImode); 8565 ix86_expand_setcc (code, tmp); 8566 code = NE; 8567 ix86_compare_op0 = tmp; 8568 ix86_compare_op1 = const0_rtx; 8569 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 8570 } 8571 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 8572 { 8573 tmp = gen_reg_rtx (GET_MODE (operands[0])); 8574 emit_move_insn (tmp, operands[3]); 8575 operands[3] = tmp; 8576 } 8577 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 8578 { 8579 tmp = gen_reg_rtx (GET_MODE (operands[0])); 8580 emit_move_insn (tmp, operands[2]); 8581 operands[2] = tmp; 8582 } 8583 8584 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8585 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8586 compare_op, 8587 operands[2], 8588 operands[3]))); 8589 if (bypass_test) 8590 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8591 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8592 bypass_test, 8593 operands[3], 8594 operands[0]))); 8595 if (second_test) 8596 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 8597 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 8598 second_test, 8599 operands[2], 8600 operands[0]))); 8601 8602 return 1; 8603} 8604 8605/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 8606 works for floating pointer parameters and nonoffsetable memories. 8607 For pushes, it returns just stack offsets; the values will be saved 8608 in the right order. Maximally three parts are generated. */ 8609 8610static int 8611ix86_split_to_parts (operand, parts, mode) 8612 rtx operand; 8613 rtx *parts; 8614 enum machine_mode mode; 8615{ 8616 int size; 8617 8618 if (!TARGET_64BIT) 8619 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4); 8620 else 8621 size = (GET_MODE_SIZE (mode) + 4) / 8; 8622 8623 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand))) 8624 abort (); 8625 if (size < 2 || size > 3) 8626 abort (); 8627 8628 /* Optimize constant pool reference to immediates. This is used by fp moves, 8629 that force all constants to memory to allow combining. */ 8630 8631 if (GET_CODE (operand) == MEM 8632 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF 8633 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0))) 8634 operand = get_pool_constant (XEXP (operand, 0)); 8635 8636 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 8637 { 8638 /* The only non-offsetable memories we handle are pushes. */ 8639 if (! push_operand (operand, VOIDmode)) 8640 abort (); 8641 8642 operand = copy_rtx (operand); 8643 PUT_MODE (operand, Pmode); 8644 parts[0] = parts[1] = parts[2] = operand; 8645 } 8646 else if (!TARGET_64BIT) 8647 { 8648 if (mode == DImode) 8649 split_di (&operand, 1, &parts[0], &parts[1]); 8650 else 8651 { 8652 if (REG_P (operand)) 8653 { 8654 if (!reload_completed) 8655 abort (); 8656 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 8657 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 8658 if (size == 3) 8659 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 8660 } 8661 else if (offsettable_memref_p (operand)) 8662 { 8663 operand = adjust_address (operand, SImode, 0); 8664 parts[0] = operand; 8665 parts[1] = adjust_address (operand, SImode, 4); 8666 if (size == 3) 8667 parts[2] = adjust_address (operand, SImode, 8); 8668 } 8669 else if (GET_CODE (operand) == CONST_DOUBLE) 8670 { 8671 REAL_VALUE_TYPE r; 8672 long l[4]; 8673 8674 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 8675 switch (mode) 8676 { 8677 case XFmode: 8678 case TFmode: 8679 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 8680 parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode)); 8681 break; 8682 case DFmode: 8683 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 8684 break; 8685 default: 8686 abort (); 8687 } 8688 parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode)); 8689 parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode)); 8690 } 8691 else 8692 abort (); 8693 } 8694 } 8695 else 8696 { 8697 if (mode == TImode) 8698 split_ti (&operand, 1, &parts[0], &parts[1]); 8699 if (mode == XFmode || mode == TFmode) 8700 { 8701 if (REG_P (operand)) 8702 { 8703 if (!reload_completed) 8704 abort (); 8705 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 8706 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 8707 } 8708 else if (offsettable_memref_p (operand)) 8709 { 8710 operand = adjust_address (operand, DImode, 0); 8711 parts[0] = operand; 8712 parts[1] = adjust_address (operand, SImode, 8); 8713 } 8714 else if (GET_CODE (operand) == CONST_DOUBLE) 8715 { 8716 REAL_VALUE_TYPE r; 8717 long l[3]; 8718 8719 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 8720 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 8721 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 8722 if (HOST_BITS_PER_WIDE_INT >= 64) 8723 parts[0] 8724 = GEN_INT (trunc_int_for_mode 8725 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 8726 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 8727 DImode)); 8728 else 8729 parts[0] = immed_double_const (l[0], l[1], DImode); 8730 parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode)); 8731 } 8732 else 8733 abort (); 8734 } 8735 } 8736 8737 return size; 8738} 8739 8740/* Emit insns to perform a move or push of DI, DF, and XF values. 8741 Return false when normal moves are needed; true when all required 8742 insns have been emitted. Operands 2-4 contain the input values 8743 int the correct order; operands 5-7 contain the output values. */ 8744 8745void 8746ix86_split_long_move (operands) 8747 rtx operands[]; 8748{ 8749 rtx part[2][3]; 8750 int nparts; 8751 int push = 0; 8752 int collisions = 0; 8753 enum machine_mode mode = GET_MODE (operands[0]); 8754 8755 /* The DFmode expanders may ask us to move double. 8756 For 64bit target this is single move. By hiding the fact 8757 here we simplify i386.md splitters. */ 8758 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 8759 { 8760 /* Optimize constant pool reference to immediates. This is used by 8761 fp moves, that force all constants to memory to allow combining. */ 8762 8763 if (GET_CODE (operands[1]) == MEM 8764 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 8765 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 8766 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 8767 if (push_operand (operands[0], VOIDmode)) 8768 { 8769 operands[0] = copy_rtx (operands[0]); 8770 PUT_MODE (operands[0], Pmode); 8771 } 8772 else 8773 operands[0] = gen_lowpart (DImode, operands[0]); 8774 operands[1] = gen_lowpart (DImode, operands[1]); 8775 emit_move_insn (operands[0], operands[1]); 8776 return; 8777 } 8778 8779 /* The only non-offsettable memory we handle is push. */ 8780 if (push_operand (operands[0], VOIDmode)) 8781 push = 1; 8782 else if (GET_CODE (operands[0]) == MEM 8783 && ! offsettable_memref_p (operands[0])) 8784 abort (); 8785 8786 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 8787 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 8788 8789 /* When emitting push, take care for source operands on the stack. */ 8790 if (push && GET_CODE (operands[1]) == MEM 8791 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 8792 { 8793 if (nparts == 3) 8794 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 8795 XEXP (part[1][2], 0)); 8796 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 8797 XEXP (part[1][1], 0)); 8798 } 8799 8800 /* We need to do copy in the right order in case an address register 8801 of the source overlaps the destination. */ 8802 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 8803 { 8804 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 8805 collisions++; 8806 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 8807 collisions++; 8808 if (nparts == 3 8809 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 8810 collisions++; 8811 8812 /* Collision in the middle part can be handled by reordering. */ 8813 if (collisions == 1 && nparts == 3 8814 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 8815 { 8816 rtx tmp; 8817 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 8818 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 8819 } 8820 8821 /* If there are more collisions, we can't handle it by reordering. 8822 Do an lea to the last part and use only one colliding move. */ 8823 else if (collisions > 1) 8824 { 8825 collisions = 1; 8826 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1], 8827 XEXP (part[1][0], 0))); 8828 part[1][0] = change_address (part[1][0], 8829 TARGET_64BIT ? DImode : SImode, 8830 part[0][nparts - 1]); 8831 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD); 8832 if (nparts == 3) 8833 part[1][2] = adjust_address (part[1][0], VOIDmode, 8); 8834 } 8835 } 8836 8837 if (push) 8838 { 8839 if (!TARGET_64BIT) 8840 { 8841 if (nparts == 3) 8842 { 8843 /* We use only first 12 bytes of TFmode value, but for pushing we 8844 are required to adjust stack as if we were pushing real 16byte 8845 value. */ 8846 if (mode == TFmode && !TARGET_64BIT) 8847 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 8848 GEN_INT (-4))); 8849 emit_move_insn (part[0][2], part[1][2]); 8850 } 8851 } 8852 else 8853 { 8854 /* In 64bit mode we don't have 32bit push available. In case this is 8855 register, it is OK - we will just use larger counterpart. We also 8856 retype memory - these comes from attempt to avoid REX prefix on 8857 moving of second half of TFmode value. */ 8858 if (GET_MODE (part[1][1]) == SImode) 8859 { 8860 if (GET_CODE (part[1][1]) == MEM) 8861 part[1][1] = adjust_address (part[1][1], DImode, 0); 8862 else if (REG_P (part[1][1])) 8863 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 8864 else 8865 abort (); 8866 if (GET_MODE (part[1][0]) == SImode) 8867 part[1][0] = part[1][1]; 8868 } 8869 } 8870 emit_move_insn (part[0][1], part[1][1]); 8871 emit_move_insn (part[0][0], part[1][0]); 8872 return; 8873 } 8874 8875 /* Choose correct order to not overwrite the source before it is copied. */ 8876 if ((REG_P (part[0][0]) 8877 && REG_P (part[1][1]) 8878 && (REGNO (part[0][0]) == REGNO (part[1][1]) 8879 || (nparts == 3 8880 && REGNO (part[0][0]) == REGNO (part[1][2])))) 8881 || (collisions > 0 8882 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 8883 { 8884 if (nparts == 3) 8885 { 8886 operands[2] = part[0][2]; 8887 operands[3] = part[0][1]; 8888 operands[4] = part[0][0]; 8889 operands[5] = part[1][2]; 8890 operands[6] = part[1][1]; 8891 operands[7] = part[1][0]; 8892 } 8893 else 8894 { 8895 operands[2] = part[0][1]; 8896 operands[3] = part[0][0]; 8897 operands[5] = part[1][1]; 8898 operands[6] = part[1][0]; 8899 } 8900 } 8901 else 8902 { 8903 if (nparts == 3) 8904 { 8905 operands[2] = part[0][0]; 8906 operands[3] = part[0][1]; 8907 operands[4] = part[0][2]; 8908 operands[5] = part[1][0]; 8909 operands[6] = part[1][1]; 8910 operands[7] = part[1][2]; 8911 } 8912 else 8913 { 8914 operands[2] = part[0][0]; 8915 operands[3] = part[0][1]; 8916 operands[5] = part[1][0]; 8917 operands[6] = part[1][1]; 8918 } 8919 } 8920 emit_move_insn (operands[2], operands[5]); 8921 emit_move_insn (operands[3], operands[6]); 8922 if (nparts == 3) 8923 emit_move_insn (operands[4], operands[7]); 8924 8925 return; 8926} 8927 8928void 8929ix86_split_ashldi (operands, scratch) 8930 rtx *operands, scratch; 8931{ 8932 rtx low[2], high[2]; 8933 int count; 8934 8935 if (GET_CODE (operands[2]) == CONST_INT) 8936 { 8937 split_di (operands, 2, low, high); 8938 count = INTVAL (operands[2]) & 63; 8939 8940 if (count >= 32) 8941 { 8942 emit_move_insn (high[0], low[1]); 8943 emit_move_insn (low[0], const0_rtx); 8944 8945 if (count > 32) 8946 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32))); 8947 } 8948 else 8949 { 8950 if (!rtx_equal_p (operands[0], operands[1])) 8951 emit_move_insn (operands[0], operands[1]); 8952 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); 8953 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count))); 8954 } 8955 } 8956 else 8957 { 8958 if (!rtx_equal_p (operands[0], operands[1])) 8959 emit_move_insn (operands[0], operands[1]); 8960 8961 split_di (operands, 1, low, high); 8962 8963 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); 8964 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); 8965 8966 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 8967 { 8968 if (! no_new_pseudos) 8969 scratch = force_reg (SImode, const0_rtx); 8970 else 8971 emit_move_insn (scratch, const0_rtx); 8972 8973 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], 8974 scratch)); 8975 } 8976 else 8977 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 8978 } 8979} 8980 8981void 8982ix86_split_ashrdi (operands, scratch) 8983 rtx *operands, scratch; 8984{ 8985 rtx low[2], high[2]; 8986 int count; 8987 8988 if (GET_CODE (operands[2]) == CONST_INT) 8989 { 8990 split_di (operands, 2, low, high); 8991 count = INTVAL (operands[2]) & 63; 8992 8993 if (count >= 32) 8994 { 8995 emit_move_insn (low[0], high[1]); 8996 8997 if (! reload_completed) 8998 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31))); 8999 else 9000 { 9001 emit_move_insn (high[0], low[0]); 9002 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); 9003 } 9004 9005 if (count > 32) 9006 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); 9007 } 9008 else 9009 { 9010 if (!rtx_equal_p (operands[0], operands[1])) 9011 emit_move_insn (operands[0], operands[1]); 9012 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 9013 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count))); 9014 } 9015 } 9016 else 9017 { 9018 if (!rtx_equal_p (operands[0], operands[1])) 9019 emit_move_insn (operands[0], operands[1]); 9020 9021 split_di (operands, 1, low, high); 9022 9023 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 9024 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); 9025 9026 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 9027 { 9028 if (! no_new_pseudos) 9029 scratch = gen_reg_rtx (SImode); 9030 emit_move_insn (scratch, high[0]); 9031 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); 9032 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 9033 scratch)); 9034 } 9035 else 9036 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 9037 } 9038} 9039 9040void 9041ix86_split_lshrdi (operands, scratch) 9042 rtx *operands, scratch; 9043{ 9044 rtx low[2], high[2]; 9045 int count; 9046 9047 if (GET_CODE (operands[2]) == CONST_INT) 9048 { 9049 split_di (operands, 2, low, high); 9050 count = INTVAL (operands[2]) & 63; 9051 9052 if (count >= 32) 9053 { 9054 emit_move_insn (low[0], high[1]); 9055 emit_move_insn (high[0], const0_rtx); 9056 9057 if (count > 32) 9058 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); 9059 } 9060 else 9061 { 9062 if (!rtx_equal_p (operands[0], operands[1])) 9063 emit_move_insn (operands[0], operands[1]); 9064 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 9065 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count))); 9066 } 9067 } 9068 else 9069 { 9070 if (!rtx_equal_p (operands[0], operands[1])) 9071 emit_move_insn (operands[0], operands[1]); 9072 9073 split_di (operands, 1, low, high); 9074 9075 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 9076 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); 9077 9078 /* Heh. By reversing the arguments, we can reuse this pattern. */ 9079 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 9080 { 9081 if (! no_new_pseudos) 9082 scratch = force_reg (SImode, const0_rtx); 9083 else 9084 emit_move_insn (scratch, const0_rtx); 9085 9086 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 9087 scratch)); 9088 } 9089 else 9090 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 9091 } 9092} 9093 9094/* Helper function for the string operations below. Dest VARIABLE whether 9095 it is aligned to VALUE bytes. If true, jump to the label. */ 9096static rtx 9097ix86_expand_aligntest (variable, value) 9098 rtx variable; 9099 int value; 9100{ 9101 rtx label = gen_label_rtx (); 9102 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 9103 if (GET_MODE (variable) == DImode) 9104 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 9105 else 9106 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 9107 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 9108 1, label); 9109 return label; 9110} 9111 9112/* Adjust COUNTER by the VALUE. */ 9113static void 9114ix86_adjust_counter (countreg, value) 9115 rtx countreg; 9116 HOST_WIDE_INT value; 9117{ 9118 if (GET_MODE (countreg) == DImode) 9119 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 9120 else 9121 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 9122} 9123 9124/* Zero extend possibly SImode EXP to Pmode register. */ 9125rtx 9126ix86_zero_extend_to_Pmode (exp) 9127 rtx exp; 9128{ 9129 rtx r; 9130 if (GET_MODE (exp) == VOIDmode) 9131 return force_reg (Pmode, exp); 9132 if (GET_MODE (exp) == Pmode) 9133 return copy_to_mode_reg (Pmode, exp); 9134 r = gen_reg_rtx (Pmode); 9135 emit_insn (gen_zero_extendsidi2 (r, exp)); 9136 return r; 9137} 9138 9139/* Expand string move (memcpy) operation. Use i386 string operations when 9140 profitable. expand_clrstr contains similar code. */ 9141int 9142ix86_expand_movstr (dst, src, count_exp, align_exp) 9143 rtx dst, src, count_exp, align_exp; 9144{ 9145 rtx srcreg, destreg, countreg; 9146 enum machine_mode counter_mode; 9147 HOST_WIDE_INT align = 0; 9148 unsigned HOST_WIDE_INT count = 0; 9149 rtx insns; 9150 9151 start_sequence (); 9152 9153 if (GET_CODE (align_exp) == CONST_INT) 9154 align = INTVAL (align_exp); 9155 9156 /* This simple hack avoids all inlining code and simplifies code below. */ 9157 if (!TARGET_ALIGN_STRINGOPS) 9158 align = 64; 9159 9160 if (GET_CODE (count_exp) == CONST_INT) 9161 count = INTVAL (count_exp); 9162 9163 /* Figure out proper mode for counter. For 32bits it is always SImode, 9164 for 64bits use SImode when possible, otherwise DImode. 9165 Set count to number of bytes copied when known at compile time. */ 9166 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 9167 || x86_64_zero_extended_value (count_exp)) 9168 counter_mode = SImode; 9169 else 9170 counter_mode = DImode; 9171 9172 if (counter_mode != SImode && counter_mode != DImode) 9173 abort (); 9174 9175 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 9176 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 9177 9178 emit_insn (gen_cld ()); 9179 9180 /* When optimizing for size emit simple rep ; movsb instruction for 9181 counts not divisible by 4. */ 9182 9183 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 9184 { 9185 countreg = ix86_zero_extend_to_Pmode (count_exp); 9186 if (TARGET_64BIT) 9187 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg, 9188 destreg, srcreg, countreg)); 9189 else 9190 emit_insn (gen_rep_movqi (destreg, srcreg, countreg, 9191 destreg, srcreg, countreg)); 9192 } 9193 9194 /* For constant aligned (or small unaligned) copies use rep movsl 9195 followed by code copying the rest. For PentiumPro ensure 8 byte 9196 alignment to allow rep movsl acceleration. */ 9197 9198 else if (count != 0 9199 && (align >= 8 9200 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 9201 || optimize_size || count < (unsigned int) 64)) 9202 { 9203 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 9204 if (count & ~(size - 1)) 9205 { 9206 countreg = copy_to_mode_reg (counter_mode, 9207 GEN_INT ((count >> (size == 4 ? 2 : 3)) 9208 & (TARGET_64BIT ? -1 : 0x3fffffff))); 9209 countreg = ix86_zero_extend_to_Pmode (countreg); 9210 if (size == 4) 9211 { 9212 if (TARGET_64BIT) 9213 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg, 9214 destreg, srcreg, countreg)); 9215 else 9216 emit_insn (gen_rep_movsi (destreg, srcreg, countreg, 9217 destreg, srcreg, countreg)); 9218 } 9219 else 9220 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg, 9221 destreg, srcreg, countreg)); 9222 } 9223 if (size == 8 && (count & 0x04)) 9224 emit_insn (gen_strmovsi (destreg, srcreg)); 9225 if (count & 0x02) 9226 emit_insn (gen_strmovhi (destreg, srcreg)); 9227 if (count & 0x01) 9228 emit_insn (gen_strmovqi (destreg, srcreg)); 9229 } 9230 /* The generic code based on the glibc implementation: 9231 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 9232 allowing accelerated copying there) 9233 - copy the data using rep movsl 9234 - copy the rest. */ 9235 else 9236 { 9237 rtx countreg2; 9238 rtx label = NULL; 9239 int desired_alignment = (TARGET_PENTIUMPRO 9240 && (count == 0 || count >= (unsigned int) 260) 9241 ? 8 : UNITS_PER_WORD); 9242 9243 /* In case we don't know anything about the alignment, default to 9244 library version, since it is usually equally fast and result in 9245 shorter code. */ 9246 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 9247 { 9248 end_sequence (); 9249 return 0; 9250 } 9251 9252 if (TARGET_SINGLE_STRINGOP) 9253 emit_insn (gen_cld ()); 9254 9255 countreg2 = gen_reg_rtx (Pmode); 9256 countreg = copy_to_mode_reg (counter_mode, count_exp); 9257 9258 /* We don't use loops to align destination and to copy parts smaller 9259 than 4 bytes, because gcc is able to optimize such code better (in 9260 the case the destination or the count really is aligned, gcc is often 9261 able to predict the branches) and also it is friendlier to the 9262 hardware branch prediction. 9263 9264 Using loops is benefical for generic case, because we can 9265 handle small counts using the loops. Many CPUs (such as Athlon) 9266 have large REP prefix setup costs. 9267 9268 This is quite costy. Maybe we can revisit this decision later or 9269 add some customizability to this code. */ 9270 9271 if (count == 0 && align < desired_alignment) 9272 { 9273 label = gen_label_rtx (); 9274 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 9275 LEU, 0, counter_mode, 1, label); 9276 } 9277 if (align <= 1) 9278 { 9279 rtx label = ix86_expand_aligntest (destreg, 1); 9280 emit_insn (gen_strmovqi (destreg, srcreg)); 9281 ix86_adjust_counter (countreg, 1); 9282 emit_label (label); 9283 LABEL_NUSES (label) = 1; 9284 } 9285 if (align <= 2) 9286 { 9287 rtx label = ix86_expand_aligntest (destreg, 2); 9288 emit_insn (gen_strmovhi (destreg, srcreg)); 9289 ix86_adjust_counter (countreg, 2); 9290 emit_label (label); 9291 LABEL_NUSES (label) = 1; 9292 } 9293 if (align <= 4 && desired_alignment > 4) 9294 { 9295 rtx label = ix86_expand_aligntest (destreg, 4); 9296 emit_insn (gen_strmovsi (destreg, srcreg)); 9297 ix86_adjust_counter (countreg, 4); 9298 emit_label (label); 9299 LABEL_NUSES (label) = 1; 9300 } 9301 9302 if (label && desired_alignment > 4 && !TARGET_64BIT) 9303 { 9304 emit_label (label); 9305 LABEL_NUSES (label) = 1; 9306 label = NULL_RTX; 9307 } 9308 if (!TARGET_SINGLE_STRINGOP) 9309 emit_insn (gen_cld ()); 9310 if (TARGET_64BIT) 9311 { 9312 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 9313 GEN_INT (3))); 9314 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2, 9315 destreg, srcreg, countreg2)); 9316 } 9317 else 9318 { 9319 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 9320 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2, 9321 destreg, srcreg, countreg2)); 9322 } 9323 9324 if (label) 9325 { 9326 emit_label (label); 9327 LABEL_NUSES (label) = 1; 9328 } 9329 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 9330 emit_insn (gen_strmovsi (destreg, srcreg)); 9331 if ((align <= 4 || count == 0) && TARGET_64BIT) 9332 { 9333 rtx label = ix86_expand_aligntest (countreg, 4); 9334 emit_insn (gen_strmovsi (destreg, srcreg)); 9335 emit_label (label); 9336 LABEL_NUSES (label) = 1; 9337 } 9338 if (align > 2 && count != 0 && (count & 2)) 9339 emit_insn (gen_strmovhi (destreg, srcreg)); 9340 if (align <= 2 || count == 0) 9341 { 9342 rtx label = ix86_expand_aligntest (countreg, 2); 9343 emit_insn (gen_strmovhi (destreg, srcreg)); 9344 emit_label (label); 9345 LABEL_NUSES (label) = 1; 9346 } 9347 if (align > 1 && count != 0 && (count & 1)) 9348 emit_insn (gen_strmovqi (destreg, srcreg)); 9349 if (align <= 1 || count == 0) 9350 { 9351 rtx label = ix86_expand_aligntest (countreg, 1); 9352 emit_insn (gen_strmovqi (destreg, srcreg)); 9353 emit_label (label); 9354 LABEL_NUSES (label) = 1; 9355 } 9356 } 9357 9358 insns = get_insns (); 9359 end_sequence (); 9360 9361 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg); 9362 emit_insns (insns); 9363 return 1; 9364} 9365 9366/* Expand string clear operation (bzero). Use i386 string operations when 9367 profitable. expand_movstr contains similar code. */ 9368int 9369ix86_expand_clrstr (src, count_exp, align_exp) 9370 rtx src, count_exp, align_exp; 9371{ 9372 rtx destreg, zeroreg, countreg; 9373 enum machine_mode counter_mode; 9374 HOST_WIDE_INT align = 0; 9375 unsigned HOST_WIDE_INT count = 0; 9376 9377 if (GET_CODE (align_exp) == CONST_INT) 9378 align = INTVAL (align_exp); 9379 9380 /* This simple hack avoids all inlining code and simplifies code below. */ 9381 if (!TARGET_ALIGN_STRINGOPS) 9382 align = 32; 9383 9384 if (GET_CODE (count_exp) == CONST_INT) 9385 count = INTVAL (count_exp); 9386 /* Figure out proper mode for counter. For 32bits it is always SImode, 9387 for 64bits use SImode when possible, otherwise DImode. 9388 Set count to number of bytes copied when known at compile time. */ 9389 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 9390 || x86_64_zero_extended_value (count_exp)) 9391 counter_mode = SImode; 9392 else 9393 counter_mode = DImode; 9394 9395 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 9396 9397 emit_insn (gen_cld ()); 9398 9399 /* When optimizing for size emit simple rep ; movsb instruction for 9400 counts not divisible by 4. */ 9401 9402 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 9403 { 9404 countreg = ix86_zero_extend_to_Pmode (count_exp); 9405 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 9406 if (TARGET_64BIT) 9407 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg, 9408 destreg, countreg)); 9409 else 9410 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg, 9411 destreg, countreg)); 9412 } 9413 else if (count != 0 9414 && (align >= 8 9415 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 9416 || optimize_size || count < (unsigned int) 64)) 9417 { 9418 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 9419 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 9420 if (count & ~(size - 1)) 9421 { 9422 countreg = copy_to_mode_reg (counter_mode, 9423 GEN_INT ((count >> (size == 4 ? 2 : 3)) 9424 & (TARGET_64BIT ? -1 : 0x3fffffff))); 9425 countreg = ix86_zero_extend_to_Pmode (countreg); 9426 if (size == 4) 9427 { 9428 if (TARGET_64BIT) 9429 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg, 9430 destreg, countreg)); 9431 else 9432 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg, 9433 destreg, countreg)); 9434 } 9435 else 9436 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg, 9437 destreg, countreg)); 9438 } 9439 if (size == 8 && (count & 0x04)) 9440 emit_insn (gen_strsetsi (destreg, 9441 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9442 if (count & 0x02) 9443 emit_insn (gen_strsethi (destreg, 9444 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9445 if (count & 0x01) 9446 emit_insn (gen_strsetqi (destreg, 9447 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9448 } 9449 else 9450 { 9451 rtx countreg2; 9452 rtx label = NULL; 9453 /* Compute desired alignment of the string operation. */ 9454 int desired_alignment = (TARGET_PENTIUMPRO 9455 && (count == 0 || count >= (unsigned int) 260) 9456 ? 8 : UNITS_PER_WORD); 9457 9458 /* In case we don't know anything about the alignment, default to 9459 library version, since it is usually equally fast and result in 9460 shorter code. */ 9461 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 9462 return 0; 9463 9464 if (TARGET_SINGLE_STRINGOP) 9465 emit_insn (gen_cld ()); 9466 9467 countreg2 = gen_reg_rtx (Pmode); 9468 countreg = copy_to_mode_reg (counter_mode, count_exp); 9469 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 9470 9471 if (count == 0 && align < desired_alignment) 9472 { 9473 label = gen_label_rtx (); 9474 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 9475 LEU, 0, counter_mode, 1, label); 9476 } 9477 if (align <= 1) 9478 { 9479 rtx label = ix86_expand_aligntest (destreg, 1); 9480 emit_insn (gen_strsetqi (destreg, 9481 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9482 ix86_adjust_counter (countreg, 1); 9483 emit_label (label); 9484 LABEL_NUSES (label) = 1; 9485 } 9486 if (align <= 2) 9487 { 9488 rtx label = ix86_expand_aligntest (destreg, 2); 9489 emit_insn (gen_strsethi (destreg, 9490 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9491 ix86_adjust_counter (countreg, 2); 9492 emit_label (label); 9493 LABEL_NUSES (label) = 1; 9494 } 9495 if (align <= 4 && desired_alignment > 4) 9496 { 9497 rtx label = ix86_expand_aligntest (destreg, 4); 9498 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT 9499 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 9500 : zeroreg))); 9501 ix86_adjust_counter (countreg, 4); 9502 emit_label (label); 9503 LABEL_NUSES (label) = 1; 9504 } 9505 9506 if (label && desired_alignment > 4 && !TARGET_64BIT) 9507 { 9508 emit_label (label); 9509 LABEL_NUSES (label) = 1; 9510 label = NULL_RTX; 9511 } 9512 9513 if (!TARGET_SINGLE_STRINGOP) 9514 emit_insn (gen_cld ()); 9515 if (TARGET_64BIT) 9516 { 9517 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 9518 GEN_INT (3))); 9519 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg, 9520 destreg, countreg2)); 9521 } 9522 else 9523 { 9524 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 9525 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg, 9526 destreg, countreg2)); 9527 } 9528 if (label) 9529 { 9530 emit_label (label); 9531 LABEL_NUSES (label) = 1; 9532 } 9533 9534 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 9535 emit_insn (gen_strsetsi (destreg, 9536 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9537 if (TARGET_64BIT && (align <= 4 || count == 0)) 9538 { 9539 rtx label = ix86_expand_aligntest (countreg, 4); 9540 emit_insn (gen_strsetsi (destreg, 9541 gen_rtx_SUBREG (SImode, zeroreg, 0))); 9542 emit_label (label); 9543 LABEL_NUSES (label) = 1; 9544 } 9545 if (align > 2 && count != 0 && (count & 2)) 9546 emit_insn (gen_strsethi (destreg, 9547 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9548 if (align <= 2 || count == 0) 9549 { 9550 rtx label = ix86_expand_aligntest (countreg, 2); 9551 emit_insn (gen_strsethi (destreg, 9552 gen_rtx_SUBREG (HImode, zeroreg, 0))); 9553 emit_label (label); 9554 LABEL_NUSES (label) = 1; 9555 } 9556 if (align > 1 && count != 0 && (count & 1)) 9557 emit_insn (gen_strsetqi (destreg, 9558 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9559 if (align <= 1 || count == 0) 9560 { 9561 rtx label = ix86_expand_aligntest (countreg, 1); 9562 emit_insn (gen_strsetqi (destreg, 9563 gen_rtx_SUBREG (QImode, zeroreg, 0))); 9564 emit_label (label); 9565 LABEL_NUSES (label) = 1; 9566 } 9567 } 9568 return 1; 9569} 9570/* Expand strlen. */ 9571int 9572ix86_expand_strlen (out, src, eoschar, align) 9573 rtx out, src, eoschar, align; 9574{ 9575 rtx addr, scratch1, scratch2, scratch3, scratch4; 9576 9577 /* The generic case of strlen expander is long. Avoid it's 9578 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 9579 9580 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 9581 && !TARGET_INLINE_ALL_STRINGOPS 9582 && !optimize_size 9583 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 9584 return 0; 9585 9586 addr = force_reg (Pmode, XEXP (src, 0)); 9587 scratch1 = gen_reg_rtx (Pmode); 9588 9589 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 9590 && !optimize_size) 9591 { 9592 /* Well it seems that some optimizer does not combine a call like 9593 foo(strlen(bar), strlen(bar)); 9594 when the move and the subtraction is done here. It does calculate 9595 the length just once when these instructions are done inside of 9596 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 9597 often used and I use one fewer register for the lifetime of 9598 output_strlen_unroll() this is better. */ 9599 9600 emit_move_insn (out, addr); 9601 9602 ix86_expand_strlensi_unroll_1 (out, align); 9603 9604 /* strlensi_unroll_1 returns the address of the zero at the end of 9605 the string, like memchr(), so compute the length by subtracting 9606 the start address. */ 9607 if (TARGET_64BIT) 9608 emit_insn (gen_subdi3 (out, out, addr)); 9609 else 9610 emit_insn (gen_subsi3 (out, out, addr)); 9611 } 9612 else 9613 { 9614 scratch2 = gen_reg_rtx (Pmode); 9615 scratch3 = gen_reg_rtx (Pmode); 9616 scratch4 = force_reg (Pmode, constm1_rtx); 9617 9618 emit_move_insn (scratch3, addr); 9619 eoschar = force_reg (QImode, eoschar); 9620 9621 emit_insn (gen_cld ()); 9622 if (TARGET_64BIT) 9623 { 9624 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar, 9625 align, scratch4, scratch3)); 9626 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 9627 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 9628 } 9629 else 9630 { 9631 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar, 9632 align, scratch4, scratch3)); 9633 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 9634 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 9635 } 9636 } 9637 return 1; 9638} 9639 9640/* Expand the appropriate insns for doing strlen if not just doing 9641 repnz; scasb 9642 9643 out = result, initialized with the start address 9644 align_rtx = alignment of the address. 9645 scratch = scratch register, initialized with the startaddress when 9646 not aligned, otherwise undefined 9647 9648 This is just the body. It needs the initialisations mentioned above and 9649 some address computing at the end. These things are done in i386.md. */ 9650 9651static void 9652ix86_expand_strlensi_unroll_1 (out, align_rtx) 9653 rtx out, align_rtx; 9654{ 9655 int align; 9656 rtx tmp; 9657 rtx align_2_label = NULL_RTX; 9658 rtx align_3_label = NULL_RTX; 9659 rtx align_4_label = gen_label_rtx (); 9660 rtx end_0_label = gen_label_rtx (); 9661 rtx mem; 9662 rtx tmpreg = gen_reg_rtx (SImode); 9663 rtx scratch = gen_reg_rtx (SImode); 9664 9665 align = 0; 9666 if (GET_CODE (align_rtx) == CONST_INT) 9667 align = INTVAL (align_rtx); 9668 9669 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 9670 9671 /* Is there a known alignment and is it less than 4? */ 9672 if (align < 4) 9673 { 9674 rtx scratch1 = gen_reg_rtx (Pmode); 9675 emit_move_insn (scratch1, out); 9676 /* Is there a known alignment and is it not 2? */ 9677 if (align != 2) 9678 { 9679 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 9680 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 9681 9682 /* Leave just the 3 lower bits. */ 9683 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 9684 NULL_RTX, 0, OPTAB_WIDEN); 9685 9686 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 9687 Pmode, 1, align_4_label); 9688 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL, 9689 Pmode, 1, align_2_label); 9690 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL, 9691 Pmode, 1, align_3_label); 9692 } 9693 else 9694 { 9695 /* Since the alignment is 2, we have to check 2 or 0 bytes; 9696 check if is aligned to 4 - byte. */ 9697 9698 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2), 9699 NULL_RTX, 0, OPTAB_WIDEN); 9700 9701 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 9702 Pmode, 1, align_4_label); 9703 } 9704 9705 mem = gen_rtx_MEM (QImode, out); 9706 9707 /* Now compare the bytes. */ 9708 9709 /* Compare the first n unaligned byte on a byte per byte basis. */ 9710 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 9711 QImode, 1, end_0_label); 9712 9713 /* Increment the address. */ 9714 if (TARGET_64BIT) 9715 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9716 else 9717 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9718 9719 /* Not needed with an alignment of 2 */ 9720 if (align != 2) 9721 { 9722 emit_label (align_2_label); 9723 9724 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 9725 end_0_label); 9726 9727 if (TARGET_64BIT) 9728 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9729 else 9730 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9731 9732 emit_label (align_3_label); 9733 } 9734 9735 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 9736 end_0_label); 9737 9738 if (TARGET_64BIT) 9739 emit_insn (gen_adddi3 (out, out, const1_rtx)); 9740 else 9741 emit_insn (gen_addsi3 (out, out, const1_rtx)); 9742 } 9743 9744 /* Generate loop to check 4 bytes at a time. It is not a good idea to 9745 align this loop. It gives only huge programs, but does not help to 9746 speed up. */ 9747 emit_label (align_4_label); 9748 9749 mem = gen_rtx_MEM (SImode, out); 9750 emit_move_insn (scratch, mem); 9751 if (TARGET_64BIT) 9752 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 9753 else 9754 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 9755 9756 /* This formula yields a nonzero result iff one of the bytes is zero. 9757 This saves three branches inside loop and many cycles. */ 9758 9759 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 9760 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 9761 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 9762 emit_insn (gen_andsi3 (tmpreg, tmpreg, 9763 GEN_INT (trunc_int_for_mode 9764 (0x80808080, SImode)))); 9765 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 9766 align_4_label); 9767 9768 if (TARGET_CMOVE) 9769 { 9770 rtx reg = gen_reg_rtx (SImode); 9771 rtx reg2 = gen_reg_rtx (Pmode); 9772 emit_move_insn (reg, tmpreg); 9773 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 9774 9775 /* If zero is not in the first two bytes, move two bytes forward. */ 9776 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 9777 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9778 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 9779 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 9780 gen_rtx_IF_THEN_ELSE (SImode, tmp, 9781 reg, 9782 tmpreg))); 9783 /* Emit lea manually to avoid clobbering of flags. */ 9784 emit_insn (gen_rtx_SET (SImode, reg2, 9785 gen_rtx_PLUS (Pmode, out, GEN_INT (2)))); 9786 9787 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9788 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 9789 emit_insn (gen_rtx_SET (VOIDmode, out, 9790 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 9791 reg2, 9792 out))); 9793 9794 } 9795 else 9796 { 9797 rtx end_2_label = gen_label_rtx (); 9798 /* Is zero in the first two bytes? */ 9799 9800 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 9801 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 9802 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 9803 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 9804 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 9805 pc_rtx); 9806 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 9807 JUMP_LABEL (tmp) = end_2_label; 9808 9809 /* Not in the first two. Move two bytes forward. */ 9810 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 9811 if (TARGET_64BIT) 9812 emit_insn (gen_adddi3 (out, out, GEN_INT (2))); 9813 else 9814 emit_insn (gen_addsi3 (out, out, GEN_INT (2))); 9815 9816 emit_label (end_2_label); 9817 9818 } 9819 9820 /* Avoid branch in fixing the byte. */ 9821 tmpreg = gen_lowpart (QImode, tmpreg); 9822 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 9823 if (TARGET_64BIT) 9824 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3))); 9825 else 9826 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3))); 9827 9828 emit_label (end_0_label); 9829} 9830 9831/* Clear stack slot assignments remembered from previous functions. 9832 This is called from INIT_EXPANDERS once before RTL is emitted for each 9833 function. */ 9834 9835static void 9836ix86_init_machine_status (p) 9837 struct function *p; 9838{ 9839 p->machine = (struct machine_function *) 9840 xcalloc (1, sizeof (struct machine_function)); 9841} 9842 9843/* Mark machine specific bits of P for GC. */ 9844static void 9845ix86_mark_machine_status (p) 9846 struct function *p; 9847{ 9848 struct machine_function *machine = p->machine; 9849 enum machine_mode mode; 9850 int n; 9851 9852 if (! machine) 9853 return; 9854 9855 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE; 9856 mode = (enum machine_mode) ((int) mode + 1)) 9857 for (n = 0; n < MAX_386_STACK_LOCALS; n++) 9858 ggc_mark_rtx (machine->stack_locals[(int) mode][n]); 9859} 9860 9861static void 9862ix86_free_machine_status (p) 9863 struct function *p; 9864{ 9865 free (p->machine); 9866 p->machine = NULL; 9867} 9868 9869/* Return a MEM corresponding to a stack slot with mode MODE. 9870 Allocate a new slot if necessary. 9871 9872 The RTL for a function can have several slots available: N is 9873 which slot to use. */ 9874 9875rtx 9876assign_386_stack_local (mode, n) 9877 enum machine_mode mode; 9878 int n; 9879{ 9880 if (n < 0 || n >= MAX_386_STACK_LOCALS) 9881 abort (); 9882 9883 if (ix86_stack_locals[(int) mode][n] == NULL_RTX) 9884 ix86_stack_locals[(int) mode][n] 9885 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 9886 9887 return ix86_stack_locals[(int) mode][n]; 9888} 9889 9890/* Calculate the length of the memory address in the instruction 9891 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 9892 9893static int 9894memory_address_length (addr) 9895 rtx addr; 9896{ 9897 struct ix86_address parts; 9898 rtx base, index, disp; 9899 int len; 9900 9901 if (GET_CODE (addr) == PRE_DEC 9902 || GET_CODE (addr) == POST_INC 9903 || GET_CODE (addr) == PRE_MODIFY 9904 || GET_CODE (addr) == POST_MODIFY) 9905 return 0; 9906 9907 if (! ix86_decompose_address (addr, &parts)) 9908 abort (); 9909 9910 base = parts.base; 9911 index = parts.index; 9912 disp = parts.disp; 9913 len = 0; 9914 9915 /* Register Indirect. */ 9916 if (base && !index && !disp) 9917 { 9918 /* Special cases: ebp and esp need the two-byte modrm form. */ 9919 if (addr == stack_pointer_rtx 9920 || addr == arg_pointer_rtx 9921 || addr == frame_pointer_rtx 9922 || addr == hard_frame_pointer_rtx) 9923 len = 1; 9924 } 9925 9926 /* Direct Addressing. */ 9927 else if (disp && !base && !index) 9928 len = 4; 9929 9930 else 9931 { 9932 /* Find the length of the displacement constant. */ 9933 if (disp) 9934 { 9935 if (GET_CODE (disp) == CONST_INT 9936 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')) 9937 len = 1; 9938 else 9939 len = 4; 9940 } 9941 9942 /* An index requires the two-byte modrm form. */ 9943 if (index) 9944 len += 1; 9945 } 9946 9947 return len; 9948} 9949 9950/* Compute default value for "length_immediate" attribute. When SHORTFORM is set 9951 expect that insn have 8bit immediate alternative. */ 9952int 9953ix86_attr_length_immediate_default (insn, shortform) 9954 rtx insn; 9955 int shortform; 9956{ 9957 int len = 0; 9958 int i; 9959 extract_insn_cached (insn); 9960 for (i = recog_data.n_operands - 1; i >= 0; --i) 9961 if (CONSTANT_P (recog_data.operand[i])) 9962 { 9963 if (len) 9964 abort (); 9965 if (shortform 9966 && GET_CODE (recog_data.operand[i]) == CONST_INT 9967 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K')) 9968 len = 1; 9969 else 9970 { 9971 switch (get_attr_mode (insn)) 9972 { 9973 case MODE_QI: 9974 len+=1; 9975 break; 9976 case MODE_HI: 9977 len+=2; 9978 break; 9979 case MODE_SI: 9980 len+=4; 9981 break; 9982 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 9983 case MODE_DI: 9984 len+=4; 9985 break; 9986 default: 9987 fatal_insn ("unknown insn mode", insn); 9988 } 9989 } 9990 } 9991 return len; 9992} 9993/* Compute default value for "length_address" attribute. */ 9994int 9995ix86_attr_length_address_default (insn) 9996 rtx insn; 9997{ 9998 int i; 9999 extract_insn_cached (insn); 10000 for (i = recog_data.n_operands - 1; i >= 0; --i) 10001 if (GET_CODE (recog_data.operand[i]) == MEM) 10002 { 10003 return memory_address_length (XEXP (recog_data.operand[i], 0)); 10004 break; 10005 } 10006 return 0; 10007} 10008 10009/* Return the maximum number of instructions a cpu can issue. */ 10010 10011static int 10012ix86_issue_rate () 10013{ 10014 switch (ix86_cpu) 10015 { 10016 case PROCESSOR_PENTIUM: 10017 case PROCESSOR_K6: 10018 return 2; 10019 10020 case PROCESSOR_PENTIUMPRO: 10021 case PROCESSOR_PENTIUM4: 10022 case PROCESSOR_ATHLON: 10023 return 3; 10024 10025 default: 10026 return 1; 10027 } 10028} 10029 10030/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 10031 by DEP_INSN and nothing set by DEP_INSN. */ 10032 10033static int 10034ix86_flags_dependant (insn, dep_insn, insn_type) 10035 rtx insn, dep_insn; 10036 enum attr_type insn_type; 10037{ 10038 rtx set, set2; 10039 10040 /* Simplify the test for uninteresting insns. */ 10041 if (insn_type != TYPE_SETCC 10042 && insn_type != TYPE_ICMOV 10043 && insn_type != TYPE_FCMOV 10044 && insn_type != TYPE_IBR) 10045 return 0; 10046 10047 if ((set = single_set (dep_insn)) != 0) 10048 { 10049 set = SET_DEST (set); 10050 set2 = NULL_RTX; 10051 } 10052 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 10053 && XVECLEN (PATTERN (dep_insn), 0) == 2 10054 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 10055 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 10056 { 10057 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 10058 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 10059 } 10060 else 10061 return 0; 10062 10063 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 10064 return 0; 10065 10066 /* This test is true if the dependent insn reads the flags but 10067 not any other potentially set register. */ 10068 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 10069 return 0; 10070 10071 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 10072 return 0; 10073 10074 return 1; 10075} 10076 10077/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 10078 address with operands set by DEP_INSN. */ 10079 10080static int 10081ix86_agi_dependant (insn, dep_insn, insn_type) 10082 rtx insn, dep_insn; 10083 enum attr_type insn_type; 10084{ 10085 rtx addr; 10086 10087 if (insn_type == TYPE_LEA 10088 && TARGET_PENTIUM) 10089 { 10090 addr = PATTERN (insn); 10091 if (GET_CODE (addr) == SET) 10092 ; 10093 else if (GET_CODE (addr) == PARALLEL 10094 && GET_CODE (XVECEXP (addr, 0, 0)) == SET) 10095 addr = XVECEXP (addr, 0, 0); 10096 else 10097 abort (); 10098 addr = SET_SRC (addr); 10099 } 10100 else 10101 { 10102 int i; 10103 extract_insn_cached (insn); 10104 for (i = recog_data.n_operands - 1; i >= 0; --i) 10105 if (GET_CODE (recog_data.operand[i]) == MEM) 10106 { 10107 addr = XEXP (recog_data.operand[i], 0); 10108 goto found; 10109 } 10110 return 0; 10111 found:; 10112 } 10113 10114 return modified_in_p (addr, dep_insn); 10115} 10116 10117static int 10118ix86_adjust_cost (insn, link, dep_insn, cost) 10119 rtx insn, link, dep_insn; 10120 int cost; 10121{ 10122 enum attr_type insn_type, dep_insn_type; 10123 enum attr_memory memory, dep_memory; 10124 rtx set, set2; 10125 int dep_insn_code_number; 10126 10127 /* Anti and output depenancies have zero cost on all CPUs. */ 10128 if (REG_NOTE_KIND (link) != 0) 10129 return 0; 10130 10131 dep_insn_code_number = recog_memoized (dep_insn); 10132 10133 /* If we can't recognize the insns, we can't really do anything. */ 10134 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 10135 return cost; 10136 10137 insn_type = get_attr_type (insn); 10138 dep_insn_type = get_attr_type (dep_insn); 10139 10140 switch (ix86_cpu) 10141 { 10142 case PROCESSOR_PENTIUM: 10143 /* Address Generation Interlock adds a cycle of latency. */ 10144 if (ix86_agi_dependant (insn, dep_insn, insn_type)) 10145 cost += 1; 10146 10147 /* ??? Compares pair with jump/setcc. */ 10148 if (ix86_flags_dependant (insn, dep_insn, insn_type)) 10149 cost = 0; 10150 10151 /* Floating point stores require value to be ready one cycle ealier. */ 10152 if (insn_type == TYPE_FMOV 10153 && get_attr_memory (insn) == MEMORY_STORE 10154 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10155 cost += 1; 10156 break; 10157 10158 case PROCESSOR_PENTIUMPRO: 10159 memory = get_attr_memory (insn); 10160 dep_memory = get_attr_memory (dep_insn); 10161 10162 /* Since we can't represent delayed latencies of load+operation, 10163 increase the cost here for non-imov insns. */ 10164 if (dep_insn_type != TYPE_IMOV 10165 && dep_insn_type != TYPE_FMOV 10166 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)) 10167 cost += 1; 10168 10169 /* INT->FP conversion is expensive. */ 10170 if (get_attr_fp_int_src (dep_insn)) 10171 cost += 5; 10172 10173 /* There is one cycle extra latency between an FP op and a store. */ 10174 if (insn_type == TYPE_FMOV 10175 && (set = single_set (dep_insn)) != NULL_RTX 10176 && (set2 = single_set (insn)) != NULL_RTX 10177 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 10178 && GET_CODE (SET_DEST (set2)) == MEM) 10179 cost += 1; 10180 10181 /* Show ability of reorder buffer to hide latency of load by executing 10182 in parallel with previous instruction in case 10183 previous instruction is not needed to compute the address. */ 10184 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 10185 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10186 { 10187 /* Claim moves to take one cycle, as core can issue one load 10188 at time and the next load can start cycle later. */ 10189 if (dep_insn_type == TYPE_IMOV 10190 || dep_insn_type == TYPE_FMOV) 10191 cost = 1; 10192 else if (cost > 1) 10193 cost--; 10194 } 10195 break; 10196 10197 case PROCESSOR_K6: 10198 memory = get_attr_memory (insn); 10199 dep_memory = get_attr_memory (dep_insn); 10200 /* The esp dependency is resolved before the instruction is really 10201 finished. */ 10202 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 10203 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 10204 return 1; 10205 10206 /* Since we can't represent delayed latencies of load+operation, 10207 increase the cost here for non-imov insns. */ 10208 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 10209 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; 10210 10211 /* INT->FP conversion is expensive. */ 10212 if (get_attr_fp_int_src (dep_insn)) 10213 cost += 5; 10214 10215 /* Show ability of reorder buffer to hide latency of load by executing 10216 in parallel with previous instruction in case 10217 previous instruction is not needed to compute the address. */ 10218 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 10219 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10220 { 10221 /* Claim moves to take one cycle, as core can issue one load 10222 at time and the next load can start cycle later. */ 10223 if (dep_insn_type == TYPE_IMOV 10224 || dep_insn_type == TYPE_FMOV) 10225 cost = 1; 10226 else if (cost > 2) 10227 cost -= 2; 10228 else 10229 cost = 1; 10230 } 10231 break; 10232 10233 case PROCESSOR_ATHLON: 10234 memory = get_attr_memory (insn); 10235 dep_memory = get_attr_memory (dep_insn); 10236 10237 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 10238 { 10239 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV) 10240 cost += 2; 10241 else 10242 cost += 3; 10243 } 10244 /* Show ability of reorder buffer to hide latency of load by executing 10245 in parallel with previous instruction in case 10246 previous instruction is not needed to compute the address. */ 10247 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 10248 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 10249 { 10250 /* Claim moves to take one cycle, as core can issue one load 10251 at time and the next load can start cycle later. */ 10252 if (dep_insn_type == TYPE_IMOV 10253 || dep_insn_type == TYPE_FMOV) 10254 cost = 0; 10255 else if (cost >= 3) 10256 cost -= 3; 10257 else 10258 cost = 0; 10259 } 10260 10261 default: 10262 break; 10263 } 10264 10265 return cost; 10266} 10267 10268static union 10269{ 10270 struct ppro_sched_data 10271 { 10272 rtx decode[3]; 10273 int issued_this_cycle; 10274 } ppro; 10275} ix86_sched_data; 10276 10277static int 10278ix86_safe_length (insn) 10279 rtx insn; 10280{ 10281 if (recog_memoized (insn) >= 0) 10282 return get_attr_length (insn); 10283 else 10284 return 128; 10285} 10286 10287static int 10288ix86_safe_length_prefix (insn) 10289 rtx insn; 10290{ 10291 if (recog_memoized (insn) >= 0) 10292 return get_attr_length (insn); 10293 else 10294 return 0; 10295} 10296 10297static enum attr_memory 10298ix86_safe_memory (insn) 10299 rtx insn; 10300{ 10301 if (recog_memoized (insn) >= 0) 10302 return get_attr_memory (insn); 10303 else 10304 return MEMORY_UNKNOWN; 10305} 10306 10307static enum attr_pent_pair 10308ix86_safe_pent_pair (insn) 10309 rtx insn; 10310{ 10311 if (recog_memoized (insn) >= 0) 10312 return get_attr_pent_pair (insn); 10313 else 10314 return PENT_PAIR_NP; 10315} 10316 10317static enum attr_ppro_uops 10318ix86_safe_ppro_uops (insn) 10319 rtx insn; 10320{ 10321 if (recog_memoized (insn) >= 0) 10322 return get_attr_ppro_uops (insn); 10323 else 10324 return PPRO_UOPS_MANY; 10325} 10326 10327static void 10328ix86_dump_ppro_packet (dump) 10329 FILE *dump; 10330{ 10331 if (ix86_sched_data.ppro.decode[0]) 10332 { 10333 fprintf (dump, "PPRO packet: %d", 10334 INSN_UID (ix86_sched_data.ppro.decode[0])); 10335 if (ix86_sched_data.ppro.decode[1]) 10336 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1])); 10337 if (ix86_sched_data.ppro.decode[2]) 10338 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2])); 10339 fputc ('\n', dump); 10340 } 10341} 10342 10343/* We're beginning a new block. Initialize data structures as necessary. */ 10344 10345static void 10346ix86_sched_init (dump, sched_verbose, veclen) 10347 FILE *dump ATTRIBUTE_UNUSED; 10348 int sched_verbose ATTRIBUTE_UNUSED; 10349 int veclen ATTRIBUTE_UNUSED; 10350{ 10351 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data)); 10352} 10353 10354/* Shift INSN to SLOT, and shift everything else down. */ 10355 10356static void 10357ix86_reorder_insn (insnp, slot) 10358 rtx *insnp, *slot; 10359{ 10360 if (insnp != slot) 10361 { 10362 rtx insn = *insnp; 10363 do 10364 insnp[0] = insnp[1]; 10365 while (++insnp != slot); 10366 *insnp = insn; 10367 } 10368} 10369 10370/* Find an instruction with given pairability and minimal amount of cycles 10371 lost by the fact that the CPU waits for both pipelines to finish before 10372 reading next instructions. Also take care that both instructions together 10373 can not exceed 7 bytes. */ 10374 10375static rtx * 10376ix86_pent_find_pair (e_ready, ready, type, first) 10377 rtx *e_ready; 10378 rtx *ready; 10379 enum attr_pent_pair type; 10380 rtx first; 10381{ 10382 int mincycles, cycles; 10383 enum attr_pent_pair tmp; 10384 enum attr_memory memory; 10385 rtx *insnp, *bestinsnp = NULL; 10386 10387 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first)) 10388 return NULL; 10389 10390 memory = ix86_safe_memory (first); 10391 cycles = result_ready_cost (first); 10392 mincycles = INT_MAX; 10393 10394 for (insnp = e_ready; insnp >= ready && mincycles; --insnp) 10395 if ((tmp = ix86_safe_pent_pair (*insnp)) == type 10396 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp)) 10397 { 10398 enum attr_memory second_memory; 10399 int secondcycles, currentcycles; 10400 10401 second_memory = ix86_safe_memory (*insnp); 10402 secondcycles = result_ready_cost (*insnp); 10403 currentcycles = abs (cycles - secondcycles); 10404 10405 if (secondcycles >= 1 && cycles >= 1) 10406 { 10407 /* Two read/modify/write instructions together takes two 10408 cycles longer. */ 10409 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH) 10410 currentcycles += 2; 10411 10412 /* Read modify/write instruction followed by read/modify 10413 takes one cycle longer. */ 10414 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD 10415 && tmp != PENT_PAIR_UV 10416 && ix86_safe_pent_pair (first) != PENT_PAIR_UV) 10417 currentcycles += 1; 10418 } 10419 if (currentcycles < mincycles) 10420 bestinsnp = insnp, mincycles = currentcycles; 10421 } 10422 10423 return bestinsnp; 10424} 10425 10426/* Subroutines of ix86_sched_reorder. */ 10427 10428static void 10429ix86_sched_reorder_pentium (ready, e_ready) 10430 rtx *ready; 10431 rtx *e_ready; 10432{ 10433 enum attr_pent_pair pair1, pair2; 10434 rtx *insnp; 10435 10436 /* This wouldn't be necessary if Haifa knew that static insn ordering 10437 is important to which pipe an insn is issued to. So we have to make 10438 some minor rearrangements. */ 10439 10440 pair1 = ix86_safe_pent_pair (*e_ready); 10441 10442 /* If the first insn is non-pairable, let it be. */ 10443 if (pair1 == PENT_PAIR_NP) 10444 return; 10445 10446 pair2 = PENT_PAIR_NP; 10447 insnp = 0; 10448 10449 /* If the first insn is UV or PV pairable, search for a PU 10450 insn to go with. */ 10451 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV) 10452 { 10453 insnp = ix86_pent_find_pair (e_ready-1, ready, 10454 PENT_PAIR_PU, *e_ready); 10455 if (insnp) 10456 pair2 = PENT_PAIR_PU; 10457 } 10458 10459 /* If the first insn is PU or UV pairable, search for a PV 10460 insn to go with. */ 10461 if (pair2 == PENT_PAIR_NP 10462 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV)) 10463 { 10464 insnp = ix86_pent_find_pair (e_ready-1, ready, 10465 PENT_PAIR_PV, *e_ready); 10466 if (insnp) 10467 pair2 = PENT_PAIR_PV; 10468 } 10469 10470 /* If the first insn is pairable, search for a UV 10471 insn to go with. */ 10472 if (pair2 == PENT_PAIR_NP) 10473 { 10474 insnp = ix86_pent_find_pair (e_ready-1, ready, 10475 PENT_PAIR_UV, *e_ready); 10476 if (insnp) 10477 pair2 = PENT_PAIR_UV; 10478 } 10479 10480 if (pair2 == PENT_PAIR_NP) 10481 return; 10482 10483 /* Found something! Decide if we need to swap the order. */ 10484 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU 10485 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV 10486 && ix86_safe_memory (*e_ready) == MEMORY_BOTH 10487 && ix86_safe_memory (*insnp) == MEMORY_LOAD)) 10488 ix86_reorder_insn (insnp, e_ready); 10489 else 10490 ix86_reorder_insn (insnp, e_ready - 1); 10491} 10492 10493static void 10494ix86_sched_reorder_ppro (ready, e_ready) 10495 rtx *ready; 10496 rtx *e_ready; 10497{ 10498 rtx decode[3]; 10499 enum attr_ppro_uops cur_uops; 10500 int issued_this_cycle; 10501 rtx *insnp; 10502 int i; 10503 10504 /* At this point .ppro.decode contains the state of the three 10505 decoders from last "cycle". That is, those insns that were 10506 actually independent. But here we're scheduling for the 10507 decoder, and we may find things that are decodable in the 10508 same cycle. */ 10509 10510 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode)); 10511 issued_this_cycle = 0; 10512 10513 insnp = e_ready; 10514 cur_uops = ix86_safe_ppro_uops (*insnp); 10515 10516 /* If the decoders are empty, and we've a complex insn at the 10517 head of the priority queue, let it issue without complaint. */ 10518 if (decode[0] == NULL) 10519 { 10520 if (cur_uops == PPRO_UOPS_MANY) 10521 { 10522 decode[0] = *insnp; 10523 goto ppro_done; 10524 } 10525 10526 /* Otherwise, search for a 2-4 uop unsn to issue. */ 10527 while (cur_uops != PPRO_UOPS_FEW) 10528 { 10529 if (insnp == ready) 10530 break; 10531 cur_uops = ix86_safe_ppro_uops (*--insnp); 10532 } 10533 10534 /* If so, move it to the head of the line. */ 10535 if (cur_uops == PPRO_UOPS_FEW) 10536 ix86_reorder_insn (insnp, e_ready); 10537 10538 /* Issue the head of the queue. */ 10539 issued_this_cycle = 1; 10540 decode[0] = *e_ready--; 10541 } 10542 10543 /* Look for simple insns to fill in the other two slots. */ 10544 for (i = 1; i < 3; ++i) 10545 if (decode[i] == NULL) 10546 { 10547 if (ready >= e_ready) 10548 goto ppro_done; 10549 10550 insnp = e_ready; 10551 cur_uops = ix86_safe_ppro_uops (*insnp); 10552 while (cur_uops != PPRO_UOPS_ONE) 10553 { 10554 if (insnp == ready) 10555 break; 10556 cur_uops = ix86_safe_ppro_uops (*--insnp); 10557 } 10558 10559 /* Found one. Move it to the head of the queue and issue it. */ 10560 if (cur_uops == PPRO_UOPS_ONE) 10561 { 10562 ix86_reorder_insn (insnp, e_ready); 10563 decode[i] = *e_ready--; 10564 issued_this_cycle++; 10565 continue; 10566 } 10567 10568 /* ??? Didn't find one. Ideally, here we would do a lazy split 10569 of 2-uop insns, issue one and queue the other. */ 10570 } 10571 10572 ppro_done: 10573 if (issued_this_cycle == 0) 10574 issued_this_cycle = 1; 10575 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle; 10576} 10577 10578/* We are about to being issuing insns for this clock cycle. 10579 Override the default sort algorithm to better slot instructions. */ 10580static int 10581ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var) 10582 FILE *dump ATTRIBUTE_UNUSED; 10583 int sched_verbose ATTRIBUTE_UNUSED; 10584 rtx *ready; 10585 int *n_readyp; 10586 int clock_var ATTRIBUTE_UNUSED; 10587{ 10588 int n_ready = *n_readyp; 10589 rtx *e_ready = ready + n_ready - 1; 10590 10591 if (n_ready < 2) 10592 goto out; 10593 10594 switch (ix86_cpu) 10595 { 10596 default: 10597 break; 10598 10599 case PROCESSOR_PENTIUM: 10600 ix86_sched_reorder_pentium (ready, e_ready); 10601 break; 10602 10603 case PROCESSOR_PENTIUMPRO: 10604 ix86_sched_reorder_ppro (ready, e_ready); 10605 break; 10606 } 10607 10608out: 10609 return ix86_issue_rate (); 10610} 10611 10612/* We are about to issue INSN. Return the number of insns left on the 10613 ready queue that can be issued this cycle. */ 10614 10615static int 10616ix86_variable_issue (dump, sched_verbose, insn, can_issue_more) 10617 FILE *dump; 10618 int sched_verbose; 10619 rtx insn; 10620 int can_issue_more; 10621{ 10622 int i; 10623 switch (ix86_cpu) 10624 { 10625 default: 10626 return can_issue_more - 1; 10627 10628 case PROCESSOR_PENTIUMPRO: 10629 { 10630 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn); 10631 10632 if (uops == PPRO_UOPS_MANY) 10633 { 10634 if (sched_verbose) 10635 ix86_dump_ppro_packet (dump); 10636 ix86_sched_data.ppro.decode[0] = insn; 10637 ix86_sched_data.ppro.decode[1] = NULL; 10638 ix86_sched_data.ppro.decode[2] = NULL; 10639 if (sched_verbose) 10640 ix86_dump_ppro_packet (dump); 10641 ix86_sched_data.ppro.decode[0] = NULL; 10642 } 10643 else if (uops == PPRO_UOPS_FEW) 10644 { 10645 if (sched_verbose) 10646 ix86_dump_ppro_packet (dump); 10647 ix86_sched_data.ppro.decode[0] = insn; 10648 ix86_sched_data.ppro.decode[1] = NULL; 10649 ix86_sched_data.ppro.decode[2] = NULL; 10650 } 10651 else 10652 { 10653 for (i = 0; i < 3; ++i) 10654 if (ix86_sched_data.ppro.decode[i] == NULL) 10655 { 10656 ix86_sched_data.ppro.decode[i] = insn; 10657 break; 10658 } 10659 if (i == 3) 10660 abort (); 10661 if (i == 2) 10662 { 10663 if (sched_verbose) 10664 ix86_dump_ppro_packet (dump); 10665 ix86_sched_data.ppro.decode[0] = NULL; 10666 ix86_sched_data.ppro.decode[1] = NULL; 10667 ix86_sched_data.ppro.decode[2] = NULL; 10668 } 10669 } 10670 } 10671 return --ix86_sched_data.ppro.issued_this_cycle; 10672 } 10673} 10674 10675/* Walk through INSNS and look for MEM references whose address is DSTREG or 10676 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as 10677 appropriate. */ 10678 10679void 10680ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg) 10681 rtx insns; 10682 rtx dstref, srcref, dstreg, srcreg; 10683{ 10684 rtx insn; 10685 10686 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn)) 10687 if (INSN_P (insn)) 10688 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref, 10689 dstreg, srcreg); 10690} 10691 10692/* Subroutine of above to actually do the updating by recursively walking 10693 the rtx. */ 10694 10695static void 10696ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg) 10697 rtx x; 10698 rtx dstref, srcref, dstreg, srcreg; 10699{ 10700 enum rtx_code code = GET_CODE (x); 10701 const char *format_ptr = GET_RTX_FORMAT (code); 10702 int i, j; 10703 10704 if (code == MEM && XEXP (x, 0) == dstreg) 10705 MEM_COPY_ATTRIBUTES (x, dstref); 10706 else if (code == MEM && XEXP (x, 0) == srcreg) 10707 MEM_COPY_ATTRIBUTES (x, srcref); 10708 10709 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++) 10710 { 10711 if (*format_ptr == 'e') 10712 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref, 10713 dstreg, srcreg); 10714 else if (*format_ptr == 'E') 10715 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 10716 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref, 10717 dstreg, srcreg); 10718 } 10719} 10720 10721/* Compute the alignment given to a constant that is being placed in memory. 10722 EXP is the constant and ALIGN is the alignment that the object would 10723 ordinarily have. 10724 The value of this function is used instead of that alignment to align 10725 the object. */ 10726 10727int 10728ix86_constant_alignment (exp, align) 10729 tree exp; 10730 int align; 10731{ 10732 if (TREE_CODE (exp) == REAL_CST) 10733 { 10734 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 10735 return 64; 10736 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 10737 return 128; 10738 } 10739 else if (TREE_CODE (exp) == STRING_CST && !TARGET_NO_ALIGN_LONG_STRINGS 10740 && TREE_STRING_LENGTH (exp) >= 31 && align < 256) 10741 return 256; 10742 10743 return align; 10744} 10745 10746/* Compute the alignment for a static variable. 10747 TYPE is the data type, and ALIGN is the alignment that 10748 the object would ordinarily have. The value of this function is used 10749 instead of that alignment to align the object. */ 10750 10751int 10752ix86_data_alignment (type, align) 10753 tree type; 10754 int align; 10755{ 10756 if (AGGREGATE_TYPE_P (type) 10757 && TYPE_SIZE (type) 10758 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10759 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256 10760 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256) 10761 return 256; 10762 10763 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 10764 to 16byte boundary. */ 10765 if (TARGET_64BIT) 10766 { 10767 if (AGGREGATE_TYPE_P (type) 10768 && TYPE_SIZE (type) 10769 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10770 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 10771 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 10772 return 128; 10773 } 10774 10775 if (TREE_CODE (type) == ARRAY_TYPE) 10776 { 10777 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 10778 return 64; 10779 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 10780 return 128; 10781 } 10782 else if (TREE_CODE (type) == COMPLEX_TYPE) 10783 { 10784 10785 if (TYPE_MODE (type) == DCmode && align < 64) 10786 return 64; 10787 if (TYPE_MODE (type) == XCmode && align < 128) 10788 return 128; 10789 } 10790 else if ((TREE_CODE (type) == RECORD_TYPE 10791 || TREE_CODE (type) == UNION_TYPE 10792 || TREE_CODE (type) == QUAL_UNION_TYPE) 10793 && TYPE_FIELDS (type)) 10794 { 10795 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 10796 return 64; 10797 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 10798 return 128; 10799 } 10800 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 10801 || TREE_CODE (type) == INTEGER_TYPE) 10802 { 10803 if (TYPE_MODE (type) == DFmode && align < 64) 10804 return 64; 10805 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 10806 return 128; 10807 } 10808 10809 return align; 10810} 10811 10812/* Compute the alignment for a local variable. 10813 TYPE is the data type, and ALIGN is the alignment that 10814 the object would ordinarily have. The value of this macro is used 10815 instead of that alignment to align the object. */ 10816 10817int 10818ix86_local_alignment (type, align) 10819 tree type; 10820 int align; 10821{ 10822 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 10823 to 16byte boundary. */ 10824 if (TARGET_64BIT) 10825 { 10826 if (AGGREGATE_TYPE_P (type) 10827 && TYPE_SIZE (type) 10828 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 10829 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 10830 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 10831 return 128; 10832 } 10833 if (TREE_CODE (type) == ARRAY_TYPE) 10834 { 10835 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 10836 return 64; 10837 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 10838 return 128; 10839 } 10840 else if (TREE_CODE (type) == COMPLEX_TYPE) 10841 { 10842 if (TYPE_MODE (type) == DCmode && align < 64) 10843 return 64; 10844 if (TYPE_MODE (type) == XCmode && align < 128) 10845 return 128; 10846 } 10847 else if ((TREE_CODE (type) == RECORD_TYPE 10848 || TREE_CODE (type) == UNION_TYPE 10849 || TREE_CODE (type) == QUAL_UNION_TYPE) 10850 && TYPE_FIELDS (type)) 10851 { 10852 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 10853 return 64; 10854 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 10855 return 128; 10856 } 10857 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 10858 || TREE_CODE (type) == INTEGER_TYPE) 10859 { 10860 10861 if (TYPE_MODE (type) == DFmode && align < 64) 10862 return 64; 10863 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 10864 return 128; 10865 } 10866 return align; 10867} 10868 10869/* Emit RTL insns to initialize the variable parts of a trampoline. 10870 FNADDR is an RTX for the address of the function's pure code. 10871 CXT is an RTX for the static chain value for the function. */ 10872void 10873x86_initialize_trampoline (tramp, fnaddr, cxt) 10874 rtx tramp, fnaddr, cxt; 10875{ 10876 if (!TARGET_64BIT) 10877 { 10878 /* Compute offset from the end of the jmp to the target function. */ 10879 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 10880 plus_constant (tramp, 10), 10881 NULL_RTX, 1, OPTAB_DIRECT); 10882 emit_move_insn (gen_rtx_MEM (QImode, tramp), 10883 GEN_INT (trunc_int_for_mode (0xb9, QImode))); 10884 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 10885 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 10886 GEN_INT (trunc_int_for_mode (0xe9, QImode))); 10887 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 10888 } 10889 else 10890 { 10891 int offset = 0; 10892 /* Try to load address using shorter movl instead of movabs. 10893 We may want to support movq for kernel mode, but kernel does not use 10894 trampolines at the moment. */ 10895 if (x86_64_zero_extended_value (fnaddr)) 10896 { 10897 fnaddr = copy_to_mode_reg (DImode, fnaddr); 10898 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10899 GEN_INT (trunc_int_for_mode (0xbb41, HImode))); 10900 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 10901 gen_lowpart (SImode, fnaddr)); 10902 offset += 6; 10903 } 10904 else 10905 { 10906 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10907 GEN_INT (trunc_int_for_mode (0xbb49, HImode))); 10908 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 10909 fnaddr); 10910 offset += 10; 10911 } 10912 /* Load static chain using movabs to r10. */ 10913 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10914 GEN_INT (trunc_int_for_mode (0xba49, HImode))); 10915 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 10916 cxt); 10917 offset += 10; 10918 /* Jump to the r11 */ 10919 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 10920 GEN_INT (trunc_int_for_mode (0xff49, HImode))); 10921 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 10922 GEN_INT (trunc_int_for_mode (0xe3, QImode))); 10923 offset += 3; 10924 if (offset > TRAMPOLINE_SIZE) 10925 abort (); 10926 } 10927} 10928 10929#define def_builtin(MASK, NAME, TYPE, CODE) \ 10930do { \ 10931 if ((MASK) & target_flags) \ 10932 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \ 10933} while (0) 10934 10935struct builtin_description 10936{ 10937 const unsigned int mask; 10938 const enum insn_code icode; 10939 const char *const name; 10940 const enum ix86_builtins code; 10941 const enum rtx_code comparison; 10942 const unsigned int flag; 10943}; 10944 10945static const struct builtin_description bdesc_comi[] = 10946{ 10947 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 }, 10948 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 }, 10949 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 }, 10950 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 }, 10951 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 }, 10952 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 }, 10953 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, 10954 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, 10955 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, 10956 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 }, 10957 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 }, 10958 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 } 10959}; 10960 10961static const struct builtin_description bdesc_2arg[] = 10962{ 10963 /* SSE */ 10964 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 10965 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 10966 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 10967 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 10968 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 10969 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 10970 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 10971 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 10972 10973 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 10974 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 10975 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 10976 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, 10977 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, 10978 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 10979 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, 10980 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, 10981 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, 10982 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, 10983 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, 10984 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, 10985 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 10986 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 10987 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 10988 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 10989 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, 10990 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, 10991 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, 10992 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, 10993 10994 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 10995 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 10996 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 10997 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 10998 10999 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 11000 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 11001 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 11002 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 11003 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 11004 11005 /* MMX */ 11006 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 11007 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 11008 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 11009 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 11010 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 11011 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 11012 11013 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 11014 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 11015 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 11016 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 11017 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 11018 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 11019 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 11020 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 11021 11022 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 11023 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 11024 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 11025 11026 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 11027 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 11028 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 11029 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 11030 11031 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 11032 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 11033 11034 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 11035 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 11036 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 11037 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 11038 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 11039 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 11040 11041 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 11042 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 11043 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 11044 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 11045 11046 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 11047 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 11048 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 11049 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 11050 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 11051 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 11052 11053 /* Special. */ 11054 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 11055 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 11056 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 11057 11058 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 11059 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 11060 11061 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 11062 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 11063 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 11064 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 11065 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 11066 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 11067 11068 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 11069 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 11070 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 11071 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 11072 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 11073 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 11074 11075 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 11076 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 11077 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 11078 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 11079 11080 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 11081 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 } 11082 11083}; 11084 11085static const struct builtin_description bdesc_1arg[] = 11086{ 11087 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 11088 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 11089 11090 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 11091 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 11092 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 11093 11094 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 11095 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 11096 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 11097 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 } 11098 11099}; 11100 11101void 11102ix86_init_builtins () 11103{ 11104 if (TARGET_MMX) 11105 ix86_init_mmx_sse_builtins (); 11106} 11107 11108/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 11109 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 11110 builtins. */ 11111static void 11112ix86_init_mmx_sse_builtins () 11113{ 11114 const struct builtin_description * d; 11115 size_t i; 11116 tree endlink = void_list_node; 11117 11118 tree pchar_type_node = build_pointer_type (char_type_node); 11119 tree pfloat_type_node = build_pointer_type (float_type_node); 11120 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 11121 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 11122 11123 /* Comparisons. */ 11124 tree int_ftype_v4sf_v4sf 11125 = build_function_type (integer_type_node, 11126 tree_cons (NULL_TREE, V4SF_type_node, 11127 tree_cons (NULL_TREE, 11128 V4SF_type_node, 11129 endlink))); 11130 tree v4si_ftype_v4sf_v4sf 11131 = build_function_type (V4SI_type_node, 11132 tree_cons (NULL_TREE, V4SF_type_node, 11133 tree_cons (NULL_TREE, 11134 V4SF_type_node, 11135 endlink))); 11136 /* MMX/SSE/integer conversions. */ 11137 tree int_ftype_v4sf 11138 = build_function_type (integer_type_node, 11139 tree_cons (NULL_TREE, V4SF_type_node, 11140 endlink)); 11141 tree int_ftype_v8qi 11142 = build_function_type (integer_type_node, 11143 tree_cons (NULL_TREE, V8QI_type_node, 11144 endlink)); 11145 tree v4sf_ftype_v4sf_int 11146 = build_function_type (V4SF_type_node, 11147 tree_cons (NULL_TREE, V4SF_type_node, 11148 tree_cons (NULL_TREE, integer_type_node, 11149 endlink))); 11150 tree v4sf_ftype_v4sf_v2si 11151 = build_function_type (V4SF_type_node, 11152 tree_cons (NULL_TREE, V4SF_type_node, 11153 tree_cons (NULL_TREE, V2SI_type_node, 11154 endlink))); 11155 tree int_ftype_v4hi_int 11156 = build_function_type (integer_type_node, 11157 tree_cons (NULL_TREE, V4HI_type_node, 11158 tree_cons (NULL_TREE, integer_type_node, 11159 endlink))); 11160 tree v4hi_ftype_v4hi_int_int 11161 = build_function_type (V4HI_type_node, 11162 tree_cons (NULL_TREE, V4HI_type_node, 11163 tree_cons (NULL_TREE, integer_type_node, 11164 tree_cons (NULL_TREE, 11165 integer_type_node, 11166 endlink)))); 11167 /* Miscellaneous. */ 11168 tree v8qi_ftype_v4hi_v4hi 11169 = build_function_type (V8QI_type_node, 11170 tree_cons (NULL_TREE, V4HI_type_node, 11171 tree_cons (NULL_TREE, V4HI_type_node, 11172 endlink))); 11173 tree v4hi_ftype_v2si_v2si 11174 = build_function_type (V4HI_type_node, 11175 tree_cons (NULL_TREE, V2SI_type_node, 11176 tree_cons (NULL_TREE, V2SI_type_node, 11177 endlink))); 11178 tree v4sf_ftype_v4sf_v4sf_int 11179 = build_function_type (V4SF_type_node, 11180 tree_cons (NULL_TREE, V4SF_type_node, 11181 tree_cons (NULL_TREE, V4SF_type_node, 11182 tree_cons (NULL_TREE, 11183 integer_type_node, 11184 endlink)))); 11185 tree v4hi_ftype_v8qi_v8qi 11186 = build_function_type (V4HI_type_node, 11187 tree_cons (NULL_TREE, V8QI_type_node, 11188 tree_cons (NULL_TREE, V8QI_type_node, 11189 endlink))); 11190 tree v2si_ftype_v4hi_v4hi 11191 = build_function_type (V2SI_type_node, 11192 tree_cons (NULL_TREE, V4HI_type_node, 11193 tree_cons (NULL_TREE, V4HI_type_node, 11194 endlink))); 11195 tree v4hi_ftype_v4hi_int 11196 = build_function_type (V4HI_type_node, 11197 tree_cons (NULL_TREE, V4HI_type_node, 11198 tree_cons (NULL_TREE, integer_type_node, 11199 endlink))); 11200 tree v4hi_ftype_v4hi_di 11201 = build_function_type (V4HI_type_node, 11202 tree_cons (NULL_TREE, V4HI_type_node, 11203 tree_cons (NULL_TREE, 11204 long_long_integer_type_node, 11205 endlink))); 11206 tree v2si_ftype_v2si_di 11207 = build_function_type (V2SI_type_node, 11208 tree_cons (NULL_TREE, V2SI_type_node, 11209 tree_cons (NULL_TREE, 11210 long_long_integer_type_node, 11211 endlink))); 11212 tree void_ftype_void 11213 = build_function_type (void_type_node, endlink); 11214 tree void_ftype_unsigned 11215 = build_function_type (void_type_node, 11216 tree_cons (NULL_TREE, unsigned_type_node, 11217 endlink)); 11218 tree unsigned_ftype_void 11219 = build_function_type (unsigned_type_node, endlink); 11220 tree di_ftype_void 11221 = build_function_type (long_long_unsigned_type_node, endlink); 11222 tree v4sf_ftype_void 11223 = build_function_type (V4SF_type_node, endlink); 11224 tree v2si_ftype_v4sf 11225 = build_function_type (V2SI_type_node, 11226 tree_cons (NULL_TREE, V4SF_type_node, 11227 endlink)); 11228 /* Loads/stores. */ 11229 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node, 11230 tree_cons (NULL_TREE, V8QI_type_node, 11231 tree_cons (NULL_TREE, 11232 pchar_type_node, 11233 endlink))); 11234 tree void_ftype_v8qi_v8qi_pchar 11235 = build_function_type (void_type_node, maskmovq_args); 11236 tree v4sf_ftype_pfloat 11237 = build_function_type (V4SF_type_node, 11238 tree_cons (NULL_TREE, pfloat_type_node, 11239 endlink)); 11240 /* @@@ the type is bogus */ 11241 tree v4sf_ftype_v4sf_pv2si 11242 = build_function_type (V4SF_type_node, 11243 tree_cons (NULL_TREE, V4SF_type_node, 11244 tree_cons (NULL_TREE, pv2si_type_node, 11245 endlink))); 11246 tree void_ftype_pv2si_v4sf 11247 = build_function_type (void_type_node, 11248 tree_cons (NULL_TREE, pv2si_type_node, 11249 tree_cons (NULL_TREE, V4SF_type_node, 11250 endlink))); 11251 tree void_ftype_pfloat_v4sf 11252 = build_function_type (void_type_node, 11253 tree_cons (NULL_TREE, pfloat_type_node, 11254 tree_cons (NULL_TREE, V4SF_type_node, 11255 endlink))); 11256 tree void_ftype_pdi_di 11257 = build_function_type (void_type_node, 11258 tree_cons (NULL_TREE, pdi_type_node, 11259 tree_cons (NULL_TREE, 11260 long_long_unsigned_type_node, 11261 endlink))); 11262 /* Normal vector unops. */ 11263 tree v4sf_ftype_v4sf 11264 = build_function_type (V4SF_type_node, 11265 tree_cons (NULL_TREE, V4SF_type_node, 11266 endlink)); 11267 11268 /* Normal vector binops. */ 11269 tree v4sf_ftype_v4sf_v4sf 11270 = build_function_type (V4SF_type_node, 11271 tree_cons (NULL_TREE, V4SF_type_node, 11272 tree_cons (NULL_TREE, V4SF_type_node, 11273 endlink))); 11274 tree v8qi_ftype_v8qi_v8qi 11275 = build_function_type (V8QI_type_node, 11276 tree_cons (NULL_TREE, V8QI_type_node, 11277 tree_cons (NULL_TREE, V8QI_type_node, 11278 endlink))); 11279 tree v4hi_ftype_v4hi_v4hi 11280 = build_function_type (V4HI_type_node, 11281 tree_cons (NULL_TREE, V4HI_type_node, 11282 tree_cons (NULL_TREE, V4HI_type_node, 11283 endlink))); 11284 tree v2si_ftype_v2si_v2si 11285 = build_function_type (V2SI_type_node, 11286 tree_cons (NULL_TREE, V2SI_type_node, 11287 tree_cons (NULL_TREE, V2SI_type_node, 11288 endlink))); 11289 tree di_ftype_di_di 11290 = build_function_type (long_long_unsigned_type_node, 11291 tree_cons (NULL_TREE, long_long_unsigned_type_node, 11292 tree_cons (NULL_TREE, 11293 long_long_unsigned_type_node, 11294 endlink))); 11295 11296 tree v2si_ftype_v2sf 11297 = build_function_type (V2SI_type_node, 11298 tree_cons (NULL_TREE, V2SF_type_node, 11299 endlink)); 11300 tree v2sf_ftype_v2si 11301 = build_function_type (V2SF_type_node, 11302 tree_cons (NULL_TREE, V2SI_type_node, 11303 endlink)); 11304 tree v2si_ftype_v2si 11305 = build_function_type (V2SI_type_node, 11306 tree_cons (NULL_TREE, V2SI_type_node, 11307 endlink)); 11308 tree v2sf_ftype_v2sf 11309 = build_function_type (V2SF_type_node, 11310 tree_cons (NULL_TREE, V2SF_type_node, 11311 endlink)); 11312 tree v2sf_ftype_v2sf_v2sf 11313 = build_function_type (V2SF_type_node, 11314 tree_cons (NULL_TREE, V2SF_type_node, 11315 tree_cons (NULL_TREE, 11316 V2SF_type_node, 11317 endlink))); 11318 tree v2si_ftype_v2sf_v2sf 11319 = build_function_type (V2SI_type_node, 11320 tree_cons (NULL_TREE, V2SF_type_node, 11321 tree_cons (NULL_TREE, 11322 V2SF_type_node, 11323 endlink))); 11324 11325 /* Add all builtins that are more or less simple operations on two 11326 operands. */ 11327 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) 11328 { 11329 /* Use one of the operands; the target can have a different mode for 11330 mask-generating compares. */ 11331 enum machine_mode mode; 11332 tree type; 11333 11334 if (d->name == 0) 11335 continue; 11336 mode = insn_data[d->icode].operand[1].mode; 11337 11338 switch (mode) 11339 { 11340 case V4SFmode: 11341 type = v4sf_ftype_v4sf_v4sf; 11342 break; 11343 case V8QImode: 11344 type = v8qi_ftype_v8qi_v8qi; 11345 break; 11346 case V4HImode: 11347 type = v4hi_ftype_v4hi_v4hi; 11348 break; 11349 case V2SImode: 11350 type = v2si_ftype_v2si_v2si; 11351 break; 11352 case DImode: 11353 type = di_ftype_di_di; 11354 break; 11355 11356 default: 11357 abort (); 11358 } 11359 11360 /* Override for comparisons. */ 11361 if (d->icode == CODE_FOR_maskcmpv4sf3 11362 || d->icode == CODE_FOR_maskncmpv4sf3 11363 || d->icode == CODE_FOR_vmmaskcmpv4sf3 11364 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 11365 type = v4si_ftype_v4sf_v4sf; 11366 11367 def_builtin (d->mask, d->name, type, d->code); 11368 } 11369 11370 /* Add the remaining MMX insns with somewhat more complicated types. */ 11371 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); 11372 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 11373 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 11374 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 11375 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 11376 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 11377 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 11378 11379 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 11380 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 11381 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 11382 11383 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 11384 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 11385 11386 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 11387 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 11388 11389 /* comi/ucomi insns. */ 11390 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++) 11391 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 11392 11393 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 11394 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 11395 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 11396 11397 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 11398 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 11399 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 11400 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 11401 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 11402 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 11403 11404 def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); 11405 def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); 11406 def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); 11407 def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); 11408 11409 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); 11410 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); 11411 11412 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 11413 11414 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); 11415 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); 11416 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS); 11417 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); 11418 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 11419 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); 11420 11421 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 11422 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 11423 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 11424 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 11425 11426 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 11427 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 11428 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 11429 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 11430 11431 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 11432 11433 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 11434 11435 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 11436 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 11437 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 11438 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 11439 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 11440 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 11441 11442 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 11443 11444 /* Original 3DNow! */ 11445 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 11446 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 11447 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 11448 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 11449 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 11450 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 11451 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 11452 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 11453 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 11454 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 11455 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 11456 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 11457 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 11458 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 11459 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 11460 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 11461 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 11462 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 11463 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 11464 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 11465 11466 /* 3DNow! extension as used in the Athlon CPU. */ 11467 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 11468 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 11469 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 11470 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 11471 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 11472 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 11473 11474 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); 11475} 11476 11477/* Errors in the source file can cause expand_expr to return const0_rtx 11478 where we expect a vector. To avoid crashing, use one of the vector 11479 clear instructions. */ 11480static rtx 11481safe_vector_operand (x, mode) 11482 rtx x; 11483 enum machine_mode mode; 11484{ 11485 if (x != const0_rtx) 11486 return x; 11487 x = gen_reg_rtx (mode); 11488 11489 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) 11490 emit_insn (gen_mmx_clrdi (mode == DImode ? x 11491 : gen_rtx_SUBREG (DImode, x, 0))); 11492 else 11493 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x 11494 : gen_rtx_SUBREG (V4SFmode, x, 0))); 11495 return x; 11496} 11497 11498/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 11499 11500static rtx 11501ix86_expand_binop_builtin (icode, arglist, target) 11502 enum insn_code icode; 11503 tree arglist; 11504 rtx target; 11505{ 11506 rtx pat; 11507 tree arg0 = TREE_VALUE (arglist); 11508 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11509 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11510 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11511 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11512 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11513 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 11514 11515 if (VECTOR_MODE_P (mode0)) 11516 op0 = safe_vector_operand (op0, mode0); 11517 if (VECTOR_MODE_P (mode1)) 11518 op1 = safe_vector_operand (op1, mode1); 11519 11520 if (! target 11521 || GET_MODE (target) != tmode 11522 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11523 target = gen_reg_rtx (tmode); 11524 11525 /* In case the insn wants input operands in modes different from 11526 the result, abort. */ 11527 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1) 11528 abort (); 11529 11530 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11531 op0 = copy_to_mode_reg (mode0, op0); 11532 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11533 op1 = copy_to_mode_reg (mode1, op1); 11534 11535 /* In the commutative cases, both op0 and op1 are nonimmediate_operand, 11536 yet one of the two must not be a memory. This is normally enforced 11537 by expanders, but we didn't bother to create one here. */ 11538 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) 11539 op0 = copy_to_mode_reg (mode0, op0); 11540 11541 pat = GEN_FCN (icode) (target, op0, op1); 11542 if (! pat) 11543 return 0; 11544 emit_insn (pat); 11545 return target; 11546} 11547 11548/* In type_for_mode we restrict the ability to create TImode types 11549 to hosts with 64-bit H_W_I. So we've defined the SSE logicals 11550 to have a V4SFmode signature. Convert them in-place to TImode. */ 11551 11552static rtx 11553ix86_expand_timode_binop_builtin (icode, arglist, target) 11554 enum insn_code icode; 11555 tree arglist; 11556 rtx target; 11557{ 11558 rtx pat; 11559 tree arg0 = TREE_VALUE (arglist); 11560 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11561 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11562 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11563 11564 op0 = gen_lowpart (TImode, op0); 11565 op1 = gen_lowpart (TImode, op1); 11566 target = gen_reg_rtx (TImode); 11567 11568 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode)) 11569 op0 = copy_to_mode_reg (TImode, op0); 11570 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) 11571 op1 = copy_to_mode_reg (TImode, op1); 11572 11573 /* In the commutative cases, both op0 and op1 are nonimmediate_operand, 11574 yet one of the two must not be a memory. This is normally enforced 11575 by expanders, but we didn't bother to create one here. */ 11576 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) 11577 op0 = copy_to_mode_reg (TImode, op0); 11578 11579 pat = GEN_FCN (icode) (target, op0, op1); 11580 if (! pat) 11581 return 0; 11582 emit_insn (pat); 11583 11584 return gen_lowpart (V4SFmode, target); 11585} 11586 11587/* Subroutine of ix86_expand_builtin to take care of stores. */ 11588 11589static rtx 11590ix86_expand_store_builtin (icode, arglist) 11591 enum insn_code icode; 11592 tree arglist; 11593{ 11594 rtx pat; 11595 tree arg0 = TREE_VALUE (arglist); 11596 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11597 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11598 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11599 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 11600 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 11601 11602 if (VECTOR_MODE_P (mode1)) 11603 op1 = safe_vector_operand (op1, mode1); 11604 11605 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11606 11607 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 11608 op1 = copy_to_mode_reg (mode1, op1); 11609 11610 pat = GEN_FCN (icode) (op0, op1); 11611 if (pat) 11612 emit_insn (pat); 11613 return 0; 11614} 11615 11616/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 11617 11618static rtx 11619ix86_expand_unop_builtin (icode, arglist, target, do_load) 11620 enum insn_code icode; 11621 tree arglist; 11622 rtx target; 11623 int do_load; 11624{ 11625 rtx pat; 11626 tree arg0 = TREE_VALUE (arglist); 11627 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11628 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11629 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11630 11631 if (! target 11632 || GET_MODE (target) != tmode 11633 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11634 target = gen_reg_rtx (tmode); 11635 if (do_load) 11636 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11637 else 11638 { 11639 if (VECTOR_MODE_P (mode0)) 11640 op0 = safe_vector_operand (op0, mode0); 11641 11642 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11643 op0 = copy_to_mode_reg (mode0, op0); 11644 } 11645 11646 pat = GEN_FCN (icode) (target, op0); 11647 if (! pat) 11648 return 0; 11649 emit_insn (pat); 11650 return target; 11651} 11652 11653/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 11654 sqrtss, rsqrtss, rcpss. */ 11655 11656static rtx 11657ix86_expand_unop1_builtin (icode, arglist, target) 11658 enum insn_code icode; 11659 tree arglist; 11660 rtx target; 11661{ 11662 rtx pat; 11663 tree arg0 = TREE_VALUE (arglist); 11664 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11665 enum machine_mode tmode = insn_data[icode].operand[0].mode; 11666 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 11667 11668 if (! target 11669 || GET_MODE (target) != tmode 11670 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11671 target = gen_reg_rtx (tmode); 11672 11673 if (VECTOR_MODE_P (mode0)) 11674 op0 = safe_vector_operand (op0, mode0); 11675 11676 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11677 op0 = copy_to_mode_reg (mode0, op0); 11678 11679 op1 = op0; 11680 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 11681 op1 = copy_to_mode_reg (mode0, op1); 11682 11683 pat = GEN_FCN (icode) (target, op0, op1); 11684 if (! pat) 11685 return 0; 11686 emit_insn (pat); 11687 return target; 11688} 11689 11690/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 11691 11692static rtx 11693ix86_expand_sse_compare (d, arglist, target) 11694 const struct builtin_description *d; 11695 tree arglist; 11696 rtx target; 11697{ 11698 rtx pat; 11699 tree arg0 = TREE_VALUE (arglist); 11700 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11701 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11702 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11703 rtx op2; 11704 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 11705 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 11706 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 11707 enum rtx_code comparison = d->comparison; 11708 11709 if (VECTOR_MODE_P (mode0)) 11710 op0 = safe_vector_operand (op0, mode0); 11711 if (VECTOR_MODE_P (mode1)) 11712 op1 = safe_vector_operand (op1, mode1); 11713 11714 /* Swap operands if we have a comparison that isn't available in 11715 hardware. */ 11716 if (d->flag) 11717 { 11718 rtx tmp = gen_reg_rtx (mode1); 11719 emit_move_insn (tmp, op1); 11720 op1 = op0; 11721 op0 = tmp; 11722 } 11723 11724 if (! target 11725 || GET_MODE (target) != tmode 11726 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 11727 target = gen_reg_rtx (tmode); 11728 11729 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 11730 op0 = copy_to_mode_reg (mode0, op0); 11731 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 11732 op1 = copy_to_mode_reg (mode1, op1); 11733 11734 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 11735 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 11736 if (! pat) 11737 return 0; 11738 emit_insn (pat); 11739 return target; 11740} 11741 11742/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 11743 11744static rtx 11745ix86_expand_sse_comi (d, arglist, target) 11746 const struct builtin_description *d; 11747 tree arglist; 11748 rtx target; 11749{ 11750 rtx pat; 11751 tree arg0 = TREE_VALUE (arglist); 11752 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11753 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11754 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11755 rtx op2; 11756 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 11757 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 11758 enum rtx_code comparison = d->comparison; 11759 11760 if (VECTOR_MODE_P (mode0)) 11761 op0 = safe_vector_operand (op0, mode0); 11762 if (VECTOR_MODE_P (mode1)) 11763 op1 = safe_vector_operand (op1, mode1); 11764 11765 /* Swap operands if we have a comparison that isn't available in 11766 hardware. */ 11767 if (d->flag) 11768 { 11769 rtx tmp = op1; 11770 op1 = op0; 11771 op0 = tmp; 11772 } 11773 11774 target = gen_reg_rtx (SImode); 11775 emit_move_insn (target, const0_rtx); 11776 target = gen_rtx_SUBREG (QImode, target, 0); 11777 11778 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 11779 op0 = copy_to_mode_reg (mode0, op0); 11780 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 11781 op1 = copy_to_mode_reg (mode1, op1); 11782 11783 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 11784 pat = GEN_FCN (d->icode) (op0, op1, op2); 11785 if (! pat) 11786 return 0; 11787 emit_insn (pat); 11788 emit_insn (gen_rtx_SET (VOIDmode, 11789 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 11790 gen_rtx_fmt_ee (comparison, QImode, 11791 gen_rtx_REG (CCmode, FLAGS_REG), 11792 const0_rtx))); 11793 11794 return SUBREG_REG (target); 11795} 11796 11797/* Expand an expression EXP that calls a built-in function, 11798 with result going to TARGET if that's convenient 11799 (and in mode MODE if that's convenient). 11800 SUBTARGET may be used as the target for computing one of EXP's operands. 11801 IGNORE is nonzero if the value is to be ignored. */ 11802 11803rtx 11804ix86_expand_builtin (exp, target, subtarget, mode, ignore) 11805 tree exp; 11806 rtx target; 11807 rtx subtarget ATTRIBUTE_UNUSED; 11808 enum machine_mode mode ATTRIBUTE_UNUSED; 11809 int ignore ATTRIBUTE_UNUSED; 11810{ 11811 const struct builtin_description *d; 11812 size_t i; 11813 enum insn_code icode; 11814 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 11815 tree arglist = TREE_OPERAND (exp, 1); 11816 tree arg0, arg1, arg2; 11817 rtx op0, op1, op2, pat; 11818 enum machine_mode tmode, mode0, mode1, mode2; 11819 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 11820 11821 switch (fcode) 11822 { 11823 case IX86_BUILTIN_EMMS: 11824 emit_insn (gen_emms ()); 11825 return 0; 11826 11827 case IX86_BUILTIN_SFENCE: 11828 emit_insn (gen_sfence ()); 11829 return 0; 11830 11831 case IX86_BUILTIN_PEXTRW: 11832 icode = CODE_FOR_mmx_pextrw; 11833 arg0 = TREE_VALUE (arglist); 11834 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11835 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11836 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11837 tmode = insn_data[icode].operand[0].mode; 11838 mode0 = insn_data[icode].operand[1].mode; 11839 mode1 = insn_data[icode].operand[2].mode; 11840 11841 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11842 op0 = copy_to_mode_reg (mode0, op0); 11843 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11844 { 11845 /* @@@ better error message */ 11846 error ("selector must be an immediate"); 11847 return gen_reg_rtx (tmode); 11848 } 11849 if (target == 0 11850 || GET_MODE (target) != tmode 11851 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11852 target = gen_reg_rtx (tmode); 11853 pat = GEN_FCN (icode) (target, op0, op1); 11854 if (! pat) 11855 return 0; 11856 emit_insn (pat); 11857 return target; 11858 11859 case IX86_BUILTIN_PINSRW: 11860 icode = CODE_FOR_mmx_pinsrw; 11861 arg0 = TREE_VALUE (arglist); 11862 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11863 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 11864 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11865 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11866 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 11867 tmode = insn_data[icode].operand[0].mode; 11868 mode0 = insn_data[icode].operand[1].mode; 11869 mode1 = insn_data[icode].operand[2].mode; 11870 mode2 = insn_data[icode].operand[3].mode; 11871 11872 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11873 op0 = copy_to_mode_reg (mode0, op0); 11874 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11875 op1 = copy_to_mode_reg (mode1, op1); 11876 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 11877 { 11878 /* @@@ better error message */ 11879 error ("selector must be an immediate"); 11880 return const0_rtx; 11881 } 11882 if (target == 0 11883 || GET_MODE (target) != tmode 11884 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11885 target = gen_reg_rtx (tmode); 11886 pat = GEN_FCN (icode) (target, op0, op1, op2); 11887 if (! pat) 11888 return 0; 11889 emit_insn (pat); 11890 return target; 11891 11892 case IX86_BUILTIN_MASKMOVQ: 11893 icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq; 11894 /* Note the arg order is different from the operand order. */ 11895 arg1 = TREE_VALUE (arglist); 11896 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 11897 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 11898 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11899 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11900 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 11901 mode0 = insn_data[icode].operand[0].mode; 11902 mode1 = insn_data[icode].operand[1].mode; 11903 mode2 = insn_data[icode].operand[2].mode; 11904 11905 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 11906 op0 = copy_to_mode_reg (mode0, op0); 11907 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 11908 op1 = copy_to_mode_reg (mode1, op1); 11909 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 11910 op2 = copy_to_mode_reg (mode2, op2); 11911 pat = GEN_FCN (icode) (op0, op1, op2); 11912 if (! pat) 11913 return 0; 11914 emit_insn (pat); 11915 return 0; 11916 11917 case IX86_BUILTIN_SQRTSS: 11918 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target); 11919 case IX86_BUILTIN_RSQRTSS: 11920 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target); 11921 case IX86_BUILTIN_RCPSS: 11922 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); 11923 11924 case IX86_BUILTIN_ANDPS: 11925 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3, 11926 arglist, target); 11927 case IX86_BUILTIN_ANDNPS: 11928 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3, 11929 arglist, target); 11930 case IX86_BUILTIN_ORPS: 11931 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3, 11932 arglist, target); 11933 case IX86_BUILTIN_XORPS: 11934 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3, 11935 arglist, target); 11936 11937 case IX86_BUILTIN_LOADAPS: 11938 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); 11939 11940 case IX86_BUILTIN_LOADUPS: 11941 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 11942 11943 case IX86_BUILTIN_STOREAPS: 11944 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); 11945 case IX86_BUILTIN_STOREUPS: 11946 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 11947 11948 case IX86_BUILTIN_LOADSS: 11949 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1); 11950 11951 case IX86_BUILTIN_STORESS: 11952 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist); 11953 11954 case IX86_BUILTIN_LOADHPS: 11955 case IX86_BUILTIN_LOADLPS: 11956 icode = (fcode == IX86_BUILTIN_LOADHPS 11957 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); 11958 arg0 = TREE_VALUE (arglist); 11959 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11960 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11961 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11962 tmode = insn_data[icode].operand[0].mode; 11963 mode0 = insn_data[icode].operand[1].mode; 11964 mode1 = insn_data[icode].operand[2].mode; 11965 11966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 11967 op0 = copy_to_mode_reg (mode0, op0); 11968 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 11969 if (target == 0 11970 || GET_MODE (target) != tmode 11971 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11972 target = gen_reg_rtx (tmode); 11973 pat = GEN_FCN (icode) (target, op0, op1); 11974 if (! pat) 11975 return 0; 11976 emit_insn (pat); 11977 return target; 11978 11979 case IX86_BUILTIN_STOREHPS: 11980 case IX86_BUILTIN_STORELPS: 11981 icode = (fcode == IX86_BUILTIN_STOREHPS 11982 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps); 11983 arg0 = TREE_VALUE (arglist); 11984 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 11985 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 11986 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 11987 mode0 = insn_data[icode].operand[1].mode; 11988 mode1 = insn_data[icode].operand[2].mode; 11989 11990 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 11991 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 11992 op1 = copy_to_mode_reg (mode1, op1); 11993 11994 pat = GEN_FCN (icode) (op0, op0, op1); 11995 if (! pat) 11996 return 0; 11997 emit_insn (pat); 11998 return 0; 11999 12000 case IX86_BUILTIN_MOVNTPS: 12001 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 12002 case IX86_BUILTIN_MOVNTQ: 12003 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 12004 12005 case IX86_BUILTIN_LDMXCSR: 12006 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); 12007 target = assign_386_stack_local (SImode, 0); 12008 emit_move_insn (target, op0); 12009 emit_insn (gen_ldmxcsr (target)); 12010 return 0; 12011 12012 case IX86_BUILTIN_STMXCSR: 12013 target = assign_386_stack_local (SImode, 0); 12014 emit_insn (gen_stmxcsr (target)); 12015 return copy_to_mode_reg (SImode, target); 12016 12017 case IX86_BUILTIN_SHUFPS: 12018 icode = CODE_FOR_sse_shufps; 12019 arg0 = TREE_VALUE (arglist); 12020 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 12021 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 12022 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 12023 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 12024 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 12025 tmode = insn_data[icode].operand[0].mode; 12026 mode0 = insn_data[icode].operand[1].mode; 12027 mode1 = insn_data[icode].operand[2].mode; 12028 mode2 = insn_data[icode].operand[3].mode; 12029 12030 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 12031 op0 = copy_to_mode_reg (mode0, op0); 12032 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 12033 op1 = copy_to_mode_reg (mode1, op1); 12034 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 12035 { 12036 /* @@@ better error message */ 12037 error ("mask must be an immediate"); 12038 return gen_reg_rtx (tmode); 12039 } 12040 if (target == 0 12041 || GET_MODE (target) != tmode 12042 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 12043 target = gen_reg_rtx (tmode); 12044 pat = GEN_FCN (icode) (target, op0, op1, op2); 12045 if (! pat) 12046 return 0; 12047 emit_insn (pat); 12048 return target; 12049 12050 case IX86_BUILTIN_PSHUFW: 12051 icode = CODE_FOR_mmx_pshufw; 12052 arg0 = TREE_VALUE (arglist); 12053 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 12054 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 12055 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 12056 tmode = insn_data[icode].operand[0].mode; 12057 mode1 = insn_data[icode].operand[1].mode; 12058 mode2 = insn_data[icode].operand[2].mode; 12059 12060 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 12061 op0 = copy_to_mode_reg (mode1, op0); 12062 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 12063 { 12064 /* @@@ better error message */ 12065 error ("mask must be an immediate"); 12066 return const0_rtx; 12067 } 12068 if (target == 0 12069 || GET_MODE (target) != tmode 12070 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 12071 target = gen_reg_rtx (tmode); 12072 pat = GEN_FCN (icode) (target, op0, op1); 12073 if (! pat) 12074 return 0; 12075 emit_insn (pat); 12076 return target; 12077 12078 case IX86_BUILTIN_FEMMS: 12079 emit_insn (gen_femms ()); 12080 return NULL_RTX; 12081 12082 case IX86_BUILTIN_PAVGUSB: 12083 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target); 12084 12085 case IX86_BUILTIN_PF2ID: 12086 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0); 12087 12088 case IX86_BUILTIN_PFACC: 12089 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target); 12090 12091 case IX86_BUILTIN_PFADD: 12092 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target); 12093 12094 case IX86_BUILTIN_PFCMPEQ: 12095 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target); 12096 12097 case IX86_BUILTIN_PFCMPGE: 12098 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target); 12099 12100 case IX86_BUILTIN_PFCMPGT: 12101 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target); 12102 12103 case IX86_BUILTIN_PFMAX: 12104 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target); 12105 12106 case IX86_BUILTIN_PFMIN: 12107 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target); 12108 12109 case IX86_BUILTIN_PFMUL: 12110 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target); 12111 12112 case IX86_BUILTIN_PFRCP: 12113 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0); 12114 12115 case IX86_BUILTIN_PFRCPIT1: 12116 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target); 12117 12118 case IX86_BUILTIN_PFRCPIT2: 12119 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target); 12120 12121 case IX86_BUILTIN_PFRSQIT1: 12122 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target); 12123 12124 case IX86_BUILTIN_PFRSQRT: 12125 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0); 12126 12127 case IX86_BUILTIN_PFSUB: 12128 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target); 12129 12130 case IX86_BUILTIN_PFSUBR: 12131 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target); 12132 12133 case IX86_BUILTIN_PI2FD: 12134 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0); 12135 12136 case IX86_BUILTIN_PMULHRW: 12137 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); 12138 12139 case IX86_BUILTIN_PF2IW: 12140 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); 12141 12142 case IX86_BUILTIN_PFNACC: 12143 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target); 12144 12145 case IX86_BUILTIN_PFPNACC: 12146 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target); 12147 12148 case IX86_BUILTIN_PI2FW: 12149 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0); 12150 12151 case IX86_BUILTIN_PSWAPDSI: 12152 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0); 12153 12154 case IX86_BUILTIN_PSWAPDSF: 12155 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); 12156 12157 case IX86_BUILTIN_SSE_ZERO: 12158 target = gen_reg_rtx (V4SFmode); 12159 emit_insn (gen_sse_clrv4sf (target)); 12160 return target; 12161 12162 case IX86_BUILTIN_MMX_ZERO: 12163 target = gen_reg_rtx (DImode); 12164 emit_insn (gen_mmx_clrdi (target)); 12165 return target; 12166 12167 default: 12168 break; 12169 } 12170 12171 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) 12172 if (d->code == fcode) 12173 { 12174 /* Compares are treated specially. */ 12175 if (d->icode == CODE_FOR_maskcmpv4sf3 12176 || d->icode == CODE_FOR_vmmaskcmpv4sf3 12177 || d->icode == CODE_FOR_maskncmpv4sf3 12178 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 12179 return ix86_expand_sse_compare (d, arglist, target); 12180 12181 return ix86_expand_binop_builtin (d->icode, arglist, target); 12182 } 12183 12184 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++) 12185 if (d->code == fcode) 12186 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 12187 12188 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++) 12189 if (d->code == fcode) 12190 return ix86_expand_sse_comi (d, arglist, target); 12191 12192 /* @@@ Should really do something sensible here. */ 12193 return 0; 12194} 12195 12196/* Store OPERAND to the memory after reload is completed. This means 12197 that we can't easily use assign_stack_local. */ 12198rtx 12199ix86_force_to_memory (mode, operand) 12200 enum machine_mode mode; 12201 rtx operand; 12202{ 12203 rtx result; 12204 if (!reload_completed) 12205 abort (); 12206 if (TARGET_64BIT && TARGET_RED_ZONE) 12207 { 12208 result = gen_rtx_MEM (mode, 12209 gen_rtx_PLUS (Pmode, 12210 stack_pointer_rtx, 12211 GEN_INT (-RED_ZONE_SIZE))); 12212 emit_move_insn (result, operand); 12213 } 12214 else if (TARGET_64BIT && !TARGET_RED_ZONE) 12215 { 12216 switch (mode) 12217 { 12218 case HImode: 12219 case SImode: 12220 operand = gen_lowpart (DImode, operand); 12221 /* FALLTHRU */ 12222 case DImode: 12223 emit_insn ( 12224 gen_rtx_SET (VOIDmode, 12225 gen_rtx_MEM (DImode, 12226 gen_rtx_PRE_DEC (DImode, 12227 stack_pointer_rtx)), 12228 operand)); 12229 break; 12230 default: 12231 abort (); 12232 } 12233 result = gen_rtx_MEM (mode, stack_pointer_rtx); 12234 } 12235 else 12236 { 12237 switch (mode) 12238 { 12239 case DImode: 12240 { 12241 rtx operands[2]; 12242 split_di (&operand, 1, operands, operands + 1); 12243 emit_insn ( 12244 gen_rtx_SET (VOIDmode, 12245 gen_rtx_MEM (SImode, 12246 gen_rtx_PRE_DEC (Pmode, 12247 stack_pointer_rtx)), 12248 operands[1])); 12249 emit_insn ( 12250 gen_rtx_SET (VOIDmode, 12251 gen_rtx_MEM (SImode, 12252 gen_rtx_PRE_DEC (Pmode, 12253 stack_pointer_rtx)), 12254 operands[0])); 12255 } 12256 break; 12257 case HImode: 12258 /* It is better to store HImodes as SImodes. */ 12259 if (!TARGET_PARTIAL_REG_STALL) 12260 operand = gen_lowpart (SImode, operand); 12261 /* FALLTHRU */ 12262 case SImode: 12263 emit_insn ( 12264 gen_rtx_SET (VOIDmode, 12265 gen_rtx_MEM (GET_MODE (operand), 12266 gen_rtx_PRE_DEC (SImode, 12267 stack_pointer_rtx)), 12268 operand)); 12269 break; 12270 default: 12271 abort (); 12272 } 12273 result = gen_rtx_MEM (mode, stack_pointer_rtx); 12274 } 12275 return result; 12276} 12277 12278/* Free operand from the memory. */ 12279void 12280ix86_free_from_memory (mode) 12281 enum machine_mode mode; 12282{ 12283 if (!TARGET_64BIT || !TARGET_RED_ZONE) 12284 { 12285 int size; 12286 12287 if (mode == DImode || TARGET_64BIT) 12288 size = 8; 12289 else if (mode == HImode && TARGET_PARTIAL_REG_STALL) 12290 size = 2; 12291 else 12292 size = 4; 12293 /* Use LEA to deallocate stack space. In peephole2 it will be converted 12294 to pop or add instruction if registers are available. */ 12295 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 12296 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 12297 GEN_INT (size)))); 12298 } 12299} 12300 12301/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 12302 QImode must go into class Q_REGS. 12303 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 12304 movdf to do mem-to-mem moves through integer regs. */ 12305enum reg_class 12306ix86_preferred_reload_class (x, class) 12307 rtx x; 12308 enum reg_class class; 12309{ 12310 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 12311 { 12312 /* SSE can't load any constant directly yet. */ 12313 if (SSE_CLASS_P (class)) 12314 return NO_REGS; 12315 /* Floats can load 0 and 1. */ 12316 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x)) 12317 { 12318 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */ 12319 if (MAYBE_SSE_CLASS_P (class)) 12320 return (reg_class_subset_p (class, GENERAL_REGS) 12321 ? GENERAL_REGS : FLOAT_REGS); 12322 else 12323 return class; 12324 } 12325 /* General regs can load everything. */ 12326 if (reg_class_subset_p (class, GENERAL_REGS)) 12327 return GENERAL_REGS; 12328 /* In case we haven't resolved FLOAT or SSE yet, give up. */ 12329 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)) 12330 return NO_REGS; 12331 } 12332 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x)) 12333 return NO_REGS; 12334 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS)) 12335 return Q_REGS; 12336 return class; 12337} 12338 12339/* If we are copying between general and FP registers, we need a memory 12340 location. The same is true for SSE and MMX registers. 12341 12342 The macro can't work reliably when one of the CLASSES is class containing 12343 registers from multiple units (SSE, MMX, integer). We avoid this by never 12344 combining those units in single alternative in the machine description. 12345 Ensure that this constraint holds to avoid unexpected surprises. 12346 12347 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 12348 enforce these sanity checks. */ 12349int 12350ix86_secondary_memory_needed (class1, class2, mode, strict) 12351 enum reg_class class1, class2; 12352 enum machine_mode mode; 12353 int strict; 12354{ 12355 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 12356 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 12357 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 12358 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 12359 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 12360 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 12361 { 12362 if (strict) 12363 abort (); 12364 else 12365 return 1; 12366 } 12367 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2) 12368 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2) 12369 && (mode) != SImode) 12370 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 12371 && (mode) != SImode)); 12372} 12373/* Return the cost of moving data from a register in class CLASS1 to 12374 one in class CLASS2. 12375 12376 It is not required that the cost always equal 2 when FROM is the same as TO; 12377 on some machines it is expensive to move between registers if they are not 12378 general registers. */ 12379int 12380ix86_register_move_cost (mode, class1, class2) 12381 enum machine_mode mode; 12382 enum reg_class class1, class2; 12383{ 12384 /* In case we require secondary memory, compute cost of the store followed 12385 by load. In order to avoid bad register allocation choices, we need 12386 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 12387 12388 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 12389 { 12390 int cost = 1; 12391 12392 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 12393 MEMORY_MOVE_COST (mode, class1, 1)); 12394 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 12395 MEMORY_MOVE_COST (mode, class2, 1)); 12396 12397 /* In case of copying from general_purpose_register we may emit multiple 12398 stores followed by single load causing memory size mismatch stall. 12399 Count this as arbitarily high cost of 20. */ 12400 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 12401 cost += 20; 12402 12403 /* In the case of FP/MMX moves, the registers actually overlap, and we 12404 have to switch modes in order to treat them differently. */ 12405 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 12406 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 12407 cost += 20; 12408 12409 return cost; 12410 } 12411 12412 /* Moves between SSE/MMX and integer unit are expensive. */ 12413 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 12414 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 12415 return ix86_cost->mmxsse_to_integer; 12416 if (MAYBE_FLOAT_CLASS_P (class1)) 12417 return ix86_cost->fp_move; 12418 if (MAYBE_SSE_CLASS_P (class1)) 12419 return ix86_cost->sse_move; 12420 if (MAYBE_MMX_CLASS_P (class1)) 12421 return ix86_cost->mmx_move; 12422 return 2; 12423} 12424 12425/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 12426int 12427ix86_hard_regno_mode_ok (regno, mode) 12428 int regno; 12429 enum machine_mode mode; 12430{ 12431 /* Flags and only flags can only hold CCmode values. */ 12432 if (CC_REGNO_P (regno)) 12433 return GET_MODE_CLASS (mode) == MODE_CC; 12434 if (GET_MODE_CLASS (mode) == MODE_CC 12435 || GET_MODE_CLASS (mode) == MODE_RANDOM 12436 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 12437 return 0; 12438 if (FP_REGNO_P (regno)) 12439 return VALID_FP_MODE_P (mode); 12440 if (SSE_REGNO_P (regno)) 12441 return VALID_SSE_REG_MODE (mode); 12442 if (MMX_REGNO_P (regno)) 12443 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode); 12444 /* We handle both integer and floats in the general purpose registers. 12445 In future we should be able to handle vector modes as well. */ 12446 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) 12447 return 0; 12448 /* Take care for QImode values - they can be in non-QI regs, but then 12449 they do cause partial register stalls. */ 12450 if (regno < 4 || mode != QImode || TARGET_64BIT) 12451 return 1; 12452 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL; 12453} 12454 12455/* Return the cost of moving data of mode M between a 12456 register and memory. A value of 2 is the default; this cost is 12457 relative to those in `REGISTER_MOVE_COST'. 12458 12459 If moving between registers and memory is more expensive than 12460 between two registers, you should define this macro to express the 12461 relative cost. 12462 12463 Model also increased moving costs of QImode registers in non 12464 Q_REGS classes. 12465 */ 12466int 12467ix86_memory_move_cost (mode, class, in) 12468 enum machine_mode mode; 12469 enum reg_class class; 12470 int in; 12471{ 12472 if (FLOAT_CLASS_P (class)) 12473 { 12474 int index; 12475 switch (mode) 12476 { 12477 case SFmode: 12478 index = 0; 12479 break; 12480 case DFmode: 12481 index = 1; 12482 break; 12483 case XFmode: 12484 case TFmode: 12485 index = 2; 12486 break; 12487 default: 12488 return 100; 12489 } 12490 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 12491 } 12492 if (SSE_CLASS_P (class)) 12493 { 12494 int index; 12495 switch (GET_MODE_SIZE (mode)) 12496 { 12497 case 4: 12498 index = 0; 12499 break; 12500 case 8: 12501 index = 1; 12502 break; 12503 case 16: 12504 index = 2; 12505 break; 12506 default: 12507 return 100; 12508 } 12509 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 12510 } 12511 if (MMX_CLASS_P (class)) 12512 { 12513 int index; 12514 switch (GET_MODE_SIZE (mode)) 12515 { 12516 case 4: 12517 index = 0; 12518 break; 12519 case 8: 12520 index = 1; 12521 break; 12522 default: 12523 return 100; 12524 } 12525 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 12526 } 12527 switch (GET_MODE_SIZE (mode)) 12528 { 12529 case 1: 12530 if (in) 12531 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 12532 : ix86_cost->movzbl_load); 12533 else 12534 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 12535 : ix86_cost->int_store[0] + 4); 12536 break; 12537 case 2: 12538 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 12539 default: 12540 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 12541 if (mode == TFmode) 12542 mode = XFmode; 12543 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 12544 * (int) GET_MODE_SIZE (mode) / 4); 12545 } 12546} 12547 12548#ifdef DO_GLOBAL_CTORS_BODY 12549static void 12550ix86_svr3_asm_out_constructor (symbol, priority) 12551 rtx symbol; 12552 int priority ATTRIBUTE_UNUSED; 12553{ 12554 init_section (); 12555 fputs ("\tpushl $", asm_out_file); 12556 assemble_name (asm_out_file, XSTR (symbol, 0)); 12557 fputc ('\n', asm_out_file); 12558} 12559#endif 12560 12561/* Order the registers for register allocator. */ 12562 12563void 12564x86_order_regs_for_local_alloc () 12565{ 12566 int pos = 0; 12567 int i; 12568 12569 /* First allocate the local general purpose registers. */ 12570 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 12571 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 12572 reg_alloc_order [pos++] = i; 12573 12574 /* Global general purpose registers. */ 12575 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 12576 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 12577 reg_alloc_order [pos++] = i; 12578 12579 /* x87 registers come first in case we are doing FP math 12580 using them. */ 12581 if (!TARGET_SSE_MATH) 12582 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 12583 reg_alloc_order [pos++] = i; 12584 12585 /* SSE registers. */ 12586 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 12587 reg_alloc_order [pos++] = i; 12588 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 12589 reg_alloc_order [pos++] = i; 12590 12591 /* x87 registerts. */ 12592 if (TARGET_SSE_MATH) 12593 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 12594 reg_alloc_order [pos++] = i; 12595 12596 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 12597 reg_alloc_order [pos++] = i; 12598 12599 /* Initialize the rest of array as we do not allocate some registers 12600 at all. */ 12601 while (pos < FIRST_PSEUDO_REGISTER) 12602 reg_alloc_order [pos++] = 0; 12603} 12604 12605void 12606x86_output_mi_thunk (file, delta, function) 12607 FILE *file; 12608 int delta; 12609 tree function; 12610{ 12611 tree parm; 12612 rtx xops[3]; 12613 12614 if (ix86_regparm > 0) 12615 parm = TYPE_ARG_TYPES (TREE_TYPE (function)); 12616 else 12617 parm = NULL_TREE; 12618 for (; parm; parm = TREE_CHAIN (parm)) 12619 if (TREE_VALUE (parm) == void_type_node) 12620 break; 12621 12622 xops[0] = GEN_INT (delta); 12623 if (TARGET_64BIT) 12624 { 12625 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0; 12626 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 12627 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops); 12628 if (flag_pic) 12629 { 12630 fprintf (file, "\tjmp *"); 12631 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); 12632 fprintf (file, "@GOTPCREL(%%rip)\n"); 12633 } 12634 else 12635 { 12636 fprintf (file, "\tjmp "); 12637 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); 12638 fprintf (file, "\n"); 12639 } 12640 } 12641 else 12642 { 12643 if (parm) 12644 xops[1] = gen_rtx_REG (SImode, 0); 12645 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)))) 12646 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 12647 else 12648 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 12649 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops); 12650 12651 if (flag_pic) 12652 { 12653 xops[0] = pic_offset_table_rtx; 12654 xops[1] = gen_label_rtx (); 12655 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 12656 12657 if (ix86_regparm > 2) 12658 abort (); 12659 output_asm_insn ("push{l}\t%0", xops); 12660 output_asm_insn ("call\t%P1", xops); 12661 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1])); 12662 output_asm_insn ("pop{l}\t%0", xops); 12663 output_asm_insn 12664 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); 12665 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0)); 12666 output_asm_insn 12667 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops); 12668 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n"); 12669 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n"); 12670 } 12671 else 12672 { 12673 fprintf (file, "\tjmp "); 12674 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); 12675 fprintf (file, "\n"); 12676 } 12677 } 12678} 12679 12680int 12681x86_field_alignment (field, computed) 12682 tree field; 12683 int computed; 12684{ 12685 enum machine_mode mode; 12686 tree type = TREE_TYPE (field); 12687 12688 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 12689 return computed; 12690 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 12691 ? get_inner_array_type (type) : type); 12692 if (mode == DFmode || mode == DCmode 12693 || GET_MODE_CLASS (mode) == MODE_INT 12694 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 12695 return MIN (32, computed); 12696 return computed; 12697} 12698