i386.c revision 122194
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003 Free Software Foundation, Inc. 4 5This file is part of GNU CC. 6 7GNU CC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GNU CC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GNU CC; see the file COPYING. If not, write to 19the Free Software Foundation, 59 Temple Place - Suite 330, 20Boston, MA 02111-1307, USA. */ 21 22 23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 122194 2003-11-07 03:17:46Z kan $ */ 24 25 26#include "config.h" 27#include "system.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-attr.h" 38#include "flags.h" 39#include "except.h" 40#include "function.h" 41#include "recog.h" 42#include "expr.h" 43#include "optabs.h" 44#include "toplev.h" 45#include "basic-block.h" 46#include "ggc.h" 47#include "target.h" 48#include "target-def.h" 49#include "langhooks.h" 50 51#ifndef CHECK_STACK_LIMIT 52#define CHECK_STACK_LIMIT (-1) 53#endif 54 55/* Processor costs (relative to an add) */ 56static const 57struct processor_costs size_cost = { /* costs for tunning for size */ 58 2, /* cost of an add instruction */ 59 3, /* cost of a lea instruction */ 60 2, /* variable shift costs */ 61 3, /* constant shift costs */ 62 3, /* cost of starting a multiply */ 63 0, /* cost of multiply per each bit set */ 64 3, /* cost of a divide/mod */ 65 3, /* cost of movsx */ 66 3, /* cost of movzx */ 67 0, /* "large" insn */ 68 2, /* MOVE_RATIO */ 69 2, /* cost for loading QImode using movzbl */ 70 {2, 2, 2}, /* cost of loading integer registers 71 in QImode, HImode and SImode. 72 Relative to reg-reg move (2). */ 73 {2, 2, 2}, /* cost of storing integer registers */ 74 2, /* cost of reg,reg fld/fst */ 75 {2, 2, 2}, /* cost of loading fp registers 76 in SFmode, DFmode and XFmode */ 77 {2, 2, 2}, /* cost of loading integer registers */ 78 3, /* cost of moving MMX register */ 79 {3, 3}, /* cost of loading MMX registers 80 in SImode and DImode */ 81 {3, 3}, /* cost of storing MMX registers 82 in SImode and DImode */ 83 3, /* cost of moving SSE register */ 84 {3, 3, 3}, /* cost of loading SSE registers 85 in SImode, DImode and TImode */ 86 {3, 3, 3}, /* cost of storing SSE registers 87 in SImode, DImode and TImode */ 88 3, /* MMX or SSE register to integer */ 89 0, /* size of prefetch block */ 90 0, /* number of parallel prefetches */ 91 2, /* cost of FADD and FSUB insns. */ 92 2, /* cost of FMUL instruction. */ 93 2, /* cost of FDIV instruction. */ 94 2, /* cost of FABS instruction. */ 95 2, /* cost of FCHS instruction. */ 96 2, /* cost of FSQRT instruction. */ 97}; 98 99/* Processor costs (relative to an add) */ 100static const 101struct processor_costs i386_cost = { /* 386 specific costs */ 102 1, /* cost of an add instruction */ 103 1, /* cost of a lea instruction */ 104 3, /* variable shift costs */ 105 2, /* constant shift costs */ 106 6, /* cost of starting a multiply */ 107 1, /* cost of multiply per each bit set */ 108 23, /* cost of a divide/mod */ 109 3, /* cost of movsx */ 110 2, /* cost of movzx */ 111 15, /* "large" insn */ 112 3, /* MOVE_RATIO */ 113 4, /* cost for loading QImode using movzbl */ 114 {2, 4, 2}, /* cost of loading integer registers 115 in QImode, HImode and SImode. 116 Relative to reg-reg move (2). */ 117 {2, 4, 2}, /* cost of storing integer registers */ 118 2, /* cost of reg,reg fld/fst */ 119 {8, 8, 8}, /* cost of loading fp registers 120 in SFmode, DFmode and XFmode */ 121 {8, 8, 8}, /* cost of loading integer registers */ 122 2, /* cost of moving MMX register */ 123 {4, 8}, /* cost of loading MMX registers 124 in SImode and DImode */ 125 {4, 8}, /* cost of storing MMX registers 126 in SImode and DImode */ 127 2, /* cost of moving SSE register */ 128 {4, 8, 16}, /* cost of loading SSE registers 129 in SImode, DImode and TImode */ 130 {4, 8, 16}, /* cost of storing SSE registers 131 in SImode, DImode and TImode */ 132 3, /* MMX or SSE register to integer */ 133 0, /* size of prefetch block */ 134 0, /* number of parallel prefetches */ 135 23, /* cost of FADD and FSUB insns. */ 136 27, /* cost of FMUL instruction. */ 137 88, /* cost of FDIV instruction. */ 138 22, /* cost of FABS instruction. */ 139 24, /* cost of FCHS instruction. */ 140 122, /* cost of FSQRT instruction. */ 141}; 142 143static const 144struct processor_costs i486_cost = { /* 486 specific costs */ 145 1, /* cost of an add instruction */ 146 1, /* cost of a lea instruction */ 147 3, /* variable shift costs */ 148 2, /* constant shift costs */ 149 12, /* cost of starting a multiply */ 150 1, /* cost of multiply per each bit set */ 151 40, /* cost of a divide/mod */ 152 3, /* cost of movsx */ 153 2, /* cost of movzx */ 154 15, /* "large" insn */ 155 3, /* MOVE_RATIO */ 156 4, /* cost for loading QImode using movzbl */ 157 {2, 4, 2}, /* cost of loading integer registers 158 in QImode, HImode and SImode. 159 Relative to reg-reg move (2). */ 160 {2, 4, 2}, /* cost of storing integer registers */ 161 2, /* cost of reg,reg fld/fst */ 162 {8, 8, 8}, /* cost of loading fp registers 163 in SFmode, DFmode and XFmode */ 164 {8, 8, 8}, /* cost of loading integer registers */ 165 2, /* cost of moving MMX register */ 166 {4, 8}, /* cost of loading MMX registers 167 in SImode and DImode */ 168 {4, 8}, /* cost of storing MMX registers 169 in SImode and DImode */ 170 2, /* cost of moving SSE register */ 171 {4, 8, 16}, /* cost of loading SSE registers 172 in SImode, DImode and TImode */ 173 {4, 8, 16}, /* cost of storing SSE registers 174 in SImode, DImode and TImode */ 175 3, /* MMX or SSE register to integer */ 176 0, /* size of prefetch block */ 177 0, /* number of parallel prefetches */ 178 8, /* cost of FADD and FSUB insns. */ 179 16, /* cost of FMUL instruction. */ 180 73, /* cost of FDIV instruction. */ 181 3, /* cost of FABS instruction. */ 182 3, /* cost of FCHS instruction. */ 183 83, /* cost of FSQRT instruction. */ 184}; 185 186static const 187struct processor_costs pentium_cost = { 188 1, /* cost of an add instruction */ 189 1, /* cost of a lea instruction */ 190 4, /* variable shift costs */ 191 1, /* constant shift costs */ 192 11, /* cost of starting a multiply */ 193 0, /* cost of multiply per each bit set */ 194 25, /* cost of a divide/mod */ 195 3, /* cost of movsx */ 196 2, /* cost of movzx */ 197 8, /* "large" insn */ 198 6, /* MOVE_RATIO */ 199 6, /* cost for loading QImode using movzbl */ 200 {2, 4, 2}, /* cost of loading integer registers 201 in QImode, HImode and SImode. 202 Relative to reg-reg move (2). */ 203 {2, 4, 2}, /* cost of storing integer registers */ 204 2, /* cost of reg,reg fld/fst */ 205 {2, 2, 6}, /* cost of loading fp registers 206 in SFmode, DFmode and XFmode */ 207 {4, 4, 6}, /* cost of loading integer registers */ 208 8, /* cost of moving MMX register */ 209 {8, 8}, /* cost of loading MMX registers 210 in SImode and DImode */ 211 {8, 8}, /* cost of storing MMX registers 212 in SImode and DImode */ 213 2, /* cost of moving SSE register */ 214 {4, 8, 16}, /* cost of loading SSE registers 215 in SImode, DImode and TImode */ 216 {4, 8, 16}, /* cost of storing SSE registers 217 in SImode, DImode and TImode */ 218 3, /* MMX or SSE register to integer */ 219 0, /* size of prefetch block */ 220 0, /* number of parallel prefetches */ 221 3, /* cost of FADD and FSUB insns. */ 222 3, /* cost of FMUL instruction. */ 223 39, /* cost of FDIV instruction. */ 224 1, /* cost of FABS instruction. */ 225 1, /* cost of FCHS instruction. */ 226 70, /* cost of FSQRT instruction. */ 227}; 228 229static const 230struct processor_costs pentiumpro_cost = { 231 1, /* cost of an add instruction */ 232 1, /* cost of a lea instruction */ 233 1, /* variable shift costs */ 234 1, /* constant shift costs */ 235 4, /* cost of starting a multiply */ 236 0, /* cost of multiply per each bit set */ 237 17, /* cost of a divide/mod */ 238 1, /* cost of movsx */ 239 1, /* cost of movzx */ 240 8, /* "large" insn */ 241 6, /* MOVE_RATIO */ 242 2, /* cost for loading QImode using movzbl */ 243 {4, 4, 4}, /* cost of loading integer registers 244 in QImode, HImode and SImode. 245 Relative to reg-reg move (2). */ 246 {2, 2, 2}, /* cost of storing integer registers */ 247 2, /* cost of reg,reg fld/fst */ 248 {2, 2, 6}, /* cost of loading fp registers 249 in SFmode, DFmode and XFmode */ 250 {4, 4, 6}, /* cost of loading integer registers */ 251 2, /* cost of moving MMX register */ 252 {2, 2}, /* cost of loading MMX registers 253 in SImode and DImode */ 254 {2, 2}, /* cost of storing MMX registers 255 in SImode and DImode */ 256 2, /* cost of moving SSE register */ 257 {2, 2, 8}, /* cost of loading SSE registers 258 in SImode, DImode and TImode */ 259 {2, 2, 8}, /* cost of storing SSE registers 260 in SImode, DImode and TImode */ 261 3, /* MMX or SSE register to integer */ 262 32, /* size of prefetch block */ 263 6, /* number of parallel prefetches */ 264 3, /* cost of FADD and FSUB insns. */ 265 5, /* cost of FMUL instruction. */ 266 56, /* cost of FDIV instruction. */ 267 2, /* cost of FABS instruction. */ 268 2, /* cost of FCHS instruction. */ 269 56, /* cost of FSQRT instruction. */ 270}; 271 272static const 273struct processor_costs k6_cost = { 274 1, /* cost of an add instruction */ 275 2, /* cost of a lea instruction */ 276 1, /* variable shift costs */ 277 1, /* constant shift costs */ 278 3, /* cost of starting a multiply */ 279 0, /* cost of multiply per each bit set */ 280 18, /* cost of a divide/mod */ 281 2, /* cost of movsx */ 282 2, /* cost of movzx */ 283 8, /* "large" insn */ 284 4, /* MOVE_RATIO */ 285 3, /* cost for loading QImode using movzbl */ 286 {4, 5, 4}, /* cost of loading integer registers 287 in QImode, HImode and SImode. 288 Relative to reg-reg move (2). */ 289 {2, 3, 2}, /* cost of storing integer registers */ 290 4, /* cost of reg,reg fld/fst */ 291 {6, 6, 6}, /* cost of loading fp registers 292 in SFmode, DFmode and XFmode */ 293 {4, 4, 4}, /* cost of loading integer registers */ 294 2, /* cost of moving MMX register */ 295 {2, 2}, /* cost of loading MMX registers 296 in SImode and DImode */ 297 {2, 2}, /* cost of storing MMX registers 298 in SImode and DImode */ 299 2, /* cost of moving SSE register */ 300 {2, 2, 8}, /* cost of loading SSE registers 301 in SImode, DImode and TImode */ 302 {2, 2, 8}, /* cost of storing SSE registers 303 in SImode, DImode and TImode */ 304 6, /* MMX or SSE register to integer */ 305 32, /* size of prefetch block */ 306 1, /* number of parallel prefetches */ 307 2, /* cost of FADD and FSUB insns. */ 308 2, /* cost of FMUL instruction. */ 309 56, /* cost of FDIV instruction. */ 310 2, /* cost of FABS instruction. */ 311 2, /* cost of FCHS instruction. */ 312 56, /* cost of FSQRT instruction. */ 313}; 314 315static const 316struct processor_costs athlon_cost = { 317 1, /* cost of an add instruction */ 318 2, /* cost of a lea instruction */ 319 1, /* variable shift costs */ 320 1, /* constant shift costs */ 321 5, /* cost of starting a multiply */ 322 0, /* cost of multiply per each bit set */ 323 42, /* cost of a divide/mod */ 324 1, /* cost of movsx */ 325 1, /* cost of movzx */ 326 8, /* "large" insn */ 327 9, /* MOVE_RATIO */ 328 4, /* cost for loading QImode using movzbl */ 329 {3, 4, 3}, /* cost of loading integer registers 330 in QImode, HImode and SImode. 331 Relative to reg-reg move (2). */ 332 {3, 4, 3}, /* cost of storing integer registers */ 333 4, /* cost of reg,reg fld/fst */ 334 {4, 4, 12}, /* cost of loading fp registers 335 in SFmode, DFmode and XFmode */ 336 {6, 6, 8}, /* cost of loading integer registers */ 337 2, /* cost of moving MMX register */ 338 {4, 4}, /* cost of loading MMX registers 339 in SImode and DImode */ 340 {4, 4}, /* cost of storing MMX registers 341 in SImode and DImode */ 342 2, /* cost of moving SSE register */ 343 {4, 4, 6}, /* cost of loading SSE registers 344 in SImode, DImode and TImode */ 345 {4, 4, 5}, /* cost of storing SSE registers 346 in SImode, DImode and TImode */ 347 5, /* MMX or SSE register to integer */ 348 64, /* size of prefetch block */ 349 6, /* number of parallel prefetches */ 350 4, /* cost of FADD and FSUB insns. */ 351 4, /* cost of FMUL instruction. */ 352 24, /* cost of FDIV instruction. */ 353 2, /* cost of FABS instruction. */ 354 2, /* cost of FCHS instruction. */ 355 35, /* cost of FSQRT instruction. */ 356}; 357 358static const 359struct processor_costs pentium4_cost = { 360 1, /* cost of an add instruction */ 361 1, /* cost of a lea instruction */ 362 8, /* variable shift costs */ 363 8, /* constant shift costs */ 364 30, /* cost of starting a multiply */ 365 0, /* cost of multiply per each bit set */ 366 112, /* cost of a divide/mod */ 367 1, /* cost of movsx */ 368 1, /* cost of movzx */ 369 16, /* "large" insn */ 370 6, /* MOVE_RATIO */ 371 2, /* cost for loading QImode using movzbl */ 372 {4, 5, 4}, /* cost of loading integer registers 373 in QImode, HImode and SImode. 374 Relative to reg-reg move (2). */ 375 {2, 3, 2}, /* cost of storing integer registers */ 376 2, /* cost of reg,reg fld/fst */ 377 {2, 2, 6}, /* cost of loading fp registers 378 in SFmode, DFmode and XFmode */ 379 {4, 4, 6}, /* cost of loading integer registers */ 380 2, /* cost of moving MMX register */ 381 {2, 2}, /* cost of loading MMX registers 382 in SImode and DImode */ 383 {2, 2}, /* cost of storing MMX registers 384 in SImode and DImode */ 385 12, /* cost of moving SSE register */ 386 {12, 12, 12}, /* cost of loading SSE registers 387 in SImode, DImode and TImode */ 388 {2, 2, 8}, /* cost of storing SSE registers 389 in SImode, DImode and TImode */ 390 10, /* MMX or SSE register to integer */ 391 64, /* size of prefetch block */ 392 6, /* number of parallel prefetches */ 393 5, /* cost of FADD and FSUB insns. */ 394 7, /* cost of FMUL instruction. */ 395 43, /* cost of FDIV instruction. */ 396 2, /* cost of FABS instruction. */ 397 2, /* cost of FCHS instruction. */ 398 43, /* cost of FSQRT instruction. */ 399}; 400 401const struct processor_costs *ix86_cost = &pentium_cost; 402 403/* Processor feature/optimization bitmasks. */ 404#define m_386 (1<<PROCESSOR_I386) 405#define m_486 (1<<PROCESSOR_I486) 406#define m_PENT (1<<PROCESSOR_PENTIUM) 407#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 408#define m_K6 (1<<PROCESSOR_K6) 409#define m_ATHLON (1<<PROCESSOR_ATHLON) 410#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 411 412const int x86_use_leave = m_386 | m_K6 | m_ATHLON; 413const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4; 414const int x86_zero_extend_with_and = m_486 | m_PENT; 415const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */; 416const int x86_double_with_add = ~m_386; 417const int x86_use_bit_test = m_386; 418const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; 419const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4; 420const int x86_3dnow_a = m_ATHLON; 421const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4; 422const int x86_branch_hints = m_PENT4; 423const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; 424const int x86_partial_reg_stall = m_PPRO; 425const int x86_use_loop = m_K6; 426const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT); 427const int x86_use_mov0 = m_K6; 428const int x86_use_cltd = ~(m_PENT | m_K6); 429const int x86_read_modify_write = ~m_PENT; 430const int x86_read_modify = ~(m_PENT | m_PPRO); 431const int x86_split_long_moves = m_PPRO; 432const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON; 433const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 434const int x86_single_stringop = m_386 | m_PENT4; 435const int x86_qimode_math = ~(0); 436const int x86_promote_qi_regs = 0; 437const int x86_himode_math = ~(m_PPRO); 438const int x86_promote_hi_regs = m_PPRO; 439const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4; 440const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4; 441const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4; 442const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; 443const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO); 444const int x86_partial_reg_dependency = m_ATHLON | m_PENT4; 445const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4; 446const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO; 447const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 448const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 449const int x86_decompose_lea = m_PENT4; 450const int x86_shift1 = ~m_486; 451const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4; 452 453/* In case the avreage insn count for single function invocation is 454 lower than this constant, emit fast (but longer) prologue and 455 epilogue code. */ 456#define FAST_PROLOGUE_INSN_COUNT 30 457 458/* Set by prologue expander and used by epilogue expander to determine 459 the style used. */ 460static int use_fast_prologue_epilogue; 461 462/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 463static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 464static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 465static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 466 467/* Array of the smallest class containing reg number REGNO, indexed by 468 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 469 470enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 471{ 472 /* ax, dx, cx, bx */ 473 AREG, DREG, CREG, BREG, 474 /* si, di, bp, sp */ 475 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 476 /* FP registers */ 477 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 478 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 479 /* arg pointer */ 480 NON_Q_REGS, 481 /* flags, fpsr, dirflag, frame */ 482 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 483 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 484 SSE_REGS, SSE_REGS, 485 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 486 MMX_REGS, MMX_REGS, 487 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 488 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 489 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 490 SSE_REGS, SSE_REGS, 491}; 492 493/* The "default" register map used in 32bit mode. */ 494 495int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 496{ 497 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 498 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 499 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 500 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 501 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 502 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 503 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 504}; 505 506static int const x86_64_int_parameter_registers[6] = 507{ 508 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 509 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 510}; 511 512static int const x86_64_int_return_registers[4] = 513{ 514 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 515}; 516 517/* The "default" register map used in 64bit mode. */ 518int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 519{ 520 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 521 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 522 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 523 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 524 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 525 8,9,10,11,12,13,14,15, /* extended integer registers */ 526 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 527}; 528 529/* Define the register numbers to be used in Dwarf debugging information. 530 The SVR4 reference port C compiler uses the following register numbers 531 in its Dwarf output code: 532 0 for %eax (gcc regno = 0) 533 1 for %ecx (gcc regno = 2) 534 2 for %edx (gcc regno = 1) 535 3 for %ebx (gcc regno = 3) 536 4 for %esp (gcc regno = 7) 537 5 for %ebp (gcc regno = 6) 538 6 for %esi (gcc regno = 4) 539 7 for %edi (gcc regno = 5) 540 The following three DWARF register numbers are never generated by 541 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 542 believes these numbers have these meanings. 543 8 for %eip (no gcc equivalent) 544 9 for %eflags (gcc regno = 17) 545 10 for %trapno (no gcc equivalent) 546 It is not at all clear how we should number the FP stack registers 547 for the x86 architecture. If the version of SDB on x86/svr4 were 548 a bit less brain dead with respect to floating-point then we would 549 have a precedent to follow with respect to DWARF register numbers 550 for x86 FP registers, but the SDB on x86/svr4 is so completely 551 broken with respect to FP registers that it is hardly worth thinking 552 of it as something to strive for compatibility with. 553 The version of x86/svr4 SDB I have at the moment does (partially) 554 seem to believe that DWARF register number 11 is associated with 555 the x86 register %st(0), but that's about all. Higher DWARF 556 register numbers don't seem to be associated with anything in 557 particular, and even for DWARF regno 11, SDB only seems to under- 558 stand that it should say that a variable lives in %st(0) (when 559 asked via an `=' command) if we said it was in DWARF regno 11, 560 but SDB still prints garbage when asked for the value of the 561 variable in question (via a `/' command). 562 (Also note that the labels SDB prints for various FP stack regs 563 when doing an `x' command are all wrong.) 564 Note that these problems generally don't affect the native SVR4 565 C compiler because it doesn't allow the use of -O with -g and 566 because when it is *not* optimizing, it allocates a memory 567 location for each floating-point variable, and the memory 568 location is what gets described in the DWARF AT_location 569 attribute for the variable in question. 570 Regardless of the severe mental illness of the x86/svr4 SDB, we 571 do something sensible here and we use the following DWARF 572 register numbers. Note that these are all stack-top-relative 573 numbers. 574 11 for %st(0) (gcc regno = 8) 575 12 for %st(1) (gcc regno = 9) 576 13 for %st(2) (gcc regno = 10) 577 14 for %st(3) (gcc regno = 11) 578 15 for %st(4) (gcc regno = 12) 579 16 for %st(5) (gcc regno = 13) 580 17 for %st(6) (gcc regno = 14) 581 18 for %st(7) (gcc regno = 15) 582*/ 583int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 584{ 585 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 586 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 587 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 588 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 589 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 590 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */ 591 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */ 592}; 593 594/* Test and compare insns in i386.md store the information needed to 595 generate branch and scc insns here. */ 596 597rtx ix86_compare_op0 = NULL_RTX; 598rtx ix86_compare_op1 = NULL_RTX; 599 600/* The encoding characters for the four TLS models present in ELF. */ 601 602static char const tls_model_chars[] = " GLil"; 603 604#define MAX_386_STACK_LOCALS 3 605/* Size of the register save area. */ 606#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 607 608/* Define the structure for the machine field in struct function. */ 609struct machine_function GTY(()) 610{ 611 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS]; 612 const char *some_ld_name; 613 int save_varrargs_registers; 614 int accesses_prev_frame; 615}; 616 617#define ix86_stack_locals (cfun->machine->stack_locals) 618#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) 619 620/* Structure describing stack frame layout. 621 Stack grows downward: 622 623 [arguments] 624 <- ARG_POINTER 625 saved pc 626 627 saved frame pointer if frame_pointer_needed 628 <- HARD_FRAME_POINTER 629 [saved regs] 630 631 [padding1] \ 632 ) 633 [va_arg registers] ( 634 > to_allocate <- FRAME_POINTER 635 [frame] ( 636 ) 637 [padding2] / 638 */ 639struct ix86_frame 640{ 641 int nregs; 642 int padding1; 643 int va_arg_size; 644 HOST_WIDE_INT frame; 645 int padding2; 646 int outgoing_arguments_size; 647 int red_zone_size; 648 649 HOST_WIDE_INT to_allocate; 650 /* The offsets relative to ARG_POINTER. */ 651 HOST_WIDE_INT frame_pointer_offset; 652 HOST_WIDE_INT hard_frame_pointer_offset; 653 HOST_WIDE_INT stack_pointer_offset; 654}; 655 656/* Used to enable/disable debugging features. */ 657const char *ix86_debug_arg_string, *ix86_debug_addr_string; 658/* Code model option as passed by user. */ 659const char *ix86_cmodel_string; 660/* Parsed value. */ 661enum cmodel ix86_cmodel; 662/* Asm dialect. */ 663const char *ix86_asm_string; 664enum asm_dialect ix86_asm_dialect = ASM_ATT; 665/* TLS dialext. */ 666const char *ix86_tls_dialect_string; 667enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 668 669/* Which unit we are generating floating point math for. */ 670enum fpmath_unit ix86_fpmath; 671 672/* Which cpu are we scheduling for. */ 673enum processor_type ix86_cpu; 674/* Which instruction set architecture to use. */ 675enum processor_type ix86_arch; 676 677/* Strings to hold which cpu and instruction set architecture to use. */ 678const char *ix86_cpu_string; /* for -mcpu=<xxx> */ 679const char *ix86_arch_string; /* for -march=<xxx> */ 680const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */ 681 682/* # of registers to use to pass arguments. */ 683const char *ix86_regparm_string; 684 685/* true if sse prefetch instruction is not NOOP. */ 686int x86_prefetch_sse; 687 688/* ix86_regparm_string as a number */ 689int ix86_regparm; 690 691/* Alignment to use for loops and jumps: */ 692 693/* Power of two alignment for loops. */ 694const char *ix86_align_loops_string; 695 696/* Power of two alignment for non-loop jumps. */ 697const char *ix86_align_jumps_string; 698 699/* Power of two alignment for stack boundary in bytes. */ 700const char *ix86_preferred_stack_boundary_string; 701 702/* Preferred alignment for stack boundary in bits. */ 703int ix86_preferred_stack_boundary; 704 705/* Values 1-5: see jump.c */ 706int ix86_branch_cost; 707const char *ix86_branch_cost_string; 708 709/* Power of two alignment for functions. */ 710const char *ix86_align_funcs_string; 711 712/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 713static char internal_label_prefix[16]; 714static int internal_label_prefix_len; 715 716static int local_symbolic_operand PARAMS ((rtx, enum machine_mode)); 717static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model)); 718static void output_pic_addr_const PARAMS ((FILE *, rtx, int)); 719static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode, 720 int, int, FILE *)); 721static const char *get_some_local_dynamic_name PARAMS ((void)); 722static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *)); 723static rtx maybe_get_pool_constant PARAMS ((rtx)); 724static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx)); 725static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code, 726 rtx *, rtx *)); 727static rtx get_thread_pointer PARAMS ((void)); 728static void get_pc_thunk_name PARAMS ((char [32], unsigned int)); 729static rtx gen_push PARAMS ((rtx)); 730static int memory_address_length PARAMS ((rtx addr)); 731static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type)); 732static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type)); 733static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx)); 734static void ix86_dump_ppro_packet PARAMS ((FILE *)); 735static void ix86_reorder_insn PARAMS ((rtx *, rtx *)); 736static struct machine_function * ix86_init_machine_status PARAMS ((void)); 737static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode)); 738static int ix86_nsaved_regs PARAMS ((void)); 739static void ix86_emit_save_regs PARAMS ((void)); 740static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT)); 741static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int)); 742static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT)); 743static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx)); 744static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *)); 745static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void)); 746static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT)); 747static rtx ix86_expand_aligntest PARAMS ((rtx, int)); 748static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx)); 749static int ix86_issue_rate PARAMS ((void)); 750static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int)); 751static void ix86_sched_init PARAMS ((FILE *, int, int)); 752static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); 753static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int)); 754static int ia32_use_dfa_pipeline_interface PARAMS ((void)); 755static int ia32_multipass_dfa_lookahead PARAMS ((void)); 756static void ix86_init_mmx_sse_builtins PARAMS ((void)); 757static rtx x86_this_parameter PARAMS ((tree)); 758static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, 759 HOST_WIDE_INT, tree)); 760static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT, 761 HOST_WIDE_INT, tree)); 762 763struct ix86_address 764{ 765 rtx base, index, disp; 766 HOST_WIDE_INT scale; 767}; 768 769static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *)); 770static bool ix86_cannot_force_const_mem PARAMS ((rtx)); 771 772static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED; 773static const char *ix86_strip_name_encoding PARAMS ((const char *)) 774 ATTRIBUTE_UNUSED; 775 776struct builtin_description; 777static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *, 778 tree, rtx)); 779static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, 780 tree, rtx)); 781static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); 782static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); 783static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); 784static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); 785static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); 786static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); 787static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code, 788 enum rtx_code *, 789 enum rtx_code *, 790 enum rtx_code *)); 791static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx, 792 rtx *, rtx *)); 793static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code)); 794static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code)); 795static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code)); 796static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code)); 797static unsigned int ix86_select_alt_pic_regnum PARAMS ((void)); 798static int ix86_save_reg PARAMS ((unsigned int, int)); 799static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *)); 800static int ix86_comp_type_attributes PARAMS ((tree, tree)); 801static int ix86_fntype_regparm PARAMS ((tree)); 802const struct attribute_spec ix86_attribute_table[]; 803static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *)); 804static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *)); 805static int ix86_value_regno PARAMS ((enum machine_mode)); 806static bool contains_128bit_aligned_vector_p PARAMS ((tree)); 807 808#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) 809static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); 810#endif 811 812/* Register class used for passing given 64bit part of the argument. 813 These represent classes as documented by the PS ABI, with the exception 814 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 815 use SF or DFmode move instead of DImode to avoid reformating penalties. 816 817 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 818 whenever possible (upper half does contain padding). 819 */ 820enum x86_64_reg_class 821 { 822 X86_64_NO_CLASS, 823 X86_64_INTEGER_CLASS, 824 X86_64_INTEGERSI_CLASS, 825 X86_64_SSE_CLASS, 826 X86_64_SSESF_CLASS, 827 X86_64_SSEDF_CLASS, 828 X86_64_SSEUP_CLASS, 829 X86_64_X87_CLASS, 830 X86_64_X87UP_CLASS, 831 X86_64_MEMORY_CLASS 832 }; 833static const char * const x86_64_reg_class_name[] = 834 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"}; 835 836#define MAX_CLASSES 4 837static int classify_argument PARAMS ((enum machine_mode, tree, 838 enum x86_64_reg_class [MAX_CLASSES], 839 int)); 840static int examine_argument PARAMS ((enum machine_mode, tree, int, int *, 841 int *)); 842static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int, 843 const int *, int)); 844static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, 845 enum x86_64_reg_class)); 846 847/* Initialize the GCC target structure. */ 848#undef TARGET_ATTRIBUTE_TABLE 849#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 850#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 851# undef TARGET_MERGE_DECL_ATTRIBUTES 852# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 853#endif 854 855#undef TARGET_COMP_TYPE_ATTRIBUTES 856#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 857 858#undef TARGET_INIT_BUILTINS 859#define TARGET_INIT_BUILTINS ix86_init_builtins 860 861#undef TARGET_EXPAND_BUILTIN 862#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 863 864#undef TARGET_ASM_FUNCTION_EPILOGUE 865#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 866 867#undef TARGET_ASM_OPEN_PAREN 868#define TARGET_ASM_OPEN_PAREN "" 869#undef TARGET_ASM_CLOSE_PAREN 870#define TARGET_ASM_CLOSE_PAREN "" 871 872#undef TARGET_ASM_ALIGNED_HI_OP 873#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 874#undef TARGET_ASM_ALIGNED_SI_OP 875#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 876#ifdef ASM_QUAD 877#undef TARGET_ASM_ALIGNED_DI_OP 878#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 879#endif 880 881#undef TARGET_ASM_UNALIGNED_HI_OP 882#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 883#undef TARGET_ASM_UNALIGNED_SI_OP 884#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 885#undef TARGET_ASM_UNALIGNED_DI_OP 886#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 887 888#undef TARGET_SCHED_ADJUST_COST 889#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 890#undef TARGET_SCHED_ISSUE_RATE 891#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 892#undef TARGET_SCHED_VARIABLE_ISSUE 893#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue 894#undef TARGET_SCHED_INIT 895#define TARGET_SCHED_INIT ix86_sched_init 896#undef TARGET_SCHED_REORDER 897#define TARGET_SCHED_REORDER ix86_sched_reorder 898#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE 899#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \ 900 ia32_use_dfa_pipeline_interface 901#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 902#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 903 ia32_multipass_dfa_lookahead 904 905#ifdef HAVE_AS_TLS 906#undef TARGET_HAVE_TLS 907#define TARGET_HAVE_TLS true 908#endif 909#undef TARGET_CANNOT_FORCE_CONST_MEM 910#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 911 912#undef TARGET_ASM_OUTPUT_MI_THUNK 913#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 914#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 915#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 916 917struct gcc_target targetm = TARGET_INITIALIZER; 918 919/* The svr4 ABI for the i386 says that records and unions are returned 920 in memory. */ 921#ifndef DEFAULT_PCC_STRUCT_RETURN 922#define DEFAULT_PCC_STRUCT_RETURN 1 923#endif 924 925/* Sometimes certain combinations of command options do not make 926 sense on a particular target machine. You can define a macro 927 `OVERRIDE_OPTIONS' to take account of this. This macro, if 928 defined, is executed once just after all the command options have 929 been parsed. 930 931 Don't use this macro to turn on various extra optimizations for 932 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 933 934void 935override_options () 936{ 937 int i; 938 /* Comes from final.c -- no real reason to change it. */ 939#define MAX_CODE_ALIGN 16 940 941 static struct ptt 942 { 943 const struct processor_costs *cost; /* Processor costs */ 944 const int target_enable; /* Target flags to enable. */ 945 const int target_disable; /* Target flags to disable. */ 946 const int align_loop; /* Default alignments. */ 947 const int align_loop_max_skip; 948 const int align_jump; 949 const int align_jump_max_skip; 950 const int align_func; 951 const int branch_cost; 952 } 953 const processor_target_table[PROCESSOR_max] = 954 { 955 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1}, 956 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1}, 957 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1}, 958 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1}, 959 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1}, 960 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1}, 961 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1} 962 }; 963 964 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 965 static struct pta 966 { 967 const char *const name; /* processor name or nickname. */ 968 const enum processor_type processor; 969 const enum pta_flags 970 { 971 PTA_SSE = 1, 972 PTA_SSE2 = 2, 973 PTA_MMX = 4, 974 PTA_PREFETCH_SSE = 8, 975 PTA_3DNOW = 16, 976 PTA_3DNOW_A = 64 977 } flags; 978 } 979 const processor_alias_table[] = 980 { 981 {"i386", PROCESSOR_I386, 0}, 982 {"i486", PROCESSOR_I486, 0}, 983 {"i586", PROCESSOR_PENTIUM, 0}, 984 {"pentium", PROCESSOR_PENTIUM, 0}, 985 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 986 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 987 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 988 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 989 {"i686", PROCESSOR_PENTIUMPRO, 0}, 990 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 991 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 992 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 993 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | 994 PTA_MMX | PTA_PREFETCH_SSE}, 995 {"k6", PROCESSOR_K6, PTA_MMX}, 996 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 997 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 998 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 999 | PTA_3DNOW_A}, 1000 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1001 | PTA_3DNOW | PTA_3DNOW_A}, 1002 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1003 | PTA_3DNOW_A | PTA_SSE}, 1004 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1005 | PTA_3DNOW_A | PTA_SSE}, 1006 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1007 | PTA_3DNOW_A | PTA_SSE}, 1008 }; 1009 1010 int const pta_size = ARRAY_SIZE (processor_alias_table); 1011 1012 /* By default our XFmode is the 80-bit extended format. If we have 1013 use TFmode instead, it's also the 80-bit format, but with padding. */ 1014 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format; 1015 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format; 1016 1017 /* Set the default values for switches whose default depends on TARGET_64BIT 1018 in case they weren't overwriten by command line options. */ 1019 if (TARGET_64BIT) 1020 { 1021 if (flag_omit_frame_pointer == 2) 1022 flag_omit_frame_pointer = 1; 1023 if (flag_asynchronous_unwind_tables == 2) 1024 flag_asynchronous_unwind_tables = 1; 1025 if (flag_pcc_struct_return == 2) 1026 flag_pcc_struct_return = 0; 1027 } 1028 else 1029 { 1030 if (flag_omit_frame_pointer == 2) 1031 flag_omit_frame_pointer = 0; 1032 if (flag_asynchronous_unwind_tables == 2) 1033 flag_asynchronous_unwind_tables = 0; 1034 if (flag_pcc_struct_return == 2) 1035 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1036 } 1037 1038#ifdef SUBTARGET_OVERRIDE_OPTIONS 1039 SUBTARGET_OVERRIDE_OPTIONS; 1040#endif 1041 1042 if (!ix86_cpu_string && ix86_arch_string) 1043 ix86_cpu_string = ix86_arch_string; 1044 if (!ix86_cpu_string) 1045 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT]; 1046 if (!ix86_arch_string) 1047 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386"; 1048 1049 if (ix86_cmodel_string != 0) 1050 { 1051 if (!strcmp (ix86_cmodel_string, "small")) 1052 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1053 else if (flag_pic) 1054 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1055 else if (!strcmp (ix86_cmodel_string, "32")) 1056 ix86_cmodel = CM_32; 1057 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1058 ix86_cmodel = CM_KERNEL; 1059 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic) 1060 ix86_cmodel = CM_MEDIUM; 1061 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1062 ix86_cmodel = CM_LARGE; 1063 else 1064 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1065 } 1066 else 1067 { 1068 ix86_cmodel = CM_32; 1069 if (TARGET_64BIT) 1070 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1071 } 1072 if (ix86_asm_string != 0) 1073 { 1074 if (!strcmp (ix86_asm_string, "intel")) 1075 ix86_asm_dialect = ASM_INTEL; 1076 else if (!strcmp (ix86_asm_string, "att")) 1077 ix86_asm_dialect = ASM_ATT; 1078 else 1079 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1080 } 1081 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1082 error ("code model `%s' not supported in the %s bit mode", 1083 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1084 if (ix86_cmodel == CM_LARGE) 1085 sorry ("code model `large' not supported yet"); 1086 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1087 sorry ("%i-bit mode not compiled in", 1088 (target_flags & MASK_64BIT) ? 64 : 32); 1089 1090 for (i = 0; i < pta_size; i++) 1091 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1092 { 1093 ix86_arch = processor_alias_table[i].processor; 1094 /* Default cpu tuning to the architecture. */ 1095 ix86_cpu = ix86_arch; 1096 if (processor_alias_table[i].flags & PTA_MMX 1097 && !(target_flags_explicit & MASK_MMX)) 1098 target_flags |= MASK_MMX; 1099 if (processor_alias_table[i].flags & PTA_3DNOW 1100 && !(target_flags_explicit & MASK_3DNOW)) 1101 target_flags |= MASK_3DNOW; 1102 if (processor_alias_table[i].flags & PTA_3DNOW_A 1103 && !(target_flags_explicit & MASK_3DNOW_A)) 1104 target_flags |= MASK_3DNOW_A; 1105 if (processor_alias_table[i].flags & PTA_SSE 1106 && !(target_flags_explicit & MASK_SSE)) 1107 target_flags |= MASK_SSE; 1108 if (processor_alias_table[i].flags & PTA_SSE2 1109 && !(target_flags_explicit & MASK_SSE2)) 1110 target_flags |= MASK_SSE2; 1111 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1112 x86_prefetch_sse = true; 1113 break; 1114 } 1115 1116 if (i == pta_size) 1117 error ("bad value (%s) for -march= switch", ix86_arch_string); 1118 1119 for (i = 0; i < pta_size; i++) 1120 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name)) 1121 { 1122 ix86_cpu = processor_alias_table[i].processor; 1123 break; 1124 } 1125 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1126 x86_prefetch_sse = true; 1127 if (i == pta_size) 1128 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string); 1129 1130 if (optimize_size) 1131 ix86_cost = &size_cost; 1132 else 1133 ix86_cost = processor_target_table[ix86_cpu].cost; 1134 target_flags |= processor_target_table[ix86_cpu].target_enable; 1135 target_flags &= ~processor_target_table[ix86_cpu].target_disable; 1136 1137 /* Arrange to set up i386_stack_locals for all functions. */ 1138 init_machine_status = ix86_init_machine_status; 1139 1140 /* Validate -mregparm= value. */ 1141 if (ix86_regparm_string) 1142 { 1143 i = atoi (ix86_regparm_string); 1144 if (i < 0 || i > REGPARM_MAX) 1145 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1146 else 1147 ix86_regparm = i; 1148 } 1149 else 1150 if (TARGET_64BIT) 1151 ix86_regparm = REGPARM_MAX; 1152 1153 /* If the user has provided any of the -malign-* options, 1154 warn and use that value only if -falign-* is not set. 1155 Remove this code in GCC 3.2 or later. */ 1156 if (ix86_align_loops_string) 1157 { 1158 warning ("-malign-loops is obsolete, use -falign-loops"); 1159 if (align_loops == 0) 1160 { 1161 i = atoi (ix86_align_loops_string); 1162 if (i < 0 || i > MAX_CODE_ALIGN) 1163 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1164 else 1165 align_loops = 1 << i; 1166 } 1167 } 1168 1169 if (ix86_align_jumps_string) 1170 { 1171 warning ("-malign-jumps is obsolete, use -falign-jumps"); 1172 if (align_jumps == 0) 1173 { 1174 i = atoi (ix86_align_jumps_string); 1175 if (i < 0 || i > MAX_CODE_ALIGN) 1176 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1177 else 1178 align_jumps = 1 << i; 1179 } 1180 } 1181 1182 if (ix86_align_funcs_string) 1183 { 1184 warning ("-malign-functions is obsolete, use -falign-functions"); 1185 if (align_functions == 0) 1186 { 1187 i = atoi (ix86_align_funcs_string); 1188 if (i < 0 || i > MAX_CODE_ALIGN) 1189 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1190 else 1191 align_functions = 1 << i; 1192 } 1193 } 1194 1195 /* Default align_* from the processor table. */ 1196 if (align_loops == 0) 1197 { 1198 align_loops = processor_target_table[ix86_cpu].align_loop; 1199 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip; 1200 } 1201 if (align_jumps == 0) 1202 { 1203 align_jumps = processor_target_table[ix86_cpu].align_jump; 1204 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip; 1205 } 1206 if (align_functions == 0) 1207 { 1208 align_functions = processor_target_table[ix86_cpu].align_func; 1209 } 1210 1211 /* Validate -mpreferred-stack-boundary= value, or provide default. 1212 The default of 128 bits is for Pentium III's SSE __m128, but we 1213 don't want additional code to keep the stack aligned when 1214 optimizing for code size. */ 1215 ix86_preferred_stack_boundary = (optimize_size 1216 ? TARGET_64BIT ? 128 : 32 1217 : 128); 1218 if (ix86_preferred_stack_boundary_string) 1219 { 1220 i = atoi (ix86_preferred_stack_boundary_string); 1221 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 1222 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1223 TARGET_64BIT ? 4 : 2); 1224 else 1225 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1226 } 1227 1228 /* Validate -mbranch-cost= value, or provide default. */ 1229 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost; 1230 if (ix86_branch_cost_string) 1231 { 1232 i = atoi (ix86_branch_cost_string); 1233 if (i < 0 || i > 5) 1234 error ("-mbranch-cost=%d is not between 0 and 5", i); 1235 else 1236 ix86_branch_cost = i; 1237 } 1238 1239 if (ix86_tls_dialect_string) 1240 { 1241 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 1242 ix86_tls_dialect = TLS_DIALECT_GNU; 1243 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 1244 ix86_tls_dialect = TLS_DIALECT_SUN; 1245 else 1246 error ("bad value (%s) for -mtls-dialect= switch", 1247 ix86_tls_dialect_string); 1248 } 1249 1250 /* Keep nonleaf frame pointers. */ 1251 if (TARGET_OMIT_LEAF_FRAME_POINTER) 1252 flag_omit_frame_pointer = 1; 1253 1254 /* If we're doing fast math, we don't care about comparison order 1255 wrt NaNs. This lets us use a shorter comparison sequence. */ 1256 if (flag_unsafe_math_optimizations) 1257 target_flags &= ~MASK_IEEE_FP; 1258 1259 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1260 since the insns won't need emulation. */ 1261 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1262 target_flags &= ~MASK_NO_FANCY_MATH_387; 1263 1264 /* Turn on SSE2 builtins for -mpni. */ 1265 if (TARGET_PNI) 1266 target_flags |= MASK_SSE2; 1267 1268 /* Turn on SSE builtins for -msse2. */ 1269 if (TARGET_SSE2) 1270 target_flags |= MASK_SSE; 1271 1272 if (TARGET_64BIT) 1273 { 1274 if (TARGET_ALIGN_DOUBLE) 1275 error ("-malign-double makes no sense in the 64bit mode"); 1276 if (TARGET_RTD) 1277 error ("-mrtd calling convention not supported in the 64bit mode"); 1278 /* Enable by default the SSE and MMX builtins. */ 1279 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE); 1280 ix86_fpmath = FPMATH_SSE; 1281 } 1282 else 1283 ix86_fpmath = FPMATH_387; 1284 1285 if (ix86_fpmath_string != 0) 1286 { 1287 if (! strcmp (ix86_fpmath_string, "387")) 1288 ix86_fpmath = FPMATH_387; 1289 else if (! strcmp (ix86_fpmath_string, "sse")) 1290 { 1291 if (!TARGET_SSE) 1292 { 1293 warning ("SSE instruction set disabled, using 387 arithmetics"); 1294 ix86_fpmath = FPMATH_387; 1295 } 1296 else 1297 ix86_fpmath = FPMATH_SSE; 1298 } 1299 else if (! strcmp (ix86_fpmath_string, "387,sse") 1300 || ! strcmp (ix86_fpmath_string, "sse,387")) 1301 { 1302 if (!TARGET_SSE) 1303 { 1304 warning ("SSE instruction set disabled, using 387 arithmetics"); 1305 ix86_fpmath = FPMATH_387; 1306 } 1307 else if (!TARGET_80387) 1308 { 1309 warning ("387 instruction set disabled, using SSE arithmetics"); 1310 ix86_fpmath = FPMATH_SSE; 1311 } 1312 else 1313 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1314 } 1315 else 1316 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1317 } 1318 1319 /* It makes no sense to ask for just SSE builtins, so MMX is also turned 1320 on by -msse. */ 1321 if (TARGET_SSE) 1322 { 1323 target_flags |= MASK_MMX; 1324 x86_prefetch_sse = true; 1325 } 1326 1327 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ 1328 if (TARGET_3DNOW) 1329 { 1330 target_flags |= MASK_MMX; 1331 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX 1332 extensions it adds. */ 1333 if (x86_3dnow_a & (1 << ix86_arch)) 1334 target_flags |= MASK_3DNOW_A; 1335 } 1336 if ((x86_accumulate_outgoing_args & CPUMASK) 1337 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 1338 && !optimize_size) 1339 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1340 1341 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1342 { 1343 char *p; 1344 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1345 p = strchr (internal_label_prefix, 'X'); 1346 internal_label_prefix_len = p - internal_label_prefix; 1347 *p = '\0'; 1348 } 1349} 1350 1351void 1352optimization_options (level, size) 1353 int level; 1354 int size ATTRIBUTE_UNUSED; 1355{ 1356 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 1357 make the problem with not enough registers even worse. */ 1358#ifdef INSN_SCHEDULING 1359 if (level > 1) 1360 flag_schedule_insns = 0; 1361#endif 1362 1363 /* The default values of these switches depend on the TARGET_64BIT 1364 that is not known at this moment. Mark these values with 2 and 1365 let user the to override these. In case there is no command line option 1366 specifying them, we will set the defaults in override_options. */ 1367 if (optimize >= 1) 1368 flag_omit_frame_pointer = 2; 1369 flag_pcc_struct_return = 2; 1370 flag_asynchronous_unwind_tables = 2; 1371} 1372 1373/* Table of valid machine attributes. */ 1374const struct attribute_spec ix86_attribute_table[] = 1375{ 1376 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 1377 /* Stdcall attribute says callee is responsible for popping arguments 1378 if they are not variable. */ 1379 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1380 /* Cdecl attribute says the callee is a normal C declaration */ 1381 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1382 /* Regparm attribute specifies how many integer arguments are to be 1383 passed in registers. */ 1384 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute }, 1385#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 1386 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1387 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1388 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 1389#endif 1390 { NULL, 0, 0, false, false, false, NULL } 1391}; 1392 1393/* Handle a "cdecl" or "stdcall" attribute; 1394 arguments as in struct attribute_spec.handler. */ 1395static tree 1396ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs) 1397 tree *node; 1398 tree name; 1399 tree args ATTRIBUTE_UNUSED; 1400 int flags ATTRIBUTE_UNUSED; 1401 bool *no_add_attrs; 1402{ 1403 if (TREE_CODE (*node) != FUNCTION_TYPE 1404 && TREE_CODE (*node) != METHOD_TYPE 1405 && TREE_CODE (*node) != FIELD_DECL 1406 && TREE_CODE (*node) != TYPE_DECL) 1407 { 1408 warning ("`%s' attribute only applies to functions", 1409 IDENTIFIER_POINTER (name)); 1410 *no_add_attrs = true; 1411 } 1412 1413 if (TARGET_64BIT) 1414 { 1415 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name)); 1416 *no_add_attrs = true; 1417 } 1418 1419 return NULL_TREE; 1420} 1421 1422/* Handle a "regparm" attribute; 1423 arguments as in struct attribute_spec.handler. */ 1424static tree 1425ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs) 1426 tree *node; 1427 tree name; 1428 tree args; 1429 int flags ATTRIBUTE_UNUSED; 1430 bool *no_add_attrs; 1431{ 1432 if (TREE_CODE (*node) != FUNCTION_TYPE 1433 && TREE_CODE (*node) != METHOD_TYPE 1434 && TREE_CODE (*node) != FIELD_DECL 1435 && TREE_CODE (*node) != TYPE_DECL) 1436 { 1437 warning ("`%s' attribute only applies to functions", 1438 IDENTIFIER_POINTER (name)); 1439 *no_add_attrs = true; 1440 } 1441 else 1442 { 1443 tree cst; 1444 1445 cst = TREE_VALUE (args); 1446 if (TREE_CODE (cst) != INTEGER_CST) 1447 { 1448 warning ("`%s' attribute requires an integer constant argument", 1449 IDENTIFIER_POINTER (name)); 1450 *no_add_attrs = true; 1451 } 1452 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 1453 { 1454 warning ("argument to `%s' attribute larger than %d", 1455 IDENTIFIER_POINTER (name), REGPARM_MAX); 1456 *no_add_attrs = true; 1457 } 1458 } 1459 1460 return NULL_TREE; 1461} 1462 1463/* Return 0 if the attributes for two types are incompatible, 1 if they 1464 are compatible, and 2 if they are nearly compatible (which causes a 1465 warning to be generated). */ 1466 1467static int 1468ix86_comp_type_attributes (type1, type2) 1469 tree type1; 1470 tree type2; 1471{ 1472 /* Check for mismatch of non-default calling convention. */ 1473 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 1474 1475 if (TREE_CODE (type1) != FUNCTION_TYPE) 1476 return 1; 1477 1478 /* Check for mismatched return types (cdecl vs stdcall). */ 1479 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 1480 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 1481 return 0; 1482 return 1; 1483} 1484 1485/* Return the regparm value for a fuctio with the indicated TYPE. */ 1486 1487static int 1488ix86_fntype_regparm (type) 1489 tree type; 1490{ 1491 tree attr; 1492 1493 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 1494 if (attr) 1495 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1496 else 1497 return ix86_regparm; 1498} 1499 1500/* Value is the number of bytes of arguments automatically 1501 popped when returning from a subroutine call. 1502 FUNDECL is the declaration node of the function (as a tree), 1503 FUNTYPE is the data type of the function (as a tree), 1504 or for a library call it is an identifier node for the subroutine name. 1505 SIZE is the number of bytes of arguments passed on the stack. 1506 1507 On the 80386, the RTD insn may be used to pop them if the number 1508 of args is fixed, but if the number is variable then the caller 1509 must pop them all. RTD can't be used for library calls now 1510 because the library is compiled with the Unix compiler. 1511 Use of RTD is a selectable option, since it is incompatible with 1512 standard Unix calling sequences. If the option is not selected, 1513 the caller must always pop the args. 1514 1515 The attribute stdcall is equivalent to RTD on a per module basis. */ 1516 1517int 1518ix86_return_pops_args (fundecl, funtype, size) 1519 tree fundecl; 1520 tree funtype; 1521 int size; 1522{ 1523 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 1524 1525 /* Cdecl functions override -mrtd, and never pop the stack. */ 1526 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 1527 1528 /* Stdcall functions will pop the stack if not variable args. */ 1529 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))) 1530 rtd = 1; 1531 1532 if (rtd 1533 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 1534 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 1535 == void_type_node))) 1536 return size; 1537 } 1538 1539 /* Lose any fake structure return argument if it is passed on the stack. */ 1540 if (aggregate_value_p (TREE_TYPE (funtype)) 1541 && !TARGET_64BIT) 1542 { 1543 int nregs = ix86_fntype_regparm (funtype); 1544 1545 if (!nregs) 1546 return GET_MODE_SIZE (Pmode); 1547 } 1548 1549 return 0; 1550} 1551 1552/* Argument support functions. */ 1553 1554/* Return true when register may be used to pass function parameters. */ 1555bool 1556ix86_function_arg_regno_p (regno) 1557 int regno; 1558{ 1559 int i; 1560 if (!TARGET_64BIT) 1561 return (regno < REGPARM_MAX 1562 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 1563 if (SSE_REGNO_P (regno) && TARGET_SSE) 1564 return true; 1565 /* RAX is used as hidden argument to va_arg functions. */ 1566 if (!regno) 1567 return true; 1568 for (i = 0; i < REGPARM_MAX; i++) 1569 if (regno == x86_64_int_parameter_registers[i]) 1570 return true; 1571 return false; 1572} 1573 1574/* Initialize a variable CUM of type CUMULATIVE_ARGS 1575 for a call to a function whose data type is FNTYPE. 1576 For a library call, FNTYPE is 0. */ 1577 1578void 1579init_cumulative_args (cum, fntype, libname) 1580 CUMULATIVE_ARGS *cum; /* Argument info to initialize */ 1581 tree fntype; /* tree ptr for function decl */ 1582 rtx libname; /* SYMBOL_REF of library name or 0 */ 1583{ 1584 static CUMULATIVE_ARGS zero_cum; 1585 tree param, next_param; 1586 1587 if (TARGET_DEBUG_ARG) 1588 { 1589 fprintf (stderr, "\ninit_cumulative_args ("); 1590 if (fntype) 1591 fprintf (stderr, "fntype code = %s, ret code = %s", 1592 tree_code_name[(int) TREE_CODE (fntype)], 1593 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 1594 else 1595 fprintf (stderr, "no fntype"); 1596 1597 if (libname) 1598 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 1599 } 1600 1601 *cum = zero_cum; 1602 1603 /* Set up the number of registers to use for passing arguments. */ 1604 cum->nregs = ix86_regparm; 1605 cum->sse_nregs = SSE_REGPARM_MAX; 1606 if (fntype && !TARGET_64BIT) 1607 { 1608 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype)); 1609 1610 if (attr) 1611 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1612 } 1613 cum->maybe_vaarg = false; 1614 1615 /* Determine if this function has variable arguments. This is 1616 indicated by the last argument being 'void_type_mode' if there 1617 are no variable arguments. If there are variable arguments, then 1618 we won't pass anything in registers */ 1619 1620 if (cum->nregs) 1621 { 1622 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 1623 param != 0; param = next_param) 1624 { 1625 next_param = TREE_CHAIN (param); 1626 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 1627 { 1628 if (!TARGET_64BIT) 1629 cum->nregs = 0; 1630 cum->maybe_vaarg = true; 1631 } 1632 } 1633 } 1634 if ((!fntype && !libname) 1635 || (fntype && !TYPE_ARG_TYPES (fntype))) 1636 cum->maybe_vaarg = 1; 1637 1638 if (TARGET_DEBUG_ARG) 1639 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 1640 1641 return; 1642} 1643 1644/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal 1645 of this code is to classify each 8bytes of incoming argument by the register 1646 class and assign registers accordingly. */ 1647 1648/* Return the union class of CLASS1 and CLASS2. 1649 See the x86-64 PS ABI for details. */ 1650 1651static enum x86_64_reg_class 1652merge_classes (class1, class2) 1653 enum x86_64_reg_class class1, class2; 1654{ 1655 /* Rule #1: If both classes are equal, this is the resulting class. */ 1656 if (class1 == class2) 1657 return class1; 1658 1659 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 1660 the other class. */ 1661 if (class1 == X86_64_NO_CLASS) 1662 return class2; 1663 if (class2 == X86_64_NO_CLASS) 1664 return class1; 1665 1666 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 1667 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 1668 return X86_64_MEMORY_CLASS; 1669 1670 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 1671 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 1672 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 1673 return X86_64_INTEGERSI_CLASS; 1674 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 1675 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 1676 return X86_64_INTEGER_CLASS; 1677 1678 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */ 1679 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS 1680 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS) 1681 return X86_64_MEMORY_CLASS; 1682 1683 /* Rule #6: Otherwise class SSE is used. */ 1684 return X86_64_SSE_CLASS; 1685} 1686 1687/* Classify the argument of type TYPE and mode MODE. 1688 CLASSES will be filled by the register class used to pass each word 1689 of the operand. The number of words is returned. In case the parameter 1690 should be passed in memory, 0 is returned. As a special case for zero 1691 sized containers, classes[0] will be NO_CLASS and 1 is returned. 1692 1693 BIT_OFFSET is used internally for handling records and specifies offset 1694 of the offset in bits modulo 256 to avoid overflow cases. 1695 1696 See the x86-64 PS ABI for details. 1697*/ 1698 1699static int 1700classify_argument (mode, type, classes, bit_offset) 1701 enum machine_mode mode; 1702 tree type; 1703 enum x86_64_reg_class classes[MAX_CLASSES]; 1704 int bit_offset; 1705{ 1706 int bytes = 1707 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1708 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1709 1710 /* Variable sized entities are always passed/returned in memory. */ 1711 if (bytes < 0) 1712 return 0; 1713 1714 if (type && AGGREGATE_TYPE_P (type)) 1715 { 1716 int i; 1717 tree field; 1718 enum x86_64_reg_class subclasses[MAX_CLASSES]; 1719 1720 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 1721 if (bytes > 16) 1722 return 0; 1723 1724 for (i = 0; i < words; i++) 1725 classes[i] = X86_64_NO_CLASS; 1726 1727 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 1728 signalize memory class, so handle it as special case. */ 1729 if (!words) 1730 { 1731 classes[0] = X86_64_NO_CLASS; 1732 return 1; 1733 } 1734 1735 /* Classify each field of record and merge classes. */ 1736 if (TREE_CODE (type) == RECORD_TYPE) 1737 { 1738 /* For classes first merge in the field of the subclasses. */ 1739 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 1740 { 1741 tree bases = TYPE_BINFO_BASETYPES (type); 1742 int n_bases = TREE_VEC_LENGTH (bases); 1743 int i; 1744 1745 for (i = 0; i < n_bases; ++i) 1746 { 1747 tree binfo = TREE_VEC_ELT (bases, i); 1748 int num; 1749 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 1750 tree type = BINFO_TYPE (binfo); 1751 1752 num = classify_argument (TYPE_MODE (type), 1753 type, subclasses, 1754 (offset + bit_offset) % 256); 1755 if (!num) 1756 return 0; 1757 for (i = 0; i < num; i++) 1758 { 1759 int pos = (offset + (bit_offset % 64)) / 8 / 8; 1760 classes[i + pos] = 1761 merge_classes (subclasses[i], classes[i + pos]); 1762 } 1763 } 1764 } 1765 /* And now merge the fields of structure. */ 1766 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1767 { 1768 if (TREE_CODE (field) == FIELD_DECL) 1769 { 1770 int num; 1771 1772 /* Bitfields are always classified as integer. Handle them 1773 early, since later code would consider them to be 1774 misaligned integers. */ 1775 if (DECL_BIT_FIELD (field)) 1776 { 1777 for (i = int_bit_position (field) / 8 / 8; 1778 i < (int_bit_position (field) 1779 + tree_low_cst (DECL_SIZE (field), 0) 1780 + 63) / 8 / 8; i++) 1781 classes[i] = 1782 merge_classes (X86_64_INTEGER_CLASS, 1783 classes[i]); 1784 } 1785 else 1786 { 1787 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1788 TREE_TYPE (field), subclasses, 1789 (int_bit_position (field) 1790 + bit_offset) % 256); 1791 if (!num) 1792 return 0; 1793 for (i = 0; i < num; i++) 1794 { 1795 int pos = 1796 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 1797 classes[i + pos] = 1798 merge_classes (subclasses[i], classes[i + pos]); 1799 } 1800 } 1801 } 1802 } 1803 } 1804 /* Arrays are handled as small records. */ 1805 else if (TREE_CODE (type) == ARRAY_TYPE) 1806 { 1807 int num; 1808 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 1809 TREE_TYPE (type), subclasses, bit_offset); 1810 if (!num) 1811 return 0; 1812 1813 /* The partial classes are now full classes. */ 1814 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 1815 subclasses[0] = X86_64_SSE_CLASS; 1816 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 1817 subclasses[0] = X86_64_INTEGER_CLASS; 1818 1819 for (i = 0; i < words; i++) 1820 classes[i] = subclasses[i % num]; 1821 } 1822 /* Unions are similar to RECORD_TYPE but offset is always 0. */ 1823 else if (TREE_CODE (type) == UNION_TYPE 1824 || TREE_CODE (type) == QUAL_UNION_TYPE) 1825 { 1826 /* For classes first merge in the field of the subclasses. */ 1827 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 1828 { 1829 tree bases = TYPE_BINFO_BASETYPES (type); 1830 int n_bases = TREE_VEC_LENGTH (bases); 1831 int i; 1832 1833 for (i = 0; i < n_bases; ++i) 1834 { 1835 tree binfo = TREE_VEC_ELT (bases, i); 1836 int num; 1837 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 1838 tree type = BINFO_TYPE (binfo); 1839 1840 num = classify_argument (TYPE_MODE (type), 1841 type, subclasses, 1842 (offset + (bit_offset % 64)) % 256); 1843 if (!num) 1844 return 0; 1845 for (i = 0; i < num; i++) 1846 { 1847 int pos = (offset + (bit_offset % 64)) / 8 / 8; 1848 classes[i + pos] = 1849 merge_classes (subclasses[i], classes[i + pos]); 1850 } 1851 } 1852 } 1853 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1854 { 1855 if (TREE_CODE (field) == FIELD_DECL) 1856 { 1857 int num; 1858 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1859 TREE_TYPE (field), subclasses, 1860 bit_offset); 1861 if (!num) 1862 return 0; 1863 for (i = 0; i < num; i++) 1864 classes[i] = merge_classes (subclasses[i], classes[i]); 1865 } 1866 } 1867 } 1868 else 1869 abort (); 1870 1871 /* Final merger cleanup. */ 1872 for (i = 0; i < words; i++) 1873 { 1874 /* If one class is MEMORY, everything should be passed in 1875 memory. */ 1876 if (classes[i] == X86_64_MEMORY_CLASS) 1877 return 0; 1878 1879 /* The X86_64_SSEUP_CLASS should be always preceded by 1880 X86_64_SSE_CLASS. */ 1881 if (classes[i] == X86_64_SSEUP_CLASS 1882 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 1883 classes[i] = X86_64_SSE_CLASS; 1884 1885 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 1886 if (classes[i] == X86_64_X87UP_CLASS 1887 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 1888 classes[i] = X86_64_SSE_CLASS; 1889 } 1890 return words; 1891 } 1892 1893 /* Compute alignment needed. We align all types to natural boundaries with 1894 exception of XFmode that is aligned to 64bits. */ 1895 if (mode != VOIDmode && mode != BLKmode) 1896 { 1897 int mode_alignment = GET_MODE_BITSIZE (mode); 1898 1899 if (mode == XFmode) 1900 mode_alignment = 128; 1901 else if (mode == XCmode) 1902 mode_alignment = 256; 1903 /* Misaligned fields are always returned in memory. */ 1904 if (bit_offset % mode_alignment) 1905 return 0; 1906 } 1907 1908 /* Classification of atomic types. */ 1909 switch (mode) 1910 { 1911 case DImode: 1912 case SImode: 1913 case HImode: 1914 case QImode: 1915 case CSImode: 1916 case CHImode: 1917 case CQImode: 1918 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 1919 classes[0] = X86_64_INTEGERSI_CLASS; 1920 else 1921 classes[0] = X86_64_INTEGER_CLASS; 1922 return 1; 1923 case CDImode: 1924 case TImode: 1925 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1926 return 2; 1927 case CTImode: 1928 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1929 classes[2] = classes[3] = X86_64_INTEGER_CLASS; 1930 return 4; 1931 case SFmode: 1932 if (!(bit_offset % 64)) 1933 classes[0] = X86_64_SSESF_CLASS; 1934 else 1935 classes[0] = X86_64_SSE_CLASS; 1936 return 1; 1937 case DFmode: 1938 classes[0] = X86_64_SSEDF_CLASS; 1939 return 1; 1940 case TFmode: 1941 classes[0] = X86_64_X87_CLASS; 1942 classes[1] = X86_64_X87UP_CLASS; 1943 return 2; 1944 case TCmode: 1945 classes[0] = X86_64_X87_CLASS; 1946 classes[1] = X86_64_X87UP_CLASS; 1947 classes[2] = X86_64_X87_CLASS; 1948 classes[3] = X86_64_X87UP_CLASS; 1949 return 4; 1950 case DCmode: 1951 classes[0] = X86_64_SSEDF_CLASS; 1952 classes[1] = X86_64_SSEDF_CLASS; 1953 return 2; 1954 case SCmode: 1955 classes[0] = X86_64_SSE_CLASS; 1956 return 1; 1957 case V4SFmode: 1958 case V4SImode: 1959 case V16QImode: 1960 case V8HImode: 1961 case V2DFmode: 1962 case V2DImode: 1963 classes[0] = X86_64_SSE_CLASS; 1964 classes[1] = X86_64_SSEUP_CLASS; 1965 return 2; 1966 case V2SFmode: 1967 case V2SImode: 1968 case V4HImode: 1969 case V8QImode: 1970 return 0; 1971 case BLKmode: 1972 case VOIDmode: 1973 return 0; 1974 default: 1975 abort (); 1976 } 1977} 1978 1979/* Examine the argument and return set number of register required in each 1980 class. Return 0 iff parameter should be passed in memory. */ 1981static int 1982examine_argument (mode, type, in_return, int_nregs, sse_nregs) 1983 enum machine_mode mode; 1984 tree type; 1985 int *int_nregs, *sse_nregs; 1986 int in_return; 1987{ 1988 enum x86_64_reg_class class[MAX_CLASSES]; 1989 int n = classify_argument (mode, type, class, 0); 1990 1991 *int_nregs = 0; 1992 *sse_nregs = 0; 1993 if (!n) 1994 return 0; 1995 for (n--; n >= 0; n--) 1996 switch (class[n]) 1997 { 1998 case X86_64_INTEGER_CLASS: 1999 case X86_64_INTEGERSI_CLASS: 2000 (*int_nregs)++; 2001 break; 2002 case X86_64_SSE_CLASS: 2003 case X86_64_SSESF_CLASS: 2004 case X86_64_SSEDF_CLASS: 2005 (*sse_nregs)++; 2006 break; 2007 case X86_64_NO_CLASS: 2008 case X86_64_SSEUP_CLASS: 2009 break; 2010 case X86_64_X87_CLASS: 2011 case X86_64_X87UP_CLASS: 2012 if (!in_return) 2013 return 0; 2014 break; 2015 case X86_64_MEMORY_CLASS: 2016 abort (); 2017 } 2018 return 1; 2019} 2020/* Construct container for the argument used by GCC interface. See 2021 FUNCTION_ARG for the detailed description. */ 2022static rtx 2023construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno) 2024 enum machine_mode mode; 2025 tree type; 2026 int in_return; 2027 int nintregs, nsseregs; 2028 const int * intreg; 2029 int sse_regno; 2030{ 2031 enum machine_mode tmpmode; 2032 int bytes = 2033 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2034 enum x86_64_reg_class class[MAX_CLASSES]; 2035 int n; 2036 int i; 2037 int nexps = 0; 2038 int needed_sseregs, needed_intregs; 2039 rtx exp[MAX_CLASSES]; 2040 rtx ret; 2041 2042 n = classify_argument (mode, type, class, 0); 2043 if (TARGET_DEBUG_ARG) 2044 { 2045 if (!n) 2046 fprintf (stderr, "Memory class\n"); 2047 else 2048 { 2049 fprintf (stderr, "Classes:"); 2050 for (i = 0; i < n; i++) 2051 { 2052 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 2053 } 2054 fprintf (stderr, "\n"); 2055 } 2056 } 2057 if (!n) 2058 return NULL; 2059 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs)) 2060 return NULL; 2061 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 2062 return NULL; 2063 2064 /* First construct simple cases. Avoid SCmode, since we want to use 2065 single register to pass this type. */ 2066 if (n == 1 && mode != SCmode) 2067 switch (class[0]) 2068 { 2069 case X86_64_INTEGER_CLASS: 2070 case X86_64_INTEGERSI_CLASS: 2071 return gen_rtx_REG (mode, intreg[0]); 2072 case X86_64_SSE_CLASS: 2073 case X86_64_SSESF_CLASS: 2074 case X86_64_SSEDF_CLASS: 2075 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2076 case X86_64_X87_CLASS: 2077 return gen_rtx_REG (mode, FIRST_STACK_REG); 2078 case X86_64_NO_CLASS: 2079 /* Zero sized array, struct or class. */ 2080 return NULL; 2081 default: 2082 abort (); 2083 } 2084 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS) 2085 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2086 if (n == 2 2087 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 2088 return gen_rtx_REG (TFmode, FIRST_STACK_REG); 2089 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 2090 && class[1] == X86_64_INTEGER_CLASS 2091 && (mode == CDImode || mode == TImode) 2092 && intreg[0] + 1 == intreg[1]) 2093 return gen_rtx_REG (mode, intreg[0]); 2094 if (n == 4 2095 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS 2096 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS) 2097 return gen_rtx_REG (TCmode, FIRST_STACK_REG); 2098 2099 /* Otherwise figure out the entries of the PARALLEL. */ 2100 for (i = 0; i < n; i++) 2101 { 2102 switch (class[i]) 2103 { 2104 case X86_64_NO_CLASS: 2105 break; 2106 case X86_64_INTEGER_CLASS: 2107 case X86_64_INTEGERSI_CLASS: 2108 /* Merge TImodes on aligned occassions here too. */ 2109 if (i * 8 + 8 > bytes) 2110 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 2111 else if (class[i] == X86_64_INTEGERSI_CLASS) 2112 tmpmode = SImode; 2113 else 2114 tmpmode = DImode; 2115 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 2116 if (tmpmode == BLKmode) 2117 tmpmode = DImode; 2118 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2119 gen_rtx_REG (tmpmode, *intreg), 2120 GEN_INT (i*8)); 2121 intreg++; 2122 break; 2123 case X86_64_SSESF_CLASS: 2124 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2125 gen_rtx_REG (SFmode, 2126 SSE_REGNO (sse_regno)), 2127 GEN_INT (i*8)); 2128 sse_regno++; 2129 break; 2130 case X86_64_SSEDF_CLASS: 2131 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2132 gen_rtx_REG (DFmode, 2133 SSE_REGNO (sse_regno)), 2134 GEN_INT (i*8)); 2135 sse_regno++; 2136 break; 2137 case X86_64_SSE_CLASS: 2138 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 2139 tmpmode = TImode; 2140 else 2141 tmpmode = DImode; 2142 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2143 gen_rtx_REG (tmpmode, 2144 SSE_REGNO (sse_regno)), 2145 GEN_INT (i*8)); 2146 if (tmpmode == TImode) 2147 i++; 2148 sse_regno++; 2149 break; 2150 default: 2151 abort (); 2152 } 2153 } 2154 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 2155 for (i = 0; i < nexps; i++) 2156 XVECEXP (ret, 0, i) = exp [i]; 2157 return ret; 2158} 2159 2160/* Update the data in CUM to advance over an argument 2161 of mode MODE and data type TYPE. 2162 (TYPE is null for libcalls where that information may not be available.) */ 2163 2164void 2165function_arg_advance (cum, mode, type, named) 2166 CUMULATIVE_ARGS *cum; /* current arg information */ 2167 enum machine_mode mode; /* current arg mode */ 2168 tree type; /* type of the argument or 0 if lib support */ 2169 int named; /* whether or not the argument was named */ 2170{ 2171 int bytes = 2172 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2173 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2174 2175 if (TARGET_DEBUG_ARG) 2176 fprintf (stderr, 2177 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n", 2178 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2179 if (TARGET_64BIT) 2180 { 2181 int int_nregs, sse_nregs; 2182 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 2183 cum->words += words; 2184 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 2185 { 2186 cum->nregs -= int_nregs; 2187 cum->sse_nregs -= sse_nregs; 2188 cum->regno += int_nregs; 2189 cum->sse_regno += sse_nregs; 2190 } 2191 else 2192 cum->words += words; 2193 } 2194 else 2195 { 2196 if (TARGET_SSE && mode == TImode) 2197 { 2198 cum->sse_words += words; 2199 cum->sse_nregs -= 1; 2200 cum->sse_regno += 1; 2201 if (cum->sse_nregs <= 0) 2202 { 2203 cum->sse_nregs = 0; 2204 cum->sse_regno = 0; 2205 } 2206 } 2207 else 2208 { 2209 cum->words += words; 2210 cum->nregs -= words; 2211 cum->regno += words; 2212 2213 if (cum->nregs <= 0) 2214 { 2215 cum->nregs = 0; 2216 cum->regno = 0; 2217 } 2218 } 2219 } 2220 return; 2221} 2222 2223/* Define where to put the arguments to a function. 2224 Value is zero to push the argument on the stack, 2225 or a hard register in which to store the argument. 2226 2227 MODE is the argument's machine mode. 2228 TYPE is the data type of the argument (as a tree). 2229 This is null for libcalls where that information may 2230 not be available. 2231 CUM is a variable of type CUMULATIVE_ARGS which gives info about 2232 the preceding args and about the function being called. 2233 NAMED is nonzero if this argument is a named parameter 2234 (otherwise it is an extra parameter matching an ellipsis). */ 2235 2236rtx 2237function_arg (cum, mode, type, named) 2238 CUMULATIVE_ARGS *cum; /* current arg information */ 2239 enum machine_mode mode; /* current arg mode */ 2240 tree type; /* type of the argument or 0 if lib support */ 2241 int named; /* != 0 for normal args, == 0 for ... args */ 2242{ 2243 rtx ret = NULL_RTX; 2244 int bytes = 2245 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2246 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2247 2248 /* Handle an hidden AL argument containing number of registers for varargs 2249 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 2250 any AL settings. */ 2251 if (mode == VOIDmode) 2252 { 2253 if (TARGET_64BIT) 2254 return GEN_INT (cum->maybe_vaarg 2255 ? (cum->sse_nregs < 0 2256 ? SSE_REGPARM_MAX 2257 : cum->sse_regno) 2258 : -1); 2259 else 2260 return constm1_rtx; 2261 } 2262 if (TARGET_64BIT) 2263 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs, 2264 &x86_64_int_parameter_registers [cum->regno], 2265 cum->sse_regno); 2266 else 2267 switch (mode) 2268 { 2269 /* For now, pass fp/complex values on the stack. */ 2270 default: 2271 break; 2272 2273 case BLKmode: 2274 if (bytes < 0) 2275 break; 2276 /* FALLTHRU */ 2277 case DImode: 2278 case SImode: 2279 case HImode: 2280 case QImode: 2281 if (words <= cum->nregs) 2282 ret = gen_rtx_REG (mode, cum->regno); 2283 break; 2284 case TImode: 2285 if (cum->sse_nregs) 2286 ret = gen_rtx_REG (mode, cum->sse_regno); 2287 break; 2288 } 2289 2290 if (TARGET_DEBUG_ARG) 2291 { 2292 fprintf (stderr, 2293 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 2294 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2295 2296 if (ret) 2297 print_simple_rtl (stderr, ret); 2298 else 2299 fprintf (stderr, ", stack"); 2300 2301 fprintf (stderr, " )\n"); 2302 } 2303 2304 return ret; 2305} 2306 2307/* Return true when TYPE should be 128bit aligned for 32bit argument passing 2308 ABI */ 2309static bool 2310contains_128bit_aligned_vector_p (type) 2311 tree type; 2312{ 2313 enum machine_mode mode = TYPE_MODE (type); 2314 if (SSE_REG_MODE_P (mode) 2315 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 2316 return true; 2317 if (TYPE_ALIGN (type) < 128) 2318 return false; 2319 2320 if (AGGREGATE_TYPE_P (type)) 2321 { 2322 /* Walk the agregates recursivly. */ 2323 if (TREE_CODE (type) == RECORD_TYPE 2324 || TREE_CODE (type) == UNION_TYPE 2325 || TREE_CODE (type) == QUAL_UNION_TYPE) 2326 { 2327 tree field; 2328 2329 if (TYPE_BINFO (type) != NULL 2330 && TYPE_BINFO_BASETYPES (type) != NULL) 2331 { 2332 tree bases = TYPE_BINFO_BASETYPES (type); 2333 int n_bases = TREE_VEC_LENGTH (bases); 2334 int i; 2335 2336 for (i = 0; i < n_bases; ++i) 2337 { 2338 tree binfo = TREE_VEC_ELT (bases, i); 2339 tree type = BINFO_TYPE (binfo); 2340 2341 if (contains_128bit_aligned_vector_p (type)) 2342 return true; 2343 } 2344 } 2345 /* And now merge the fields of structure. */ 2346 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2347 { 2348 if (TREE_CODE (field) == FIELD_DECL 2349 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 2350 return true; 2351 } 2352 } 2353 /* Just for use if some languages passes arrays by value. */ 2354 else if (TREE_CODE (type) == ARRAY_TYPE) 2355 { 2356 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 2357 return true; 2358 } 2359 else 2360 abort (); 2361 } 2362 return false; 2363} 2364 2365/* A C expression that indicates when an argument must be passed by 2366 reference. If nonzero for an argument, a copy of that argument is 2367 made in memory and a pointer to the argument is passed instead of 2368 the argument itself. The pointer is passed in whatever way is 2369 appropriate for passing a pointer to that type. */ 2370 2371int 2372function_arg_pass_by_reference (cum, mode, type, named) 2373 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED; 2374 enum machine_mode mode ATTRIBUTE_UNUSED; 2375 tree type; 2376 int named ATTRIBUTE_UNUSED; 2377{ 2378 if (!TARGET_64BIT) 2379 return 0; 2380 2381 if (type && int_size_in_bytes (type) == -1) 2382 { 2383 if (TARGET_DEBUG_ARG) 2384 fprintf (stderr, "function_arg_pass_by_reference\n"); 2385 return 1; 2386 } 2387 2388 return 0; 2389} 2390 2391/* Gives the alignment boundary, in bits, of an argument with the specified mode 2392 and type. */ 2393 2394int 2395ix86_function_arg_boundary (mode, type) 2396 enum machine_mode mode; 2397 tree type; 2398{ 2399 int align; 2400 if (type) 2401 align = TYPE_ALIGN (type); 2402 else 2403 align = GET_MODE_ALIGNMENT (mode); 2404 if (align < PARM_BOUNDARY) 2405 align = PARM_BOUNDARY; 2406 if (!TARGET_64BIT) 2407 { 2408 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 2409 make an exception for SSE modes since these require 128bit 2410 alignment. 2411 2412 The handling here differs from field_alignment. ICC aligns MMX 2413 arguments to 4 byte boundaries, while structure fields are aligned 2414 to 8 byte boundaries. */ 2415 if (!type) 2416 { 2417 if (!SSE_REG_MODE_P (mode)) 2418 align = PARM_BOUNDARY; 2419 } 2420 else 2421 { 2422 if (!contains_128bit_aligned_vector_p (type)) 2423 align = PARM_BOUNDARY; 2424 } 2425 if (align != PARM_BOUNDARY && !TARGET_SSE) 2426 abort(); 2427 } 2428 if (align > 128) 2429 align = 128; 2430 return align; 2431} 2432 2433/* Return true if N is a possible register number of function value. */ 2434bool 2435ix86_function_value_regno_p (regno) 2436 int regno; 2437{ 2438 if (!TARGET_64BIT) 2439 { 2440 return ((regno) == 0 2441 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 2442 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 2443 } 2444 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 2445 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 2446 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 2447} 2448 2449/* Define how to find the value returned by a function. 2450 VALTYPE is the data type of the value (as a tree). 2451 If the precise function being called is known, FUNC is its FUNCTION_DECL; 2452 otherwise, FUNC is 0. */ 2453rtx 2454ix86_function_value (valtype) 2455 tree valtype; 2456{ 2457 if (TARGET_64BIT) 2458 { 2459 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1, 2460 REGPARM_MAX, SSE_REGPARM_MAX, 2461 x86_64_int_return_registers, 0); 2462 /* For zero sized structures, construct_continer return NULL, but we need 2463 to keep rest of compiler happy by returning meaningfull value. */ 2464 if (!ret) 2465 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 2466 return ret; 2467 } 2468 else 2469 return gen_rtx_REG (TYPE_MODE (valtype), 2470 ix86_value_regno (TYPE_MODE (valtype))); 2471} 2472 2473/* Return false iff type is returned in memory. */ 2474int 2475ix86_return_in_memory (type) 2476 tree type; 2477{ 2478 int needed_intregs, needed_sseregs, size; 2479 enum machine_mode mode = TYPE_MODE (type); 2480 2481 if (TARGET_64BIT) 2482 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 2483 2484 if (mode == BLKmode) 2485 return 1; 2486 2487 size = int_size_in_bytes (type); 2488 2489 if (VECTOR_MODE_P (mode) || mode == TImode) 2490 { 2491 /* User-created vectors small enough to fit in EAX. */ 2492 if (size < 8) 2493 return 0; 2494 2495 /* MMX/3dNow values are returned on the stack, since we've 2496 got to EMMS/FEMMS before returning. */ 2497 if (size == 8) 2498 return 1; 2499 2500 /* SSE values are returned in XMM0. */ 2501 /* ??? Except when it doesn't exist? We have a choice of 2502 either (1) being abi incompatible with a -march switch, 2503 or (2) generating an error here. Given no good solution, 2504 I think the safest thing is one warning. The user won't 2505 be able to use -Werror, but... */ 2506 if (size == 16) 2507 { 2508 static bool warned; 2509 2510 if (TARGET_SSE) 2511 return 0; 2512 2513 if (!warned) 2514 { 2515 warned = true; 2516 warning ("SSE vector return without SSE enabled changes the ABI"); 2517 } 2518 return 1; 2519 } 2520 } 2521 2522 if (mode == TFmode) 2523 return 0; 2524 if (size > 12) 2525 return 1; 2526 return 0; 2527} 2528 2529/* Define how to find the value returned by a library function 2530 assuming the value has mode MODE. */ 2531rtx 2532ix86_libcall_value (mode) 2533 enum machine_mode mode; 2534{ 2535 if (TARGET_64BIT) 2536 { 2537 switch (mode) 2538 { 2539 case SFmode: 2540 case SCmode: 2541 case DFmode: 2542 case DCmode: 2543 return gen_rtx_REG (mode, FIRST_SSE_REG); 2544 case TFmode: 2545 case TCmode: 2546 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 2547 default: 2548 return gen_rtx_REG (mode, 0); 2549 } 2550 } 2551 else 2552 return gen_rtx_REG (mode, ix86_value_regno (mode)); 2553} 2554 2555/* Given a mode, return the register to use for a return value. */ 2556 2557static int 2558ix86_value_regno (mode) 2559 enum machine_mode mode; 2560{ 2561 /* Floating point return values in %st(0). */ 2562 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387) 2563 return FIRST_FLOAT_REG; 2564 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 2565 we prevent this case when sse is not available. */ 2566 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 2567 return FIRST_SSE_REG; 2568 /* Everything else in %eax. */ 2569 return 0; 2570} 2571 2572/* Create the va_list data type. */ 2573 2574tree 2575ix86_build_va_list () 2576{ 2577 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 2578 2579 /* For i386 we use plain pointer to argument area. */ 2580 if (!TARGET_64BIT) 2581 return build_pointer_type (char_type_node); 2582 2583 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 2584 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 2585 2586 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 2587 unsigned_type_node); 2588 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 2589 unsigned_type_node); 2590 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 2591 ptr_type_node); 2592 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 2593 ptr_type_node); 2594 2595 DECL_FIELD_CONTEXT (f_gpr) = record; 2596 DECL_FIELD_CONTEXT (f_fpr) = record; 2597 DECL_FIELD_CONTEXT (f_ovf) = record; 2598 DECL_FIELD_CONTEXT (f_sav) = record; 2599 2600 TREE_CHAIN (record) = type_decl; 2601 TYPE_NAME (record) = type_decl; 2602 TYPE_FIELDS (record) = f_gpr; 2603 TREE_CHAIN (f_gpr) = f_fpr; 2604 TREE_CHAIN (f_fpr) = f_ovf; 2605 TREE_CHAIN (f_ovf) = f_sav; 2606 2607 layout_type (record); 2608 2609 /* The correct type is an array type of one element. */ 2610 return build_array_type (record, build_index_type (size_zero_node)); 2611} 2612 2613/* Perform any needed actions needed for a function that is receiving a 2614 variable number of arguments. 2615 2616 CUM is as above. 2617 2618 MODE and TYPE are the mode and type of the current parameter. 2619 2620 PRETEND_SIZE is a variable that should be set to the amount of stack 2621 that must be pushed by the prolog to pretend that our caller pushed 2622 it. 2623 2624 Normally, this macro will push all remaining incoming registers on the 2625 stack and set PRETEND_SIZE to the length of the registers pushed. */ 2626 2627void 2628ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) 2629 CUMULATIVE_ARGS *cum; 2630 enum machine_mode mode; 2631 tree type; 2632 int *pretend_size ATTRIBUTE_UNUSED; 2633 int no_rtl; 2634 2635{ 2636 CUMULATIVE_ARGS next_cum; 2637 rtx save_area = NULL_RTX, mem; 2638 rtx label; 2639 rtx label_ref; 2640 rtx tmp_reg; 2641 rtx nsse_reg; 2642 int set; 2643 tree fntype; 2644 int stdarg_p; 2645 int i; 2646 2647 if (!TARGET_64BIT) 2648 return; 2649 2650 /* Indicate to allocate space on the stack for varargs save area. */ 2651 ix86_save_varrargs_registers = 1; 2652 2653 cfun->stack_alignment_needed = 128; 2654 2655 fntype = TREE_TYPE (current_function_decl); 2656 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 2657 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 2658 != void_type_node)); 2659 2660 /* For varargs, we do not want to skip the dummy va_dcl argument. 2661 For stdargs, we do want to skip the last named argument. */ 2662 next_cum = *cum; 2663 if (stdarg_p) 2664 function_arg_advance (&next_cum, mode, type, 1); 2665 2666 if (!no_rtl) 2667 save_area = frame_pointer_rtx; 2668 2669 set = get_varargs_alias_set (); 2670 2671 for (i = next_cum.regno; i < ix86_regparm; i++) 2672 { 2673 mem = gen_rtx_MEM (Pmode, 2674 plus_constant (save_area, i * UNITS_PER_WORD)); 2675 set_mem_alias_set (mem, set); 2676 emit_move_insn (mem, gen_rtx_REG (Pmode, 2677 x86_64_int_parameter_registers[i])); 2678 } 2679 2680 if (next_cum.sse_nregs) 2681 { 2682 /* Now emit code to save SSE registers. The AX parameter contains number 2683 of SSE parameter regsiters used to call this function. We use 2684 sse_prologue_save insn template that produces computed jump across 2685 SSE saves. We need some preparation work to get this working. */ 2686 2687 label = gen_label_rtx (); 2688 label_ref = gen_rtx_LABEL_REF (Pmode, label); 2689 2690 /* Compute address to jump to : 2691 label - 5*eax + nnamed_sse_arguments*5 */ 2692 tmp_reg = gen_reg_rtx (Pmode); 2693 nsse_reg = gen_reg_rtx (Pmode); 2694 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 2695 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2696 gen_rtx_MULT (Pmode, nsse_reg, 2697 GEN_INT (4)))); 2698 if (next_cum.sse_regno) 2699 emit_move_insn 2700 (nsse_reg, 2701 gen_rtx_CONST (DImode, 2702 gen_rtx_PLUS (DImode, 2703 label_ref, 2704 GEN_INT (next_cum.sse_regno * 4)))); 2705 else 2706 emit_move_insn (nsse_reg, label_ref); 2707 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 2708 2709 /* Compute address of memory block we save into. We always use pointer 2710 pointing 127 bytes after first byte to store - this is needed to keep 2711 instruction size limited by 4 bytes. */ 2712 tmp_reg = gen_reg_rtx (Pmode); 2713 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2714 plus_constant (save_area, 2715 8 * REGPARM_MAX + 127))); 2716 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 2717 set_mem_alias_set (mem, set); 2718 set_mem_align (mem, BITS_PER_WORD); 2719 2720 /* And finally do the dirty job! */ 2721 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 2722 GEN_INT (next_cum.sse_regno), label)); 2723 } 2724 2725} 2726 2727/* Implement va_start. */ 2728 2729void 2730ix86_va_start (valist, nextarg) 2731 tree valist; 2732 rtx nextarg; 2733{ 2734 HOST_WIDE_INT words, n_gpr, n_fpr; 2735 tree f_gpr, f_fpr, f_ovf, f_sav; 2736 tree gpr, fpr, ovf, sav, t; 2737 2738 /* Only 64bit target needs something special. */ 2739 if (!TARGET_64BIT) 2740 { 2741 std_expand_builtin_va_start (valist, nextarg); 2742 return; 2743 } 2744 2745 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2746 f_fpr = TREE_CHAIN (f_gpr); 2747 f_ovf = TREE_CHAIN (f_fpr); 2748 f_sav = TREE_CHAIN (f_ovf); 2749 2750 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2751 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2752 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2753 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2754 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2755 2756 /* Count number of gp and fp argument registers used. */ 2757 words = current_function_args_info.words; 2758 n_gpr = current_function_args_info.regno; 2759 n_fpr = current_function_args_info.sse_regno; 2760 2761 if (TARGET_DEBUG_ARG) 2762 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 2763 (int) words, (int) n_gpr, (int) n_fpr); 2764 2765 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 2766 build_int_2 (n_gpr * 8, 0)); 2767 TREE_SIDE_EFFECTS (t) = 1; 2768 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2769 2770 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 2771 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0)); 2772 TREE_SIDE_EFFECTS (t) = 1; 2773 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2774 2775 /* Find the overflow area. */ 2776 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 2777 if (words != 0) 2778 t = build (PLUS_EXPR, TREE_TYPE (ovf), t, 2779 build_int_2 (words * UNITS_PER_WORD, 0)); 2780 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2781 TREE_SIDE_EFFECTS (t) = 1; 2782 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2783 2784 /* Find the register save area. 2785 Prologue of the function save it right above stack frame. */ 2786 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx); 2787 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 2788 TREE_SIDE_EFFECTS (t) = 1; 2789 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2790} 2791 2792/* Implement va_arg. */ 2793rtx 2794ix86_va_arg (valist, type) 2795 tree valist, type; 2796{ 2797 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 2798 tree f_gpr, f_fpr, f_ovf, f_sav; 2799 tree gpr, fpr, ovf, sav, t; 2800 int size, rsize; 2801 rtx lab_false, lab_over = NULL_RTX; 2802 rtx addr_rtx, r; 2803 rtx container; 2804 int indirect_p = 0; 2805 2806 /* Only 64bit target needs something special. */ 2807 if (!TARGET_64BIT) 2808 { 2809 return std_expand_builtin_va_arg (valist, type); 2810 } 2811 2812 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2813 f_fpr = TREE_CHAIN (f_gpr); 2814 f_ovf = TREE_CHAIN (f_fpr); 2815 f_sav = TREE_CHAIN (f_ovf); 2816 2817 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2818 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2819 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2820 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2821 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2822 2823 size = int_size_in_bytes (type); 2824 if (size == -1) 2825 { 2826 /* Passed by reference. */ 2827 indirect_p = 1; 2828 type = build_pointer_type (type); 2829 size = int_size_in_bytes (type); 2830 } 2831 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2832 2833 container = construct_container (TYPE_MODE (type), type, 0, 2834 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 2835 /* 2836 * Pull the value out of the saved registers ... 2837 */ 2838 2839 addr_rtx = gen_reg_rtx (Pmode); 2840 2841 if (container) 2842 { 2843 rtx int_addr_rtx, sse_addr_rtx; 2844 int needed_intregs, needed_sseregs; 2845 int need_temp; 2846 2847 lab_over = gen_label_rtx (); 2848 lab_false = gen_label_rtx (); 2849 2850 examine_argument (TYPE_MODE (type), type, 0, 2851 &needed_intregs, &needed_sseregs); 2852 2853 2854 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64) 2855 || TYPE_ALIGN (type) > 128); 2856 2857 /* In case we are passing structure, verify that it is consetuctive block 2858 on the register save area. If not we need to do moves. */ 2859 if (!need_temp && !REG_P (container)) 2860 { 2861 /* Verify that all registers are strictly consetuctive */ 2862 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 2863 { 2864 int i; 2865 2866 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2867 { 2868 rtx slot = XVECEXP (container, 0, i); 2869 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 2870 || INTVAL (XEXP (slot, 1)) != i * 16) 2871 need_temp = 1; 2872 } 2873 } 2874 else 2875 { 2876 int i; 2877 2878 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2879 { 2880 rtx slot = XVECEXP (container, 0, i); 2881 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 2882 || INTVAL (XEXP (slot, 1)) != i * 8) 2883 need_temp = 1; 2884 } 2885 } 2886 } 2887 if (!need_temp) 2888 { 2889 int_addr_rtx = addr_rtx; 2890 sse_addr_rtx = addr_rtx; 2891 } 2892 else 2893 { 2894 int_addr_rtx = gen_reg_rtx (Pmode); 2895 sse_addr_rtx = gen_reg_rtx (Pmode); 2896 } 2897 /* First ensure that we fit completely in registers. */ 2898 if (needed_intregs) 2899 { 2900 emit_cmp_and_jump_insns (expand_expr 2901 (gpr, NULL_RTX, SImode, EXPAND_NORMAL), 2902 GEN_INT ((REGPARM_MAX - needed_intregs + 2903 1) * 8), GE, const1_rtx, SImode, 2904 1, lab_false); 2905 } 2906 if (needed_sseregs) 2907 { 2908 emit_cmp_and_jump_insns (expand_expr 2909 (fpr, NULL_RTX, SImode, EXPAND_NORMAL), 2910 GEN_INT ((SSE_REGPARM_MAX - 2911 needed_sseregs + 1) * 16 + 2912 REGPARM_MAX * 8), GE, const1_rtx, 2913 SImode, 1, lab_false); 2914 } 2915 2916 /* Compute index to start of area used for integer regs. */ 2917 if (needed_intregs) 2918 { 2919 t = build (PLUS_EXPR, ptr_type_node, sav, gpr); 2920 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL); 2921 if (r != int_addr_rtx) 2922 emit_move_insn (int_addr_rtx, r); 2923 } 2924 if (needed_sseregs) 2925 { 2926 t = build (PLUS_EXPR, ptr_type_node, sav, fpr); 2927 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL); 2928 if (r != sse_addr_rtx) 2929 emit_move_insn (sse_addr_rtx, r); 2930 } 2931 if (need_temp) 2932 { 2933 int i; 2934 rtx mem; 2935 rtx x; 2936 2937 /* Never use the memory itself, as it has the alias set. */ 2938 x = XEXP (assign_temp (type, 0, 1, 0), 0); 2939 mem = gen_rtx_MEM (BLKmode, x); 2940 force_operand (x, addr_rtx); 2941 set_mem_alias_set (mem, get_varargs_alias_set ()); 2942 set_mem_align (mem, BITS_PER_UNIT); 2943 2944 for (i = 0; i < XVECLEN (container, 0); i++) 2945 { 2946 rtx slot = XVECEXP (container, 0, i); 2947 rtx reg = XEXP (slot, 0); 2948 enum machine_mode mode = GET_MODE (reg); 2949 rtx src_addr; 2950 rtx src_mem; 2951 int src_offset; 2952 rtx dest_mem; 2953 2954 if (SSE_REGNO_P (REGNO (reg))) 2955 { 2956 src_addr = sse_addr_rtx; 2957 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 2958 } 2959 else 2960 { 2961 src_addr = int_addr_rtx; 2962 src_offset = REGNO (reg) * 8; 2963 } 2964 src_mem = gen_rtx_MEM (mode, src_addr); 2965 set_mem_alias_set (src_mem, get_varargs_alias_set ()); 2966 src_mem = adjust_address (src_mem, mode, src_offset); 2967 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1))); 2968 emit_move_insn (dest_mem, src_mem); 2969 } 2970 } 2971 2972 if (needed_intregs) 2973 { 2974 t = 2975 build (PLUS_EXPR, TREE_TYPE (gpr), gpr, 2976 build_int_2 (needed_intregs * 8, 0)); 2977 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 2978 TREE_SIDE_EFFECTS (t) = 1; 2979 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2980 } 2981 if (needed_sseregs) 2982 { 2983 t = 2984 build (PLUS_EXPR, TREE_TYPE (fpr), fpr, 2985 build_int_2 (needed_sseregs * 16, 0)); 2986 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 2987 TREE_SIDE_EFFECTS (t) = 1; 2988 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2989 } 2990 2991 emit_jump_insn (gen_jump (lab_over)); 2992 emit_barrier (); 2993 emit_label (lab_false); 2994 } 2995 2996 /* ... otherwise out of the overflow area. */ 2997 2998 /* Care for on-stack alignment if needed. */ 2999 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64) 3000 t = ovf; 3001 else 3002 { 3003 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 3004 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0)); 3005 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1)); 3006 } 3007 t = save_expr (t); 3008 3009 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL); 3010 if (r != addr_rtx) 3011 emit_move_insn (addr_rtx, r); 3012 3013 t = 3014 build (PLUS_EXPR, TREE_TYPE (t), t, 3015 build_int_2 (rsize * UNITS_PER_WORD, 0)); 3016 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 3017 TREE_SIDE_EFFECTS (t) = 1; 3018 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3019 3020 if (container) 3021 emit_label (lab_over); 3022 3023 if (indirect_p) 3024 { 3025 r = gen_rtx_MEM (Pmode, addr_rtx); 3026 set_mem_alias_set (r, get_varargs_alias_set ()); 3027 emit_move_insn (addr_rtx, r); 3028 } 3029 3030 return addr_rtx; 3031} 3032 3033/* Return nonzero if OP is either a i387 or SSE fp register. */ 3034int 3035any_fp_register_operand (op, mode) 3036 rtx op; 3037 enum machine_mode mode ATTRIBUTE_UNUSED; 3038{ 3039 return ANY_FP_REG_P (op); 3040} 3041 3042/* Return nonzero if OP is an i387 fp register. */ 3043int 3044fp_register_operand (op, mode) 3045 rtx op; 3046 enum machine_mode mode ATTRIBUTE_UNUSED; 3047{ 3048 return FP_REG_P (op); 3049} 3050 3051/* Return nonzero if OP is a non-fp register_operand. */ 3052int 3053register_and_not_any_fp_reg_operand (op, mode) 3054 rtx op; 3055 enum machine_mode mode; 3056{ 3057 return register_operand (op, mode) && !ANY_FP_REG_P (op); 3058} 3059 3060/* Return nonzero of OP is a register operand other than an 3061 i387 fp register. */ 3062int 3063register_and_not_fp_reg_operand (op, mode) 3064 rtx op; 3065 enum machine_mode mode; 3066{ 3067 return register_operand (op, mode) && !FP_REG_P (op); 3068} 3069 3070/* Return nonzero if OP is general operand representable on x86_64. */ 3071 3072int 3073x86_64_general_operand (op, mode) 3074 rtx op; 3075 enum machine_mode mode; 3076{ 3077 if (!TARGET_64BIT) 3078 return general_operand (op, mode); 3079 if (nonimmediate_operand (op, mode)) 3080 return 1; 3081 return x86_64_sign_extended_value (op); 3082} 3083 3084/* Return nonzero if OP is general operand representable on x86_64 3085 as either sign extended or zero extended constant. */ 3086 3087int 3088x86_64_szext_general_operand (op, mode) 3089 rtx op; 3090 enum machine_mode mode; 3091{ 3092 if (!TARGET_64BIT) 3093 return general_operand (op, mode); 3094 if (nonimmediate_operand (op, mode)) 3095 return 1; 3096 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 3097} 3098 3099/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 3100 3101int 3102x86_64_nonmemory_operand (op, mode) 3103 rtx op; 3104 enum machine_mode mode; 3105{ 3106 if (!TARGET_64BIT) 3107 return nonmemory_operand (op, mode); 3108 if (register_operand (op, mode)) 3109 return 1; 3110 return x86_64_sign_extended_value (op); 3111} 3112 3113/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */ 3114 3115int 3116x86_64_movabs_operand (op, mode) 3117 rtx op; 3118 enum machine_mode mode; 3119{ 3120 if (!TARGET_64BIT || !flag_pic) 3121 return nonmemory_operand (op, mode); 3122 if (register_operand (op, mode) || x86_64_sign_extended_value (op)) 3123 return 1; 3124 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op)) 3125 return 1; 3126 return 0; 3127} 3128 3129/* Return nonzero if OPNUM's MEM should be matched 3130 in movabs* patterns. */ 3131 3132int 3133ix86_check_movabs (insn, opnum) 3134 rtx insn; 3135 int opnum; 3136{ 3137 rtx set, mem; 3138 3139 set = PATTERN (insn); 3140 if (GET_CODE (set) == PARALLEL) 3141 set = XVECEXP (set, 0, 0); 3142 if (GET_CODE (set) != SET) 3143 abort (); 3144 mem = XEXP (set, opnum); 3145 while (GET_CODE (mem) == SUBREG) 3146 mem = SUBREG_REG (mem); 3147 if (GET_CODE (mem) != MEM) 3148 abort (); 3149 return (volatile_ok || !MEM_VOLATILE_P (mem)); 3150} 3151 3152/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 3153 3154int 3155x86_64_szext_nonmemory_operand (op, mode) 3156 rtx op; 3157 enum machine_mode mode; 3158{ 3159 if (!TARGET_64BIT) 3160 return nonmemory_operand (op, mode); 3161 if (register_operand (op, mode)) 3162 return 1; 3163 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 3164} 3165 3166/* Return nonzero if OP is immediate operand representable on x86_64. */ 3167 3168int 3169x86_64_immediate_operand (op, mode) 3170 rtx op; 3171 enum machine_mode mode; 3172{ 3173 if (!TARGET_64BIT) 3174 return immediate_operand (op, mode); 3175 return x86_64_sign_extended_value (op); 3176} 3177 3178/* Return nonzero if OP is immediate operand representable on x86_64. */ 3179 3180int 3181x86_64_zext_immediate_operand (op, mode) 3182 rtx op; 3183 enum machine_mode mode ATTRIBUTE_UNUSED; 3184{ 3185 return x86_64_zero_extended_value (op); 3186} 3187 3188/* Return nonzero if OP is (const_int 1), else return zero. */ 3189 3190int 3191const_int_1_operand (op, mode) 3192 rtx op; 3193 enum machine_mode mode ATTRIBUTE_UNUSED; 3194{ 3195 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1); 3196} 3197 3198/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand 3199 for shift & compare patterns, as shifting by 0 does not change flags), 3200 else return zero. */ 3201 3202int 3203const_int_1_31_operand (op, mode) 3204 rtx op; 3205 enum machine_mode mode ATTRIBUTE_UNUSED; 3206{ 3207 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31); 3208} 3209 3210/* Returns 1 if OP is either a symbol reference or a sum of a symbol 3211 reference and a constant. */ 3212 3213int 3214symbolic_operand (op, mode) 3215 register rtx op; 3216 enum machine_mode mode ATTRIBUTE_UNUSED; 3217{ 3218 switch (GET_CODE (op)) 3219 { 3220 case SYMBOL_REF: 3221 case LABEL_REF: 3222 return 1; 3223 3224 case CONST: 3225 op = XEXP (op, 0); 3226 if (GET_CODE (op) == SYMBOL_REF 3227 || GET_CODE (op) == LABEL_REF 3228 || (GET_CODE (op) == UNSPEC 3229 && (XINT (op, 1) == UNSPEC_GOT 3230 || XINT (op, 1) == UNSPEC_GOTOFF 3231 || XINT (op, 1) == UNSPEC_GOTPCREL))) 3232 return 1; 3233 if (GET_CODE (op) != PLUS 3234 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3235 return 0; 3236 3237 op = XEXP (op, 0); 3238 if (GET_CODE (op) == SYMBOL_REF 3239 || GET_CODE (op) == LABEL_REF) 3240 return 1; 3241 /* Only @GOTOFF gets offsets. */ 3242 if (GET_CODE (op) != UNSPEC 3243 || XINT (op, 1) != UNSPEC_GOTOFF) 3244 return 0; 3245 3246 op = XVECEXP (op, 0, 0); 3247 if (GET_CODE (op) == SYMBOL_REF 3248 || GET_CODE (op) == LABEL_REF) 3249 return 1; 3250 return 0; 3251 3252 default: 3253 return 0; 3254 } 3255} 3256 3257/* Return true if the operand contains a @GOT or @GOTOFF reference. */ 3258 3259int 3260pic_symbolic_operand (op, mode) 3261 register rtx op; 3262 enum machine_mode mode ATTRIBUTE_UNUSED; 3263{ 3264 if (GET_CODE (op) != CONST) 3265 return 0; 3266 op = XEXP (op, 0); 3267 if (TARGET_64BIT) 3268 { 3269 if (GET_CODE (XEXP (op, 0)) == UNSPEC) 3270 return 1; 3271 } 3272 else 3273 { 3274 if (GET_CODE (op) == UNSPEC) 3275 return 1; 3276 if (GET_CODE (op) != PLUS 3277 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3278 return 0; 3279 op = XEXP (op, 0); 3280 if (GET_CODE (op) == UNSPEC) 3281 return 1; 3282 } 3283 return 0; 3284} 3285 3286/* Return true if OP is a symbolic operand that resolves locally. */ 3287 3288static int 3289local_symbolic_operand (op, mode) 3290 rtx op; 3291 enum machine_mode mode ATTRIBUTE_UNUSED; 3292{ 3293 if (GET_CODE (op) == CONST 3294 && GET_CODE (XEXP (op, 0)) == PLUS 3295 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3296 op = XEXP (XEXP (op, 0), 0); 3297 3298 if (GET_CODE (op) == LABEL_REF) 3299 return 1; 3300 3301 if (GET_CODE (op) != SYMBOL_REF) 3302 return 0; 3303 3304 /* These we've been told are local by varasm and encode_section_info 3305 respectively. */ 3306 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op)) 3307 return 1; 3308 3309 /* There is, however, a not insubstantial body of code in the rest of 3310 the compiler that assumes it can just stick the results of 3311 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ 3312 /* ??? This is a hack. Should update the body of the compiler to 3313 always create a DECL an invoke targetm.encode_section_info. */ 3314 if (strncmp (XSTR (op, 0), internal_label_prefix, 3315 internal_label_prefix_len) == 0) 3316 return 1; 3317 3318 return 0; 3319} 3320 3321/* Test for various thread-local symbols. See ix86_encode_section_info. */ 3322 3323int 3324tls_symbolic_operand (op, mode) 3325 register rtx op; 3326 enum machine_mode mode ATTRIBUTE_UNUSED; 3327{ 3328 const char *symbol_str; 3329 3330 if (GET_CODE (op) != SYMBOL_REF) 3331 return 0; 3332 symbol_str = XSTR (op, 0); 3333 3334 if (symbol_str[0] != '%') 3335 return 0; 3336 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars; 3337} 3338 3339static int 3340tls_symbolic_operand_1 (op, kind) 3341 rtx op; 3342 enum tls_model kind; 3343{ 3344 const char *symbol_str; 3345 3346 if (GET_CODE (op) != SYMBOL_REF) 3347 return 0; 3348 symbol_str = XSTR (op, 0); 3349 3350 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind]; 3351} 3352 3353int 3354global_dynamic_symbolic_operand (op, mode) 3355 register rtx op; 3356 enum machine_mode mode ATTRIBUTE_UNUSED; 3357{ 3358 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC); 3359} 3360 3361int 3362local_dynamic_symbolic_operand (op, mode) 3363 register rtx op; 3364 enum machine_mode mode ATTRIBUTE_UNUSED; 3365{ 3366 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC); 3367} 3368 3369int 3370initial_exec_symbolic_operand (op, mode) 3371 register rtx op; 3372 enum machine_mode mode ATTRIBUTE_UNUSED; 3373{ 3374 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC); 3375} 3376 3377int 3378local_exec_symbolic_operand (op, mode) 3379 register rtx op; 3380 enum machine_mode mode ATTRIBUTE_UNUSED; 3381{ 3382 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC); 3383} 3384 3385/* Test for a valid operand for a call instruction. Don't allow the 3386 arg pointer register or virtual regs since they may decay into 3387 reg + const, which the patterns can't handle. */ 3388 3389int 3390call_insn_operand (op, mode) 3391 rtx op; 3392 enum machine_mode mode ATTRIBUTE_UNUSED; 3393{ 3394 /* Disallow indirect through a virtual register. This leads to 3395 compiler aborts when trying to eliminate them. */ 3396 if (GET_CODE (op) == REG 3397 && (op == arg_pointer_rtx 3398 || op == frame_pointer_rtx 3399 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 3400 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 3401 return 0; 3402 3403 /* Disallow `call 1234'. Due to varying assembler lameness this 3404 gets either rejected or translated to `call .+1234'. */ 3405 if (GET_CODE (op) == CONST_INT) 3406 return 0; 3407 3408 /* Explicitly allow SYMBOL_REF even if pic. */ 3409 if (GET_CODE (op) == SYMBOL_REF) 3410 return 1; 3411 3412 /* Otherwise we can allow any general_operand in the address. */ 3413 return general_operand (op, Pmode); 3414} 3415 3416int 3417constant_call_address_operand (op, mode) 3418 rtx op; 3419 enum machine_mode mode ATTRIBUTE_UNUSED; 3420{ 3421 if (GET_CODE (op) == CONST 3422 && GET_CODE (XEXP (op, 0)) == PLUS 3423 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3424 op = XEXP (XEXP (op, 0), 0); 3425 return GET_CODE (op) == SYMBOL_REF; 3426} 3427 3428/* Match exactly zero and one. */ 3429 3430int 3431const0_operand (op, mode) 3432 register rtx op; 3433 enum machine_mode mode; 3434{ 3435 return op == CONST0_RTX (mode); 3436} 3437 3438int 3439const1_operand (op, mode) 3440 register rtx op; 3441 enum machine_mode mode ATTRIBUTE_UNUSED; 3442{ 3443 return op == const1_rtx; 3444} 3445 3446/* Match 2, 4, or 8. Used for leal multiplicands. */ 3447 3448int 3449const248_operand (op, mode) 3450 register rtx op; 3451 enum machine_mode mode ATTRIBUTE_UNUSED; 3452{ 3453 return (GET_CODE (op) == CONST_INT 3454 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8)); 3455} 3456 3457/* True if this is a constant appropriate for an increment or decremenmt. */ 3458 3459int 3460incdec_operand (op, mode) 3461 register rtx op; 3462 enum machine_mode mode ATTRIBUTE_UNUSED; 3463{ 3464 /* On Pentium4, the inc and dec operations causes extra dependency on flag 3465 registers, since carry flag is not set. */ 3466 if (TARGET_PENTIUM4 && !optimize_size) 3467 return 0; 3468 return op == const1_rtx || op == constm1_rtx; 3469} 3470 3471/* Return nonzero if OP is acceptable as operand of DImode shift 3472 expander. */ 3473 3474int 3475shiftdi_operand (op, mode) 3476 rtx op; 3477 enum machine_mode mode ATTRIBUTE_UNUSED; 3478{ 3479 if (TARGET_64BIT) 3480 return nonimmediate_operand (op, mode); 3481 else 3482 return register_operand (op, mode); 3483} 3484 3485/* Return false if this is the stack pointer, or any other fake 3486 register eliminable to the stack pointer. Otherwise, this is 3487 a register operand. 3488 3489 This is used to prevent esp from being used as an index reg. 3490 Which would only happen in pathological cases. */ 3491 3492int 3493reg_no_sp_operand (op, mode) 3494 register rtx op; 3495 enum machine_mode mode; 3496{ 3497 rtx t = op; 3498 if (GET_CODE (t) == SUBREG) 3499 t = SUBREG_REG (t); 3500 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx) 3501 return 0; 3502 3503 return register_operand (op, mode); 3504} 3505 3506int 3507mmx_reg_operand (op, mode) 3508 register rtx op; 3509 enum machine_mode mode ATTRIBUTE_UNUSED; 3510{ 3511 return MMX_REG_P (op); 3512} 3513 3514/* Return false if this is any eliminable register. Otherwise 3515 general_operand. */ 3516 3517int 3518general_no_elim_operand (op, mode) 3519 register rtx op; 3520 enum machine_mode mode; 3521{ 3522 rtx t = op; 3523 if (GET_CODE (t) == SUBREG) 3524 t = SUBREG_REG (t); 3525 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3526 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3527 || t == virtual_stack_dynamic_rtx) 3528 return 0; 3529 if (REG_P (t) 3530 && REGNO (t) >= FIRST_VIRTUAL_REGISTER 3531 && REGNO (t) <= LAST_VIRTUAL_REGISTER) 3532 return 0; 3533 3534 return general_operand (op, mode); 3535} 3536 3537/* Return false if this is any eliminable register. Otherwise 3538 register_operand or const_int. */ 3539 3540int 3541nonmemory_no_elim_operand (op, mode) 3542 register rtx op; 3543 enum machine_mode mode; 3544{ 3545 rtx t = op; 3546 if (GET_CODE (t) == SUBREG) 3547 t = SUBREG_REG (t); 3548 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3549 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3550 || t == virtual_stack_dynamic_rtx) 3551 return 0; 3552 3553 return GET_CODE (op) == CONST_INT || register_operand (op, mode); 3554} 3555 3556/* Return false if this is any eliminable register or stack register, 3557 otherwise work like register_operand. */ 3558 3559int 3560index_register_operand (op, mode) 3561 register rtx op; 3562 enum machine_mode mode; 3563{ 3564 rtx t = op; 3565 if (GET_CODE (t) == SUBREG) 3566 t = SUBREG_REG (t); 3567 if (!REG_P (t)) 3568 return 0; 3569 if (t == arg_pointer_rtx 3570 || t == frame_pointer_rtx 3571 || t == virtual_incoming_args_rtx 3572 || t == virtual_stack_vars_rtx 3573 || t == virtual_stack_dynamic_rtx 3574 || REGNO (t) == STACK_POINTER_REGNUM) 3575 return 0; 3576 3577 return general_operand (op, mode); 3578} 3579 3580/* Return true if op is a Q_REGS class register. */ 3581 3582int 3583q_regs_operand (op, mode) 3584 register rtx op; 3585 enum machine_mode mode; 3586{ 3587 if (mode != VOIDmode && GET_MODE (op) != mode) 3588 return 0; 3589 if (GET_CODE (op) == SUBREG) 3590 op = SUBREG_REG (op); 3591 return ANY_QI_REG_P (op); 3592} 3593 3594/* Return true if op is a NON_Q_REGS class register. */ 3595 3596int 3597non_q_regs_operand (op, mode) 3598 register rtx op; 3599 enum machine_mode mode; 3600{ 3601 if (mode != VOIDmode && GET_MODE (op) != mode) 3602 return 0; 3603 if (GET_CODE (op) == SUBREG) 3604 op = SUBREG_REG (op); 3605 return NON_QI_REG_P (op); 3606} 3607 3608/* Return 1 when OP is operand acceptable for standard SSE move. */ 3609int 3610vector_move_operand (op, mode) 3611 rtx op; 3612 enum machine_mode mode; 3613{ 3614 if (nonimmediate_operand (op, mode)) 3615 return 1; 3616 if (GET_MODE (op) != mode && mode != VOIDmode) 3617 return 0; 3618 return (op == CONST0_RTX (GET_MODE (op))); 3619} 3620 3621/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS 3622 insns. */ 3623int 3624sse_comparison_operator (op, mode) 3625 rtx op; 3626 enum machine_mode mode ATTRIBUTE_UNUSED; 3627{ 3628 enum rtx_code code = GET_CODE (op); 3629 switch (code) 3630 { 3631 /* Operations supported directly. */ 3632 case EQ: 3633 case LT: 3634 case LE: 3635 case UNORDERED: 3636 case NE: 3637 case UNGE: 3638 case UNGT: 3639 case ORDERED: 3640 return 1; 3641 /* These are equivalent to ones above in non-IEEE comparisons. */ 3642 case UNEQ: 3643 case UNLT: 3644 case UNLE: 3645 case LTGT: 3646 case GE: 3647 case GT: 3648 return !TARGET_IEEE_FP; 3649 default: 3650 return 0; 3651 } 3652} 3653/* Return 1 if OP is a valid comparison operator in valid mode. */ 3654int 3655ix86_comparison_operator (op, mode) 3656 register rtx op; 3657 enum machine_mode mode; 3658{ 3659 enum machine_mode inmode; 3660 enum rtx_code code = GET_CODE (op); 3661 if (mode != VOIDmode && GET_MODE (op) != mode) 3662 return 0; 3663 if (GET_RTX_CLASS (code) != '<') 3664 return 0; 3665 inmode = GET_MODE (XEXP (op, 0)); 3666 3667 if (inmode == CCFPmode || inmode == CCFPUmode) 3668 { 3669 enum rtx_code second_code, bypass_code; 3670 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3671 return (bypass_code == NIL && second_code == NIL); 3672 } 3673 switch (code) 3674 { 3675 case EQ: case NE: 3676 return 1; 3677 case LT: case GE: 3678 if (inmode == CCmode || inmode == CCGCmode 3679 || inmode == CCGOCmode || inmode == CCNOmode) 3680 return 1; 3681 return 0; 3682 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU: 3683 if (inmode == CCmode) 3684 return 1; 3685 return 0; 3686 case GT: case LE: 3687 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode) 3688 return 1; 3689 return 0; 3690 default: 3691 return 0; 3692 } 3693} 3694 3695/* Return 1 if OP is a comparison operator that can be issued by fcmov. */ 3696 3697int 3698fcmov_comparison_operator (op, mode) 3699 register rtx op; 3700 enum machine_mode mode; 3701{ 3702 enum machine_mode inmode; 3703 enum rtx_code code = GET_CODE (op); 3704 if (mode != VOIDmode && GET_MODE (op) != mode) 3705 return 0; 3706 if (GET_RTX_CLASS (code) != '<') 3707 return 0; 3708 inmode = GET_MODE (XEXP (op, 0)); 3709 if (inmode == CCFPmode || inmode == CCFPUmode) 3710 { 3711 enum rtx_code second_code, bypass_code; 3712 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3713 if (bypass_code != NIL || second_code != NIL) 3714 return 0; 3715 code = ix86_fp_compare_code_to_integer (code); 3716 } 3717 /* i387 supports just limited amount of conditional codes. */ 3718 switch (code) 3719 { 3720 case LTU: case GTU: case LEU: case GEU: 3721 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode) 3722 return 1; 3723 return 0; 3724 case ORDERED: case UNORDERED: 3725 case EQ: case NE: 3726 return 1; 3727 default: 3728 return 0; 3729 } 3730} 3731 3732/* Return 1 if OP is a binary operator that can be promoted to wider mode. */ 3733 3734int 3735promotable_binary_operator (op, mode) 3736 register rtx op; 3737 enum machine_mode mode ATTRIBUTE_UNUSED; 3738{ 3739 switch (GET_CODE (op)) 3740 { 3741 case MULT: 3742 /* Modern CPUs have same latency for HImode and SImode multiply, 3743 but 386 and 486 do HImode multiply faster. */ 3744 return ix86_cpu > PROCESSOR_I486; 3745 case PLUS: 3746 case AND: 3747 case IOR: 3748 case XOR: 3749 case ASHIFT: 3750 return 1; 3751 default: 3752 return 0; 3753 } 3754} 3755 3756/* Nearly general operand, but accept any const_double, since we wish 3757 to be able to drop them into memory rather than have them get pulled 3758 into registers. */ 3759 3760int 3761cmp_fp_expander_operand (op, mode) 3762 register rtx op; 3763 enum machine_mode mode; 3764{ 3765 if (mode != VOIDmode && mode != GET_MODE (op)) 3766 return 0; 3767 if (GET_CODE (op) == CONST_DOUBLE) 3768 return 1; 3769 return general_operand (op, mode); 3770} 3771 3772/* Match an SI or HImode register for a zero_extract. */ 3773 3774int 3775ext_register_operand (op, mode) 3776 register rtx op; 3777 enum machine_mode mode ATTRIBUTE_UNUSED; 3778{ 3779 int regno; 3780 if ((!TARGET_64BIT || GET_MODE (op) != DImode) 3781 && GET_MODE (op) != SImode && GET_MODE (op) != HImode) 3782 return 0; 3783 3784 if (!register_operand (op, VOIDmode)) 3785 return 0; 3786 3787 /* Be curefull to accept only registers having upper parts. */ 3788 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op)); 3789 return (regno > LAST_VIRTUAL_REGISTER || regno < 4); 3790} 3791 3792/* Return 1 if this is a valid binary floating-point operation. 3793 OP is the expression matched, and MODE is its mode. */ 3794 3795int 3796binary_fp_operator (op, mode) 3797 register rtx op; 3798 enum machine_mode mode; 3799{ 3800 if (mode != VOIDmode && mode != GET_MODE (op)) 3801 return 0; 3802 3803 switch (GET_CODE (op)) 3804 { 3805 case PLUS: 3806 case MINUS: 3807 case MULT: 3808 case DIV: 3809 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT; 3810 3811 default: 3812 return 0; 3813 } 3814} 3815 3816int 3817mult_operator (op, mode) 3818 register rtx op; 3819 enum machine_mode mode ATTRIBUTE_UNUSED; 3820{ 3821 return GET_CODE (op) == MULT; 3822} 3823 3824int 3825div_operator (op, mode) 3826 register rtx op; 3827 enum machine_mode mode ATTRIBUTE_UNUSED; 3828{ 3829 return GET_CODE (op) == DIV; 3830} 3831 3832int 3833arith_or_logical_operator (op, mode) 3834 rtx op; 3835 enum machine_mode mode; 3836{ 3837 return ((mode == VOIDmode || GET_MODE (op) == mode) 3838 && (GET_RTX_CLASS (GET_CODE (op)) == 'c' 3839 || GET_RTX_CLASS (GET_CODE (op)) == '2')); 3840} 3841 3842/* Returns 1 if OP is memory operand with a displacement. */ 3843 3844int 3845memory_displacement_operand (op, mode) 3846 register rtx op; 3847 enum machine_mode mode; 3848{ 3849 struct ix86_address parts; 3850 3851 if (! memory_operand (op, mode)) 3852 return 0; 3853 3854 if (! ix86_decompose_address (XEXP (op, 0), &parts)) 3855 abort (); 3856 3857 return parts.disp != NULL_RTX; 3858} 3859 3860/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0, 3861 re-recognize the operand to avoid a copy_to_mode_reg that will fail. 3862 3863 ??? It seems likely that this will only work because cmpsi is an 3864 expander, and no actual insns use this. */ 3865 3866int 3867cmpsi_operand (op, mode) 3868 rtx op; 3869 enum machine_mode mode; 3870{ 3871 if (nonimmediate_operand (op, mode)) 3872 return 1; 3873 3874 if (GET_CODE (op) == AND 3875 && GET_MODE (op) == SImode 3876 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT 3877 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT 3878 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT 3879 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8 3880 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8 3881 && GET_CODE (XEXP (op, 1)) == CONST_INT) 3882 return 1; 3883 3884 return 0; 3885} 3886 3887/* Returns 1 if OP is memory operand that can not be represented by the 3888 modRM array. */ 3889 3890int 3891long_memory_operand (op, mode) 3892 register rtx op; 3893 enum machine_mode mode; 3894{ 3895 if (! memory_operand (op, mode)) 3896 return 0; 3897 3898 return memory_address_length (op) != 0; 3899} 3900 3901/* Return nonzero if the rtx is known aligned. */ 3902 3903int 3904aligned_operand (op, mode) 3905 rtx op; 3906 enum machine_mode mode; 3907{ 3908 struct ix86_address parts; 3909 3910 if (!general_operand (op, mode)) 3911 return 0; 3912 3913 /* Registers and immediate operands are always "aligned". */ 3914 if (GET_CODE (op) != MEM) 3915 return 1; 3916 3917 /* Don't even try to do any aligned optimizations with volatiles. */ 3918 if (MEM_VOLATILE_P (op)) 3919 return 0; 3920 3921 op = XEXP (op, 0); 3922 3923 /* Pushes and pops are only valid on the stack pointer. */ 3924 if (GET_CODE (op) == PRE_DEC 3925 || GET_CODE (op) == POST_INC) 3926 return 1; 3927 3928 /* Decode the address. */ 3929 if (! ix86_decompose_address (op, &parts)) 3930 abort (); 3931 3932 if (parts.base && GET_CODE (parts.base) == SUBREG) 3933 parts.base = SUBREG_REG (parts.base); 3934 if (parts.index && GET_CODE (parts.index) == SUBREG) 3935 parts.index = SUBREG_REG (parts.index); 3936 3937 /* Look for some component that isn't known to be aligned. */ 3938 if (parts.index) 3939 { 3940 if (parts.scale < 4 3941 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32) 3942 return 0; 3943 } 3944 if (parts.base) 3945 { 3946 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32) 3947 return 0; 3948 } 3949 if (parts.disp) 3950 { 3951 if (GET_CODE (parts.disp) != CONST_INT 3952 || (INTVAL (parts.disp) & 3) != 0) 3953 return 0; 3954 } 3955 3956 /* Didn't find one -- this must be an aligned address. */ 3957 return 1; 3958} 3959 3960/* Return true if the constant is something that can be loaded with 3961 a special instruction. Only handle 0.0 and 1.0; others are less 3962 worthwhile. */ 3963 3964int 3965standard_80387_constant_p (x) 3966 rtx x; 3967{ 3968 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 3969 return -1; 3970 /* Note that on the 80387, other constants, such as pi, that we should support 3971 too. On some machines, these are much slower to load as standard constant, 3972 than to load from doubles in memory. */ 3973 if (x == CONST0_RTX (GET_MODE (x))) 3974 return 1; 3975 if (x == CONST1_RTX (GET_MODE (x))) 3976 return 2; 3977 return 0; 3978} 3979 3980/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 3981 */ 3982int 3983standard_sse_constant_p (x) 3984 rtx x; 3985{ 3986 if (x == const0_rtx) 3987 return 1; 3988 return (x == CONST0_RTX (GET_MODE (x))); 3989} 3990 3991/* Returns 1 if OP contains a symbol reference */ 3992 3993int 3994symbolic_reference_mentioned_p (op) 3995 rtx op; 3996{ 3997 register const char *fmt; 3998 register int i; 3999 4000 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 4001 return 1; 4002 4003 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4004 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4005 { 4006 if (fmt[i] == 'E') 4007 { 4008 register int j; 4009 4010 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4011 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 4012 return 1; 4013 } 4014 4015 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 4016 return 1; 4017 } 4018 4019 return 0; 4020} 4021 4022/* Return 1 if it is appropriate to emit `ret' instructions in the 4023 body of a function. Do this only if the epilogue is simple, needing a 4024 couple of insns. Prior to reloading, we can't tell how many registers 4025 must be saved, so return 0 then. Return 0 if there is no frame 4026 marker to de-allocate. 4027 4028 If NON_SAVING_SETJMP is defined and true, then it is not possible 4029 for the epilogue to be simple, so return 0. This is a special case 4030 since NON_SAVING_SETJMP will not cause regs_ever_live to change 4031 until final, but jump_optimize may need to know sooner if a 4032 `return' is OK. */ 4033 4034int 4035ix86_can_use_return_insn_p () 4036{ 4037 struct ix86_frame frame; 4038 4039#ifdef NON_SAVING_SETJMP 4040 if (NON_SAVING_SETJMP && current_function_calls_setjmp) 4041 return 0; 4042#endif 4043 4044 if (! reload_completed || frame_pointer_needed) 4045 return 0; 4046 4047 /* Don't allow more than 32 pop, since that's all we can do 4048 with one instruction. */ 4049 if (current_function_pops_args 4050 && current_function_args_size >= 32768) 4051 return 0; 4052 4053 ix86_compute_frame_layout (&frame); 4054 return frame.to_allocate == 0 && frame.nregs == 0; 4055} 4056 4057/* Return 1 if VALUE can be stored in the sign extended immediate field. */ 4058int 4059x86_64_sign_extended_value (value) 4060 rtx value; 4061{ 4062 switch (GET_CODE (value)) 4063 { 4064 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known 4065 to be at least 32 and this all acceptable constants are 4066 represented as CONST_INT. */ 4067 case CONST_INT: 4068 if (HOST_BITS_PER_WIDE_INT == 32) 4069 return 1; 4070 else 4071 { 4072 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode); 4073 return trunc_int_for_mode (val, SImode) == val; 4074 } 4075 break; 4076 4077 /* For certain code models, the symbolic references are known to fit. 4078 in CM_SMALL_PIC model we know it fits if it is local to the shared 4079 library. Don't count TLS SYMBOL_REFs here, since they should fit 4080 only if inside of UNSPEC handled below. */ 4081 case SYMBOL_REF: 4082 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL); 4083 4084 /* For certain code models, the code is near as well. */ 4085 case LABEL_REF: 4086 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM 4087 || ix86_cmodel == CM_KERNEL); 4088 4089 /* We also may accept the offsetted memory references in certain special 4090 cases. */ 4091 case CONST: 4092 if (GET_CODE (XEXP (value, 0)) == UNSPEC) 4093 switch (XINT (XEXP (value, 0), 1)) 4094 { 4095 case UNSPEC_GOTPCREL: 4096 case UNSPEC_DTPOFF: 4097 case UNSPEC_GOTNTPOFF: 4098 case UNSPEC_NTPOFF: 4099 return 1; 4100 default: 4101 break; 4102 } 4103 if (GET_CODE (XEXP (value, 0)) == PLUS) 4104 { 4105 rtx op1 = XEXP (XEXP (value, 0), 0); 4106 rtx op2 = XEXP (XEXP (value, 0), 1); 4107 HOST_WIDE_INT offset; 4108 4109 if (ix86_cmodel == CM_LARGE) 4110 return 0; 4111 if (GET_CODE (op2) != CONST_INT) 4112 return 0; 4113 offset = trunc_int_for_mode (INTVAL (op2), DImode); 4114 switch (GET_CODE (op1)) 4115 { 4116 case SYMBOL_REF: 4117 /* For CM_SMALL assume that latest object is 16MB before 4118 end of 31bits boundary. We may also accept pretty 4119 large negative constants knowing that all objects are 4120 in the positive half of address space. */ 4121 if (ix86_cmodel == CM_SMALL 4122 && offset < 16*1024*1024 4123 && trunc_int_for_mode (offset, SImode) == offset) 4124 return 1; 4125 /* For CM_KERNEL we know that all object resist in the 4126 negative half of 32bits address space. We may not 4127 accept negative offsets, since they may be just off 4128 and we may accept pretty large positive ones. */ 4129 if (ix86_cmodel == CM_KERNEL 4130 && offset > 0 4131 && trunc_int_for_mode (offset, SImode) == offset) 4132 return 1; 4133 break; 4134 case LABEL_REF: 4135 /* These conditions are similar to SYMBOL_REF ones, just the 4136 constraints for code models differ. */ 4137 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 4138 && offset < 16*1024*1024 4139 && trunc_int_for_mode (offset, SImode) == offset) 4140 return 1; 4141 if (ix86_cmodel == CM_KERNEL 4142 && offset > 0 4143 && trunc_int_for_mode (offset, SImode) == offset) 4144 return 1; 4145 break; 4146 case UNSPEC: 4147 switch (XINT (op1, 1)) 4148 { 4149 case UNSPEC_DTPOFF: 4150 case UNSPEC_NTPOFF: 4151 if (offset > 0 4152 && trunc_int_for_mode (offset, SImode) == offset) 4153 return 1; 4154 } 4155 break; 4156 default: 4157 return 0; 4158 } 4159 } 4160 return 0; 4161 default: 4162 return 0; 4163 } 4164} 4165 4166/* Return 1 if VALUE can be stored in the zero extended immediate field. */ 4167int 4168x86_64_zero_extended_value (value) 4169 rtx value; 4170{ 4171 switch (GET_CODE (value)) 4172 { 4173 case CONST_DOUBLE: 4174 if (HOST_BITS_PER_WIDE_INT == 32) 4175 return (GET_MODE (value) == VOIDmode 4176 && !CONST_DOUBLE_HIGH (value)); 4177 else 4178 return 0; 4179 case CONST_INT: 4180 if (HOST_BITS_PER_WIDE_INT == 32) 4181 return INTVAL (value) >= 0; 4182 else 4183 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff); 4184 break; 4185 4186 /* For certain code models, the symbolic references are known to fit. */ 4187 case SYMBOL_REF: 4188 return ix86_cmodel == CM_SMALL; 4189 4190 /* For certain code models, the code is near as well. */ 4191 case LABEL_REF: 4192 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM; 4193 4194 /* We also may accept the offsetted memory references in certain special 4195 cases. */ 4196 case CONST: 4197 if (GET_CODE (XEXP (value, 0)) == PLUS) 4198 { 4199 rtx op1 = XEXP (XEXP (value, 0), 0); 4200 rtx op2 = XEXP (XEXP (value, 0), 1); 4201 4202 if (ix86_cmodel == CM_LARGE) 4203 return 0; 4204 switch (GET_CODE (op1)) 4205 { 4206 case SYMBOL_REF: 4207 return 0; 4208 /* For small code model we may accept pretty large positive 4209 offsets, since one bit is available for free. Negative 4210 offsets are limited by the size of NULL pointer area 4211 specified by the ABI. */ 4212 if (ix86_cmodel == CM_SMALL 4213 && GET_CODE (op2) == CONST_INT 4214 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 4215 && (trunc_int_for_mode (INTVAL (op2), SImode) 4216 == INTVAL (op2))) 4217 return 1; 4218 /* ??? For the kernel, we may accept adjustment of 4219 -0x10000000, since we know that it will just convert 4220 negative address space to positive, but perhaps this 4221 is not worthwhile. */ 4222 break; 4223 case LABEL_REF: 4224 /* These conditions are similar to SYMBOL_REF ones, just the 4225 constraints for code models differ. */ 4226 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 4227 && GET_CODE (op2) == CONST_INT 4228 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 4229 && (trunc_int_for_mode (INTVAL (op2), SImode) 4230 == INTVAL (op2))) 4231 return 1; 4232 break; 4233 default: 4234 return 0; 4235 } 4236 } 4237 return 0; 4238 default: 4239 return 0; 4240 } 4241} 4242 4243/* Value should be nonzero if functions must have frame pointers. 4244 Zero means the frame pointer need not be set up (and parms may 4245 be accessed via the stack pointer) in functions that seem suitable. */ 4246 4247int 4248ix86_frame_pointer_required () 4249{ 4250 /* If we accessed previous frames, then the generated code expects 4251 to be able to access the saved ebp value in our frame. */ 4252 if (cfun->machine->accesses_prev_frame) 4253 return 1; 4254 4255 /* Several x86 os'es need a frame pointer for other reasons, 4256 usually pertaining to setjmp. */ 4257 if (SUBTARGET_FRAME_POINTER_REQUIRED) 4258 return 1; 4259 4260 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 4261 the frame pointer by default. Turn it back on now if we've not 4262 got a leaf function. */ 4263 if (TARGET_OMIT_LEAF_FRAME_POINTER 4264 && (!current_function_is_leaf)) 4265 return 1; 4266 4267 if (current_function_profile) 4268 return 1; 4269 4270 return 0; 4271} 4272 4273/* Record that the current function accesses previous call frames. */ 4274 4275void 4276ix86_setup_frame_addresses () 4277{ 4278 cfun->machine->accesses_prev_frame = 1; 4279} 4280 4281#if defined(HAVE_GAS_HIDDEN) && (defined(SUPPORTS_ONE_ONLY) && SUPPORTS_ONE_ONLY) 4282# define USE_HIDDEN_LINKONCE 1 4283#else 4284# define USE_HIDDEN_LINKONCE 0 4285#endif 4286 4287static int pic_labels_used; 4288 4289/* Fills in the label name that should be used for a pc thunk for 4290 the given register. */ 4291 4292static void 4293get_pc_thunk_name (name, regno) 4294 char name[32]; 4295 unsigned int regno; 4296{ 4297 if (USE_HIDDEN_LINKONCE) 4298 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 4299 else 4300 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 4301} 4302 4303 4304/* This function generates code for -fpic that loads %ebx with 4305 the return address of the caller and then returns. */ 4306 4307void 4308ix86_asm_file_end (file) 4309 FILE *file; 4310{ 4311 rtx xops[2]; 4312 int regno; 4313 4314 for (regno = 0; regno < 8; ++regno) 4315 { 4316 char name[32]; 4317 4318 if (! ((pic_labels_used >> regno) & 1)) 4319 continue; 4320 4321 get_pc_thunk_name (name, regno); 4322 4323 if (USE_HIDDEN_LINKONCE) 4324 { 4325 tree decl; 4326 4327 decl = build_decl (FUNCTION_DECL, get_identifier (name), 4328 error_mark_node); 4329 TREE_PUBLIC (decl) = 1; 4330 TREE_STATIC (decl) = 1; 4331 DECL_ONE_ONLY (decl) = 1; 4332 4333 (*targetm.asm_out.unique_section) (decl, 0); 4334 named_section (decl, NULL, 0); 4335 4336 (*targetm.asm_out.globalize_label) (file, name); 4337 fputs ("\t.hidden\t", file); 4338 assemble_name (file, name); 4339 fputc ('\n', file); 4340 ASM_DECLARE_FUNCTION_NAME (file, name, decl); 4341 } 4342 else 4343 { 4344 text_section (); 4345 ASM_OUTPUT_LABEL (file, name); 4346 } 4347 4348 xops[0] = gen_rtx_REG (SImode, regno); 4349 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 4350 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 4351 output_asm_insn ("ret", xops); 4352 } 4353} 4354 4355/* Emit code for the SET_GOT patterns. */ 4356 4357const char * 4358output_set_got (dest) 4359 rtx dest; 4360{ 4361 rtx xops[3]; 4362 4363 xops[0] = dest; 4364 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 4365 4366 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 4367 { 4368 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); 4369 4370 if (!flag_pic) 4371 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 4372 else 4373 output_asm_insn ("call\t%a2", xops); 4374 4375#if TARGET_MACHO 4376 /* Output the "canonical" label name ("Lxx$pb") here too. This 4377 is what will be referred to by the Mach-O PIC subsystem. */ 4378 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 4379#endif 4380 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", 4381 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 4382 4383 if (flag_pic) 4384 output_asm_insn ("pop{l}\t%0", xops); 4385 } 4386 else 4387 { 4388 char name[32]; 4389 get_pc_thunk_name (name, REGNO (dest)); 4390 pic_labels_used |= 1 << REGNO (dest); 4391 4392 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4393 xops[2] = gen_rtx_MEM (QImode, xops[2]); 4394 output_asm_insn ("call\t%X2", xops); 4395 } 4396 4397 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 4398 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 4399 else if (!TARGET_MACHO) 4400 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops); 4401 4402 return ""; 4403} 4404 4405/* Generate an "push" pattern for input ARG. */ 4406 4407static rtx 4408gen_push (arg) 4409 rtx arg; 4410{ 4411 return gen_rtx_SET (VOIDmode, 4412 gen_rtx_MEM (Pmode, 4413 gen_rtx_PRE_DEC (Pmode, 4414 stack_pointer_rtx)), 4415 arg); 4416} 4417 4418/* Return >= 0 if there is an unused call-clobbered register available 4419 for the entire function. */ 4420 4421static unsigned int 4422ix86_select_alt_pic_regnum () 4423{ 4424 if (current_function_is_leaf && !current_function_profile) 4425 { 4426 int i; 4427 for (i = 2; i >= 0; --i) 4428 if (!regs_ever_live[i]) 4429 return i; 4430 } 4431 4432 return INVALID_REGNUM; 4433} 4434 4435/* Return 1 if we need to save REGNO. */ 4436static int 4437ix86_save_reg (regno, maybe_eh_return) 4438 unsigned int regno; 4439 int maybe_eh_return; 4440{ 4441 if (pic_offset_table_rtx 4442 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 4443 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 4444 || current_function_profile 4445 || current_function_calls_eh_return 4446 || current_function_uses_const_pool)) 4447 { 4448 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 4449 return 0; 4450 return 1; 4451 } 4452 4453 if (current_function_calls_eh_return && maybe_eh_return) 4454 { 4455 unsigned i; 4456 for (i = 0; ; i++) 4457 { 4458 unsigned test = EH_RETURN_DATA_REGNO (i); 4459 if (test == INVALID_REGNUM) 4460 break; 4461 if (test == regno) 4462 return 1; 4463 } 4464 } 4465 4466 return (regs_ever_live[regno] 4467 && !call_used_regs[regno] 4468 && !fixed_regs[regno] 4469 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 4470} 4471 4472/* Return number of registers to be saved on the stack. */ 4473 4474static int 4475ix86_nsaved_regs () 4476{ 4477 int nregs = 0; 4478 int regno; 4479 4480 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4481 if (ix86_save_reg (regno, true)) 4482 nregs++; 4483 return nregs; 4484} 4485 4486/* Return the offset between two registers, one to be eliminated, and the other 4487 its replacement, at the start of a routine. */ 4488 4489HOST_WIDE_INT 4490ix86_initial_elimination_offset (from, to) 4491 int from; 4492 int to; 4493{ 4494 struct ix86_frame frame; 4495 ix86_compute_frame_layout (&frame); 4496 4497 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 4498 return frame.hard_frame_pointer_offset; 4499 else if (from == FRAME_POINTER_REGNUM 4500 && to == HARD_FRAME_POINTER_REGNUM) 4501 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 4502 else 4503 { 4504 if (to != STACK_POINTER_REGNUM) 4505 abort (); 4506 else if (from == ARG_POINTER_REGNUM) 4507 return frame.stack_pointer_offset; 4508 else if (from != FRAME_POINTER_REGNUM) 4509 abort (); 4510 else 4511 return frame.stack_pointer_offset - frame.frame_pointer_offset; 4512 } 4513} 4514 4515/* Fill structure ix86_frame about frame of currently computed function. */ 4516 4517static void 4518ix86_compute_frame_layout (frame) 4519 struct ix86_frame *frame; 4520{ 4521 HOST_WIDE_INT total_size; 4522 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 4523 int offset; 4524 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 4525 HOST_WIDE_INT size = get_frame_size (); 4526 4527 frame->nregs = ix86_nsaved_regs (); 4528 total_size = size; 4529 4530 /* Skip return address and saved base pointer. */ 4531 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 4532 4533 frame->hard_frame_pointer_offset = offset; 4534 4535 /* Do some sanity checking of stack_alignment_needed and 4536 preferred_alignment, since i386 port is the only using those features 4537 that may break easily. */ 4538 4539 if (size && !stack_alignment_needed) 4540 abort (); 4541 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT) 4542 abort (); 4543 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 4544 abort (); 4545 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 4546 abort (); 4547 4548 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 4549 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 4550 4551 /* Register save area */ 4552 offset += frame->nregs * UNITS_PER_WORD; 4553 4554 /* Va-arg area */ 4555 if (ix86_save_varrargs_registers) 4556 { 4557 offset += X86_64_VARARGS_SIZE; 4558 frame->va_arg_size = X86_64_VARARGS_SIZE; 4559 } 4560 else 4561 frame->va_arg_size = 0; 4562 4563 /* Align start of frame for local function. */ 4564 frame->padding1 = ((offset + stack_alignment_needed - 1) 4565 & -stack_alignment_needed) - offset; 4566 4567 offset += frame->padding1; 4568 4569 /* Frame pointer points here. */ 4570 frame->frame_pointer_offset = offset; 4571 4572 offset += size; 4573 4574 /* Add outgoing arguments area. Can be skipped if we eliminated 4575 all the function calls as dead code. */ 4576 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf) 4577 { 4578 offset += current_function_outgoing_args_size; 4579 frame->outgoing_arguments_size = current_function_outgoing_args_size; 4580 } 4581 else 4582 frame->outgoing_arguments_size = 0; 4583 4584 /* Align stack boundary. Only needed if we're calling another function 4585 or using alloca. */ 4586 if (!current_function_is_leaf || current_function_calls_alloca) 4587 frame->padding2 = ((offset + preferred_alignment - 1) 4588 & -preferred_alignment) - offset; 4589 else 4590 frame->padding2 = 0; 4591 4592 offset += frame->padding2; 4593 4594 /* We've reached end of stack frame. */ 4595 frame->stack_pointer_offset = offset; 4596 4597 /* Size prologue needs to allocate. */ 4598 frame->to_allocate = 4599 (size + frame->padding1 + frame->padding2 4600 + frame->outgoing_arguments_size + frame->va_arg_size); 4601 4602 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging 4603 && current_function_is_leaf) 4604 { 4605 frame->red_zone_size = frame->to_allocate; 4606 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 4607 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 4608 } 4609 else 4610 frame->red_zone_size = 0; 4611 frame->to_allocate -= frame->red_zone_size; 4612 frame->stack_pointer_offset -= frame->red_zone_size; 4613#if 0 4614 fprintf (stderr, "nregs: %i\n", frame->nregs); 4615 fprintf (stderr, "size: %i\n", size); 4616 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 4617 fprintf (stderr, "padding1: %i\n", frame->padding1); 4618 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 4619 fprintf (stderr, "padding2: %i\n", frame->padding2); 4620 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 4621 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 4622 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 4623 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 4624 frame->hard_frame_pointer_offset); 4625 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 4626#endif 4627} 4628 4629/* Emit code to save registers in the prologue. */ 4630 4631static void 4632ix86_emit_save_regs () 4633{ 4634 register int regno; 4635 rtx insn; 4636 4637 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4638 if (ix86_save_reg (regno, true)) 4639 { 4640 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 4641 RTX_FRAME_RELATED_P (insn) = 1; 4642 } 4643} 4644 4645/* Emit code to save registers using MOV insns. First register 4646 is restored from POINTER + OFFSET. */ 4647static void 4648ix86_emit_save_regs_using_mov (pointer, offset) 4649 rtx pointer; 4650 HOST_WIDE_INT offset; 4651{ 4652 int regno; 4653 rtx insn; 4654 4655 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4656 if (ix86_save_reg (regno, true)) 4657 { 4658 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 4659 Pmode, offset), 4660 gen_rtx_REG (Pmode, regno)); 4661 RTX_FRAME_RELATED_P (insn) = 1; 4662 offset += UNITS_PER_WORD; 4663 } 4664} 4665 4666/* Expand the prologue into a bunch of separate insns. */ 4667 4668void 4669ix86_expand_prologue () 4670{ 4671 rtx insn; 4672 bool pic_reg_used; 4673 struct ix86_frame frame; 4674 int use_mov = 0; 4675 HOST_WIDE_INT allocate; 4676 4677 if (!optimize_size) 4678 { 4679 use_fast_prologue_epilogue 4680 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT); 4681 if (TARGET_PROLOGUE_USING_MOVE) 4682 use_mov = use_fast_prologue_epilogue; 4683 } 4684 ix86_compute_frame_layout (&frame); 4685 4686 /* Note: AT&T enter does NOT have reversed args. Enter is probably 4687 slower on all targets. Also sdb doesn't like it. */ 4688 4689 if (frame_pointer_needed) 4690 { 4691 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 4692 RTX_FRAME_RELATED_P (insn) = 1; 4693 4694 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 4695 RTX_FRAME_RELATED_P (insn) = 1; 4696 } 4697 4698 allocate = frame.to_allocate; 4699 /* In case we are dealing only with single register and empty frame, 4700 push is equivalent of the mov+add sequence. */ 4701 if (allocate == 0 && frame.nregs <= 1) 4702 use_mov = 0; 4703 4704 if (!use_mov) 4705 ix86_emit_save_regs (); 4706 else 4707 allocate += frame.nregs * UNITS_PER_WORD; 4708 4709 if (allocate == 0) 4710 ; 4711 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 4712 { 4713 insn = emit_insn (gen_pro_epilogue_adjust_stack 4714 (stack_pointer_rtx, stack_pointer_rtx, 4715 GEN_INT (-allocate))); 4716 RTX_FRAME_RELATED_P (insn) = 1; 4717 } 4718 else 4719 { 4720 /* ??? Is this only valid for Win32? */ 4721 4722 rtx arg0, sym; 4723 4724 if (TARGET_64BIT) 4725 abort (); 4726 4727 arg0 = gen_rtx_REG (SImode, 0); 4728 emit_move_insn (arg0, GEN_INT (allocate)); 4729 4730 sym = gen_rtx_MEM (FUNCTION_MODE, 4731 gen_rtx_SYMBOL_REF (Pmode, "_alloca")); 4732 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx)); 4733 4734 CALL_INSN_FUNCTION_USAGE (insn) 4735 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0), 4736 CALL_INSN_FUNCTION_USAGE (insn)); 4737 4738 /* Don't allow scheduling pass to move insns across __alloca 4739 call. */ 4740 emit_insn (gen_blockage (const0_rtx)); 4741 } 4742 if (use_mov) 4743 { 4744 if (!frame_pointer_needed || !frame.to_allocate) 4745 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 4746 else 4747 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 4748 -frame.nregs * UNITS_PER_WORD); 4749 } 4750 4751#ifdef SUBTARGET_PROLOGUE 4752 SUBTARGET_PROLOGUE; 4753#endif 4754 4755 pic_reg_used = false; 4756 if (pic_offset_table_rtx 4757 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 4758 || current_function_profile)) 4759 { 4760 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 4761 4762 if (alt_pic_reg_used != INVALID_REGNUM) 4763 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 4764 4765 pic_reg_used = true; 4766 } 4767 4768 if (pic_reg_used) 4769 { 4770 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 4771 4772 /* Even with accurate pre-reload life analysis, we can wind up 4773 deleting all references to the pic register after reload. 4774 Consider if cross-jumping unifies two sides of a branch 4775 controled by a comparison vs the only read from a global. 4776 In which case, allow the set_got to be deleted, though we're 4777 too late to do anything about the ebx save in the prologue. */ 4778 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 4779 } 4780 4781 /* Prevent function calls from be scheduled before the call to mcount. 4782 In the pic_reg_used case, make sure that the got load isn't deleted. */ 4783 if (current_function_profile) 4784 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 4785} 4786 4787/* Emit code to restore saved registers using MOV insns. First register 4788 is restored from POINTER + OFFSET. */ 4789static void 4790ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return) 4791 rtx pointer; 4792 int offset; 4793 int maybe_eh_return; 4794{ 4795 int regno; 4796 4797 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4798 if (ix86_save_reg (regno, maybe_eh_return)) 4799 { 4800 emit_move_insn (gen_rtx_REG (Pmode, regno), 4801 adjust_address (gen_rtx_MEM (Pmode, pointer), 4802 Pmode, offset)); 4803 offset += UNITS_PER_WORD; 4804 } 4805} 4806 4807/* Restore function stack, frame, and registers. */ 4808 4809void 4810ix86_expand_epilogue (style) 4811 int style; 4812{ 4813 int regno; 4814 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 4815 struct ix86_frame frame; 4816 HOST_WIDE_INT offset; 4817 4818 ix86_compute_frame_layout (&frame); 4819 4820 /* Calculate start of saved registers relative to ebp. Special care 4821 must be taken for the normal return case of a function using 4822 eh_return: the eax and edx registers are marked as saved, but not 4823 restored along this path. */ 4824 offset = frame.nregs; 4825 if (current_function_calls_eh_return && style != 2) 4826 offset -= 2; 4827 offset *= -UNITS_PER_WORD; 4828 4829 /* If we're only restoring one register and sp is not valid then 4830 using a move instruction to restore the register since it's 4831 less work than reloading sp and popping the register. 4832 4833 The default code result in stack adjustment using add/lea instruction, 4834 while this code results in LEAVE instruction (or discrete equivalent), 4835 so it is profitable in some other cases as well. Especially when there 4836 are no registers to restore. We also use this code when TARGET_USE_LEAVE 4837 and there is exactly one register to pop. This heruistic may need some 4838 tuning in future. */ 4839 if ((!sp_valid && frame.nregs <= 1) 4840 || (TARGET_EPILOGUE_USING_MOVE 4841 && use_fast_prologue_epilogue 4842 && (frame.nregs > 1 || frame.to_allocate)) 4843 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 4844 || (frame_pointer_needed && TARGET_USE_LEAVE 4845 && use_fast_prologue_epilogue && frame.nregs == 1) 4846 || current_function_calls_eh_return) 4847 { 4848 /* Restore registers. We can use ebp or esp to address the memory 4849 locations. If both are available, default to ebp, since offsets 4850 are known to be small. Only exception is esp pointing directly to the 4851 end of block of saved registers, where we may simplify addressing 4852 mode. */ 4853 4854 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 4855 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 4856 frame.to_allocate, style == 2); 4857 else 4858 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 4859 offset, style == 2); 4860 4861 /* eh_return epilogues need %ecx added to the stack pointer. */ 4862 if (style == 2) 4863 { 4864 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 4865 4866 if (frame_pointer_needed) 4867 { 4868 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 4869 tmp = plus_constant (tmp, UNITS_PER_WORD); 4870 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 4871 4872 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 4873 emit_move_insn (hard_frame_pointer_rtx, tmp); 4874 4875 emit_insn (gen_pro_epilogue_adjust_stack 4876 (stack_pointer_rtx, sa, const0_rtx)); 4877 } 4878 else 4879 { 4880 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 4881 tmp = plus_constant (tmp, (frame.to_allocate 4882 + frame.nregs * UNITS_PER_WORD)); 4883 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 4884 } 4885 } 4886 else if (!frame_pointer_needed) 4887 emit_insn (gen_pro_epilogue_adjust_stack 4888 (stack_pointer_rtx, stack_pointer_rtx, 4889 GEN_INT (frame.to_allocate 4890 + frame.nregs * UNITS_PER_WORD))); 4891 /* If not an i386, mov & pop is faster than "leave". */ 4892 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue) 4893 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4894 else 4895 { 4896 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4897 hard_frame_pointer_rtx, 4898 const0_rtx)); 4899 if (TARGET_64BIT) 4900 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4901 else 4902 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4903 } 4904 } 4905 else 4906 { 4907 /* First step is to deallocate the stack frame so that we can 4908 pop the registers. */ 4909 if (!sp_valid) 4910 { 4911 if (!frame_pointer_needed) 4912 abort (); 4913 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4914 hard_frame_pointer_rtx, 4915 GEN_INT (offset))); 4916 } 4917 else if (frame.to_allocate) 4918 emit_insn (gen_pro_epilogue_adjust_stack 4919 (stack_pointer_rtx, stack_pointer_rtx, 4920 GEN_INT (frame.to_allocate))); 4921 4922 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4923 if (ix86_save_reg (regno, false)) 4924 { 4925 if (TARGET_64BIT) 4926 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 4927 else 4928 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 4929 } 4930 if (frame_pointer_needed) 4931 { 4932 /* Leave results in shorter dependency chains on CPUs that are 4933 able to grok it fast. */ 4934 if (TARGET_USE_LEAVE) 4935 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4936 else if (TARGET_64BIT) 4937 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4938 else 4939 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4940 } 4941 } 4942 4943 /* Sibcall epilogues don't want a return instruction. */ 4944 if (style == 0) 4945 return; 4946 4947 if (current_function_pops_args && current_function_args_size) 4948 { 4949 rtx popc = GEN_INT (current_function_pops_args); 4950 4951 /* i386 can only pop 64K bytes. If asked to pop more, pop 4952 return address, do explicit add, and jump indirectly to the 4953 caller. */ 4954 4955 if (current_function_pops_args >= 65536) 4956 { 4957 rtx ecx = gen_rtx_REG (SImode, 2); 4958 4959 /* There are is no "pascal" calling convention in 64bit ABI. */ 4960 if (TARGET_64BIT) 4961 abort (); 4962 4963 emit_insn (gen_popsi1 (ecx)); 4964 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 4965 emit_jump_insn (gen_return_indirect_internal (ecx)); 4966 } 4967 else 4968 emit_jump_insn (gen_return_pop_internal (popc)); 4969 } 4970 else 4971 emit_jump_insn (gen_return_internal ()); 4972} 4973 4974/* Reset from the function's potential modifications. */ 4975 4976static void 4977ix86_output_function_epilogue (file, size) 4978 FILE *file ATTRIBUTE_UNUSED; 4979 HOST_WIDE_INT size ATTRIBUTE_UNUSED; 4980{ 4981 if (pic_offset_table_rtx) 4982 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 4983} 4984 4985/* Extract the parts of an RTL expression that is a valid memory address 4986 for an instruction. Return 0 if the structure of the address is 4987 grossly off. Return -1 if the address contains ASHIFT, so it is not 4988 strictly valid, but still used for computing length of lea instruction. 4989 */ 4990 4991static int 4992ix86_decompose_address (addr, out) 4993 register rtx addr; 4994 struct ix86_address *out; 4995{ 4996 rtx base = NULL_RTX; 4997 rtx index = NULL_RTX; 4998 rtx disp = NULL_RTX; 4999 HOST_WIDE_INT scale = 1; 5000 rtx scale_rtx = NULL_RTX; 5001 int retval = 1; 5002 5003 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 5004 base = addr; 5005 else if (GET_CODE (addr) == PLUS) 5006 { 5007 rtx op0 = XEXP (addr, 0); 5008 rtx op1 = XEXP (addr, 1); 5009 enum rtx_code code0 = GET_CODE (op0); 5010 enum rtx_code code1 = GET_CODE (op1); 5011 5012 if (code0 == REG || code0 == SUBREG) 5013 { 5014 if (code1 == REG || code1 == SUBREG) 5015 index = op0, base = op1; /* index + base */ 5016 else 5017 base = op0, disp = op1; /* base + displacement */ 5018 } 5019 else if (code0 == MULT) 5020 { 5021 index = XEXP (op0, 0); 5022 scale_rtx = XEXP (op0, 1); 5023 if (code1 == REG || code1 == SUBREG) 5024 base = op1; /* index*scale + base */ 5025 else 5026 disp = op1; /* index*scale + disp */ 5027 } 5028 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT) 5029 { 5030 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */ 5031 scale_rtx = XEXP (XEXP (op0, 0), 1); 5032 base = XEXP (op0, 1); 5033 disp = op1; 5034 } 5035 else if (code0 == PLUS) 5036 { 5037 index = XEXP (op0, 0); /* index + base + disp */ 5038 base = XEXP (op0, 1); 5039 disp = op1; 5040 } 5041 else 5042 return 0; 5043 } 5044 else if (GET_CODE (addr) == MULT) 5045 { 5046 index = XEXP (addr, 0); /* index*scale */ 5047 scale_rtx = XEXP (addr, 1); 5048 } 5049 else if (GET_CODE (addr) == ASHIFT) 5050 { 5051 rtx tmp; 5052 5053 /* We're called for lea too, which implements ashift on occasion. */ 5054 index = XEXP (addr, 0); 5055 tmp = XEXP (addr, 1); 5056 if (GET_CODE (tmp) != CONST_INT) 5057 return 0; 5058 scale = INTVAL (tmp); 5059 if ((unsigned HOST_WIDE_INT) scale > 3) 5060 return 0; 5061 scale = 1 << scale; 5062 retval = -1; 5063 } 5064 else 5065 disp = addr; /* displacement */ 5066 5067 /* Extract the integral value of scale. */ 5068 if (scale_rtx) 5069 { 5070 if (GET_CODE (scale_rtx) != CONST_INT) 5071 return 0; 5072 scale = INTVAL (scale_rtx); 5073 } 5074 5075 /* Allow arg pointer and stack pointer as index if there is not scaling */ 5076 if (base && index && scale == 1 5077 && (index == arg_pointer_rtx || index == frame_pointer_rtx 5078 || index == stack_pointer_rtx)) 5079 { 5080 rtx tmp = base; 5081 base = index; 5082 index = tmp; 5083 } 5084 5085 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 5086 if ((base == hard_frame_pointer_rtx 5087 || base == frame_pointer_rtx 5088 || base == arg_pointer_rtx) && !disp) 5089 disp = const0_rtx; 5090 5091 /* Special case: on K6, [%esi] makes the instruction vector decoded. 5092 Avoid this by transforming to [%esi+0]. */ 5093 if (ix86_cpu == PROCESSOR_K6 && !optimize_size 5094 && base && !index && !disp 5095 && REG_P (base) 5096 && REGNO_REG_CLASS (REGNO (base)) == SIREG) 5097 disp = const0_rtx; 5098 5099 /* Special case: encode reg+reg instead of reg*2. */ 5100 if (!base && index && scale && scale == 2) 5101 base = index, scale = 1; 5102 5103 /* Special case: scaling cannot be encoded without base or displacement. */ 5104 if (!base && !disp && index && scale != 1) 5105 disp = const0_rtx; 5106 5107 out->base = base; 5108 out->index = index; 5109 out->disp = disp; 5110 out->scale = scale; 5111 5112 return retval; 5113} 5114 5115/* Return cost of the memory address x. 5116 For i386, it is better to use a complex address than let gcc copy 5117 the address into a reg and make a new pseudo. But not if the address 5118 requires to two regs - that would mean more pseudos with longer 5119 lifetimes. */ 5120int 5121ix86_address_cost (x) 5122 rtx x; 5123{ 5124 struct ix86_address parts; 5125 int cost = 1; 5126 5127 if (!ix86_decompose_address (x, &parts)) 5128 abort (); 5129 5130 if (parts.base && GET_CODE (parts.base) == SUBREG) 5131 parts.base = SUBREG_REG (parts.base); 5132 if (parts.index && GET_CODE (parts.index) == SUBREG) 5133 parts.index = SUBREG_REG (parts.index); 5134 5135 /* More complex memory references are better. */ 5136 if (parts.disp && parts.disp != const0_rtx) 5137 cost--; 5138 5139 /* Attempt to minimize number of registers in the address. */ 5140 if ((parts.base 5141 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 5142 || (parts.index 5143 && (!REG_P (parts.index) 5144 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 5145 cost++; 5146 5147 if (parts.base 5148 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 5149 && parts.index 5150 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 5151 && parts.base != parts.index) 5152 cost++; 5153 5154 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 5155 since it's predecode logic can't detect the length of instructions 5156 and it degenerates to vector decoded. Increase cost of such 5157 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 5158 to split such addresses or even refuse such addresses at all. 5159 5160 Following addressing modes are affected: 5161 [base+scale*index] 5162 [scale*index+disp] 5163 [base+index] 5164 5165 The first and last case may be avoidable by explicitly coding the zero in 5166 memory address, but I don't have AMD-K6 machine handy to check this 5167 theory. */ 5168 5169 if (TARGET_K6 5170 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 5171 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 5172 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 5173 cost += 10; 5174 5175 return cost; 5176} 5177 5178/* If X is a machine specific address (i.e. a symbol or label being 5179 referenced as a displacement from the GOT implemented using an 5180 UNSPEC), then return the base term. Otherwise return X. */ 5181 5182rtx 5183ix86_find_base_term (x) 5184 rtx x; 5185{ 5186 rtx term; 5187 5188 if (TARGET_64BIT) 5189 { 5190 if (GET_CODE (x) != CONST) 5191 return x; 5192 term = XEXP (x, 0); 5193 if (GET_CODE (term) == PLUS 5194 && (GET_CODE (XEXP (term, 1)) == CONST_INT 5195 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 5196 term = XEXP (term, 0); 5197 if (GET_CODE (term) != UNSPEC 5198 || XINT (term, 1) != UNSPEC_GOTPCREL) 5199 return x; 5200 5201 term = XVECEXP (term, 0, 0); 5202 5203 if (GET_CODE (term) != SYMBOL_REF 5204 && GET_CODE (term) != LABEL_REF) 5205 return x; 5206 5207 return term; 5208 } 5209 5210 if (GET_CODE (x) != PLUS 5211 || XEXP (x, 0) != pic_offset_table_rtx 5212 || GET_CODE (XEXP (x, 1)) != CONST) 5213 return x; 5214 5215 term = XEXP (XEXP (x, 1), 0); 5216 5217 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT) 5218 term = XEXP (term, 0); 5219 5220 if (GET_CODE (term) != UNSPEC 5221 || XINT (term, 1) != UNSPEC_GOTOFF) 5222 return x; 5223 5224 term = XVECEXP (term, 0, 0); 5225 5226 if (GET_CODE (term) != SYMBOL_REF 5227 && GET_CODE (term) != LABEL_REF) 5228 return x; 5229 5230 return term; 5231} 5232 5233/* Determine if a given RTX is a valid constant. We already know this 5234 satisfies CONSTANT_P. */ 5235 5236bool 5237legitimate_constant_p (x) 5238 rtx x; 5239{ 5240 rtx inner; 5241 5242 switch (GET_CODE (x)) 5243 { 5244 case SYMBOL_REF: 5245 /* TLS symbols are not constant. */ 5246 if (tls_symbolic_operand (x, Pmode)) 5247 return false; 5248 break; 5249 5250 case CONST: 5251 inner = XEXP (x, 0); 5252 5253 /* Offsets of TLS symbols are never valid. 5254 Discourage CSE from creating them. */ 5255 if (GET_CODE (inner) == PLUS 5256 && tls_symbolic_operand (XEXP (inner, 0), Pmode)) 5257 return false; 5258 5259 /* Only some unspecs are valid as "constants". */ 5260 if (GET_CODE (inner) == UNSPEC) 5261 switch (XINT (inner, 1)) 5262 { 5263 case UNSPEC_TPOFF: 5264 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); 5265 default: 5266 return false; 5267 } 5268 break; 5269 5270 default: 5271 break; 5272 } 5273 5274 /* Otherwise we handle everything else in the move patterns. */ 5275 return true; 5276} 5277 5278/* Determine if it's legal to put X into the constant pool. This 5279 is not possible for the address of thread-local symbols, which 5280 is checked above. */ 5281 5282static bool 5283ix86_cannot_force_const_mem (x) 5284 rtx x; 5285{ 5286 return !legitimate_constant_p (x); 5287} 5288 5289/* Determine if a given RTX is a valid constant address. */ 5290 5291bool 5292constant_address_p (x) 5293 rtx x; 5294{ 5295 switch (GET_CODE (x)) 5296 { 5297 case LABEL_REF: 5298 case CONST_INT: 5299 return true; 5300 5301 case CONST_DOUBLE: 5302 return TARGET_64BIT; 5303 5304 case CONST: 5305 /* For Mach-O, really believe the CONST. */ 5306 if (TARGET_MACHO) 5307 return true; 5308 /* Otherwise fall through. */ 5309 case SYMBOL_REF: 5310 return !flag_pic && legitimate_constant_p (x); 5311 5312 default: 5313 return false; 5314 } 5315} 5316 5317/* Nonzero if the constant value X is a legitimate general operand 5318 when generating PIC code. It is given that flag_pic is on and 5319 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 5320 5321bool 5322legitimate_pic_operand_p (x) 5323 rtx x; 5324{ 5325 rtx inner; 5326 5327 switch (GET_CODE (x)) 5328 { 5329 case CONST: 5330 inner = XEXP (x, 0); 5331 5332 /* Only some unspecs are valid as "constants". */ 5333 if (GET_CODE (inner) == UNSPEC) 5334 switch (XINT (inner, 1)) 5335 { 5336 case UNSPEC_TPOFF: 5337 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); 5338 default: 5339 return false; 5340 } 5341 /* FALLTHRU */ 5342 5343 case SYMBOL_REF: 5344 case LABEL_REF: 5345 return legitimate_pic_address_disp_p (x); 5346 5347 default: 5348 return true; 5349 } 5350} 5351 5352/* Determine if a given CONST RTX is a valid memory displacement 5353 in PIC mode. */ 5354 5355int 5356legitimate_pic_address_disp_p (disp) 5357 register rtx disp; 5358{ 5359 bool saw_plus; 5360 5361 /* In 64bit mode we can allow direct addresses of symbols and labels 5362 when they are not dynamic symbols. */ 5363 if (TARGET_64BIT) 5364 { 5365 /* TLS references should always be enclosed in UNSPEC. */ 5366 if (tls_symbolic_operand (disp, GET_MODE (disp))) 5367 return 0; 5368 if (GET_CODE (disp) == SYMBOL_REF 5369 && ix86_cmodel == CM_SMALL_PIC 5370 && (CONSTANT_POOL_ADDRESS_P (disp) 5371 || SYMBOL_REF_FLAG (disp))) 5372 return 1; 5373 if (GET_CODE (disp) == LABEL_REF) 5374 return 1; 5375 if (GET_CODE (disp) == CONST 5376 && GET_CODE (XEXP (disp, 0)) == PLUS 5377 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF 5378 && ix86_cmodel == CM_SMALL_PIC 5379 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0)) 5380 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0)))) 5381 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF) 5382 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT 5383 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024 5384 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024) 5385 return 1; 5386 } 5387 if (GET_CODE (disp) != CONST) 5388 return 0; 5389 disp = XEXP (disp, 0); 5390 5391 if (TARGET_64BIT) 5392 { 5393 /* We are unsafe to allow PLUS expressions. This limit allowed distance 5394 of GOT tables. We should not need these anyway. */ 5395 if (GET_CODE (disp) != UNSPEC 5396 || XINT (disp, 1) != UNSPEC_GOTPCREL) 5397 return 0; 5398 5399 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 5400 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 5401 return 0; 5402 return 1; 5403 } 5404 5405 saw_plus = false; 5406 if (GET_CODE (disp) == PLUS) 5407 { 5408 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 5409 return 0; 5410 disp = XEXP (disp, 0); 5411 saw_plus = true; 5412 } 5413 5414 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */ 5415 if (TARGET_MACHO && GET_CODE (disp) == MINUS) 5416 { 5417 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 5418 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 5419 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 5420 { 5421 const char *sym_name = XSTR (XEXP (disp, 1), 0); 5422 if (strstr (sym_name, "$pb") != 0) 5423 return 1; 5424 } 5425 } 5426 5427 if (GET_CODE (disp) != UNSPEC) 5428 return 0; 5429 5430 switch (XINT (disp, 1)) 5431 { 5432 case UNSPEC_GOT: 5433 if (saw_plus) 5434 return false; 5435 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 5436 case UNSPEC_GOTOFF: 5437 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5438 case UNSPEC_GOTTPOFF: 5439 case UNSPEC_GOTNTPOFF: 5440 case UNSPEC_INDNTPOFF: 5441 if (saw_plus) 5442 return false; 5443 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5444 case UNSPEC_NTPOFF: 5445 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5446 case UNSPEC_DTPOFF: 5447 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5448 } 5449 5450 return 0; 5451} 5452 5453/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 5454 memory address for an instruction. The MODE argument is the machine mode 5455 for the MEM expression that wants to use this address. 5456 5457 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 5458 convert common non-canonical forms to canonical form so that they will 5459 be recognized. */ 5460 5461int 5462legitimate_address_p (mode, addr, strict) 5463 enum machine_mode mode; 5464 register rtx addr; 5465 int strict; 5466{ 5467 struct ix86_address parts; 5468 rtx base, index, disp; 5469 HOST_WIDE_INT scale; 5470 const char *reason = NULL; 5471 rtx reason_rtx = NULL_RTX; 5472 5473 if (TARGET_DEBUG_ADDR) 5474 { 5475 fprintf (stderr, 5476 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 5477 GET_MODE_NAME (mode), strict); 5478 debug_rtx (addr); 5479 } 5480 5481 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP) 5482 { 5483 if (TARGET_DEBUG_ADDR) 5484 fprintf (stderr, "Success.\n"); 5485 return TRUE; 5486 } 5487 5488 if (ix86_decompose_address (addr, &parts) <= 0) 5489 { 5490 reason = "decomposition failed"; 5491 goto report_error; 5492 } 5493 5494 base = parts.base; 5495 index = parts.index; 5496 disp = parts.disp; 5497 scale = parts.scale; 5498 5499 /* Validate base register. 5500 5501 Don't allow SUBREG's here, it can lead to spill failures when the base 5502 is one word out of a two word structure, which is represented internally 5503 as a DImode int. */ 5504 5505 if (base) 5506 { 5507 rtx reg; 5508 reason_rtx = base; 5509 5510 if (GET_CODE (base) == SUBREG) 5511 reg = SUBREG_REG (base); 5512 else 5513 reg = base; 5514 5515 if (GET_CODE (reg) != REG) 5516 { 5517 reason = "base is not a register"; 5518 goto report_error; 5519 } 5520 5521 if (GET_MODE (base) != Pmode) 5522 { 5523 reason = "base is not in Pmode"; 5524 goto report_error; 5525 } 5526 5527 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 5528 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 5529 { 5530 reason = "base is not valid"; 5531 goto report_error; 5532 } 5533 } 5534 5535 /* Validate index register. 5536 5537 Don't allow SUBREG's here, it can lead to spill failures when the index 5538 is one word out of a two word structure, which is represented internally 5539 as a DImode int. */ 5540 5541 if (index) 5542 { 5543 rtx reg; 5544 reason_rtx = index; 5545 5546 if (GET_CODE (index) == SUBREG) 5547 reg = SUBREG_REG (index); 5548 else 5549 reg = index; 5550 5551 if (GET_CODE (reg) != REG) 5552 { 5553 reason = "index is not a register"; 5554 goto report_error; 5555 } 5556 5557 if (GET_MODE (index) != Pmode) 5558 { 5559 reason = "index is not in Pmode"; 5560 goto report_error; 5561 } 5562 5563 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 5564 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 5565 { 5566 reason = "index is not valid"; 5567 goto report_error; 5568 } 5569 } 5570 5571 /* Validate scale factor. */ 5572 if (scale != 1) 5573 { 5574 reason_rtx = GEN_INT (scale); 5575 if (!index) 5576 { 5577 reason = "scale without index"; 5578 goto report_error; 5579 } 5580 5581 if (scale != 2 && scale != 4 && scale != 8) 5582 { 5583 reason = "scale is not a valid multiplier"; 5584 goto report_error; 5585 } 5586 } 5587 5588 /* Validate displacement. */ 5589 if (disp) 5590 { 5591 reason_rtx = disp; 5592 5593 if (GET_CODE (disp) == CONST 5594 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 5595 switch (XINT (XEXP (disp, 0), 1)) 5596 { 5597 case UNSPEC_GOT: 5598 case UNSPEC_GOTOFF: 5599 case UNSPEC_GOTPCREL: 5600 if (!flag_pic) 5601 abort (); 5602 goto is_legitimate_pic; 5603 5604 case UNSPEC_GOTTPOFF: 5605 case UNSPEC_GOTNTPOFF: 5606 case UNSPEC_INDNTPOFF: 5607 case UNSPEC_NTPOFF: 5608 case UNSPEC_DTPOFF: 5609 break; 5610 5611 default: 5612 reason = "invalid address unspec"; 5613 goto report_error; 5614 } 5615 5616 else if (flag_pic && (SYMBOLIC_CONST (disp) 5617#if TARGET_MACHO 5618 && !machopic_operand_p (disp) 5619#endif 5620 )) 5621 { 5622 is_legitimate_pic: 5623 if (TARGET_64BIT && (index || base)) 5624 { 5625 /* foo@dtpoff(%rX) is ok. */ 5626 if (GET_CODE (disp) != CONST 5627 || GET_CODE (XEXP (disp, 0)) != PLUS 5628 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 5629 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 5630 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 5631 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 5632 { 5633 reason = "non-constant pic memory reference"; 5634 goto report_error; 5635 } 5636 } 5637 else if (! legitimate_pic_address_disp_p (disp)) 5638 { 5639 reason = "displacement is an invalid pic construct"; 5640 goto report_error; 5641 } 5642 5643 /* This code used to verify that a symbolic pic displacement 5644 includes the pic_offset_table_rtx register. 5645 5646 While this is good idea, unfortunately these constructs may 5647 be created by "adds using lea" optimization for incorrect 5648 code like: 5649 5650 int a; 5651 int foo(int i) 5652 { 5653 return *(&a+i); 5654 } 5655 5656 This code is nonsensical, but results in addressing 5657 GOT table with pic_offset_table_rtx base. We can't 5658 just refuse it easily, since it gets matched by 5659 "addsi3" pattern, that later gets split to lea in the 5660 case output register differs from input. While this 5661 can be handled by separate addsi pattern for this case 5662 that never results in lea, this seems to be easier and 5663 correct fix for crash to disable this test. */ 5664 } 5665 else if (!CONSTANT_ADDRESS_P (disp)) 5666 { 5667 reason = "displacement is not constant"; 5668 goto report_error; 5669 } 5670 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp)) 5671 { 5672 reason = "displacement is out of range"; 5673 goto report_error; 5674 } 5675 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE) 5676 { 5677 reason = "displacement is a const_double"; 5678 goto report_error; 5679 } 5680 } 5681 5682 /* Everything looks valid. */ 5683 if (TARGET_DEBUG_ADDR) 5684 fprintf (stderr, "Success.\n"); 5685 return TRUE; 5686 5687 report_error: 5688 if (TARGET_DEBUG_ADDR) 5689 { 5690 fprintf (stderr, "Error: %s\n", reason); 5691 debug_rtx (reason_rtx); 5692 } 5693 return FALSE; 5694} 5695 5696/* Return an unique alias set for the GOT. */ 5697 5698static HOST_WIDE_INT 5699ix86_GOT_alias_set () 5700{ 5701 static HOST_WIDE_INT set = -1; 5702 if (set == -1) 5703 set = new_alias_set (); 5704 return set; 5705} 5706 5707/* Return a legitimate reference for ORIG (an address) using the 5708 register REG. If REG is 0, a new pseudo is generated. 5709 5710 There are two types of references that must be handled: 5711 5712 1. Global data references must load the address from the GOT, via 5713 the PIC reg. An insn is emitted to do this load, and the reg is 5714 returned. 5715 5716 2. Static data references, constant pool addresses, and code labels 5717 compute the address as an offset from the GOT, whose base is in 5718 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to 5719 differentiate them from global data objects. The returned 5720 address is the PIC reg + an unspec constant. 5721 5722 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 5723 reg also appears in the address. */ 5724 5725rtx 5726legitimize_pic_address (orig, reg) 5727 rtx orig; 5728 rtx reg; 5729{ 5730 rtx addr = orig; 5731 rtx new = orig; 5732 rtx base; 5733 5734#if TARGET_MACHO 5735 if (reg == 0) 5736 reg = gen_reg_rtx (Pmode); 5737 /* Use the generic Mach-O PIC machinery. */ 5738 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 5739#endif 5740 5741 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 5742 new = addr; 5743 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 5744 { 5745 /* This symbol may be referenced via a displacement from the PIC 5746 base address (@GOTOFF). */ 5747 5748 if (reload_in_progress) 5749 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 5750 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 5751 new = gen_rtx_CONST (Pmode, new); 5752 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5753 5754 if (reg != 0) 5755 { 5756 emit_move_insn (reg, new); 5757 new = reg; 5758 } 5759 } 5760 else if (GET_CODE (addr) == SYMBOL_REF) 5761 { 5762 if (TARGET_64BIT) 5763 { 5764 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 5765 new = gen_rtx_CONST (Pmode, new); 5766 new = gen_rtx_MEM (Pmode, new); 5767 RTX_UNCHANGING_P (new) = 1; 5768 set_mem_alias_set (new, ix86_GOT_alias_set ()); 5769 5770 if (reg == 0) 5771 reg = gen_reg_rtx (Pmode); 5772 /* Use directly gen_movsi, otherwise the address is loaded 5773 into register for CSE. We don't want to CSE this addresses, 5774 instead we CSE addresses from the GOT table, so skip this. */ 5775 emit_insn (gen_movsi (reg, new)); 5776 new = reg; 5777 } 5778 else 5779 { 5780 /* This symbol must be referenced via a load from the 5781 Global Offset Table (@GOT). */ 5782 5783 if (reload_in_progress) 5784 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 5785 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 5786 new = gen_rtx_CONST (Pmode, new); 5787 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5788 new = gen_rtx_MEM (Pmode, new); 5789 RTX_UNCHANGING_P (new) = 1; 5790 set_mem_alias_set (new, ix86_GOT_alias_set ()); 5791 5792 if (reg == 0) 5793 reg = gen_reg_rtx (Pmode); 5794 emit_move_insn (reg, new); 5795 new = reg; 5796 } 5797 } 5798 else 5799 { 5800 if (GET_CODE (addr) == CONST) 5801 { 5802 addr = XEXP (addr, 0); 5803 5804 /* We must match stuff we generate before. Assume the only 5805 unspecs that can get here are ours. Not that we could do 5806 anything with them anyway... */ 5807 if (GET_CODE (addr) == UNSPEC 5808 || (GET_CODE (addr) == PLUS 5809 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 5810 return orig; 5811 if (GET_CODE (addr) != PLUS) 5812 abort (); 5813 } 5814 if (GET_CODE (addr) == PLUS) 5815 { 5816 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 5817 5818 /* Check first to see if this is a constant offset from a @GOTOFF 5819 symbol reference. */ 5820 if (local_symbolic_operand (op0, Pmode) 5821 && GET_CODE (op1) == CONST_INT) 5822 { 5823 if (!TARGET_64BIT) 5824 { 5825 if (reload_in_progress) 5826 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 5827 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 5828 UNSPEC_GOTOFF); 5829 new = gen_rtx_PLUS (Pmode, new, op1); 5830 new = gen_rtx_CONST (Pmode, new); 5831 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5832 5833 if (reg != 0) 5834 { 5835 emit_move_insn (reg, new); 5836 new = reg; 5837 } 5838 } 5839 else 5840 { 5841 if (INTVAL (op1) < -16*1024*1024 5842 || INTVAL (op1) >= 16*1024*1024) 5843 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1)); 5844 } 5845 } 5846 else 5847 { 5848 base = legitimize_pic_address (XEXP (addr, 0), reg); 5849 new = legitimize_pic_address (XEXP (addr, 1), 5850 base == reg ? NULL_RTX : reg); 5851 5852 if (GET_CODE (new) == CONST_INT) 5853 new = plus_constant (base, INTVAL (new)); 5854 else 5855 { 5856 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 5857 { 5858 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 5859 new = XEXP (new, 1); 5860 } 5861 new = gen_rtx_PLUS (Pmode, base, new); 5862 } 5863 } 5864 } 5865 } 5866 return new; 5867} 5868 5869static void 5870ix86_encode_section_info (decl, first) 5871 tree decl; 5872 int first ATTRIBUTE_UNUSED; 5873{ 5874 bool local_p = (*targetm.binds_local_p) (decl); 5875 rtx rtl, symbol; 5876 5877 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl); 5878 if (GET_CODE (rtl) != MEM) 5879 return; 5880 symbol = XEXP (rtl, 0); 5881 if (GET_CODE (symbol) != SYMBOL_REF) 5882 return; 5883 5884 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global 5885 symbol so that we may access it directly in the GOT. */ 5886 5887 if (flag_pic) 5888 SYMBOL_REF_FLAG (symbol) = local_p; 5889 5890 /* For ELF, encode thread-local data with %[GLil] for "global dynamic", 5891 "local dynamic", "initial exec" or "local exec" TLS models 5892 respectively. */ 5893 5894 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl)) 5895 { 5896 const char *symbol_str; 5897 char *newstr; 5898 size_t len; 5899 enum tls_model kind = decl_tls_model (decl); 5900 5901 if (TARGET_64BIT && ! flag_pic) 5902 { 5903 /* x86-64 doesn't allow non-pic code for shared libraries, 5904 so don't generate GD/LD TLS models for non-pic code. */ 5905 switch (kind) 5906 { 5907 case TLS_MODEL_GLOBAL_DYNAMIC: 5908 kind = TLS_MODEL_INITIAL_EXEC; break; 5909 case TLS_MODEL_LOCAL_DYNAMIC: 5910 kind = TLS_MODEL_LOCAL_EXEC; break; 5911 default: 5912 break; 5913 } 5914 } 5915 5916 symbol_str = XSTR (symbol, 0); 5917 5918 if (symbol_str[0] == '%') 5919 { 5920 if (symbol_str[1] == tls_model_chars[kind]) 5921 return; 5922 symbol_str += 2; 5923 } 5924 len = strlen (symbol_str) + 1; 5925 newstr = alloca (len + 2); 5926 5927 newstr[0] = '%'; 5928 newstr[1] = tls_model_chars[kind]; 5929 memcpy (newstr + 2, symbol_str, len); 5930 5931 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1); 5932 } 5933} 5934 5935/* Undo the above when printing symbol names. */ 5936 5937static const char * 5938ix86_strip_name_encoding (str) 5939 const char *str; 5940{ 5941 if (str[0] == '%') 5942 str += 2; 5943 if (str [0] == '*') 5944 str += 1; 5945 return str; 5946} 5947 5948/* Load the thread pointer into a register. */ 5949 5950static rtx 5951get_thread_pointer () 5952{ 5953 rtx tp; 5954 5955 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 5956 tp = gen_rtx_MEM (Pmode, tp); 5957 RTX_UNCHANGING_P (tp) = 1; 5958 set_mem_alias_set (tp, ix86_GOT_alias_set ()); 5959 tp = force_reg (Pmode, tp); 5960 5961 return tp; 5962} 5963 5964/* Try machine-dependent ways of modifying an illegitimate address 5965 to be legitimate. If we find one, return the new, valid address. 5966 This macro is used in only one place: `memory_address' in explow.c. 5967 5968 OLDX is the address as it was before break_out_memory_refs was called. 5969 In some cases it is useful to look at this to decide what needs to be done. 5970 5971 MODE and WIN are passed so that this macro can use 5972 GO_IF_LEGITIMATE_ADDRESS. 5973 5974 It is always safe for this macro to do nothing. It exists to recognize 5975 opportunities to optimize the output. 5976 5977 For the 80386, we handle X+REG by loading X into a register R and 5978 using R+REG. R will go in a general reg and indexing will be used. 5979 However, if REG is a broken-out memory address or multiplication, 5980 nothing needs to be done because REG can certainly go in a general reg. 5981 5982 When -fpic is used, special handling is needed for symbolic references. 5983 See comments by legitimize_pic_address in i386.c for details. */ 5984 5985rtx 5986legitimize_address (x, oldx, mode) 5987 register rtx x; 5988 register rtx oldx ATTRIBUTE_UNUSED; 5989 enum machine_mode mode; 5990{ 5991 int changed = 0; 5992 unsigned log; 5993 5994 if (TARGET_DEBUG_ADDR) 5995 { 5996 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 5997 GET_MODE_NAME (mode)); 5998 debug_rtx (x); 5999 } 6000 6001 log = tls_symbolic_operand (x, mode); 6002 if (log) 6003 { 6004 rtx dest, base, off, pic; 6005 int type; 6006 6007 switch (log) 6008 { 6009 case TLS_MODEL_GLOBAL_DYNAMIC: 6010 dest = gen_reg_rtx (Pmode); 6011 if (TARGET_64BIT) 6012 { 6013 rtx rax = gen_rtx_REG (Pmode, 0), insns; 6014 6015 start_sequence (); 6016 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 6017 insns = get_insns (); 6018 end_sequence (); 6019 6020 emit_libcall_block (insns, dest, rax, x); 6021 } 6022 else 6023 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 6024 break; 6025 6026 case TLS_MODEL_LOCAL_DYNAMIC: 6027 base = gen_reg_rtx (Pmode); 6028 if (TARGET_64BIT) 6029 { 6030 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 6031 6032 start_sequence (); 6033 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 6034 insns = get_insns (); 6035 end_sequence (); 6036 6037 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 6038 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 6039 emit_libcall_block (insns, base, rax, note); 6040 } 6041 else 6042 emit_insn (gen_tls_local_dynamic_base_32 (base)); 6043 6044 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 6045 off = gen_rtx_CONST (Pmode, off); 6046 6047 return gen_rtx_PLUS (Pmode, base, off); 6048 6049 case TLS_MODEL_INITIAL_EXEC: 6050 if (TARGET_64BIT) 6051 { 6052 pic = NULL; 6053 type = UNSPEC_GOTNTPOFF; 6054 } 6055 else if (flag_pic) 6056 { 6057 if (reload_in_progress) 6058 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6059 pic = pic_offset_table_rtx; 6060 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 6061 } 6062 else if (!TARGET_GNU_TLS) 6063 { 6064 pic = gen_reg_rtx (Pmode); 6065 emit_insn (gen_set_got (pic)); 6066 type = UNSPEC_GOTTPOFF; 6067 } 6068 else 6069 { 6070 pic = NULL; 6071 type = UNSPEC_INDNTPOFF; 6072 } 6073 6074 base = get_thread_pointer (); 6075 6076 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 6077 off = gen_rtx_CONST (Pmode, off); 6078 if (pic) 6079 off = gen_rtx_PLUS (Pmode, pic, off); 6080 off = gen_rtx_MEM (Pmode, off); 6081 RTX_UNCHANGING_P (off) = 1; 6082 set_mem_alias_set (off, ix86_GOT_alias_set ()); 6083 dest = gen_reg_rtx (Pmode); 6084 6085 if (TARGET_64BIT || TARGET_GNU_TLS) 6086 { 6087 emit_move_insn (dest, off); 6088 return gen_rtx_PLUS (Pmode, base, dest); 6089 } 6090 else 6091 emit_insn (gen_subsi3 (dest, base, off)); 6092 break; 6093 6094 case TLS_MODEL_LOCAL_EXEC: 6095 base = get_thread_pointer (); 6096 6097 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 6098 (TARGET_64BIT || TARGET_GNU_TLS) 6099 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 6100 off = gen_rtx_CONST (Pmode, off); 6101 6102 if (TARGET_64BIT || TARGET_GNU_TLS) 6103 return gen_rtx_PLUS (Pmode, base, off); 6104 else 6105 { 6106 dest = gen_reg_rtx (Pmode); 6107 emit_insn (gen_subsi3 (dest, base, off)); 6108 } 6109 break; 6110 6111 default: 6112 abort (); 6113 } 6114 6115 return dest; 6116 } 6117 6118 if (flag_pic && SYMBOLIC_CONST (x)) 6119 return legitimize_pic_address (x, 0); 6120 6121 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 6122 if (GET_CODE (x) == ASHIFT 6123 && GET_CODE (XEXP (x, 1)) == CONST_INT 6124 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4) 6125 { 6126 changed = 1; 6127 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 6128 GEN_INT (1 << log)); 6129 } 6130 6131 if (GET_CODE (x) == PLUS) 6132 { 6133 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 6134 6135 if (GET_CODE (XEXP (x, 0)) == ASHIFT 6136 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 6137 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4) 6138 { 6139 changed = 1; 6140 XEXP (x, 0) = gen_rtx_MULT (Pmode, 6141 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 6142 GEN_INT (1 << log)); 6143 } 6144 6145 if (GET_CODE (XEXP (x, 1)) == ASHIFT 6146 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 6147 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4) 6148 { 6149 changed = 1; 6150 XEXP (x, 1) = gen_rtx_MULT (Pmode, 6151 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 6152 GEN_INT (1 << log)); 6153 } 6154 6155 /* Put multiply first if it isn't already. */ 6156 if (GET_CODE (XEXP (x, 1)) == MULT) 6157 { 6158 rtx tmp = XEXP (x, 0); 6159 XEXP (x, 0) = XEXP (x, 1); 6160 XEXP (x, 1) = tmp; 6161 changed = 1; 6162 } 6163 6164 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 6165 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 6166 created by virtual register instantiation, register elimination, and 6167 similar optimizations. */ 6168 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 6169 { 6170 changed = 1; 6171 x = gen_rtx_PLUS (Pmode, 6172 gen_rtx_PLUS (Pmode, XEXP (x, 0), 6173 XEXP (XEXP (x, 1), 0)), 6174 XEXP (XEXP (x, 1), 1)); 6175 } 6176 6177 /* Canonicalize 6178 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 6179 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 6180 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 6181 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 6182 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 6183 && CONSTANT_P (XEXP (x, 1))) 6184 { 6185 rtx constant; 6186 rtx other = NULL_RTX; 6187 6188 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6189 { 6190 constant = XEXP (x, 1); 6191 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 6192 } 6193 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 6194 { 6195 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 6196 other = XEXP (x, 1); 6197 } 6198 else 6199 constant = 0; 6200 6201 if (constant) 6202 { 6203 changed = 1; 6204 x = gen_rtx_PLUS (Pmode, 6205 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 6206 XEXP (XEXP (XEXP (x, 0), 1), 0)), 6207 plus_constant (other, INTVAL (constant))); 6208 } 6209 } 6210 6211 if (changed && legitimate_address_p (mode, x, FALSE)) 6212 return x; 6213 6214 if (GET_CODE (XEXP (x, 0)) == MULT) 6215 { 6216 changed = 1; 6217 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 6218 } 6219 6220 if (GET_CODE (XEXP (x, 1)) == MULT) 6221 { 6222 changed = 1; 6223 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 6224 } 6225 6226 if (changed 6227 && GET_CODE (XEXP (x, 1)) == REG 6228 && GET_CODE (XEXP (x, 0)) == REG) 6229 return x; 6230 6231 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 6232 { 6233 changed = 1; 6234 x = legitimize_pic_address (x, 0); 6235 } 6236 6237 if (changed && legitimate_address_p (mode, x, FALSE)) 6238 return x; 6239 6240 if (GET_CODE (XEXP (x, 0)) == REG) 6241 { 6242 register rtx temp = gen_reg_rtx (Pmode); 6243 register rtx val = force_operand (XEXP (x, 1), temp); 6244 if (val != temp) 6245 emit_move_insn (temp, val); 6246 6247 XEXP (x, 1) = temp; 6248 return x; 6249 } 6250 6251 else if (GET_CODE (XEXP (x, 1)) == REG) 6252 { 6253 register rtx temp = gen_reg_rtx (Pmode); 6254 register rtx val = force_operand (XEXP (x, 0), temp); 6255 if (val != temp) 6256 emit_move_insn (temp, val); 6257 6258 XEXP (x, 0) = temp; 6259 return x; 6260 } 6261 } 6262 6263 return x; 6264} 6265 6266/* Print an integer constant expression in assembler syntax. Addition 6267 and subtraction are the only arithmetic that may appear in these 6268 expressions. FILE is the stdio stream to write to, X is the rtx, and 6269 CODE is the operand print code from the output string. */ 6270 6271static void 6272output_pic_addr_const (file, x, code) 6273 FILE *file; 6274 rtx x; 6275 int code; 6276{ 6277 char buf[256]; 6278 6279 switch (GET_CODE (x)) 6280 { 6281 case PC: 6282 if (flag_pic) 6283 putc ('.', file); 6284 else 6285 abort (); 6286 break; 6287 6288 case SYMBOL_REF: 6289 assemble_name (file, XSTR (x, 0)); 6290 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x)) 6291 fputs ("@PLT", file); 6292 break; 6293 6294 case LABEL_REF: 6295 x = XEXP (x, 0); 6296 /* FALLTHRU */ 6297 case CODE_LABEL: 6298 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 6299 assemble_name (asm_out_file, buf); 6300 break; 6301 6302 case CONST_INT: 6303 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 6304 break; 6305 6306 case CONST: 6307 /* This used to output parentheses around the expression, 6308 but that does not work on the 386 (either ATT or BSD assembler). */ 6309 output_pic_addr_const (file, XEXP (x, 0), code); 6310 break; 6311 6312 case CONST_DOUBLE: 6313 if (GET_MODE (x) == VOIDmode) 6314 { 6315 /* We can use %d if the number is <32 bits and positive. */ 6316 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 6317 fprintf (file, "0x%lx%08lx", 6318 (unsigned long) CONST_DOUBLE_HIGH (x), 6319 (unsigned long) CONST_DOUBLE_LOW (x)); 6320 else 6321 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 6322 } 6323 else 6324 /* We can't handle floating point constants; 6325 PRINT_OPERAND must handle them. */ 6326 output_operand_lossage ("floating constant misused"); 6327 break; 6328 6329 case PLUS: 6330 /* Some assemblers need integer constants to appear first. */ 6331 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 6332 { 6333 output_pic_addr_const (file, XEXP (x, 0), code); 6334 putc ('+', file); 6335 output_pic_addr_const (file, XEXP (x, 1), code); 6336 } 6337 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6338 { 6339 output_pic_addr_const (file, XEXP (x, 1), code); 6340 putc ('+', file); 6341 output_pic_addr_const (file, XEXP (x, 0), code); 6342 } 6343 else 6344 abort (); 6345 break; 6346 6347 case MINUS: 6348 if (!TARGET_MACHO) 6349 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 6350 output_pic_addr_const (file, XEXP (x, 0), code); 6351 putc ('-', file); 6352 output_pic_addr_const (file, XEXP (x, 1), code); 6353 if (!TARGET_MACHO) 6354 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 6355 break; 6356 6357 case UNSPEC: 6358 if (XVECLEN (x, 0) != 1) 6359 abort (); 6360 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 6361 switch (XINT (x, 1)) 6362 { 6363 case UNSPEC_GOT: 6364 fputs ("@GOT", file); 6365 break; 6366 case UNSPEC_GOTOFF: 6367 fputs ("@GOTOFF", file); 6368 break; 6369 case UNSPEC_GOTPCREL: 6370 fputs ("@GOTPCREL(%rip)", file); 6371 break; 6372 case UNSPEC_GOTTPOFF: 6373 /* FIXME: This might be @TPOFF in Sun ld too. */ 6374 fputs ("@GOTTPOFF", file); 6375 break; 6376 case UNSPEC_TPOFF: 6377 fputs ("@TPOFF", file); 6378 break; 6379 case UNSPEC_NTPOFF: 6380 if (TARGET_64BIT) 6381 fputs ("@TPOFF", file); 6382 else 6383 fputs ("@NTPOFF", file); 6384 break; 6385 case UNSPEC_DTPOFF: 6386 fputs ("@DTPOFF", file); 6387 break; 6388 case UNSPEC_GOTNTPOFF: 6389 if (TARGET_64BIT) 6390 fputs ("@GOTTPOFF(%rip)", file); 6391 else 6392 fputs ("@GOTNTPOFF", file); 6393 break; 6394 case UNSPEC_INDNTPOFF: 6395 fputs ("@INDNTPOFF", file); 6396 break; 6397 default: 6398 output_operand_lossage ("invalid UNSPEC as operand"); 6399 break; 6400 } 6401 break; 6402 6403 default: 6404 output_operand_lossage ("invalid expression as operand"); 6405 } 6406} 6407 6408/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST. 6409 We need to handle our special PIC relocations. */ 6410 6411void 6412i386_dwarf_output_addr_const (file, x) 6413 FILE *file; 6414 rtx x; 6415{ 6416#ifdef ASM_QUAD 6417 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG); 6418#else 6419 if (TARGET_64BIT) 6420 abort (); 6421 fprintf (file, "%s", ASM_LONG); 6422#endif 6423 if (flag_pic) 6424 output_pic_addr_const (file, x, '\0'); 6425 else 6426 output_addr_const (file, x); 6427 fputc ('\n', file); 6428} 6429 6430/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL. 6431 We need to emit DTP-relative relocations. */ 6432 6433void 6434i386_output_dwarf_dtprel (file, size, x) 6435 FILE *file; 6436 int size; 6437 rtx x; 6438{ 6439 fputs (ASM_LONG, file); 6440 output_addr_const (file, x); 6441 fputs ("@DTPOFF", file); 6442 switch (size) 6443 { 6444 case 4: 6445 break; 6446 case 8: 6447 fputs (", 0", file); 6448 break; 6449 default: 6450 abort (); 6451 } 6452} 6453 6454/* In the name of slightly smaller debug output, and to cater to 6455 general assembler losage, recognize PIC+GOTOFF and turn it back 6456 into a direct symbol reference. */ 6457 6458rtx 6459i386_simplify_dwarf_addr (orig_x) 6460 rtx orig_x; 6461{ 6462 rtx x = orig_x, y; 6463 6464 if (GET_CODE (x) == MEM) 6465 x = XEXP (x, 0); 6466 6467 if (TARGET_64BIT) 6468 { 6469 if (GET_CODE (x) != CONST 6470 || GET_CODE (XEXP (x, 0)) != UNSPEC 6471 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 6472 || GET_CODE (orig_x) != MEM) 6473 return orig_x; 6474 return XVECEXP (XEXP (x, 0), 0, 0); 6475 } 6476 6477 if (GET_CODE (x) != PLUS 6478 || GET_CODE (XEXP (x, 1)) != CONST) 6479 return orig_x; 6480 6481 if (GET_CODE (XEXP (x, 0)) == REG 6482 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 6483 /* %ebx + GOT/GOTOFF */ 6484 y = NULL; 6485 else if (GET_CODE (XEXP (x, 0)) == PLUS) 6486 { 6487 /* %ebx + %reg * scale + GOT/GOTOFF */ 6488 y = XEXP (x, 0); 6489 if (GET_CODE (XEXP (y, 0)) == REG 6490 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM) 6491 y = XEXP (y, 1); 6492 else if (GET_CODE (XEXP (y, 1)) == REG 6493 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM) 6494 y = XEXP (y, 0); 6495 else 6496 return orig_x; 6497 if (GET_CODE (y) != REG 6498 && GET_CODE (y) != MULT 6499 && GET_CODE (y) != ASHIFT) 6500 return orig_x; 6501 } 6502 else 6503 return orig_x; 6504 6505 x = XEXP (XEXP (x, 1), 0); 6506 if (GET_CODE (x) == UNSPEC 6507 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 6508 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 6509 { 6510 if (y) 6511 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0)); 6512 return XVECEXP (x, 0, 0); 6513 } 6514 6515 if (GET_CODE (x) == PLUS 6516 && GET_CODE (XEXP (x, 0)) == UNSPEC 6517 && GET_CODE (XEXP (x, 1)) == CONST_INT 6518 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 6519 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF 6520 && GET_CODE (orig_x) != MEM))) 6521 { 6522 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1)); 6523 if (y) 6524 return gen_rtx_PLUS (Pmode, y, x); 6525 return x; 6526 } 6527 6528 return orig_x; 6529} 6530 6531static void 6532put_condition_code (code, mode, reverse, fp, file) 6533 enum rtx_code code; 6534 enum machine_mode mode; 6535 int reverse, fp; 6536 FILE *file; 6537{ 6538 const char *suffix; 6539 6540 if (mode == CCFPmode || mode == CCFPUmode) 6541 { 6542 enum rtx_code second_code, bypass_code; 6543 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 6544 if (bypass_code != NIL || second_code != NIL) 6545 abort (); 6546 code = ix86_fp_compare_code_to_integer (code); 6547 mode = CCmode; 6548 } 6549 if (reverse) 6550 code = reverse_condition (code); 6551 6552 switch (code) 6553 { 6554 case EQ: 6555 suffix = "e"; 6556 break; 6557 case NE: 6558 suffix = "ne"; 6559 break; 6560 case GT: 6561 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode) 6562 abort (); 6563 suffix = "g"; 6564 break; 6565 case GTU: 6566 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers. 6567 Those same assemblers have the same but opposite losage on cmov. */ 6568 if (mode != CCmode) 6569 abort (); 6570 suffix = fp ? "nbe" : "a"; 6571 break; 6572 case LT: 6573 if (mode == CCNOmode || mode == CCGOCmode) 6574 suffix = "s"; 6575 else if (mode == CCmode || mode == CCGCmode) 6576 suffix = "l"; 6577 else 6578 abort (); 6579 break; 6580 case LTU: 6581 if (mode != CCmode) 6582 abort (); 6583 suffix = "b"; 6584 break; 6585 case GE: 6586 if (mode == CCNOmode || mode == CCGOCmode) 6587 suffix = "ns"; 6588 else if (mode == CCmode || mode == CCGCmode) 6589 suffix = "ge"; 6590 else 6591 abort (); 6592 break; 6593 case GEU: 6594 /* ??? As above. */ 6595 if (mode != CCmode) 6596 abort (); 6597 suffix = fp ? "nb" : "ae"; 6598 break; 6599 case LE: 6600 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode) 6601 abort (); 6602 suffix = "le"; 6603 break; 6604 case LEU: 6605 if (mode != CCmode) 6606 abort (); 6607 suffix = "be"; 6608 break; 6609 case UNORDERED: 6610 suffix = fp ? "u" : "p"; 6611 break; 6612 case ORDERED: 6613 suffix = fp ? "nu" : "np"; 6614 break; 6615 default: 6616 abort (); 6617 } 6618 fputs (suffix, file); 6619} 6620 6621void 6622print_reg (x, code, file) 6623 rtx x; 6624 int code; 6625 FILE *file; 6626{ 6627 if (REGNO (x) == ARG_POINTER_REGNUM 6628 || REGNO (x) == FRAME_POINTER_REGNUM 6629 || REGNO (x) == FLAGS_REG 6630 || REGNO (x) == FPSR_REG) 6631 abort (); 6632 6633 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 6634 putc ('%', file); 6635 6636 if (code == 'w' || MMX_REG_P (x)) 6637 code = 2; 6638 else if (code == 'b') 6639 code = 1; 6640 else if (code == 'k') 6641 code = 4; 6642 else if (code == 'q') 6643 code = 8; 6644 else if (code == 'y') 6645 code = 3; 6646 else if (code == 'h') 6647 code = 0; 6648 else 6649 code = GET_MODE_SIZE (GET_MODE (x)); 6650 6651 /* Irritatingly, AMD extended registers use different naming convention 6652 from the normal registers. */ 6653 if (REX_INT_REG_P (x)) 6654 { 6655 if (!TARGET_64BIT) 6656 abort (); 6657 switch (code) 6658 { 6659 case 0: 6660 error ("extended registers have no high halves"); 6661 break; 6662 case 1: 6663 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 6664 break; 6665 case 2: 6666 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 6667 break; 6668 case 4: 6669 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 6670 break; 6671 case 8: 6672 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 6673 break; 6674 default: 6675 error ("unsupported operand size for extended register"); 6676 break; 6677 } 6678 return; 6679 } 6680 switch (code) 6681 { 6682 case 3: 6683 if (STACK_TOP_P (x)) 6684 { 6685 fputs ("st(0)", file); 6686 break; 6687 } 6688 /* FALLTHRU */ 6689 case 8: 6690 case 4: 6691 case 12: 6692 if (! ANY_FP_REG_P (x)) 6693 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 6694 /* FALLTHRU */ 6695 case 16: 6696 case 2: 6697 fputs (hi_reg_name[REGNO (x)], file); 6698 break; 6699 case 1: 6700 fputs (qi_reg_name[REGNO (x)], file); 6701 break; 6702 case 0: 6703 fputs (qi_high_reg_name[REGNO (x)], file); 6704 break; 6705 default: 6706 abort (); 6707 } 6708} 6709 6710/* Locate some local-dynamic symbol still in use by this function 6711 so that we can print its name in some tls_local_dynamic_base 6712 pattern. */ 6713 6714static const char * 6715get_some_local_dynamic_name () 6716{ 6717 rtx insn; 6718 6719 if (cfun->machine->some_ld_name) 6720 return cfun->machine->some_ld_name; 6721 6722 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 6723 if (INSN_P (insn) 6724 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 6725 return cfun->machine->some_ld_name; 6726 6727 abort (); 6728} 6729 6730static int 6731get_some_local_dynamic_name_1 (px, data) 6732 rtx *px; 6733 void *data ATTRIBUTE_UNUSED; 6734{ 6735 rtx x = *px; 6736 6737 if (GET_CODE (x) == SYMBOL_REF 6738 && local_dynamic_symbolic_operand (x, Pmode)) 6739 { 6740 cfun->machine->some_ld_name = XSTR (x, 0); 6741 return 1; 6742 } 6743 6744 return 0; 6745} 6746 6747/* Meaning of CODE: 6748 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 6749 C -- print opcode suffix for set/cmov insn. 6750 c -- like C, but print reversed condition 6751 F,f -- likewise, but for floating-point. 6752 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 6753 otherwise nothing 6754 R -- print the prefix for register names. 6755 z -- print the opcode suffix for the size of the current operand. 6756 * -- print a star (in certain assembler syntax) 6757 A -- print an absolute memory reference. 6758 w -- print the operand as if it's a "word" (HImode) even if it isn't. 6759 s -- print a shift double count, followed by the assemblers argument 6760 delimiter. 6761 b -- print the QImode name of the register for the indicated operand. 6762 %b0 would print %al if operands[0] is reg 0. 6763 w -- likewise, print the HImode name of the register. 6764 k -- likewise, print the SImode name of the register. 6765 q -- likewise, print the DImode name of the register. 6766 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 6767 y -- print "st(0)" instead of "st" as a register. 6768 D -- print condition for SSE cmp instruction. 6769 P -- if PIC, print an @PLT suffix. 6770 X -- don't print any sort of PIC '@' suffix for a symbol. 6771 & -- print some in-use local-dynamic symbol name. 6772 */ 6773 6774void 6775print_operand (file, x, code) 6776 FILE *file; 6777 rtx x; 6778 int code; 6779{ 6780 if (code) 6781 { 6782 switch (code) 6783 { 6784 case '*': 6785 if (ASSEMBLER_DIALECT == ASM_ATT) 6786 putc ('*', file); 6787 return; 6788 6789 case '&': 6790 assemble_name (file, get_some_local_dynamic_name ()); 6791 return; 6792 6793 case 'A': 6794 if (ASSEMBLER_DIALECT == ASM_ATT) 6795 putc ('*', file); 6796 else if (ASSEMBLER_DIALECT == ASM_INTEL) 6797 { 6798 /* Intel syntax. For absolute addresses, registers should not 6799 be surrounded by braces. */ 6800 if (GET_CODE (x) != REG) 6801 { 6802 putc ('[', file); 6803 PRINT_OPERAND (file, x, 0); 6804 putc (']', file); 6805 return; 6806 } 6807 } 6808 else 6809 abort (); 6810 6811 PRINT_OPERAND (file, x, 0); 6812 return; 6813 6814 6815 case 'L': 6816 if (ASSEMBLER_DIALECT == ASM_ATT) 6817 putc ('l', file); 6818 return; 6819 6820 case 'W': 6821 if (ASSEMBLER_DIALECT == ASM_ATT) 6822 putc ('w', file); 6823 return; 6824 6825 case 'B': 6826 if (ASSEMBLER_DIALECT == ASM_ATT) 6827 putc ('b', file); 6828 return; 6829 6830 case 'Q': 6831 if (ASSEMBLER_DIALECT == ASM_ATT) 6832 putc ('l', file); 6833 return; 6834 6835 case 'S': 6836 if (ASSEMBLER_DIALECT == ASM_ATT) 6837 putc ('s', file); 6838 return; 6839 6840 case 'T': 6841 if (ASSEMBLER_DIALECT == ASM_ATT) 6842 putc ('t', file); 6843 return; 6844 6845 case 'z': 6846 /* 387 opcodes don't get size suffixes if the operands are 6847 registers. */ 6848 if (STACK_REG_P (x)) 6849 return; 6850 6851 /* Likewise if using Intel opcodes. */ 6852 if (ASSEMBLER_DIALECT == ASM_INTEL) 6853 return; 6854 6855 /* This is the size of op from size of operand. */ 6856 switch (GET_MODE_SIZE (GET_MODE (x))) 6857 { 6858 case 2: 6859#ifdef HAVE_GAS_FILDS_FISTS 6860 putc ('s', file); 6861#endif 6862 return; 6863 6864 case 4: 6865 if (GET_MODE (x) == SFmode) 6866 { 6867 putc ('s', file); 6868 return; 6869 } 6870 else 6871 putc ('l', file); 6872 return; 6873 6874 case 12: 6875 case 16: 6876 putc ('t', file); 6877 return; 6878 6879 case 8: 6880 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 6881 { 6882#ifdef GAS_MNEMONICS 6883 putc ('q', file); 6884#else 6885 putc ('l', file); 6886 putc ('l', file); 6887#endif 6888 } 6889 else 6890 putc ('l', file); 6891 return; 6892 6893 default: 6894 abort (); 6895 } 6896 6897 case 'b': 6898 case 'w': 6899 case 'k': 6900 case 'q': 6901 case 'h': 6902 case 'y': 6903 case 'X': 6904 case 'P': 6905 break; 6906 6907 case 's': 6908 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 6909 { 6910 PRINT_OPERAND (file, x, 0); 6911 putc (',', file); 6912 } 6913 return; 6914 6915 case 'D': 6916 /* Little bit of braindamage here. The SSE compare instructions 6917 does use completely different names for the comparisons that the 6918 fp conditional moves. */ 6919 switch (GET_CODE (x)) 6920 { 6921 case EQ: 6922 case UNEQ: 6923 fputs ("eq", file); 6924 break; 6925 case LT: 6926 case UNLT: 6927 fputs ("lt", file); 6928 break; 6929 case LE: 6930 case UNLE: 6931 fputs ("le", file); 6932 break; 6933 case UNORDERED: 6934 fputs ("unord", file); 6935 break; 6936 case NE: 6937 case LTGT: 6938 fputs ("neq", file); 6939 break; 6940 case UNGE: 6941 case GE: 6942 fputs ("nlt", file); 6943 break; 6944 case UNGT: 6945 case GT: 6946 fputs ("nle", file); 6947 break; 6948 case ORDERED: 6949 fputs ("ord", file); 6950 break; 6951 default: 6952 abort (); 6953 break; 6954 } 6955 return; 6956 case 'O': 6957#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 6958 if (ASSEMBLER_DIALECT == ASM_ATT) 6959 { 6960 switch (GET_MODE (x)) 6961 { 6962 case HImode: putc ('w', file); break; 6963 case SImode: 6964 case SFmode: putc ('l', file); break; 6965 case DImode: 6966 case DFmode: putc ('q', file); break; 6967 default: abort (); 6968 } 6969 putc ('.', file); 6970 } 6971#endif 6972 return; 6973 case 'C': 6974 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 6975 return; 6976 case 'F': 6977#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 6978 if (ASSEMBLER_DIALECT == ASM_ATT) 6979 putc ('.', file); 6980#endif 6981 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 6982 return; 6983 6984 /* Like above, but reverse condition */ 6985 case 'c': 6986 /* Check to see if argument to %c is really a constant 6987 and not a condition code which needs to be reversed. */ 6988 if (GET_RTX_CLASS (GET_CODE (x)) != '<') 6989 { 6990 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 6991 return; 6992 } 6993 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 6994 return; 6995 case 'f': 6996#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 6997 if (ASSEMBLER_DIALECT == ASM_ATT) 6998 putc ('.', file); 6999#endif 7000 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 7001 return; 7002 case '+': 7003 { 7004 rtx x; 7005 7006 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 7007 return; 7008 7009 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 7010 if (x) 7011 { 7012 int pred_val = INTVAL (XEXP (x, 0)); 7013 7014 if (pred_val < REG_BR_PROB_BASE * 45 / 100 7015 || pred_val > REG_BR_PROB_BASE * 55 / 100) 7016 { 7017 int taken = pred_val > REG_BR_PROB_BASE / 2; 7018 int cputaken = final_forward_branch_p (current_output_insn) == 0; 7019 7020 /* Emit hints only in the case default branch prediction 7021 heruistics would fail. */ 7022 if (taken != cputaken) 7023 { 7024 /* We use 3e (DS) prefix for taken branches and 7025 2e (CS) prefix for not taken branches. */ 7026 if (taken) 7027 fputs ("ds ; ", file); 7028 else 7029 fputs ("cs ; ", file); 7030 } 7031 } 7032 } 7033 return; 7034 } 7035 default: 7036 output_operand_lossage ("invalid operand code `%c'", code); 7037 } 7038 } 7039 7040 if (GET_CODE (x) == REG) 7041 { 7042 PRINT_REG (x, code, file); 7043 } 7044 7045 else if (GET_CODE (x) == MEM) 7046 { 7047 /* No `byte ptr' prefix for call instructions. */ 7048 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 7049 { 7050 const char * size; 7051 switch (GET_MODE_SIZE (GET_MODE (x))) 7052 { 7053 case 1: size = "BYTE"; break; 7054 case 2: size = "WORD"; break; 7055 case 4: size = "DWORD"; break; 7056 case 8: size = "QWORD"; break; 7057 case 12: size = "XWORD"; break; 7058 case 16: size = "XMMWORD"; break; 7059 default: 7060 abort (); 7061 } 7062 7063 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 7064 if (code == 'b') 7065 size = "BYTE"; 7066 else if (code == 'w') 7067 size = "WORD"; 7068 else if (code == 'k') 7069 size = "DWORD"; 7070 7071 fputs (size, file); 7072 fputs (" PTR ", file); 7073 } 7074 7075 x = XEXP (x, 0); 7076 if (flag_pic && CONSTANT_ADDRESS_P (x)) 7077 output_pic_addr_const (file, x, code); 7078 /* Avoid (%rip) for call operands. */ 7079 else if (CONSTANT_ADDRESS_P (x) && code == 'P' 7080 && GET_CODE (x) != CONST_INT) 7081 output_addr_const (file, x); 7082 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 7083 output_operand_lossage ("invalid constraints for operand"); 7084 else 7085 output_address (x); 7086 } 7087 7088 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 7089 { 7090 REAL_VALUE_TYPE r; 7091 long l; 7092 7093 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7094 REAL_VALUE_TO_TARGET_SINGLE (r, l); 7095 7096 if (ASSEMBLER_DIALECT == ASM_ATT) 7097 putc ('$', file); 7098 fprintf (file, "0x%lx", l); 7099 } 7100 7101 /* These float cases don't actually occur as immediate operands. */ 7102 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 7103 { 7104 char dstr[30]; 7105 7106 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 7107 fprintf (file, "%s", dstr); 7108 } 7109 7110 else if (GET_CODE (x) == CONST_DOUBLE 7111 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)) 7112 { 7113 char dstr[30]; 7114 7115 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 7116 fprintf (file, "%s", dstr); 7117 } 7118 7119 else 7120 { 7121 if (code != 'P') 7122 { 7123 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 7124 { 7125 if (ASSEMBLER_DIALECT == ASM_ATT) 7126 putc ('$', file); 7127 } 7128 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 7129 || GET_CODE (x) == LABEL_REF) 7130 { 7131 if (ASSEMBLER_DIALECT == ASM_ATT) 7132 putc ('$', file); 7133 else 7134 fputs ("OFFSET FLAT:", file); 7135 } 7136 } 7137 if (GET_CODE (x) == CONST_INT) 7138 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7139 else if (flag_pic) 7140 output_pic_addr_const (file, x, code); 7141 else 7142 output_addr_const (file, x); 7143 } 7144} 7145 7146/* Print a memory operand whose address is ADDR. */ 7147 7148void 7149print_operand_address (file, addr) 7150 FILE *file; 7151 register rtx addr; 7152{ 7153 struct ix86_address parts; 7154 rtx base, index, disp; 7155 int scale; 7156 7157 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP) 7158 { 7159 if (ASSEMBLER_DIALECT == ASM_INTEL) 7160 fputs ("DWORD PTR ", file); 7161 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7162 putc ('%', file); 7163 if (TARGET_64BIT) 7164 fputs ("fs:0", file); 7165 else 7166 fputs ("gs:0", file); 7167 return; 7168 } 7169 7170 if (! ix86_decompose_address (addr, &parts)) 7171 abort (); 7172 7173 base = parts.base; 7174 index = parts.index; 7175 disp = parts.disp; 7176 scale = parts.scale; 7177 7178 if (!base && !index) 7179 { 7180 /* Displacement only requires special attention. */ 7181 7182 if (GET_CODE (disp) == CONST_INT) 7183 { 7184 if (ASSEMBLER_DIALECT == ASM_INTEL) 7185 { 7186 if (USER_LABEL_PREFIX[0] == 0) 7187 putc ('%', file); 7188 fputs ("ds:", file); 7189 } 7190 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr)); 7191 } 7192 else if (flag_pic) 7193 output_pic_addr_const (file, addr, 0); 7194 else 7195 output_addr_const (file, addr); 7196 7197 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 7198 if (TARGET_64BIT 7199 && ((GET_CODE (addr) == SYMBOL_REF 7200 && ! tls_symbolic_operand (addr, GET_MODE (addr))) 7201 || GET_CODE (addr) == LABEL_REF 7202 || (GET_CODE (addr) == CONST 7203 && GET_CODE (XEXP (addr, 0)) == PLUS 7204 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF 7205 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF) 7206 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT))) 7207 fputs ("(%rip)", file); 7208 } 7209 else 7210 { 7211 if (ASSEMBLER_DIALECT == ASM_ATT) 7212 { 7213 if (disp) 7214 { 7215 if (flag_pic) 7216 output_pic_addr_const (file, disp, 0); 7217 else if (GET_CODE (disp) == LABEL_REF) 7218 output_asm_label (disp); 7219 else 7220 output_addr_const (file, disp); 7221 } 7222 7223 putc ('(', file); 7224 if (base) 7225 PRINT_REG (base, 0, file); 7226 if (index) 7227 { 7228 putc (',', file); 7229 PRINT_REG (index, 0, file); 7230 if (scale != 1) 7231 fprintf (file, ",%d", scale); 7232 } 7233 putc (')', file); 7234 } 7235 else 7236 { 7237 rtx offset = NULL_RTX; 7238 7239 if (disp) 7240 { 7241 /* Pull out the offset of a symbol; print any symbol itself. */ 7242 if (GET_CODE (disp) == CONST 7243 && GET_CODE (XEXP (disp, 0)) == PLUS 7244 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 7245 { 7246 offset = XEXP (XEXP (disp, 0), 1); 7247 disp = gen_rtx_CONST (VOIDmode, 7248 XEXP (XEXP (disp, 0), 0)); 7249 } 7250 7251 if (flag_pic) 7252 output_pic_addr_const (file, disp, 0); 7253 else if (GET_CODE (disp) == LABEL_REF) 7254 output_asm_label (disp); 7255 else if (GET_CODE (disp) == CONST_INT) 7256 offset = disp; 7257 else 7258 output_addr_const (file, disp); 7259 } 7260 7261 putc ('[', file); 7262 if (base) 7263 { 7264 PRINT_REG (base, 0, file); 7265 if (offset) 7266 { 7267 if (INTVAL (offset) >= 0) 7268 putc ('+', file); 7269 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 7270 } 7271 } 7272 else if (offset) 7273 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 7274 else 7275 putc ('0', file); 7276 7277 if (index) 7278 { 7279 putc ('+', file); 7280 PRINT_REG (index, 0, file); 7281 if (scale != 1) 7282 fprintf (file, "*%d", scale); 7283 } 7284 putc (']', file); 7285 } 7286 } 7287} 7288 7289bool 7290output_addr_const_extra (file, x) 7291 FILE *file; 7292 rtx x; 7293{ 7294 rtx op; 7295 7296 if (GET_CODE (x) != UNSPEC) 7297 return false; 7298 7299 op = XVECEXP (x, 0, 0); 7300 switch (XINT (x, 1)) 7301 { 7302 case UNSPEC_GOTTPOFF: 7303 output_addr_const (file, op); 7304 /* FIXME: This might be @TPOFF in Sun ld. */ 7305 fputs ("@GOTTPOFF", file); 7306 break; 7307 case UNSPEC_TPOFF: 7308 output_addr_const (file, op); 7309 fputs ("@TPOFF", file); 7310 break; 7311 case UNSPEC_NTPOFF: 7312 output_addr_const (file, op); 7313 if (TARGET_64BIT) 7314 fputs ("@TPOFF", file); 7315 else 7316 fputs ("@NTPOFF", file); 7317 break; 7318 case UNSPEC_DTPOFF: 7319 output_addr_const (file, op); 7320 fputs ("@DTPOFF", file); 7321 break; 7322 case UNSPEC_GOTNTPOFF: 7323 output_addr_const (file, op); 7324 if (TARGET_64BIT) 7325 fputs ("@GOTTPOFF(%rip)", file); 7326 else 7327 fputs ("@GOTNTPOFF", file); 7328 break; 7329 case UNSPEC_INDNTPOFF: 7330 output_addr_const (file, op); 7331 fputs ("@INDNTPOFF", file); 7332 break; 7333 7334 default: 7335 return false; 7336 } 7337 7338 return true; 7339} 7340 7341/* Split one or more DImode RTL references into pairs of SImode 7342 references. The RTL can be REG, offsettable MEM, integer constant, or 7343 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 7344 split and "num" is its length. lo_half and hi_half are output arrays 7345 that parallel "operands". */ 7346 7347void 7348split_di (operands, num, lo_half, hi_half) 7349 rtx operands[]; 7350 int num; 7351 rtx lo_half[], hi_half[]; 7352{ 7353 while (num--) 7354 { 7355 rtx op = operands[num]; 7356 7357 /* simplify_subreg refuse to split volatile memory addresses, 7358 but we still have to handle it. */ 7359 if (GET_CODE (op) == MEM) 7360 { 7361 lo_half[num] = adjust_address (op, SImode, 0); 7362 hi_half[num] = adjust_address (op, SImode, 4); 7363 } 7364 else 7365 { 7366 lo_half[num] = simplify_gen_subreg (SImode, op, 7367 GET_MODE (op) == VOIDmode 7368 ? DImode : GET_MODE (op), 0); 7369 hi_half[num] = simplify_gen_subreg (SImode, op, 7370 GET_MODE (op) == VOIDmode 7371 ? DImode : GET_MODE (op), 4); 7372 } 7373 } 7374} 7375/* Split one or more TImode RTL references into pairs of SImode 7376 references. The RTL can be REG, offsettable MEM, integer constant, or 7377 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 7378 split and "num" is its length. lo_half and hi_half are output arrays 7379 that parallel "operands". */ 7380 7381void 7382split_ti (operands, num, lo_half, hi_half) 7383 rtx operands[]; 7384 int num; 7385 rtx lo_half[], hi_half[]; 7386{ 7387 while (num--) 7388 { 7389 rtx op = operands[num]; 7390 7391 /* simplify_subreg refuse to split volatile memory addresses, but we 7392 still have to handle it. */ 7393 if (GET_CODE (op) == MEM) 7394 { 7395 lo_half[num] = adjust_address (op, DImode, 0); 7396 hi_half[num] = adjust_address (op, DImode, 8); 7397 } 7398 else 7399 { 7400 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 7401 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 7402 } 7403 } 7404} 7405 7406/* Output code to perform a 387 binary operation in INSN, one of PLUS, 7407 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 7408 is the expression of the binary operation. The output may either be 7409 emitted here, or returned to the caller, like all output_* functions. 7410 7411 There is no guarantee that the operands are the same mode, as they 7412 might be within FLOAT or FLOAT_EXTEND expressions. */ 7413 7414#ifndef SYSV386_COMPAT 7415/* Set to 1 for compatibility with brain-damaged assemblers. No-one 7416 wants to fix the assemblers because that causes incompatibility 7417 with gcc. No-one wants to fix gcc because that causes 7418 incompatibility with assemblers... You can use the option of 7419 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 7420#define SYSV386_COMPAT 1 7421#endif 7422 7423const char * 7424output_387_binary_op (insn, operands) 7425 rtx insn; 7426 rtx *operands; 7427{ 7428 static char buf[30]; 7429 const char *p; 7430 const char *ssep; 7431 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]); 7432 7433#ifdef ENABLE_CHECKING 7434 /* Even if we do not want to check the inputs, this documents input 7435 constraints. Which helps in understanding the following code. */ 7436 if (STACK_REG_P (operands[0]) 7437 && ((REG_P (operands[1]) 7438 && REGNO (operands[0]) == REGNO (operands[1]) 7439 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 7440 || (REG_P (operands[2]) 7441 && REGNO (operands[0]) == REGNO (operands[2]) 7442 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 7443 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 7444 ; /* ok */ 7445 else if (!is_sse) 7446 abort (); 7447#endif 7448 7449 switch (GET_CODE (operands[3])) 7450 { 7451 case PLUS: 7452 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7453 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7454 p = "fiadd"; 7455 else 7456 p = "fadd"; 7457 ssep = "add"; 7458 break; 7459 7460 case MINUS: 7461 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7462 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7463 p = "fisub"; 7464 else 7465 p = "fsub"; 7466 ssep = "sub"; 7467 break; 7468 7469 case MULT: 7470 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7471 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7472 p = "fimul"; 7473 else 7474 p = "fmul"; 7475 ssep = "mul"; 7476 break; 7477 7478 case DIV: 7479 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7480 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7481 p = "fidiv"; 7482 else 7483 p = "fdiv"; 7484 ssep = "div"; 7485 break; 7486 7487 default: 7488 abort (); 7489 } 7490 7491 if (is_sse) 7492 { 7493 strcpy (buf, ssep); 7494 if (GET_MODE (operands[0]) == SFmode) 7495 strcat (buf, "ss\t{%2, %0|%0, %2}"); 7496 else 7497 strcat (buf, "sd\t{%2, %0|%0, %2}"); 7498 return buf; 7499 } 7500 strcpy (buf, p); 7501 7502 switch (GET_CODE (operands[3])) 7503 { 7504 case MULT: 7505 case PLUS: 7506 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 7507 { 7508 rtx temp = operands[2]; 7509 operands[2] = operands[1]; 7510 operands[1] = temp; 7511 } 7512 7513 /* know operands[0] == operands[1]. */ 7514 7515 if (GET_CODE (operands[2]) == MEM) 7516 { 7517 p = "%z2\t%2"; 7518 break; 7519 } 7520 7521 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 7522 { 7523 if (STACK_TOP_P (operands[0])) 7524 /* How is it that we are storing to a dead operand[2]? 7525 Well, presumably operands[1] is dead too. We can't 7526 store the result to st(0) as st(0) gets popped on this 7527 instruction. Instead store to operands[2] (which I 7528 think has to be st(1)). st(1) will be popped later. 7529 gcc <= 2.8.1 didn't have this check and generated 7530 assembly code that the Unixware assembler rejected. */ 7531 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 7532 else 7533 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 7534 break; 7535 } 7536 7537 if (STACK_TOP_P (operands[0])) 7538 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 7539 else 7540 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 7541 break; 7542 7543 case MINUS: 7544 case DIV: 7545 if (GET_CODE (operands[1]) == MEM) 7546 { 7547 p = "r%z1\t%1"; 7548 break; 7549 } 7550 7551 if (GET_CODE (operands[2]) == MEM) 7552 { 7553 p = "%z2\t%2"; 7554 break; 7555 } 7556 7557 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 7558 { 7559#if SYSV386_COMPAT 7560 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 7561 derived assemblers, confusingly reverse the direction of 7562 the operation for fsub{r} and fdiv{r} when the 7563 destination register is not st(0). The Intel assembler 7564 doesn't have this brain damage. Read !SYSV386_COMPAT to 7565 figure out what the hardware really does. */ 7566 if (STACK_TOP_P (operands[0])) 7567 p = "{p\t%0, %2|rp\t%2, %0}"; 7568 else 7569 p = "{rp\t%2, %0|p\t%0, %2}"; 7570#else 7571 if (STACK_TOP_P (operands[0])) 7572 /* As above for fmul/fadd, we can't store to st(0). */ 7573 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 7574 else 7575 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 7576#endif 7577 break; 7578 } 7579 7580 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 7581 { 7582#if SYSV386_COMPAT 7583 if (STACK_TOP_P (operands[0])) 7584 p = "{rp\t%0, %1|p\t%1, %0}"; 7585 else 7586 p = "{p\t%1, %0|rp\t%0, %1}"; 7587#else 7588 if (STACK_TOP_P (operands[0])) 7589 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 7590 else 7591 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 7592#endif 7593 break; 7594 } 7595 7596 if (STACK_TOP_P (operands[0])) 7597 { 7598 if (STACK_TOP_P (operands[1])) 7599 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 7600 else 7601 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 7602 break; 7603 } 7604 else if (STACK_TOP_P (operands[1])) 7605 { 7606#if SYSV386_COMPAT 7607 p = "{\t%1, %0|r\t%0, %1}"; 7608#else 7609 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 7610#endif 7611 } 7612 else 7613 { 7614#if SYSV386_COMPAT 7615 p = "{r\t%2, %0|\t%0, %2}"; 7616#else 7617 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 7618#endif 7619 } 7620 break; 7621 7622 default: 7623 abort (); 7624 } 7625 7626 strcat (buf, p); 7627 return buf; 7628} 7629 7630/* Output code to initialize control word copies used by 7631 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN 7632 is set to control word rounding downwards. */ 7633void 7634emit_i387_cw_initialization (normal, round_down) 7635 rtx normal, round_down; 7636{ 7637 rtx reg = gen_reg_rtx (HImode); 7638 7639 emit_insn (gen_x86_fnstcw_1 (normal)); 7640 emit_move_insn (reg, normal); 7641 if (!TARGET_PARTIAL_REG_STALL && !optimize_size 7642 && !TARGET_64BIT) 7643 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 7644 else 7645 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00))); 7646 emit_move_insn (round_down, reg); 7647} 7648 7649/* Output code for INSN to convert a float to a signed int. OPERANDS 7650 are the insn operands. The output may be [HSD]Imode and the input 7651 operand may be [SDX]Fmode. */ 7652 7653const char * 7654output_fix_trunc (insn, operands) 7655 rtx insn; 7656 rtx *operands; 7657{ 7658 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 7659 int dimode_p = GET_MODE (operands[0]) == DImode; 7660 7661 /* Jump through a hoop or two for DImode, since the hardware has no 7662 non-popping instruction. We used to do this a different way, but 7663 that was somewhat fragile and broke with post-reload splitters. */ 7664 if (dimode_p && !stack_top_dies) 7665 output_asm_insn ("fld\t%y1", operands); 7666 7667 if (!STACK_TOP_P (operands[1])) 7668 abort (); 7669 7670 if (GET_CODE (operands[0]) != MEM) 7671 abort (); 7672 7673 output_asm_insn ("fldcw\t%3", operands); 7674 if (stack_top_dies || dimode_p) 7675 output_asm_insn ("fistp%z0\t%0", operands); 7676 else 7677 output_asm_insn ("fist%z0\t%0", operands); 7678 output_asm_insn ("fldcw\t%2", operands); 7679 7680 return ""; 7681} 7682 7683/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 7684 should be used and 2 when fnstsw should be used. UNORDERED_P is true 7685 when fucom should be used. */ 7686 7687const char * 7688output_fp_compare (insn, operands, eflags_p, unordered_p) 7689 rtx insn; 7690 rtx *operands; 7691 int eflags_p, unordered_p; 7692{ 7693 int stack_top_dies; 7694 rtx cmp_op0 = operands[0]; 7695 rtx cmp_op1 = operands[1]; 7696 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]); 7697 7698 if (eflags_p == 2) 7699 { 7700 cmp_op0 = cmp_op1; 7701 cmp_op1 = operands[2]; 7702 } 7703 if (is_sse) 7704 { 7705 if (GET_MODE (operands[0]) == SFmode) 7706 if (unordered_p) 7707 return "ucomiss\t{%1, %0|%0, %1}"; 7708 else 7709 return "comiss\t{%1, %0|%0, %1}"; 7710 else 7711 if (unordered_p) 7712 return "ucomisd\t{%1, %0|%0, %1}"; 7713 else 7714 return "comisd\t{%1, %0|%0, %1}"; 7715 } 7716 7717 if (! STACK_TOP_P (cmp_op0)) 7718 abort (); 7719 7720 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 7721 7722 if (STACK_REG_P (cmp_op1) 7723 && stack_top_dies 7724 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 7725 && REGNO (cmp_op1) != FIRST_STACK_REG) 7726 { 7727 /* If both the top of the 387 stack dies, and the other operand 7728 is also a stack register that dies, then this must be a 7729 `fcompp' float compare */ 7730 7731 if (eflags_p == 1) 7732 { 7733 /* There is no double popping fcomi variant. Fortunately, 7734 eflags is immune from the fstp's cc clobbering. */ 7735 if (unordered_p) 7736 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 7737 else 7738 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 7739 return "fstp\t%y0"; 7740 } 7741 else 7742 { 7743 if (eflags_p == 2) 7744 { 7745 if (unordered_p) 7746 return "fucompp\n\tfnstsw\t%0"; 7747 else 7748 return "fcompp\n\tfnstsw\t%0"; 7749 } 7750 else 7751 { 7752 if (unordered_p) 7753 return "fucompp"; 7754 else 7755 return "fcompp"; 7756 } 7757 } 7758 } 7759 else 7760 { 7761 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 7762 7763 static const char * const alt[24] = 7764 { 7765 "fcom%z1\t%y1", 7766 "fcomp%z1\t%y1", 7767 "fucom%z1\t%y1", 7768 "fucomp%z1\t%y1", 7769 7770 "ficom%z1\t%y1", 7771 "ficomp%z1\t%y1", 7772 NULL, 7773 NULL, 7774 7775 "fcomi\t{%y1, %0|%0, %y1}", 7776 "fcomip\t{%y1, %0|%0, %y1}", 7777 "fucomi\t{%y1, %0|%0, %y1}", 7778 "fucomip\t{%y1, %0|%0, %y1}", 7779 7780 NULL, 7781 NULL, 7782 NULL, 7783 NULL, 7784 7785 "fcom%z2\t%y2\n\tfnstsw\t%0", 7786 "fcomp%z2\t%y2\n\tfnstsw\t%0", 7787 "fucom%z2\t%y2\n\tfnstsw\t%0", 7788 "fucomp%z2\t%y2\n\tfnstsw\t%0", 7789 7790 "ficom%z2\t%y2\n\tfnstsw\t%0", 7791 "ficomp%z2\t%y2\n\tfnstsw\t%0", 7792 NULL, 7793 NULL 7794 }; 7795 7796 int mask; 7797 const char *ret; 7798 7799 mask = eflags_p << 3; 7800 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2; 7801 mask |= unordered_p << 1; 7802 mask |= stack_top_dies; 7803 7804 if (mask >= 24) 7805 abort (); 7806 ret = alt[mask]; 7807 if (ret == NULL) 7808 abort (); 7809 7810 return ret; 7811 } 7812} 7813 7814void 7815ix86_output_addr_vec_elt (file, value) 7816 FILE *file; 7817 int value; 7818{ 7819 const char *directive = ASM_LONG; 7820 7821 if (TARGET_64BIT) 7822 { 7823#ifdef ASM_QUAD 7824 directive = ASM_QUAD; 7825#else 7826 abort (); 7827#endif 7828 } 7829 7830 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 7831} 7832 7833void 7834ix86_output_addr_diff_elt (file, value, rel) 7835 FILE *file; 7836 int value, rel; 7837{ 7838 if (TARGET_64BIT) 7839 fprintf (file, "%s%s%d-%s%d\n", 7840 ASM_LONG, LPREFIX, value, LPREFIX, rel); 7841 else if (HAVE_AS_GOTOFF_IN_DATA) 7842 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 7843#if TARGET_MACHO 7844 else if (TARGET_MACHO) 7845 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value, 7846 machopic_function_base_name () + 1); 7847#endif 7848 else 7849 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 7850 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 7851} 7852 7853/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 7854 for the target. */ 7855 7856void 7857ix86_expand_clear (dest) 7858 rtx dest; 7859{ 7860 rtx tmp; 7861 7862 /* We play register width games, which are only valid after reload. */ 7863 if (!reload_completed) 7864 abort (); 7865 7866 /* Avoid HImode and its attendant prefix byte. */ 7867 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 7868 dest = gen_rtx_REG (SImode, REGNO (dest)); 7869 7870 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 7871 7872 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 7873 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 7874 { 7875 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 7876 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 7877 } 7878 7879 emit_insn (tmp); 7880} 7881 7882/* X is an unchanging MEM. If it is a constant pool reference, return 7883 the constant pool rtx, else NULL. */ 7884 7885static rtx 7886maybe_get_pool_constant (x) 7887 rtx x; 7888{ 7889 x = XEXP (x, 0); 7890 7891 if (flag_pic && ! TARGET_64BIT) 7892 { 7893 if (GET_CODE (x) != PLUS) 7894 return NULL_RTX; 7895 if (XEXP (x, 0) != pic_offset_table_rtx) 7896 return NULL_RTX; 7897 x = XEXP (x, 1); 7898 if (GET_CODE (x) != CONST) 7899 return NULL_RTX; 7900 x = XEXP (x, 0); 7901 if (GET_CODE (x) != UNSPEC) 7902 return NULL_RTX; 7903 if (XINT (x, 1) != UNSPEC_GOTOFF) 7904 return NULL_RTX; 7905 x = XVECEXP (x, 0, 0); 7906 } 7907 7908 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 7909 return get_pool_constant (x); 7910 7911 return NULL_RTX; 7912} 7913 7914void 7915ix86_expand_move (mode, operands) 7916 enum machine_mode mode; 7917 rtx operands[]; 7918{ 7919 int strict = (reload_in_progress || reload_completed); 7920 rtx insn, op0, op1, tmp; 7921 7922 op0 = operands[0]; 7923 op1 = operands[1]; 7924 7925 if (tls_symbolic_operand (op1, Pmode)) 7926 { 7927 op1 = legitimize_address (op1, op1, VOIDmode); 7928 if (GET_CODE (op0) == MEM) 7929 { 7930 tmp = gen_reg_rtx (mode); 7931 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1)); 7932 op1 = tmp; 7933 } 7934 } 7935 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 7936 { 7937#if TARGET_MACHO 7938 if (MACHOPIC_PURE) 7939 { 7940 rtx temp = ((reload_in_progress 7941 || ((op0 && GET_CODE (op0) == REG) 7942 && mode == Pmode)) 7943 ? op0 : gen_reg_rtx (Pmode)); 7944 op1 = machopic_indirect_data_reference (op1, temp); 7945 op1 = machopic_legitimize_pic_address (op1, mode, 7946 temp == op1 ? 0 : temp); 7947 } 7948 else 7949 { 7950 if (MACHOPIC_INDIRECT) 7951 op1 = machopic_indirect_data_reference (op1, 0); 7952 } 7953 if (op0 != op1) 7954 { 7955 insn = gen_rtx_SET (VOIDmode, op0, op1); 7956 emit_insn (insn); 7957 } 7958 return; 7959#endif /* TARGET_MACHO */ 7960 if (GET_CODE (op0) == MEM) 7961 op1 = force_reg (Pmode, op1); 7962 else 7963 { 7964 rtx temp = op0; 7965 if (GET_CODE (temp) != REG) 7966 temp = gen_reg_rtx (Pmode); 7967 temp = legitimize_pic_address (op1, temp); 7968 if (temp == op0) 7969 return; 7970 op1 = temp; 7971 } 7972 } 7973 else 7974 { 7975 if (GET_CODE (op0) == MEM 7976 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 7977 || !push_operand (op0, mode)) 7978 && GET_CODE (op1) == MEM) 7979 op1 = force_reg (mode, op1); 7980 7981 if (push_operand (op0, mode) 7982 && ! general_no_elim_operand (op1, mode)) 7983 op1 = copy_to_mode_reg (mode, op1); 7984 7985 /* Force large constants in 64bit compilation into register 7986 to get them CSEed. */ 7987 if (TARGET_64BIT && mode == DImode 7988 && immediate_operand (op1, mode) 7989 && !x86_64_zero_extended_value (op1) 7990 && !register_operand (op0, mode) 7991 && optimize && !reload_completed && !reload_in_progress) 7992 op1 = copy_to_mode_reg (mode, op1); 7993 7994 if (FLOAT_MODE_P (mode)) 7995 { 7996 /* If we are loading a floating point constant to a register, 7997 force the value to memory now, since we'll get better code 7998 out the back end. */ 7999 8000 if (strict) 8001 ; 8002 else if (GET_CODE (op1) == CONST_DOUBLE) 8003 { 8004 op1 = validize_mem (force_const_mem (mode, op1)); 8005 if (!register_operand (op0, mode)) 8006 { 8007 rtx temp = gen_reg_rtx (mode); 8008 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 8009 emit_move_insn (op0, temp); 8010 return; 8011 } 8012 } 8013 } 8014 } 8015 8016 insn = gen_rtx_SET (VOIDmode, op0, op1); 8017 8018 emit_insn (insn); 8019} 8020 8021void 8022ix86_expand_vector_move (mode, operands) 8023 enum machine_mode mode; 8024 rtx operands[]; 8025{ 8026 /* Force constants other than zero into memory. We do not know how 8027 the instructions used to build constants modify the upper 64 bits 8028 of the register, once we have that information we may be able 8029 to handle some of them more efficiently. */ 8030 if ((reload_in_progress | reload_completed) == 0 8031 && register_operand (operands[0], mode) 8032 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode)) 8033 { 8034 operands[1] = force_const_mem (mode, operands[1]); 8035 emit_move_insn (operands[0], operands[1]); 8036 return; 8037 } 8038 8039 /* Make operand1 a register if it isn't already. */ 8040 if (!no_new_pseudos 8041 && !register_operand (operands[0], mode) 8042 && !register_operand (operands[1], mode)) 8043 { 8044 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]); 8045 emit_move_insn (operands[0], temp); 8046 return; 8047 } 8048 8049 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); 8050} 8051 8052/* Attempt to expand a binary operator. Make the expansion closer to the 8053 actual machine, then just general_operand, which will allow 3 separate 8054 memory references (one output, two input) in a single insn. */ 8055 8056void 8057ix86_expand_binary_operator (code, mode, operands) 8058 enum rtx_code code; 8059 enum machine_mode mode; 8060 rtx operands[]; 8061{ 8062 int matching_memory; 8063 rtx src1, src2, dst, op, clob; 8064 8065 dst = operands[0]; 8066 src1 = operands[1]; 8067 src2 = operands[2]; 8068 8069 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 8070 if (GET_RTX_CLASS (code) == 'c' 8071 && (rtx_equal_p (dst, src2) 8072 || immediate_operand (src1, mode))) 8073 { 8074 rtx temp = src1; 8075 src1 = src2; 8076 src2 = temp; 8077 } 8078 8079 /* If the destination is memory, and we do not have matching source 8080 operands, do things in registers. */ 8081 matching_memory = 0; 8082 if (GET_CODE (dst) == MEM) 8083 { 8084 if (rtx_equal_p (dst, src1)) 8085 matching_memory = 1; 8086 else if (GET_RTX_CLASS (code) == 'c' 8087 && rtx_equal_p (dst, src2)) 8088 matching_memory = 2; 8089 else 8090 dst = gen_reg_rtx (mode); 8091 } 8092 8093 /* Both source operands cannot be in memory. */ 8094 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 8095 { 8096 if (matching_memory != 2) 8097 src2 = force_reg (mode, src2); 8098 else 8099 src1 = force_reg (mode, src1); 8100 } 8101 8102 /* If the operation is not commutable, source 1 cannot be a constant 8103 or non-matching memory. */ 8104 if ((CONSTANT_P (src1) 8105 || (!matching_memory && GET_CODE (src1) == MEM)) 8106 && GET_RTX_CLASS (code) != 'c') 8107 src1 = force_reg (mode, src1); 8108 8109 /* If optimizing, copy to regs to improve CSE */ 8110 if (optimize && ! no_new_pseudos) 8111 { 8112 if (GET_CODE (dst) == MEM) 8113 dst = gen_reg_rtx (mode); 8114 if (GET_CODE (src1) == MEM) 8115 src1 = force_reg (mode, src1); 8116 if (GET_CODE (src2) == MEM) 8117 src2 = force_reg (mode, src2); 8118 } 8119 8120 /* Emit the instruction. */ 8121 8122 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 8123 if (reload_in_progress) 8124 { 8125 /* Reload doesn't know about the flags register, and doesn't know that 8126 it doesn't want to clobber it. We can only do this with PLUS. */ 8127 if (code != PLUS) 8128 abort (); 8129 emit_insn (op); 8130 } 8131 else 8132 { 8133 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 8134 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 8135 } 8136 8137 /* Fix up the destination if needed. */ 8138 if (dst != operands[0]) 8139 emit_move_insn (operands[0], dst); 8140} 8141 8142/* Return TRUE or FALSE depending on whether the binary operator meets the 8143 appropriate constraints. */ 8144 8145int 8146ix86_binary_operator_ok (code, mode, operands) 8147 enum rtx_code code; 8148 enum machine_mode mode ATTRIBUTE_UNUSED; 8149 rtx operands[3]; 8150{ 8151 /* Both source operands cannot be in memory. */ 8152 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 8153 return 0; 8154 /* If the operation is not commutable, source 1 cannot be a constant. */ 8155 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c') 8156 return 0; 8157 /* If the destination is memory, we must have a matching source operand. */ 8158 if (GET_CODE (operands[0]) == MEM 8159 && ! (rtx_equal_p (operands[0], operands[1]) 8160 || (GET_RTX_CLASS (code) == 'c' 8161 && rtx_equal_p (operands[0], operands[2])))) 8162 return 0; 8163 /* If the operation is not commutable and the source 1 is memory, we must 8164 have a matching destination. */ 8165 if (GET_CODE (operands[1]) == MEM 8166 && GET_RTX_CLASS (code) != 'c' 8167 && ! rtx_equal_p (operands[0], operands[1])) 8168 return 0; 8169 return 1; 8170} 8171 8172/* Attempt to expand a unary operator. Make the expansion closer to the 8173 actual machine, then just general_operand, which will allow 2 separate 8174 memory references (one output, one input) in a single insn. */ 8175 8176void 8177ix86_expand_unary_operator (code, mode, operands) 8178 enum rtx_code code; 8179 enum machine_mode mode; 8180 rtx operands[]; 8181{ 8182 int matching_memory; 8183 rtx src, dst, op, clob; 8184 8185 dst = operands[0]; 8186 src = operands[1]; 8187 8188 /* If the destination is memory, and we do not have matching source 8189 operands, do things in registers. */ 8190 matching_memory = 0; 8191 if (GET_CODE (dst) == MEM) 8192 { 8193 if (rtx_equal_p (dst, src)) 8194 matching_memory = 1; 8195 else 8196 dst = gen_reg_rtx (mode); 8197 } 8198 8199 /* When source operand is memory, destination must match. */ 8200 if (!matching_memory && GET_CODE (src) == MEM) 8201 src = force_reg (mode, src); 8202 8203 /* If optimizing, copy to regs to improve CSE */ 8204 if (optimize && ! no_new_pseudos) 8205 { 8206 if (GET_CODE (dst) == MEM) 8207 dst = gen_reg_rtx (mode); 8208 if (GET_CODE (src) == MEM) 8209 src = force_reg (mode, src); 8210 } 8211 8212 /* Emit the instruction. */ 8213 8214 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 8215 if (reload_in_progress || code == NOT) 8216 { 8217 /* Reload doesn't know about the flags register, and doesn't know that 8218 it doesn't want to clobber it. */ 8219 if (code != NOT) 8220 abort (); 8221 emit_insn (op); 8222 } 8223 else 8224 { 8225 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 8226 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 8227 } 8228 8229 /* Fix up the destination if needed. */ 8230 if (dst != operands[0]) 8231 emit_move_insn (operands[0], dst); 8232} 8233 8234/* Return TRUE or FALSE depending on whether the unary operator meets the 8235 appropriate constraints. */ 8236 8237int 8238ix86_unary_operator_ok (code, mode, operands) 8239 enum rtx_code code ATTRIBUTE_UNUSED; 8240 enum machine_mode mode ATTRIBUTE_UNUSED; 8241 rtx operands[2] ATTRIBUTE_UNUSED; 8242{ 8243 /* If one of operands is memory, source and destination must match. */ 8244 if ((GET_CODE (operands[0]) == MEM 8245 || GET_CODE (operands[1]) == MEM) 8246 && ! rtx_equal_p (operands[0], operands[1])) 8247 return FALSE; 8248 return TRUE; 8249} 8250 8251/* Return TRUE or FALSE depending on whether the first SET in INSN 8252 has source and destination with matching CC modes, and that the 8253 CC mode is at least as constrained as REQ_MODE. */ 8254 8255int 8256ix86_match_ccmode (insn, req_mode) 8257 rtx insn; 8258 enum machine_mode req_mode; 8259{ 8260 rtx set; 8261 enum machine_mode set_mode; 8262 8263 set = PATTERN (insn); 8264 if (GET_CODE (set) == PARALLEL) 8265 set = XVECEXP (set, 0, 0); 8266 if (GET_CODE (set) != SET) 8267 abort (); 8268 if (GET_CODE (SET_SRC (set)) != COMPARE) 8269 abort (); 8270 8271 set_mode = GET_MODE (SET_DEST (set)); 8272 switch (set_mode) 8273 { 8274 case CCNOmode: 8275 if (req_mode != CCNOmode 8276 && (req_mode != CCmode 8277 || XEXP (SET_SRC (set), 1) != const0_rtx)) 8278 return 0; 8279 break; 8280 case CCmode: 8281 if (req_mode == CCGCmode) 8282 return 0; 8283 /* FALLTHRU */ 8284 case CCGCmode: 8285 if (req_mode == CCGOCmode || req_mode == CCNOmode) 8286 return 0; 8287 /* FALLTHRU */ 8288 case CCGOCmode: 8289 if (req_mode == CCZmode) 8290 return 0; 8291 /* FALLTHRU */ 8292 case CCZmode: 8293 break; 8294 8295 default: 8296 abort (); 8297 } 8298 8299 return (GET_MODE (SET_SRC (set)) == set_mode); 8300} 8301 8302/* Generate insn patterns to do an integer compare of OPERANDS. */ 8303 8304static rtx 8305ix86_expand_int_compare (code, op0, op1) 8306 enum rtx_code code; 8307 rtx op0, op1; 8308{ 8309 enum machine_mode cmpmode; 8310 rtx tmp, flags; 8311 8312 cmpmode = SELECT_CC_MODE (code, op0, op1); 8313 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 8314 8315 /* This is very simple, but making the interface the same as in the 8316 FP case makes the rest of the code easier. */ 8317 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 8318 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 8319 8320 /* Return the test that should be put into the flags user, i.e. 8321 the bcc, scc, or cmov instruction. */ 8322 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 8323} 8324 8325/* Figure out whether to use ordered or unordered fp comparisons. 8326 Return the appropriate mode to use. */ 8327 8328enum machine_mode 8329ix86_fp_compare_mode (code) 8330 enum rtx_code code ATTRIBUTE_UNUSED; 8331{ 8332 /* ??? In order to make all comparisons reversible, we do all comparisons 8333 non-trapping when compiling for IEEE. Once gcc is able to distinguish 8334 all forms trapping and nontrapping comparisons, we can make inequality 8335 comparisons trapping again, since it results in better code when using 8336 FCOM based compares. */ 8337 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 8338} 8339 8340enum machine_mode 8341ix86_cc_mode (code, op0, op1) 8342 enum rtx_code code; 8343 rtx op0, op1; 8344{ 8345 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 8346 return ix86_fp_compare_mode (code); 8347 switch (code) 8348 { 8349 /* Only zero flag is needed. */ 8350 case EQ: /* ZF=0 */ 8351 case NE: /* ZF!=0 */ 8352 return CCZmode; 8353 /* Codes needing carry flag. */ 8354 case GEU: /* CF=0 */ 8355 case GTU: /* CF=0 & ZF=0 */ 8356 case LTU: /* CF=1 */ 8357 case LEU: /* CF=1 | ZF=1 */ 8358 return CCmode; 8359 /* Codes possibly doable only with sign flag when 8360 comparing against zero. */ 8361 case GE: /* SF=OF or SF=0 */ 8362 case LT: /* SF<>OF or SF=1 */ 8363 if (op1 == const0_rtx) 8364 return CCGOCmode; 8365 else 8366 /* For other cases Carry flag is not required. */ 8367 return CCGCmode; 8368 /* Codes doable only with sign flag when comparing 8369 against zero, but we miss jump instruction for it 8370 so we need to use relational tests agains overflow 8371 that thus needs to be zero. */ 8372 case GT: /* ZF=0 & SF=OF */ 8373 case LE: /* ZF=1 | SF<>OF */ 8374 if (op1 == const0_rtx) 8375 return CCNOmode; 8376 else 8377 return CCGCmode; 8378 /* strcmp pattern do (use flags) and combine may ask us for proper 8379 mode. */ 8380 case USE: 8381 return CCmode; 8382 default: 8383 abort (); 8384 } 8385} 8386 8387/* Return true if we should use an FCOMI instruction for this fp comparison. */ 8388 8389int 8390ix86_use_fcomi_compare (code) 8391 enum rtx_code code ATTRIBUTE_UNUSED; 8392{ 8393 enum rtx_code swapped_code = swap_condition (code); 8394 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 8395 || (ix86_fp_comparison_cost (swapped_code) 8396 == ix86_fp_comparison_fcomi_cost (swapped_code))); 8397} 8398 8399/* Swap, force into registers, or otherwise massage the two operands 8400 to a fp comparison. The operands are updated in place; the new 8401 comparsion code is returned. */ 8402 8403static enum rtx_code 8404ix86_prepare_fp_compare_args (code, pop0, pop1) 8405 enum rtx_code code; 8406 rtx *pop0, *pop1; 8407{ 8408 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 8409 rtx op0 = *pop0, op1 = *pop1; 8410 enum machine_mode op_mode = GET_MODE (op0); 8411 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1); 8412 8413 /* All of the unordered compare instructions only work on registers. 8414 The same is true of the XFmode compare instructions. The same is 8415 true of the fcomi compare instructions. */ 8416 8417 if (!is_sse 8418 && (fpcmp_mode == CCFPUmode 8419 || op_mode == XFmode 8420 || op_mode == TFmode 8421 || ix86_use_fcomi_compare (code))) 8422 { 8423 op0 = force_reg (op_mode, op0); 8424 op1 = force_reg (op_mode, op1); 8425 } 8426 else 8427 { 8428 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 8429 things around if they appear profitable, otherwise force op0 8430 into a register. */ 8431 8432 if (standard_80387_constant_p (op0) == 0 8433 || (GET_CODE (op0) == MEM 8434 && ! (standard_80387_constant_p (op1) == 0 8435 || GET_CODE (op1) == MEM))) 8436 { 8437 rtx tmp; 8438 tmp = op0, op0 = op1, op1 = tmp; 8439 code = swap_condition (code); 8440 } 8441 8442 if (GET_CODE (op0) != REG) 8443 op0 = force_reg (op_mode, op0); 8444 8445 if (CONSTANT_P (op1)) 8446 { 8447 if (standard_80387_constant_p (op1)) 8448 op1 = force_reg (op_mode, op1); 8449 else 8450 op1 = validize_mem (force_const_mem (op_mode, op1)); 8451 } 8452 } 8453 8454 /* Try to rearrange the comparison to make it cheaper. */ 8455 if (ix86_fp_comparison_cost (code) 8456 > ix86_fp_comparison_cost (swap_condition (code)) 8457 && (GET_CODE (op1) == REG || !no_new_pseudos)) 8458 { 8459 rtx tmp; 8460 tmp = op0, op0 = op1, op1 = tmp; 8461 code = swap_condition (code); 8462 if (GET_CODE (op0) != REG) 8463 op0 = force_reg (op_mode, op0); 8464 } 8465 8466 *pop0 = op0; 8467 *pop1 = op1; 8468 return code; 8469} 8470 8471/* Convert comparison codes we use to represent FP comparison to integer 8472 code that will result in proper branch. Return UNKNOWN if no such code 8473 is available. */ 8474static enum rtx_code 8475ix86_fp_compare_code_to_integer (code) 8476 enum rtx_code code; 8477{ 8478 switch (code) 8479 { 8480 case GT: 8481 return GTU; 8482 case GE: 8483 return GEU; 8484 case ORDERED: 8485 case UNORDERED: 8486 return code; 8487 break; 8488 case UNEQ: 8489 return EQ; 8490 break; 8491 case UNLT: 8492 return LTU; 8493 break; 8494 case UNLE: 8495 return LEU; 8496 break; 8497 case LTGT: 8498 return NE; 8499 break; 8500 default: 8501 return UNKNOWN; 8502 } 8503} 8504 8505/* Split comparison code CODE into comparisons we can do using branch 8506 instructions. BYPASS_CODE is comparison code for branch that will 8507 branch around FIRST_CODE and SECOND_CODE. If some of branches 8508 is not required, set value to NIL. 8509 We never require more than two branches. */ 8510static void 8511ix86_fp_comparison_codes (code, bypass_code, first_code, second_code) 8512 enum rtx_code code, *bypass_code, *first_code, *second_code; 8513{ 8514 *first_code = code; 8515 *bypass_code = NIL; 8516 *second_code = NIL; 8517 8518 /* The fcomi comparison sets flags as follows: 8519 8520 cmp ZF PF CF 8521 > 0 0 0 8522 < 0 0 1 8523 = 1 0 0 8524 un 1 1 1 */ 8525 8526 switch (code) 8527 { 8528 case GT: /* GTU - CF=0 & ZF=0 */ 8529 case GE: /* GEU - CF=0 */ 8530 case ORDERED: /* PF=0 */ 8531 case UNORDERED: /* PF=1 */ 8532 case UNEQ: /* EQ - ZF=1 */ 8533 case UNLT: /* LTU - CF=1 */ 8534 case UNLE: /* LEU - CF=1 | ZF=1 */ 8535 case LTGT: /* EQ - ZF=0 */ 8536 break; 8537 case LT: /* LTU - CF=1 - fails on unordered */ 8538 *first_code = UNLT; 8539 *bypass_code = UNORDERED; 8540 break; 8541 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 8542 *first_code = UNLE; 8543 *bypass_code = UNORDERED; 8544 break; 8545 case EQ: /* EQ - ZF=1 - fails on unordered */ 8546 *first_code = UNEQ; 8547 *bypass_code = UNORDERED; 8548 break; 8549 case NE: /* NE - ZF=0 - fails on unordered */ 8550 *first_code = LTGT; 8551 *second_code = UNORDERED; 8552 break; 8553 case UNGE: /* GEU - CF=0 - fails on unordered */ 8554 *first_code = GE; 8555 *second_code = UNORDERED; 8556 break; 8557 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 8558 *first_code = GT; 8559 *second_code = UNORDERED; 8560 break; 8561 default: 8562 abort (); 8563 } 8564 if (!TARGET_IEEE_FP) 8565 { 8566 *second_code = NIL; 8567 *bypass_code = NIL; 8568 } 8569} 8570 8571/* Return cost of comparison done fcom + arithmetics operations on AX. 8572 All following functions do use number of instructions as an cost metrics. 8573 In future this should be tweaked to compute bytes for optimize_size and 8574 take into account performance of various instructions on various CPUs. */ 8575static int 8576ix86_fp_comparison_arithmetics_cost (code) 8577 enum rtx_code code; 8578{ 8579 if (!TARGET_IEEE_FP) 8580 return 4; 8581 /* The cost of code output by ix86_expand_fp_compare. */ 8582 switch (code) 8583 { 8584 case UNLE: 8585 case UNLT: 8586 case LTGT: 8587 case GT: 8588 case GE: 8589 case UNORDERED: 8590 case ORDERED: 8591 case UNEQ: 8592 return 4; 8593 break; 8594 case LT: 8595 case NE: 8596 case EQ: 8597 case UNGE: 8598 return 5; 8599 break; 8600 case LE: 8601 case UNGT: 8602 return 6; 8603 break; 8604 default: 8605 abort (); 8606 } 8607} 8608 8609/* Return cost of comparison done using fcomi operation. 8610 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 8611static int 8612ix86_fp_comparison_fcomi_cost (code) 8613 enum rtx_code code; 8614{ 8615 enum rtx_code bypass_code, first_code, second_code; 8616 /* Return arbitarily high cost when instruction is not supported - this 8617 prevents gcc from using it. */ 8618 if (!TARGET_CMOVE) 8619 return 1024; 8620 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8621 return (bypass_code != NIL || second_code != NIL) + 2; 8622} 8623 8624/* Return cost of comparison done using sahf operation. 8625 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 8626static int 8627ix86_fp_comparison_sahf_cost (code) 8628 enum rtx_code code; 8629{ 8630 enum rtx_code bypass_code, first_code, second_code; 8631 /* Return arbitarily high cost when instruction is not preferred - this 8632 avoids gcc from using it. */ 8633 if (!TARGET_USE_SAHF && !optimize_size) 8634 return 1024; 8635 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8636 return (bypass_code != NIL || second_code != NIL) + 3; 8637} 8638 8639/* Compute cost of the comparison done using any method. 8640 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 8641static int 8642ix86_fp_comparison_cost (code) 8643 enum rtx_code code; 8644{ 8645 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 8646 int min; 8647 8648 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 8649 sahf_cost = ix86_fp_comparison_sahf_cost (code); 8650 8651 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 8652 if (min > sahf_cost) 8653 min = sahf_cost; 8654 if (min > fcomi_cost) 8655 min = fcomi_cost; 8656 return min; 8657} 8658 8659/* Generate insn patterns to do a floating point compare of OPERANDS. */ 8660 8661static rtx 8662ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test) 8663 enum rtx_code code; 8664 rtx op0, op1, scratch; 8665 rtx *second_test; 8666 rtx *bypass_test; 8667{ 8668 enum machine_mode fpcmp_mode, intcmp_mode; 8669 rtx tmp, tmp2; 8670 int cost = ix86_fp_comparison_cost (code); 8671 enum rtx_code bypass_code, first_code, second_code; 8672 8673 fpcmp_mode = ix86_fp_compare_mode (code); 8674 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 8675 8676 if (second_test) 8677 *second_test = NULL_RTX; 8678 if (bypass_test) 8679 *bypass_test = NULL_RTX; 8680 8681 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8682 8683 /* Do fcomi/sahf based test when profitable. */ 8684 if ((bypass_code == NIL || bypass_test) 8685 && (second_code == NIL || second_test) 8686 && ix86_fp_comparison_arithmetics_cost (code) > cost) 8687 { 8688 if (TARGET_CMOVE) 8689 { 8690 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 8691 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 8692 tmp); 8693 emit_insn (tmp); 8694 } 8695 else 8696 { 8697 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 8698 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 8699 if (!scratch) 8700 scratch = gen_reg_rtx (HImode); 8701 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 8702 emit_insn (gen_x86_sahf_1 (scratch)); 8703 } 8704 8705 /* The FP codes work out to act like unsigned. */ 8706 intcmp_mode = fpcmp_mode; 8707 code = first_code; 8708 if (bypass_code != NIL) 8709 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 8710 gen_rtx_REG (intcmp_mode, FLAGS_REG), 8711 const0_rtx); 8712 if (second_code != NIL) 8713 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 8714 gen_rtx_REG (intcmp_mode, FLAGS_REG), 8715 const0_rtx); 8716 } 8717 else 8718 { 8719 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 8720 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 8721 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 8722 if (!scratch) 8723 scratch = gen_reg_rtx (HImode); 8724 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 8725 8726 /* In the unordered case, we have to check C2 for NaN's, which 8727 doesn't happen to work out to anything nice combination-wise. 8728 So do some bit twiddling on the value we've got in AH to come 8729 up with an appropriate set of condition codes. */ 8730 8731 intcmp_mode = CCNOmode; 8732 switch (code) 8733 { 8734 case GT: 8735 case UNGT: 8736 if (code == GT || !TARGET_IEEE_FP) 8737 { 8738 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 8739 code = EQ; 8740 } 8741 else 8742 { 8743 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8744 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 8745 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 8746 intcmp_mode = CCmode; 8747 code = GEU; 8748 } 8749 break; 8750 case LT: 8751 case UNLT: 8752 if (code == LT && TARGET_IEEE_FP) 8753 { 8754 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8755 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 8756 intcmp_mode = CCmode; 8757 code = EQ; 8758 } 8759 else 8760 { 8761 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 8762 code = NE; 8763 } 8764 break; 8765 case GE: 8766 case UNGE: 8767 if (code == GE || !TARGET_IEEE_FP) 8768 { 8769 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 8770 code = EQ; 8771 } 8772 else 8773 { 8774 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8775 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 8776 GEN_INT (0x01))); 8777 code = NE; 8778 } 8779 break; 8780 case LE: 8781 case UNLE: 8782 if (code == LE && TARGET_IEEE_FP) 8783 { 8784 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8785 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 8786 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 8787 intcmp_mode = CCmode; 8788 code = LTU; 8789 } 8790 else 8791 { 8792 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 8793 code = NE; 8794 } 8795 break; 8796 case EQ: 8797 case UNEQ: 8798 if (code == EQ && TARGET_IEEE_FP) 8799 { 8800 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8801 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 8802 intcmp_mode = CCmode; 8803 code = EQ; 8804 } 8805 else 8806 { 8807 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 8808 code = NE; 8809 break; 8810 } 8811 break; 8812 case NE: 8813 case LTGT: 8814 if (code == NE && TARGET_IEEE_FP) 8815 { 8816 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8817 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 8818 GEN_INT (0x40))); 8819 code = NE; 8820 } 8821 else 8822 { 8823 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 8824 code = EQ; 8825 } 8826 break; 8827 8828 case UNORDERED: 8829 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 8830 code = NE; 8831 break; 8832 case ORDERED: 8833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 8834 code = EQ; 8835 break; 8836 8837 default: 8838 abort (); 8839 } 8840 } 8841 8842 /* Return the test that should be put into the flags user, i.e. 8843 the bcc, scc, or cmov instruction. */ 8844 return gen_rtx_fmt_ee (code, VOIDmode, 8845 gen_rtx_REG (intcmp_mode, FLAGS_REG), 8846 const0_rtx); 8847} 8848 8849rtx 8850ix86_expand_compare (code, second_test, bypass_test) 8851 enum rtx_code code; 8852 rtx *second_test, *bypass_test; 8853{ 8854 rtx op0, op1, ret; 8855 op0 = ix86_compare_op0; 8856 op1 = ix86_compare_op1; 8857 8858 if (second_test) 8859 *second_test = NULL_RTX; 8860 if (bypass_test) 8861 *bypass_test = NULL_RTX; 8862 8863 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 8864 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 8865 second_test, bypass_test); 8866 else 8867 ret = ix86_expand_int_compare (code, op0, op1); 8868 8869 return ret; 8870} 8871 8872/* Return true if the CODE will result in nontrivial jump sequence. */ 8873bool 8874ix86_fp_jump_nontrivial_p (code) 8875 enum rtx_code code; 8876{ 8877 enum rtx_code bypass_code, first_code, second_code; 8878 if (!TARGET_CMOVE) 8879 return true; 8880 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8881 return bypass_code != NIL || second_code != NIL; 8882} 8883 8884void 8885ix86_expand_branch (code, label) 8886 enum rtx_code code; 8887 rtx label; 8888{ 8889 rtx tmp; 8890 8891 switch (GET_MODE (ix86_compare_op0)) 8892 { 8893 case QImode: 8894 case HImode: 8895 case SImode: 8896 simple: 8897 tmp = ix86_expand_compare (code, NULL, NULL); 8898 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 8899 gen_rtx_LABEL_REF (VOIDmode, label), 8900 pc_rtx); 8901 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 8902 return; 8903 8904 case SFmode: 8905 case DFmode: 8906 case XFmode: 8907 case TFmode: 8908 { 8909 rtvec vec; 8910 int use_fcomi; 8911 enum rtx_code bypass_code, first_code, second_code; 8912 8913 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 8914 &ix86_compare_op1); 8915 8916 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8917 8918 /* Check whether we will use the natural sequence with one jump. If 8919 so, we can expand jump early. Otherwise delay expansion by 8920 creating compound insn to not confuse optimizers. */ 8921 if (bypass_code == NIL && second_code == NIL 8922 && TARGET_CMOVE) 8923 { 8924 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 8925 gen_rtx_LABEL_REF (VOIDmode, label), 8926 pc_rtx, NULL_RTX); 8927 } 8928 else 8929 { 8930 tmp = gen_rtx_fmt_ee (code, VOIDmode, 8931 ix86_compare_op0, ix86_compare_op1); 8932 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 8933 gen_rtx_LABEL_REF (VOIDmode, label), 8934 pc_rtx); 8935 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 8936 8937 use_fcomi = ix86_use_fcomi_compare (code); 8938 vec = rtvec_alloc (3 + !use_fcomi); 8939 RTVEC_ELT (vec, 0) = tmp; 8940 RTVEC_ELT (vec, 1) 8941 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 8942 RTVEC_ELT (vec, 2) 8943 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 8944 if (! use_fcomi) 8945 RTVEC_ELT (vec, 3) 8946 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 8947 8948 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 8949 } 8950 return; 8951 } 8952 8953 case DImode: 8954 if (TARGET_64BIT) 8955 goto simple; 8956 /* Expand DImode branch into multiple compare+branch. */ 8957 { 8958 rtx lo[2], hi[2], label2; 8959 enum rtx_code code1, code2, code3; 8960 8961 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 8962 { 8963 tmp = ix86_compare_op0; 8964 ix86_compare_op0 = ix86_compare_op1; 8965 ix86_compare_op1 = tmp; 8966 code = swap_condition (code); 8967 } 8968 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 8969 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 8970 8971 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 8972 avoid two branches. This costs one extra insn, so disable when 8973 optimizing for size. */ 8974 8975 if ((code == EQ || code == NE) 8976 && (!optimize_size 8977 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 8978 { 8979 rtx xor0, xor1; 8980 8981 xor1 = hi[0]; 8982 if (hi[1] != const0_rtx) 8983 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1], 8984 NULL_RTX, 0, OPTAB_WIDEN); 8985 8986 xor0 = lo[0]; 8987 if (lo[1] != const0_rtx) 8988 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1], 8989 NULL_RTX, 0, OPTAB_WIDEN); 8990 8991 tmp = expand_binop (SImode, ior_optab, xor1, xor0, 8992 NULL_RTX, 0, OPTAB_WIDEN); 8993 8994 ix86_compare_op0 = tmp; 8995 ix86_compare_op1 = const0_rtx; 8996 ix86_expand_branch (code, label); 8997 return; 8998 } 8999 9000 /* Otherwise, if we are doing less-than or greater-or-equal-than, 9001 op1 is a constant and the low word is zero, then we can just 9002 examine the high word. */ 9003 9004 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 9005 switch (code) 9006 { 9007 case LT: case LTU: case GE: case GEU: 9008 ix86_compare_op0 = hi[0]; 9009 ix86_compare_op1 = hi[1]; 9010 ix86_expand_branch (code, label); 9011 return; 9012 default: 9013 break; 9014 } 9015 9016 /* Otherwise, we need two or three jumps. */ 9017 9018 label2 = gen_label_rtx (); 9019 9020 code1 = code; 9021 code2 = swap_condition (code); 9022 code3 = unsigned_condition (code); 9023 9024 switch (code) 9025 { 9026 case LT: case GT: case LTU: case GTU: 9027 break; 9028 9029 case LE: code1 = LT; code2 = GT; break; 9030 case GE: code1 = GT; code2 = LT; break; 9031 case LEU: code1 = LTU; code2 = GTU; break; 9032 case GEU: code1 = GTU; code2 = LTU; break; 9033 9034 case EQ: code1 = NIL; code2 = NE; break; 9035 case NE: code2 = NIL; break; 9036 9037 default: 9038 abort (); 9039 } 9040 9041 /* 9042 * a < b => 9043 * if (hi(a) < hi(b)) goto true; 9044 * if (hi(a) > hi(b)) goto false; 9045 * if (lo(a) < lo(b)) goto true; 9046 * false: 9047 */ 9048 9049 ix86_compare_op0 = hi[0]; 9050 ix86_compare_op1 = hi[1]; 9051 9052 if (code1 != NIL) 9053 ix86_expand_branch (code1, label); 9054 if (code2 != NIL) 9055 ix86_expand_branch (code2, label2); 9056 9057 ix86_compare_op0 = lo[0]; 9058 ix86_compare_op1 = lo[1]; 9059 ix86_expand_branch (code3, label); 9060 9061 if (code2 != NIL) 9062 emit_label (label2); 9063 return; 9064 } 9065 9066 default: 9067 abort (); 9068 } 9069} 9070 9071/* Split branch based on floating point condition. */ 9072void 9073ix86_split_fp_branch (code, op1, op2, target1, target2, tmp) 9074 enum rtx_code code; 9075 rtx op1, op2, target1, target2, tmp; 9076{ 9077 rtx second, bypass; 9078 rtx label = NULL_RTX; 9079 rtx condition; 9080 int bypass_probability = -1, second_probability = -1, probability = -1; 9081 rtx i; 9082 9083 if (target2 != pc_rtx) 9084 { 9085 rtx tmp = target2; 9086 code = reverse_condition_maybe_unordered (code); 9087 target2 = target1; 9088 target1 = tmp; 9089 } 9090 9091 condition = ix86_expand_fp_compare (code, op1, op2, 9092 tmp, &second, &bypass); 9093 9094 if (split_branch_probability >= 0) 9095 { 9096 /* Distribute the probabilities across the jumps. 9097 Assume the BYPASS and SECOND to be always test 9098 for UNORDERED. */ 9099 probability = split_branch_probability; 9100 9101 /* Value of 1 is low enough to make no need for probability 9102 to be updated. Later we may run some experiments and see 9103 if unordered values are more frequent in practice. */ 9104 if (bypass) 9105 bypass_probability = 1; 9106 if (second) 9107 second_probability = 1; 9108 } 9109 if (bypass != NULL_RTX) 9110 { 9111 label = gen_label_rtx (); 9112 i = emit_jump_insn (gen_rtx_SET 9113 (VOIDmode, pc_rtx, 9114 gen_rtx_IF_THEN_ELSE (VOIDmode, 9115 bypass, 9116 gen_rtx_LABEL_REF (VOIDmode, 9117 label), 9118 pc_rtx))); 9119 if (bypass_probability >= 0) 9120 REG_NOTES (i) 9121 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9122 GEN_INT (bypass_probability), 9123 REG_NOTES (i)); 9124 } 9125 i = emit_jump_insn (gen_rtx_SET 9126 (VOIDmode, pc_rtx, 9127 gen_rtx_IF_THEN_ELSE (VOIDmode, 9128 condition, target1, target2))); 9129 if (probability >= 0) 9130 REG_NOTES (i) 9131 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9132 GEN_INT (probability), 9133 REG_NOTES (i)); 9134 if (second != NULL_RTX) 9135 { 9136 i = emit_jump_insn (gen_rtx_SET 9137 (VOIDmode, pc_rtx, 9138 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 9139 target2))); 9140 if (second_probability >= 0) 9141 REG_NOTES (i) 9142 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9143 GEN_INT (second_probability), 9144 REG_NOTES (i)); 9145 } 9146 if (label != NULL_RTX) 9147 emit_label (label); 9148} 9149 9150int 9151ix86_expand_setcc (code, dest) 9152 enum rtx_code code; 9153 rtx dest; 9154{ 9155 rtx ret, tmp, tmpreg; 9156 rtx second_test, bypass_test; 9157 9158 if (GET_MODE (ix86_compare_op0) == DImode 9159 && !TARGET_64BIT) 9160 return 0; /* FAIL */ 9161 9162 if (GET_MODE (dest) != QImode) 9163 abort (); 9164 9165 ret = ix86_expand_compare (code, &second_test, &bypass_test); 9166 PUT_MODE (ret, QImode); 9167 9168 tmp = dest; 9169 tmpreg = dest; 9170 9171 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 9172 if (bypass_test || second_test) 9173 { 9174 rtx test = second_test; 9175 int bypass = 0; 9176 rtx tmp2 = gen_reg_rtx (QImode); 9177 if (bypass_test) 9178 { 9179 if (second_test) 9180 abort (); 9181 test = bypass_test; 9182 bypass = 1; 9183 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 9184 } 9185 PUT_MODE (test, QImode); 9186 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 9187 9188 if (bypass) 9189 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 9190 else 9191 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 9192 } 9193 9194 return 1; /* DONE */ 9195} 9196 9197int 9198ix86_expand_int_movcc (operands) 9199 rtx operands[]; 9200{ 9201 enum rtx_code code = GET_CODE (operands[1]), compare_code; 9202 rtx compare_seq, compare_op; 9203 rtx second_test, bypass_test; 9204 enum machine_mode mode = GET_MODE (operands[0]); 9205 9206 /* When the compare code is not LTU or GEU, we can not use sbbl case. 9207 In case comparsion is done with immediate, we can convert it to LTU or 9208 GEU by altering the integer. */ 9209 9210 if ((code == LEU || code == GTU) 9211 && GET_CODE (ix86_compare_op1) == CONST_INT 9212 && mode != HImode 9213 && INTVAL (ix86_compare_op1) != -1 9214 /* For x86-64, the immediate field in the instruction is 32-bit 9215 signed, so we can't increment a DImode value above 0x7fffffff. */ 9216 && (!TARGET_64BIT 9217 || GET_MODE (ix86_compare_op0) != DImode 9218 || INTVAL (ix86_compare_op1) != 0x7fffffff) 9219 && GET_CODE (operands[2]) == CONST_INT 9220 && GET_CODE (operands[3]) == CONST_INT) 9221 { 9222 if (code == LEU) 9223 code = LTU; 9224 else 9225 code = GEU; 9226 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1, 9227 GET_MODE (ix86_compare_op0)); 9228 } 9229 9230 start_sequence (); 9231 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 9232 compare_seq = get_insns (); 9233 end_sequence (); 9234 9235 compare_code = GET_CODE (compare_op); 9236 9237 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 9238 HImode insns, we'd be swallowed in word prefix ops. */ 9239 9240 if (mode != HImode 9241 && (mode != DImode || TARGET_64BIT) 9242 && GET_CODE (operands[2]) == CONST_INT 9243 && GET_CODE (operands[3]) == CONST_INT) 9244 { 9245 rtx out = operands[0]; 9246 HOST_WIDE_INT ct = INTVAL (operands[2]); 9247 HOST_WIDE_INT cf = INTVAL (operands[3]); 9248 HOST_WIDE_INT diff; 9249 9250 if ((compare_code == LTU || compare_code == GEU) 9251 && !second_test && !bypass_test) 9252 { 9253 /* Detect overlap between destination and compare sources. */ 9254 rtx tmp = out; 9255 9256 /* To simplify rest of code, restrict to the GEU case. */ 9257 if (compare_code == LTU) 9258 { 9259 HOST_WIDE_INT tmp = ct; 9260 ct = cf; 9261 cf = tmp; 9262 compare_code = reverse_condition (compare_code); 9263 code = reverse_condition (code); 9264 } 9265 diff = ct - cf; 9266 9267 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 9268 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 9269 tmp = gen_reg_rtx (mode); 9270 9271 emit_insn (compare_seq); 9272 if (mode == DImode) 9273 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp)); 9274 else 9275 emit_insn (gen_x86_movsicc_0_m1 (tmp)); 9276 9277 if (diff == 1) 9278 { 9279 /* 9280 * cmpl op0,op1 9281 * sbbl dest,dest 9282 * [addl dest, ct] 9283 * 9284 * Size 5 - 8. 9285 */ 9286 if (ct) 9287 tmp = expand_simple_binop (mode, PLUS, 9288 tmp, GEN_INT (ct), 9289 tmp, 1, OPTAB_DIRECT); 9290 } 9291 else if (cf == -1) 9292 { 9293 /* 9294 * cmpl op0,op1 9295 * sbbl dest,dest 9296 * orl $ct, dest 9297 * 9298 * Size 8. 9299 */ 9300 tmp = expand_simple_binop (mode, IOR, 9301 tmp, GEN_INT (ct), 9302 tmp, 1, OPTAB_DIRECT); 9303 } 9304 else if (diff == -1 && ct) 9305 { 9306 /* 9307 * cmpl op0,op1 9308 * sbbl dest,dest 9309 * notl dest 9310 * [addl dest, cf] 9311 * 9312 * Size 8 - 11. 9313 */ 9314 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1); 9315 if (cf) 9316 tmp = expand_simple_binop (mode, PLUS, 9317 tmp, GEN_INT (cf), 9318 tmp, 1, OPTAB_DIRECT); 9319 } 9320 else 9321 { 9322 /* 9323 * cmpl op0,op1 9324 * sbbl dest,dest 9325 * [notl dest] 9326 * andl cf - ct, dest 9327 * [addl dest, ct] 9328 * 9329 * Size 8 - 11. 9330 */ 9331 9332 if (cf == 0) 9333 { 9334 cf = ct; 9335 ct = 0; 9336 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1); 9337 } 9338 9339 tmp = expand_simple_binop (mode, AND, 9340 tmp, 9341 gen_int_mode (cf - ct, mode), 9342 tmp, 1, OPTAB_DIRECT); 9343 if (ct) 9344 tmp = expand_simple_binop (mode, PLUS, 9345 tmp, GEN_INT (ct), 9346 tmp, 1, OPTAB_DIRECT); 9347 } 9348 9349 if (tmp != out) 9350 emit_move_insn (out, tmp); 9351 9352 return 1; /* DONE */ 9353 } 9354 9355 diff = ct - cf; 9356 if (diff < 0) 9357 { 9358 HOST_WIDE_INT tmp; 9359 tmp = ct, ct = cf, cf = tmp; 9360 diff = -diff; 9361 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 9362 { 9363 /* We may be reversing unordered compare to normal compare, that 9364 is not valid in general (we may convert non-trapping condition 9365 to trapping one), however on i386 we currently emit all 9366 comparisons unordered. */ 9367 compare_code = reverse_condition_maybe_unordered (compare_code); 9368 code = reverse_condition_maybe_unordered (code); 9369 } 9370 else 9371 { 9372 compare_code = reverse_condition (compare_code); 9373 code = reverse_condition (code); 9374 } 9375 } 9376 9377 compare_code = NIL; 9378 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 9379 && GET_CODE (ix86_compare_op1) == CONST_INT) 9380 { 9381 if (ix86_compare_op1 == const0_rtx 9382 && (code == LT || code == GE)) 9383 compare_code = code; 9384 else if (ix86_compare_op1 == constm1_rtx) 9385 { 9386 if (code == LE) 9387 compare_code = LT; 9388 else if (code == GT) 9389 compare_code = GE; 9390 } 9391 } 9392 9393 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 9394 if (compare_code != NIL 9395 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 9396 && (cf == -1 || ct == -1)) 9397 { 9398 /* If lea code below could be used, only optimize 9399 if it results in a 2 insn sequence. */ 9400 9401 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 9402 || diff == 3 || diff == 5 || diff == 9) 9403 || (compare_code == LT && ct == -1) 9404 || (compare_code == GE && cf == -1)) 9405 { 9406 /* 9407 * notl op1 (if necessary) 9408 * sarl $31, op1 9409 * orl cf, op1 9410 */ 9411 if (ct != -1) 9412 { 9413 cf = ct; 9414 ct = -1; 9415 code = reverse_condition (code); 9416 } 9417 9418 out = emit_store_flag (out, code, ix86_compare_op0, 9419 ix86_compare_op1, VOIDmode, 0, -1); 9420 9421 out = expand_simple_binop (mode, IOR, 9422 out, GEN_INT (cf), 9423 out, 1, OPTAB_DIRECT); 9424 if (out != operands[0]) 9425 emit_move_insn (operands[0], out); 9426 9427 return 1; /* DONE */ 9428 } 9429 } 9430 9431 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 9432 || diff == 3 || diff == 5 || diff == 9) 9433 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf)))) 9434 { 9435 /* 9436 * xorl dest,dest 9437 * cmpl op1,op2 9438 * setcc dest 9439 * lea cf(dest*(ct-cf)),dest 9440 * 9441 * Size 14. 9442 * 9443 * This also catches the degenerate setcc-only case. 9444 */ 9445 9446 rtx tmp; 9447 int nops; 9448 9449 out = emit_store_flag (out, code, ix86_compare_op0, 9450 ix86_compare_op1, VOIDmode, 0, 1); 9451 9452 nops = 0; 9453 /* On x86_64 the lea instruction operates on Pmode, so we need 9454 to get arithmetics done in proper mode to match. */ 9455 if (diff == 1) 9456 tmp = copy_rtx (out); 9457 else 9458 { 9459 rtx out1; 9460 out1 = copy_rtx (out); 9461 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 9462 nops++; 9463 if (diff & 1) 9464 { 9465 tmp = gen_rtx_PLUS (mode, tmp, out1); 9466 nops++; 9467 } 9468 } 9469 if (cf != 0) 9470 { 9471 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 9472 nops++; 9473 } 9474 if (tmp != out 9475 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out)) 9476 { 9477 if (nops == 1) 9478 out = force_operand (tmp, copy_rtx (out)); 9479 else 9480 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 9481 } 9482 if (out != operands[0]) 9483 emit_move_insn (operands[0], copy_rtx (out)); 9484 9485 return 1; /* DONE */ 9486 } 9487 9488 /* 9489 * General case: Jumpful: 9490 * xorl dest,dest cmpl op1, op2 9491 * cmpl op1, op2 movl ct, dest 9492 * setcc dest jcc 1f 9493 * decl dest movl cf, dest 9494 * andl (cf-ct),dest 1: 9495 * addl ct,dest 9496 * 9497 * Size 20. Size 14. 9498 * 9499 * This is reasonably steep, but branch mispredict costs are 9500 * high on modern cpus, so consider failing only if optimizing 9501 * for space. 9502 * 9503 * %%% Parameterize branch_cost on the tuning architecture, then 9504 * use that. The 80386 couldn't care less about mispredicts. 9505 */ 9506 9507 if (!optimize_size && !TARGET_CMOVE) 9508 { 9509 if (cf == 0) 9510 { 9511 cf = ct; 9512 ct = 0; 9513 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 9514 /* We may be reversing unordered compare to normal compare, 9515 that is not valid in general (we may convert non-trapping 9516 condition to trapping one), however on i386 we currently 9517 emit all comparisons unordered. */ 9518 code = reverse_condition_maybe_unordered (code); 9519 else 9520 { 9521 code = reverse_condition (code); 9522 if (compare_code != NIL) 9523 compare_code = reverse_condition (compare_code); 9524 } 9525 } 9526 9527 if (compare_code != NIL) 9528 { 9529 /* notl op1 (if needed) 9530 sarl $31, op1 9531 andl (cf-ct), op1 9532 addl ct, op1 9533 9534 For x < 0 (resp. x <= -1) there will be no notl, 9535 so if possible swap the constants to get rid of the 9536 complement. 9537 True/false will be -1/0 while code below (store flag 9538 followed by decrement) is 0/-1, so the constants need 9539 to be exchanged once more. */ 9540 9541 if (compare_code == GE || !cf) 9542 { 9543 code = reverse_condition (code); 9544 compare_code = LT; 9545 } 9546 else 9547 { 9548 HOST_WIDE_INT tmp = cf; 9549 cf = ct; 9550 ct = tmp; 9551 } 9552 9553 out = emit_store_flag (out, code, ix86_compare_op0, 9554 ix86_compare_op1, VOIDmode, 0, -1); 9555 } 9556 else 9557 { 9558 out = emit_store_flag (out, code, ix86_compare_op0, 9559 ix86_compare_op1, VOIDmode, 0, 1); 9560 9561 out = expand_simple_binop (mode, PLUS, out, constm1_rtx, 9562 out, 1, OPTAB_DIRECT); 9563 } 9564 9565 out = expand_simple_binop (mode, AND, out, 9566 gen_int_mode (cf - ct, mode), 9567 out, 1, OPTAB_DIRECT); 9568 if (ct) 9569 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct), 9570 out, 1, OPTAB_DIRECT); 9571 if (out != operands[0]) 9572 emit_move_insn (operands[0], out); 9573 9574 return 1; /* DONE */ 9575 } 9576 } 9577 9578 if (!TARGET_CMOVE) 9579 { 9580 /* Try a few things more with specific constants and a variable. */ 9581 9582 optab op; 9583 rtx var, orig_out, out, tmp; 9584 9585 if (optimize_size) 9586 return 0; /* FAIL */ 9587 9588 /* If one of the two operands is an interesting constant, load a 9589 constant with the above and mask it in with a logical operation. */ 9590 9591 if (GET_CODE (operands[2]) == CONST_INT) 9592 { 9593 var = operands[3]; 9594 if (INTVAL (operands[2]) == 0) 9595 operands[3] = constm1_rtx, op = and_optab; 9596 else if (INTVAL (operands[2]) == -1) 9597 operands[3] = const0_rtx, op = ior_optab; 9598 else 9599 return 0; /* FAIL */ 9600 } 9601 else if (GET_CODE (operands[3]) == CONST_INT) 9602 { 9603 var = operands[2]; 9604 if (INTVAL (operands[3]) == 0) 9605 operands[2] = constm1_rtx, op = and_optab; 9606 else if (INTVAL (operands[3]) == -1) 9607 operands[2] = const0_rtx, op = ior_optab; 9608 else 9609 return 0; /* FAIL */ 9610 } 9611 else 9612 return 0; /* FAIL */ 9613 9614 orig_out = operands[0]; 9615 tmp = gen_reg_rtx (mode); 9616 operands[0] = tmp; 9617 9618 /* Recurse to get the constant loaded. */ 9619 if (ix86_expand_int_movcc (operands) == 0) 9620 return 0; /* FAIL */ 9621 9622 /* Mask in the interesting variable. */ 9623 out = expand_binop (mode, op, var, tmp, orig_out, 0, 9624 OPTAB_WIDEN); 9625 if (out != orig_out) 9626 emit_move_insn (orig_out, out); 9627 9628 return 1; /* DONE */ 9629 } 9630 9631 /* 9632 * For comparison with above, 9633 * 9634 * movl cf,dest 9635 * movl ct,tmp 9636 * cmpl op1,op2 9637 * cmovcc tmp,dest 9638 * 9639 * Size 15. 9640 */ 9641 9642 if (! nonimmediate_operand (operands[2], mode)) 9643 operands[2] = force_reg (mode, operands[2]); 9644 if (! nonimmediate_operand (operands[3], mode)) 9645 operands[3] = force_reg (mode, operands[3]); 9646 9647 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 9648 { 9649 rtx tmp = gen_reg_rtx (mode); 9650 emit_move_insn (tmp, operands[3]); 9651 operands[3] = tmp; 9652 } 9653 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 9654 { 9655 rtx tmp = gen_reg_rtx (mode); 9656 emit_move_insn (tmp, operands[2]); 9657 operands[2] = tmp; 9658 } 9659 if (! register_operand (operands[2], VOIDmode) 9660 && ! register_operand (operands[3], VOIDmode)) 9661 operands[2] = force_reg (mode, operands[2]); 9662 9663 emit_insn (compare_seq); 9664 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9665 gen_rtx_IF_THEN_ELSE (mode, 9666 compare_op, operands[2], 9667 operands[3]))); 9668 if (bypass_test) 9669 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9670 gen_rtx_IF_THEN_ELSE (mode, 9671 bypass_test, 9672 operands[3], 9673 operands[0]))); 9674 if (second_test) 9675 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9676 gen_rtx_IF_THEN_ELSE (mode, 9677 second_test, 9678 operands[2], 9679 operands[0]))); 9680 9681 return 1; /* DONE */ 9682} 9683 9684int 9685ix86_expand_fp_movcc (operands) 9686 rtx operands[]; 9687{ 9688 enum rtx_code code; 9689 rtx tmp; 9690 rtx compare_op, second_test, bypass_test; 9691 9692 /* For SF/DFmode conditional moves based on comparisons 9693 in same mode, we may want to use SSE min/max instructions. */ 9694 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode) 9695 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode)) 9696 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]) 9697 /* The SSE comparisons does not support the LTGT/UNEQ pair. */ 9698 && (!TARGET_IEEE_FP 9699 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ)) 9700 /* We may be called from the post-reload splitter. */ 9701 && (!REG_P (operands[0]) 9702 || SSE_REG_P (operands[0]) 9703 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)) 9704 { 9705 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1; 9706 code = GET_CODE (operands[1]); 9707 9708 /* See if we have (cross) match between comparison operands and 9709 conditional move operands. */ 9710 if (rtx_equal_p (operands[2], op1)) 9711 { 9712 rtx tmp = op0; 9713 op0 = op1; 9714 op1 = tmp; 9715 code = reverse_condition_maybe_unordered (code); 9716 } 9717 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1)) 9718 { 9719 /* Check for min operation. */ 9720 if (code == LT) 9721 { 9722 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 9723 if (memory_operand (op0, VOIDmode)) 9724 op0 = force_reg (GET_MODE (operands[0]), op0); 9725 if (GET_MODE (operands[0]) == SFmode) 9726 emit_insn (gen_minsf3 (operands[0], op0, op1)); 9727 else 9728 emit_insn (gen_mindf3 (operands[0], op0, op1)); 9729 return 1; 9730 } 9731 /* Check for max operation. */ 9732 if (code == GT) 9733 { 9734 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 9735 if (memory_operand (op0, VOIDmode)) 9736 op0 = force_reg (GET_MODE (operands[0]), op0); 9737 if (GET_MODE (operands[0]) == SFmode) 9738 emit_insn (gen_maxsf3 (operands[0], op0, op1)); 9739 else 9740 emit_insn (gen_maxdf3 (operands[0], op0, op1)); 9741 return 1; 9742 } 9743 } 9744 /* Manage condition to be sse_comparison_operator. In case we are 9745 in non-ieee mode, try to canonicalize the destination operand 9746 to be first in the comparison - this helps reload to avoid extra 9747 moves. */ 9748 if (!sse_comparison_operator (operands[1], VOIDmode) 9749 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP)) 9750 { 9751 rtx tmp = ix86_compare_op0; 9752 ix86_compare_op0 = ix86_compare_op1; 9753 ix86_compare_op1 = tmp; 9754 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), 9755 VOIDmode, ix86_compare_op0, 9756 ix86_compare_op1); 9757 } 9758 /* Similary try to manage result to be first operand of conditional 9759 move. We also don't support the NE comparison on SSE, so try to 9760 avoid it. */ 9761 if ((rtx_equal_p (operands[0], operands[3]) 9762 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ)) 9763 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP)) 9764 { 9765 rtx tmp = operands[2]; 9766 operands[2] = operands[3]; 9767 operands[3] = tmp; 9768 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered 9769 (GET_CODE (operands[1])), 9770 VOIDmode, ix86_compare_op0, 9771 ix86_compare_op1); 9772 } 9773 if (GET_MODE (operands[0]) == SFmode) 9774 emit_insn (gen_sse_movsfcc (operands[0], operands[1], 9775 operands[2], operands[3], 9776 ix86_compare_op0, ix86_compare_op1)); 9777 else 9778 emit_insn (gen_sse_movdfcc (operands[0], operands[1], 9779 operands[2], operands[3], 9780 ix86_compare_op0, ix86_compare_op1)); 9781 return 1; 9782 } 9783 9784 /* The floating point conditional move instructions don't directly 9785 support conditions resulting from a signed integer comparison. */ 9786 9787 code = GET_CODE (operands[1]); 9788 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 9789 9790 /* The floating point conditional move instructions don't directly 9791 support signed integer comparisons. */ 9792 9793 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 9794 { 9795 if (second_test != NULL || bypass_test != NULL) 9796 abort (); 9797 tmp = gen_reg_rtx (QImode); 9798 ix86_expand_setcc (code, tmp); 9799 code = NE; 9800 ix86_compare_op0 = tmp; 9801 ix86_compare_op1 = const0_rtx; 9802 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 9803 } 9804 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 9805 { 9806 tmp = gen_reg_rtx (GET_MODE (operands[0])); 9807 emit_move_insn (tmp, operands[3]); 9808 operands[3] = tmp; 9809 } 9810 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 9811 { 9812 tmp = gen_reg_rtx (GET_MODE (operands[0])); 9813 emit_move_insn (tmp, operands[2]); 9814 operands[2] = tmp; 9815 } 9816 9817 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9818 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 9819 compare_op, 9820 operands[2], 9821 operands[3]))); 9822 if (bypass_test) 9823 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9824 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 9825 bypass_test, 9826 operands[3], 9827 operands[0]))); 9828 if (second_test) 9829 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9830 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 9831 second_test, 9832 operands[2], 9833 operands[0]))); 9834 9835 return 1; 9836} 9837 9838/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 9839 works for floating pointer parameters and nonoffsetable memories. 9840 For pushes, it returns just stack offsets; the values will be saved 9841 in the right order. Maximally three parts are generated. */ 9842 9843static int 9844ix86_split_to_parts (operand, parts, mode) 9845 rtx operand; 9846 rtx *parts; 9847 enum machine_mode mode; 9848{ 9849 int size; 9850 9851 if (!TARGET_64BIT) 9852 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4); 9853 else 9854 size = (GET_MODE_SIZE (mode) + 4) / 8; 9855 9856 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand))) 9857 abort (); 9858 if (size < 2 || size > 3) 9859 abort (); 9860 9861 /* Optimize constant pool reference to immediates. This is used by fp 9862 moves, that force all constants to memory to allow combining. */ 9863 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand)) 9864 { 9865 rtx tmp = maybe_get_pool_constant (operand); 9866 if (tmp) 9867 operand = tmp; 9868 } 9869 9870 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 9871 { 9872 /* The only non-offsetable memories we handle are pushes. */ 9873 if (! push_operand (operand, VOIDmode)) 9874 abort (); 9875 9876 operand = copy_rtx (operand); 9877 PUT_MODE (operand, Pmode); 9878 parts[0] = parts[1] = parts[2] = operand; 9879 } 9880 else if (!TARGET_64BIT) 9881 { 9882 if (mode == DImode) 9883 split_di (&operand, 1, &parts[0], &parts[1]); 9884 else 9885 { 9886 if (REG_P (operand)) 9887 { 9888 if (!reload_completed) 9889 abort (); 9890 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 9891 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 9892 if (size == 3) 9893 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 9894 } 9895 else if (offsettable_memref_p (operand)) 9896 { 9897 operand = adjust_address (operand, SImode, 0); 9898 parts[0] = operand; 9899 parts[1] = adjust_address (operand, SImode, 4); 9900 if (size == 3) 9901 parts[2] = adjust_address (operand, SImode, 8); 9902 } 9903 else if (GET_CODE (operand) == CONST_DOUBLE) 9904 { 9905 REAL_VALUE_TYPE r; 9906 long l[4]; 9907 9908 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 9909 switch (mode) 9910 { 9911 case XFmode: 9912 case TFmode: 9913 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 9914 parts[2] = gen_int_mode (l[2], SImode); 9915 break; 9916 case DFmode: 9917 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 9918 break; 9919 default: 9920 abort (); 9921 } 9922 parts[1] = gen_int_mode (l[1], SImode); 9923 parts[0] = gen_int_mode (l[0], SImode); 9924 } 9925 else 9926 abort (); 9927 } 9928 } 9929 else 9930 { 9931 if (mode == TImode) 9932 split_ti (&operand, 1, &parts[0], &parts[1]); 9933 if (mode == XFmode || mode == TFmode) 9934 { 9935 if (REG_P (operand)) 9936 { 9937 if (!reload_completed) 9938 abort (); 9939 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 9940 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 9941 } 9942 else if (offsettable_memref_p (operand)) 9943 { 9944 operand = adjust_address (operand, DImode, 0); 9945 parts[0] = operand; 9946 parts[1] = adjust_address (operand, SImode, 8); 9947 } 9948 else if (GET_CODE (operand) == CONST_DOUBLE) 9949 { 9950 REAL_VALUE_TYPE r; 9951 long l[3]; 9952 9953 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 9954 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 9955 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 9956 if (HOST_BITS_PER_WIDE_INT >= 64) 9957 parts[0] 9958 = gen_int_mode 9959 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 9960 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 9961 DImode); 9962 else 9963 parts[0] = immed_double_const (l[0], l[1], DImode); 9964 parts[1] = gen_int_mode (l[2], SImode); 9965 } 9966 else 9967 abort (); 9968 } 9969 } 9970 9971 return size; 9972} 9973 9974/* Emit insns to perform a move or push of DI, DF, and XF values. 9975 Return false when normal moves are needed; true when all required 9976 insns have been emitted. Operands 2-4 contain the input values 9977 int the correct order; operands 5-7 contain the output values. */ 9978 9979void 9980ix86_split_long_move (operands) 9981 rtx operands[]; 9982{ 9983 rtx part[2][3]; 9984 int nparts; 9985 int push = 0; 9986 int collisions = 0; 9987 enum machine_mode mode = GET_MODE (operands[0]); 9988 9989 /* The DFmode expanders may ask us to move double. 9990 For 64bit target this is single move. By hiding the fact 9991 here we simplify i386.md splitters. */ 9992 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 9993 { 9994 /* Optimize constant pool reference to immediates. This is used by 9995 fp moves, that force all constants to memory to allow combining. */ 9996 9997 if (GET_CODE (operands[1]) == MEM 9998 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 9999 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 10000 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 10001 if (push_operand (operands[0], VOIDmode)) 10002 { 10003 operands[0] = copy_rtx (operands[0]); 10004 PUT_MODE (operands[0], Pmode); 10005 } 10006 else 10007 operands[0] = gen_lowpart (DImode, operands[0]); 10008 operands[1] = gen_lowpart (DImode, operands[1]); 10009 emit_move_insn (operands[0], operands[1]); 10010 return; 10011 } 10012 10013 /* The only non-offsettable memory we handle is push. */ 10014 if (push_operand (operands[0], VOIDmode)) 10015 push = 1; 10016 else if (GET_CODE (operands[0]) == MEM 10017 && ! offsettable_memref_p (operands[0])) 10018 abort (); 10019 10020 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 10021 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 10022 10023 /* When emitting push, take care for source operands on the stack. */ 10024 if (push && GET_CODE (operands[1]) == MEM 10025 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 10026 { 10027 if (nparts == 3) 10028 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 10029 XEXP (part[1][2], 0)); 10030 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 10031 XEXP (part[1][1], 0)); 10032 } 10033 10034 /* We need to do copy in the right order in case an address register 10035 of the source overlaps the destination. */ 10036 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 10037 { 10038 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 10039 collisions++; 10040 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 10041 collisions++; 10042 if (nparts == 3 10043 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 10044 collisions++; 10045 10046 /* Collision in the middle part can be handled by reordering. */ 10047 if (collisions == 1 && nparts == 3 10048 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 10049 { 10050 rtx tmp; 10051 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 10052 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 10053 } 10054 10055 /* If there are more collisions, we can't handle it by reordering. 10056 Do an lea to the last part and use only one colliding move. */ 10057 else if (collisions > 1) 10058 { 10059 rtx base; 10060 10061 collisions = 1; 10062 10063 base = part[0][nparts - 1]; 10064 10065 /* Handle the case when the last part isn't valid for lea. 10066 Happens in 64-bit mode storing the 12-byte XFmode. */ 10067 if (GET_MODE (base) != Pmode) 10068 base = gen_rtx_REG (Pmode, REGNO (base)); 10069 10070 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 10071 part[1][0] = replace_equiv_address (part[1][0], base); 10072 part[1][1] = replace_equiv_address (part[1][1], 10073 plus_constant (base, UNITS_PER_WORD)); 10074 if (nparts == 3) 10075 part[1][2] = replace_equiv_address (part[1][2], 10076 plus_constant (base, 8)); 10077 } 10078 } 10079 10080 if (push) 10081 { 10082 if (!TARGET_64BIT) 10083 { 10084 if (nparts == 3) 10085 { 10086 /* We use only first 12 bytes of TFmode value, but for pushing we 10087 are required to adjust stack as if we were pushing real 16byte 10088 value. */ 10089 if (mode == TFmode && !TARGET_64BIT) 10090 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 10091 GEN_INT (-4))); 10092 emit_move_insn (part[0][2], part[1][2]); 10093 } 10094 } 10095 else 10096 { 10097 /* In 64bit mode we don't have 32bit push available. In case this is 10098 register, it is OK - we will just use larger counterpart. We also 10099 retype memory - these comes from attempt to avoid REX prefix on 10100 moving of second half of TFmode value. */ 10101 if (GET_MODE (part[1][1]) == SImode) 10102 { 10103 if (GET_CODE (part[1][1]) == MEM) 10104 part[1][1] = adjust_address (part[1][1], DImode, 0); 10105 else if (REG_P (part[1][1])) 10106 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 10107 else 10108 abort (); 10109 if (GET_MODE (part[1][0]) == SImode) 10110 part[1][0] = part[1][1]; 10111 } 10112 } 10113 emit_move_insn (part[0][1], part[1][1]); 10114 emit_move_insn (part[0][0], part[1][0]); 10115 return; 10116 } 10117 10118 /* Choose correct order to not overwrite the source before it is copied. */ 10119 if ((REG_P (part[0][0]) 10120 && REG_P (part[1][1]) 10121 && (REGNO (part[0][0]) == REGNO (part[1][1]) 10122 || (nparts == 3 10123 && REGNO (part[0][0]) == REGNO (part[1][2])))) 10124 || (collisions > 0 10125 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 10126 { 10127 if (nparts == 3) 10128 { 10129 operands[2] = part[0][2]; 10130 operands[3] = part[0][1]; 10131 operands[4] = part[0][0]; 10132 operands[5] = part[1][2]; 10133 operands[6] = part[1][1]; 10134 operands[7] = part[1][0]; 10135 } 10136 else 10137 { 10138 operands[2] = part[0][1]; 10139 operands[3] = part[0][0]; 10140 operands[5] = part[1][1]; 10141 operands[6] = part[1][0]; 10142 } 10143 } 10144 else 10145 { 10146 if (nparts == 3) 10147 { 10148 operands[2] = part[0][0]; 10149 operands[3] = part[0][1]; 10150 operands[4] = part[0][2]; 10151 operands[5] = part[1][0]; 10152 operands[6] = part[1][1]; 10153 operands[7] = part[1][2]; 10154 } 10155 else 10156 { 10157 operands[2] = part[0][0]; 10158 operands[3] = part[0][1]; 10159 operands[5] = part[1][0]; 10160 operands[6] = part[1][1]; 10161 } 10162 } 10163 emit_move_insn (operands[2], operands[5]); 10164 emit_move_insn (operands[3], operands[6]); 10165 if (nparts == 3) 10166 emit_move_insn (operands[4], operands[7]); 10167 10168 return; 10169} 10170 10171void 10172ix86_split_ashldi (operands, scratch) 10173 rtx *operands, scratch; 10174{ 10175 rtx low[2], high[2]; 10176 int count; 10177 10178 if (GET_CODE (operands[2]) == CONST_INT) 10179 { 10180 split_di (operands, 2, low, high); 10181 count = INTVAL (operands[2]) & 63; 10182 10183 if (count >= 32) 10184 { 10185 emit_move_insn (high[0], low[1]); 10186 emit_move_insn (low[0], const0_rtx); 10187 10188 if (count > 32) 10189 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32))); 10190 } 10191 else 10192 { 10193 if (!rtx_equal_p (operands[0], operands[1])) 10194 emit_move_insn (operands[0], operands[1]); 10195 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); 10196 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count))); 10197 } 10198 } 10199 else 10200 { 10201 if (!rtx_equal_p (operands[0], operands[1])) 10202 emit_move_insn (operands[0], operands[1]); 10203 10204 split_di (operands, 1, low, high); 10205 10206 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); 10207 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); 10208 10209 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10210 { 10211 if (! no_new_pseudos) 10212 scratch = force_reg (SImode, const0_rtx); 10213 else 10214 emit_move_insn (scratch, const0_rtx); 10215 10216 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], 10217 scratch)); 10218 } 10219 else 10220 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 10221 } 10222} 10223 10224void 10225ix86_split_ashrdi (operands, scratch) 10226 rtx *operands, scratch; 10227{ 10228 rtx low[2], high[2]; 10229 int count; 10230 10231 if (GET_CODE (operands[2]) == CONST_INT) 10232 { 10233 split_di (operands, 2, low, high); 10234 count = INTVAL (operands[2]) & 63; 10235 10236 if (count >= 32) 10237 { 10238 emit_move_insn (low[0], high[1]); 10239 10240 if (! reload_completed) 10241 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31))); 10242 else 10243 { 10244 emit_move_insn (high[0], low[0]); 10245 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); 10246 } 10247 10248 if (count > 32) 10249 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); 10250 } 10251 else 10252 { 10253 if (!rtx_equal_p (operands[0], operands[1])) 10254 emit_move_insn (operands[0], operands[1]); 10255 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 10256 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count))); 10257 } 10258 } 10259 else 10260 { 10261 if (!rtx_equal_p (operands[0], operands[1])) 10262 emit_move_insn (operands[0], operands[1]); 10263 10264 split_di (operands, 1, low, high); 10265 10266 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 10267 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); 10268 10269 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10270 { 10271 if (! no_new_pseudos) 10272 scratch = gen_reg_rtx (SImode); 10273 emit_move_insn (scratch, high[0]); 10274 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); 10275 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 10276 scratch)); 10277 } 10278 else 10279 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 10280 } 10281} 10282 10283void 10284ix86_split_lshrdi (operands, scratch) 10285 rtx *operands, scratch; 10286{ 10287 rtx low[2], high[2]; 10288 int count; 10289 10290 if (GET_CODE (operands[2]) == CONST_INT) 10291 { 10292 split_di (operands, 2, low, high); 10293 count = INTVAL (operands[2]) & 63; 10294 10295 if (count >= 32) 10296 { 10297 emit_move_insn (low[0], high[1]); 10298 emit_move_insn (high[0], const0_rtx); 10299 10300 if (count > 32) 10301 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); 10302 } 10303 else 10304 { 10305 if (!rtx_equal_p (operands[0], operands[1])) 10306 emit_move_insn (operands[0], operands[1]); 10307 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 10308 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count))); 10309 } 10310 } 10311 else 10312 { 10313 if (!rtx_equal_p (operands[0], operands[1])) 10314 emit_move_insn (operands[0], operands[1]); 10315 10316 split_di (operands, 1, low, high); 10317 10318 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 10319 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); 10320 10321 /* Heh. By reversing the arguments, we can reuse this pattern. */ 10322 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10323 { 10324 if (! no_new_pseudos) 10325 scratch = force_reg (SImode, const0_rtx); 10326 else 10327 emit_move_insn (scratch, const0_rtx); 10328 10329 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 10330 scratch)); 10331 } 10332 else 10333 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 10334 } 10335} 10336 10337/* Helper function for the string operations below. Dest VARIABLE whether 10338 it is aligned to VALUE bytes. If true, jump to the label. */ 10339static rtx 10340ix86_expand_aligntest (variable, value) 10341 rtx variable; 10342 int value; 10343{ 10344 rtx label = gen_label_rtx (); 10345 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 10346 if (GET_MODE (variable) == DImode) 10347 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 10348 else 10349 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 10350 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 10351 1, label); 10352 return label; 10353} 10354 10355/* Adjust COUNTER by the VALUE. */ 10356static void 10357ix86_adjust_counter (countreg, value) 10358 rtx countreg; 10359 HOST_WIDE_INT value; 10360{ 10361 if (GET_MODE (countreg) == DImode) 10362 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 10363 else 10364 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 10365} 10366 10367/* Zero extend possibly SImode EXP to Pmode register. */ 10368rtx 10369ix86_zero_extend_to_Pmode (exp) 10370 rtx exp; 10371{ 10372 rtx r; 10373 if (GET_MODE (exp) == VOIDmode) 10374 return force_reg (Pmode, exp); 10375 if (GET_MODE (exp) == Pmode) 10376 return copy_to_mode_reg (Pmode, exp); 10377 r = gen_reg_rtx (Pmode); 10378 emit_insn (gen_zero_extendsidi2 (r, exp)); 10379 return r; 10380} 10381 10382/* Expand string move (memcpy) operation. Use i386 string operations when 10383 profitable. expand_clrstr contains similar code. */ 10384int 10385ix86_expand_movstr (dst, src, count_exp, align_exp) 10386 rtx dst, src, count_exp, align_exp; 10387{ 10388 rtx srcreg, destreg, countreg; 10389 enum machine_mode counter_mode; 10390 HOST_WIDE_INT align = 0; 10391 unsigned HOST_WIDE_INT count = 0; 10392 rtx insns; 10393 10394 start_sequence (); 10395 10396 if (GET_CODE (align_exp) == CONST_INT) 10397 align = INTVAL (align_exp); 10398 10399 /* This simple hack avoids all inlining code and simplifies code below. */ 10400 if (!TARGET_ALIGN_STRINGOPS) 10401 align = 64; 10402 10403 if (GET_CODE (count_exp) == CONST_INT) 10404 count = INTVAL (count_exp); 10405 10406 /* Figure out proper mode for counter. For 32bits it is always SImode, 10407 for 64bits use SImode when possible, otherwise DImode. 10408 Set count to number of bytes copied when known at compile time. */ 10409 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 10410 || x86_64_zero_extended_value (count_exp)) 10411 counter_mode = SImode; 10412 else 10413 counter_mode = DImode; 10414 10415 if (counter_mode != SImode && counter_mode != DImode) 10416 abort (); 10417 10418 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 10419 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 10420 10421 emit_insn (gen_cld ()); 10422 10423 /* When optimizing for size emit simple rep ; movsb instruction for 10424 counts not divisible by 4. */ 10425 10426 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 10427 { 10428 countreg = ix86_zero_extend_to_Pmode (count_exp); 10429 if (TARGET_64BIT) 10430 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg, 10431 destreg, srcreg, countreg)); 10432 else 10433 emit_insn (gen_rep_movqi (destreg, srcreg, countreg, 10434 destreg, srcreg, countreg)); 10435 } 10436 10437 /* For constant aligned (or small unaligned) copies use rep movsl 10438 followed by code copying the rest. For PentiumPro ensure 8 byte 10439 alignment to allow rep movsl acceleration. */ 10440 10441 else if (count != 0 10442 && (align >= 8 10443 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 10444 || optimize_size || count < (unsigned int) 64)) 10445 { 10446 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 10447 if (count & ~(size - 1)) 10448 { 10449 countreg = copy_to_mode_reg (counter_mode, 10450 GEN_INT ((count >> (size == 4 ? 2 : 3)) 10451 & (TARGET_64BIT ? -1 : 0x3fffffff))); 10452 countreg = ix86_zero_extend_to_Pmode (countreg); 10453 if (size == 4) 10454 { 10455 if (TARGET_64BIT) 10456 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg, 10457 destreg, srcreg, countreg)); 10458 else 10459 emit_insn (gen_rep_movsi (destreg, srcreg, countreg, 10460 destreg, srcreg, countreg)); 10461 } 10462 else 10463 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg, 10464 destreg, srcreg, countreg)); 10465 } 10466 if (size == 8 && (count & 0x04)) 10467 emit_insn (gen_strmovsi (destreg, srcreg)); 10468 if (count & 0x02) 10469 emit_insn (gen_strmovhi (destreg, srcreg)); 10470 if (count & 0x01) 10471 emit_insn (gen_strmovqi (destreg, srcreg)); 10472 } 10473 /* The generic code based on the glibc implementation: 10474 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 10475 allowing accelerated copying there) 10476 - copy the data using rep movsl 10477 - copy the rest. */ 10478 else 10479 { 10480 rtx countreg2; 10481 rtx label = NULL; 10482 int desired_alignment = (TARGET_PENTIUMPRO 10483 && (count == 0 || count >= (unsigned int) 260) 10484 ? 8 : UNITS_PER_WORD); 10485 10486 /* In case we don't know anything about the alignment, default to 10487 library version, since it is usually equally fast and result in 10488 shorter code. */ 10489 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 10490 { 10491 end_sequence (); 10492 return 0; 10493 } 10494 10495 if (TARGET_SINGLE_STRINGOP) 10496 emit_insn (gen_cld ()); 10497 10498 countreg2 = gen_reg_rtx (Pmode); 10499 countreg = copy_to_mode_reg (counter_mode, count_exp); 10500 10501 /* We don't use loops to align destination and to copy parts smaller 10502 than 4 bytes, because gcc is able to optimize such code better (in 10503 the case the destination or the count really is aligned, gcc is often 10504 able to predict the branches) and also it is friendlier to the 10505 hardware branch prediction. 10506 10507 Using loops is benefical for generic case, because we can 10508 handle small counts using the loops. Many CPUs (such as Athlon) 10509 have large REP prefix setup costs. 10510 10511 This is quite costy. Maybe we can revisit this decision later or 10512 add some customizability to this code. */ 10513 10514 if (count == 0 && align < desired_alignment) 10515 { 10516 label = gen_label_rtx (); 10517 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 10518 LEU, 0, counter_mode, 1, label); 10519 } 10520 if (align <= 1) 10521 { 10522 rtx label = ix86_expand_aligntest (destreg, 1); 10523 emit_insn (gen_strmovqi (destreg, srcreg)); 10524 ix86_adjust_counter (countreg, 1); 10525 emit_label (label); 10526 LABEL_NUSES (label) = 1; 10527 } 10528 if (align <= 2) 10529 { 10530 rtx label = ix86_expand_aligntest (destreg, 2); 10531 emit_insn (gen_strmovhi (destreg, srcreg)); 10532 ix86_adjust_counter (countreg, 2); 10533 emit_label (label); 10534 LABEL_NUSES (label) = 1; 10535 } 10536 if (align <= 4 && desired_alignment > 4) 10537 { 10538 rtx label = ix86_expand_aligntest (destreg, 4); 10539 emit_insn (gen_strmovsi (destreg, srcreg)); 10540 ix86_adjust_counter (countreg, 4); 10541 emit_label (label); 10542 LABEL_NUSES (label) = 1; 10543 } 10544 10545 if (label && desired_alignment > 4 && !TARGET_64BIT) 10546 { 10547 emit_label (label); 10548 LABEL_NUSES (label) = 1; 10549 label = NULL_RTX; 10550 } 10551 if (!TARGET_SINGLE_STRINGOP) 10552 emit_insn (gen_cld ()); 10553 if (TARGET_64BIT) 10554 { 10555 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 10556 GEN_INT (3))); 10557 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2, 10558 destreg, srcreg, countreg2)); 10559 } 10560 else 10561 { 10562 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 10563 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2, 10564 destreg, srcreg, countreg2)); 10565 } 10566 10567 if (label) 10568 { 10569 emit_label (label); 10570 LABEL_NUSES (label) = 1; 10571 } 10572 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 10573 emit_insn (gen_strmovsi (destreg, srcreg)); 10574 if ((align <= 4 || count == 0) && TARGET_64BIT) 10575 { 10576 rtx label = ix86_expand_aligntest (countreg, 4); 10577 emit_insn (gen_strmovsi (destreg, srcreg)); 10578 emit_label (label); 10579 LABEL_NUSES (label) = 1; 10580 } 10581 if (align > 2 && count != 0 && (count & 2)) 10582 emit_insn (gen_strmovhi (destreg, srcreg)); 10583 if (align <= 2 || count == 0) 10584 { 10585 rtx label = ix86_expand_aligntest (countreg, 2); 10586 emit_insn (gen_strmovhi (destreg, srcreg)); 10587 emit_label (label); 10588 LABEL_NUSES (label) = 1; 10589 } 10590 if (align > 1 && count != 0 && (count & 1)) 10591 emit_insn (gen_strmovqi (destreg, srcreg)); 10592 if (align <= 1 || count == 0) 10593 { 10594 rtx label = ix86_expand_aligntest (countreg, 1); 10595 emit_insn (gen_strmovqi (destreg, srcreg)); 10596 emit_label (label); 10597 LABEL_NUSES (label) = 1; 10598 } 10599 } 10600 10601 insns = get_insns (); 10602 end_sequence (); 10603 10604 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg); 10605 emit_insn (insns); 10606 return 1; 10607} 10608 10609/* Expand string clear operation (bzero). Use i386 string operations when 10610 profitable. expand_movstr contains similar code. */ 10611int 10612ix86_expand_clrstr (src, count_exp, align_exp) 10613 rtx src, count_exp, align_exp; 10614{ 10615 rtx destreg, zeroreg, countreg; 10616 enum machine_mode counter_mode; 10617 HOST_WIDE_INT align = 0; 10618 unsigned HOST_WIDE_INT count = 0; 10619 10620 if (GET_CODE (align_exp) == CONST_INT) 10621 align = INTVAL (align_exp); 10622 10623 /* This simple hack avoids all inlining code and simplifies code below. */ 10624 if (!TARGET_ALIGN_STRINGOPS) 10625 align = 32; 10626 10627 if (GET_CODE (count_exp) == CONST_INT) 10628 count = INTVAL (count_exp); 10629 /* Figure out proper mode for counter. For 32bits it is always SImode, 10630 for 64bits use SImode when possible, otherwise DImode. 10631 Set count to number of bytes copied when known at compile time. */ 10632 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 10633 || x86_64_zero_extended_value (count_exp)) 10634 counter_mode = SImode; 10635 else 10636 counter_mode = DImode; 10637 10638 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 10639 10640 emit_insn (gen_cld ()); 10641 10642 /* When optimizing for size emit simple rep ; movsb instruction for 10643 counts not divisible by 4. */ 10644 10645 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 10646 { 10647 countreg = ix86_zero_extend_to_Pmode (count_exp); 10648 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 10649 if (TARGET_64BIT) 10650 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg, 10651 destreg, countreg)); 10652 else 10653 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg, 10654 destreg, countreg)); 10655 } 10656 else if (count != 0 10657 && (align >= 8 10658 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 10659 || optimize_size || count < (unsigned int) 64)) 10660 { 10661 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 10662 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 10663 if (count & ~(size - 1)) 10664 { 10665 countreg = copy_to_mode_reg (counter_mode, 10666 GEN_INT ((count >> (size == 4 ? 2 : 3)) 10667 & (TARGET_64BIT ? -1 : 0x3fffffff))); 10668 countreg = ix86_zero_extend_to_Pmode (countreg); 10669 if (size == 4) 10670 { 10671 if (TARGET_64BIT) 10672 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg, 10673 destreg, countreg)); 10674 else 10675 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg, 10676 destreg, countreg)); 10677 } 10678 else 10679 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg, 10680 destreg, countreg)); 10681 } 10682 if (size == 8 && (count & 0x04)) 10683 emit_insn (gen_strsetsi (destreg, 10684 gen_rtx_SUBREG (SImode, zeroreg, 0))); 10685 if (count & 0x02) 10686 emit_insn (gen_strsethi (destreg, 10687 gen_rtx_SUBREG (HImode, zeroreg, 0))); 10688 if (count & 0x01) 10689 emit_insn (gen_strsetqi (destreg, 10690 gen_rtx_SUBREG (QImode, zeroreg, 0))); 10691 } 10692 else 10693 { 10694 rtx countreg2; 10695 rtx label = NULL; 10696 /* Compute desired alignment of the string operation. */ 10697 int desired_alignment = (TARGET_PENTIUMPRO 10698 && (count == 0 || count >= (unsigned int) 260) 10699 ? 8 : UNITS_PER_WORD); 10700 10701 /* In case we don't know anything about the alignment, default to 10702 library version, since it is usually equally fast and result in 10703 shorter code. */ 10704 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 10705 return 0; 10706 10707 if (TARGET_SINGLE_STRINGOP) 10708 emit_insn (gen_cld ()); 10709 10710 countreg2 = gen_reg_rtx (Pmode); 10711 countreg = copy_to_mode_reg (counter_mode, count_exp); 10712 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 10713 10714 if (count == 0 && align < desired_alignment) 10715 { 10716 label = gen_label_rtx (); 10717 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 10718 LEU, 0, counter_mode, 1, label); 10719 } 10720 if (align <= 1) 10721 { 10722 rtx label = ix86_expand_aligntest (destreg, 1); 10723 emit_insn (gen_strsetqi (destreg, 10724 gen_rtx_SUBREG (QImode, zeroreg, 0))); 10725 ix86_adjust_counter (countreg, 1); 10726 emit_label (label); 10727 LABEL_NUSES (label) = 1; 10728 } 10729 if (align <= 2) 10730 { 10731 rtx label = ix86_expand_aligntest (destreg, 2); 10732 emit_insn (gen_strsethi (destreg, 10733 gen_rtx_SUBREG (HImode, zeroreg, 0))); 10734 ix86_adjust_counter (countreg, 2); 10735 emit_label (label); 10736 LABEL_NUSES (label) = 1; 10737 } 10738 if (align <= 4 && desired_alignment > 4) 10739 { 10740 rtx label = ix86_expand_aligntest (destreg, 4); 10741 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT 10742 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 10743 : zeroreg))); 10744 ix86_adjust_counter (countreg, 4); 10745 emit_label (label); 10746 LABEL_NUSES (label) = 1; 10747 } 10748 10749 if (label && desired_alignment > 4 && !TARGET_64BIT) 10750 { 10751 emit_label (label); 10752 LABEL_NUSES (label) = 1; 10753 label = NULL_RTX; 10754 } 10755 10756 if (!TARGET_SINGLE_STRINGOP) 10757 emit_insn (gen_cld ()); 10758 if (TARGET_64BIT) 10759 { 10760 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 10761 GEN_INT (3))); 10762 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg, 10763 destreg, countreg2)); 10764 } 10765 else 10766 { 10767 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 10768 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg, 10769 destreg, countreg2)); 10770 } 10771 if (label) 10772 { 10773 emit_label (label); 10774 LABEL_NUSES (label) = 1; 10775 } 10776 10777 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 10778 emit_insn (gen_strsetsi (destreg, 10779 gen_rtx_SUBREG (SImode, zeroreg, 0))); 10780 if (TARGET_64BIT && (align <= 4 || count == 0)) 10781 { 10782 rtx label = ix86_expand_aligntest (countreg, 4); 10783 emit_insn (gen_strsetsi (destreg, 10784 gen_rtx_SUBREG (SImode, zeroreg, 0))); 10785 emit_label (label); 10786 LABEL_NUSES (label) = 1; 10787 } 10788 if (align > 2 && count != 0 && (count & 2)) 10789 emit_insn (gen_strsethi (destreg, 10790 gen_rtx_SUBREG (HImode, zeroreg, 0))); 10791 if (align <= 2 || count == 0) 10792 { 10793 rtx label = ix86_expand_aligntest (countreg, 2); 10794 emit_insn (gen_strsethi (destreg, 10795 gen_rtx_SUBREG (HImode, zeroreg, 0))); 10796 emit_label (label); 10797 LABEL_NUSES (label) = 1; 10798 } 10799 if (align > 1 && count != 0 && (count & 1)) 10800 emit_insn (gen_strsetqi (destreg, 10801 gen_rtx_SUBREG (QImode, zeroreg, 0))); 10802 if (align <= 1 || count == 0) 10803 { 10804 rtx label = ix86_expand_aligntest (countreg, 1); 10805 emit_insn (gen_strsetqi (destreg, 10806 gen_rtx_SUBREG (QImode, zeroreg, 0))); 10807 emit_label (label); 10808 LABEL_NUSES (label) = 1; 10809 } 10810 } 10811 return 1; 10812} 10813/* Expand strlen. */ 10814int 10815ix86_expand_strlen (out, src, eoschar, align) 10816 rtx out, src, eoschar, align; 10817{ 10818 rtx addr, scratch1, scratch2, scratch3, scratch4; 10819 10820 /* The generic case of strlen expander is long. Avoid it's 10821 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 10822 10823 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 10824 && !TARGET_INLINE_ALL_STRINGOPS 10825 && !optimize_size 10826 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 10827 return 0; 10828 10829 addr = force_reg (Pmode, XEXP (src, 0)); 10830 scratch1 = gen_reg_rtx (Pmode); 10831 10832 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 10833 && !optimize_size) 10834 { 10835 /* Well it seems that some optimizer does not combine a call like 10836 foo(strlen(bar), strlen(bar)); 10837 when the move and the subtraction is done here. It does calculate 10838 the length just once when these instructions are done inside of 10839 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 10840 often used and I use one fewer register for the lifetime of 10841 output_strlen_unroll() this is better. */ 10842 10843 emit_move_insn (out, addr); 10844 10845 ix86_expand_strlensi_unroll_1 (out, align); 10846 10847 /* strlensi_unroll_1 returns the address of the zero at the end of 10848 the string, like memchr(), so compute the length by subtracting 10849 the start address. */ 10850 if (TARGET_64BIT) 10851 emit_insn (gen_subdi3 (out, out, addr)); 10852 else 10853 emit_insn (gen_subsi3 (out, out, addr)); 10854 } 10855 else 10856 { 10857 scratch2 = gen_reg_rtx (Pmode); 10858 scratch3 = gen_reg_rtx (Pmode); 10859 scratch4 = force_reg (Pmode, constm1_rtx); 10860 10861 emit_move_insn (scratch3, addr); 10862 eoschar = force_reg (QImode, eoschar); 10863 10864 emit_insn (gen_cld ()); 10865 if (TARGET_64BIT) 10866 { 10867 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar, 10868 align, scratch4, scratch3)); 10869 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 10870 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 10871 } 10872 else 10873 { 10874 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar, 10875 align, scratch4, scratch3)); 10876 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 10877 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 10878 } 10879 } 10880 return 1; 10881} 10882 10883/* Expand the appropriate insns for doing strlen if not just doing 10884 repnz; scasb 10885 10886 out = result, initialized with the start address 10887 align_rtx = alignment of the address. 10888 scratch = scratch register, initialized with the startaddress when 10889 not aligned, otherwise undefined 10890 10891 This is just the body. It needs the initialisations mentioned above and 10892 some address computing at the end. These things are done in i386.md. */ 10893 10894static void 10895ix86_expand_strlensi_unroll_1 (out, align_rtx) 10896 rtx out, align_rtx; 10897{ 10898 int align; 10899 rtx tmp; 10900 rtx align_2_label = NULL_RTX; 10901 rtx align_3_label = NULL_RTX; 10902 rtx align_4_label = gen_label_rtx (); 10903 rtx end_0_label = gen_label_rtx (); 10904 rtx mem; 10905 rtx tmpreg = gen_reg_rtx (SImode); 10906 rtx scratch = gen_reg_rtx (SImode); 10907 10908 align = 0; 10909 if (GET_CODE (align_rtx) == CONST_INT) 10910 align = INTVAL (align_rtx); 10911 10912 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 10913 10914 /* Is there a known alignment and is it less than 4? */ 10915 if (align < 4) 10916 { 10917 rtx scratch1 = gen_reg_rtx (Pmode); 10918 emit_move_insn (scratch1, out); 10919 /* Is there a known alignment and is it not 2? */ 10920 if (align != 2) 10921 { 10922 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 10923 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 10924 10925 /* Leave just the 3 lower bits. */ 10926 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 10927 NULL_RTX, 0, OPTAB_WIDEN); 10928 10929 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 10930 Pmode, 1, align_4_label); 10931 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL, 10932 Pmode, 1, align_2_label); 10933 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL, 10934 Pmode, 1, align_3_label); 10935 } 10936 else 10937 { 10938 /* Since the alignment is 2, we have to check 2 or 0 bytes; 10939 check if is aligned to 4 - byte. */ 10940 10941 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2), 10942 NULL_RTX, 0, OPTAB_WIDEN); 10943 10944 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 10945 Pmode, 1, align_4_label); 10946 } 10947 10948 mem = gen_rtx_MEM (QImode, out); 10949 10950 /* Now compare the bytes. */ 10951 10952 /* Compare the first n unaligned byte on a byte per byte basis. */ 10953 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 10954 QImode, 1, end_0_label); 10955 10956 /* Increment the address. */ 10957 if (TARGET_64BIT) 10958 emit_insn (gen_adddi3 (out, out, const1_rtx)); 10959 else 10960 emit_insn (gen_addsi3 (out, out, const1_rtx)); 10961 10962 /* Not needed with an alignment of 2 */ 10963 if (align != 2) 10964 { 10965 emit_label (align_2_label); 10966 10967 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 10968 end_0_label); 10969 10970 if (TARGET_64BIT) 10971 emit_insn (gen_adddi3 (out, out, const1_rtx)); 10972 else 10973 emit_insn (gen_addsi3 (out, out, const1_rtx)); 10974 10975 emit_label (align_3_label); 10976 } 10977 10978 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 10979 end_0_label); 10980 10981 if (TARGET_64BIT) 10982 emit_insn (gen_adddi3 (out, out, const1_rtx)); 10983 else 10984 emit_insn (gen_addsi3 (out, out, const1_rtx)); 10985 } 10986 10987 /* Generate loop to check 4 bytes at a time. It is not a good idea to 10988 align this loop. It gives only huge programs, but does not help to 10989 speed up. */ 10990 emit_label (align_4_label); 10991 10992 mem = gen_rtx_MEM (SImode, out); 10993 emit_move_insn (scratch, mem); 10994 if (TARGET_64BIT) 10995 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 10996 else 10997 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 10998 10999 /* This formula yields a nonzero result iff one of the bytes is zero. 11000 This saves three branches inside loop and many cycles. */ 11001 11002 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 11003 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 11004 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 11005 emit_insn (gen_andsi3 (tmpreg, tmpreg, 11006 gen_int_mode (0x80808080, SImode))); 11007 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 11008 align_4_label); 11009 11010 if (TARGET_CMOVE) 11011 { 11012 rtx reg = gen_reg_rtx (SImode); 11013 rtx reg2 = gen_reg_rtx (Pmode); 11014 emit_move_insn (reg, tmpreg); 11015 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 11016 11017 /* If zero is not in the first two bytes, move two bytes forward. */ 11018 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 11019 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11020 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 11021 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 11022 gen_rtx_IF_THEN_ELSE (SImode, tmp, 11023 reg, 11024 tmpreg))); 11025 /* Emit lea manually to avoid clobbering of flags. */ 11026 emit_insn (gen_rtx_SET (SImode, reg2, 11027 gen_rtx_PLUS (Pmode, out, GEN_INT (2)))); 11028 11029 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11030 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 11031 emit_insn (gen_rtx_SET (VOIDmode, out, 11032 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 11033 reg2, 11034 out))); 11035 11036 } 11037 else 11038 { 11039 rtx end_2_label = gen_label_rtx (); 11040 /* Is zero in the first two bytes? */ 11041 11042 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 11043 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11044 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 11045 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 11046 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 11047 pc_rtx); 11048 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 11049 JUMP_LABEL (tmp) = end_2_label; 11050 11051 /* Not in the first two. Move two bytes forward. */ 11052 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 11053 if (TARGET_64BIT) 11054 emit_insn (gen_adddi3 (out, out, GEN_INT (2))); 11055 else 11056 emit_insn (gen_addsi3 (out, out, GEN_INT (2))); 11057 11058 emit_label (end_2_label); 11059 11060 } 11061 11062 /* Avoid branch in fixing the byte. */ 11063 tmpreg = gen_lowpart (QImode, tmpreg); 11064 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 11065 if (TARGET_64BIT) 11066 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3))); 11067 else 11068 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3))); 11069 11070 emit_label (end_0_label); 11071} 11072 11073void 11074ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop) 11075 rtx retval, fnaddr, callarg1, callarg2, pop; 11076{ 11077 rtx use = NULL, call; 11078 11079 if (pop == const0_rtx) 11080 pop = NULL; 11081 if (TARGET_64BIT && pop) 11082 abort (); 11083 11084#if TARGET_MACHO 11085 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 11086 fnaddr = machopic_indirect_call_target (fnaddr); 11087#else 11088 /* Static functions and indirect calls don't need the pic register. */ 11089 if (! TARGET_64BIT && flag_pic 11090 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 11091 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0))) 11092 use_reg (&use, pic_offset_table_rtx); 11093 11094 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 11095 { 11096 rtx al = gen_rtx_REG (QImode, 0); 11097 emit_move_insn (al, callarg2); 11098 use_reg (&use, al); 11099 } 11100#endif /* TARGET_MACHO */ 11101 11102 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 11103 { 11104 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 11105 fnaddr = gen_rtx_MEM (QImode, fnaddr); 11106 } 11107 11108 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 11109 if (retval) 11110 call = gen_rtx_SET (VOIDmode, retval, call); 11111 if (pop) 11112 { 11113 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 11114 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 11115 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 11116 } 11117 11118 call = emit_call_insn (call); 11119 if (use) 11120 CALL_INSN_FUNCTION_USAGE (call) = use; 11121} 11122 11123 11124/* Clear stack slot assignments remembered from previous functions. 11125 This is called from INIT_EXPANDERS once before RTL is emitted for each 11126 function. */ 11127 11128static struct machine_function * 11129ix86_init_machine_status () 11130{ 11131 return ggc_alloc_cleared (sizeof (struct machine_function)); 11132} 11133 11134/* Return a MEM corresponding to a stack slot with mode MODE. 11135 Allocate a new slot if necessary. 11136 11137 The RTL for a function can have several slots available: N is 11138 which slot to use. */ 11139 11140rtx 11141assign_386_stack_local (mode, n) 11142 enum machine_mode mode; 11143 int n; 11144{ 11145 if (n < 0 || n >= MAX_386_STACK_LOCALS) 11146 abort (); 11147 11148 if (ix86_stack_locals[(int) mode][n] == NULL_RTX) 11149 ix86_stack_locals[(int) mode][n] 11150 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 11151 11152 return ix86_stack_locals[(int) mode][n]; 11153} 11154 11155/* Construct the SYMBOL_REF for the tls_get_addr function. */ 11156 11157static GTY(()) rtx ix86_tls_symbol; 11158rtx 11159ix86_tls_get_addr () 11160{ 11161 11162 if (!ix86_tls_symbol) 11163 { 11164 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 11165 (TARGET_GNU_TLS && !TARGET_64BIT) 11166 ? "___tls_get_addr" 11167 : "__tls_get_addr"); 11168 } 11169 11170 return ix86_tls_symbol; 11171} 11172 11173/* Calculate the length of the memory address in the instruction 11174 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 11175 11176static int 11177memory_address_length (addr) 11178 rtx addr; 11179{ 11180 struct ix86_address parts; 11181 rtx base, index, disp; 11182 int len; 11183 11184 if (GET_CODE (addr) == PRE_DEC 11185 || GET_CODE (addr) == POST_INC 11186 || GET_CODE (addr) == PRE_MODIFY 11187 || GET_CODE (addr) == POST_MODIFY) 11188 return 0; 11189 11190 if (! ix86_decompose_address (addr, &parts)) 11191 abort (); 11192 11193 base = parts.base; 11194 index = parts.index; 11195 disp = parts.disp; 11196 len = 0; 11197 11198 /* Rule of thumb: 11199 - esp as the base always wants an index, 11200 - ebp as the base always wants a displacement. */ 11201 11202 /* Register Indirect. */ 11203 if (base && !index && !disp) 11204 { 11205 /* esp (for its index) and ebp (for its displacement) need 11206 the two-byte modrm form. */ 11207 if (addr == stack_pointer_rtx 11208 || addr == arg_pointer_rtx 11209 || addr == frame_pointer_rtx 11210 || addr == hard_frame_pointer_rtx) 11211 len = 1; 11212 } 11213 11214 /* Direct Addressing. */ 11215 else if (disp && !base && !index) 11216 len = 4; 11217 11218 else 11219 { 11220 /* Find the length of the displacement constant. */ 11221 if (disp) 11222 { 11223 if (GET_CODE (disp) == CONST_INT 11224 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K') 11225 && base) 11226 len = 1; 11227 else 11228 len = 4; 11229 } 11230 /* ebp always wants a displacement. */ 11231 else if (base == hard_frame_pointer_rtx) 11232 len = 1; 11233 11234 /* An index requires the two-byte modrm form... */ 11235 if (index 11236 /* ...like esp, which always wants an index. */ 11237 || base == stack_pointer_rtx 11238 || base == arg_pointer_rtx 11239 || base == frame_pointer_rtx) 11240 len += 1; 11241 } 11242 11243 return len; 11244} 11245 11246/* Compute default value for "length_immediate" attribute. When SHORTFORM 11247 is set, expect that insn have 8bit immediate alternative. */ 11248int 11249ix86_attr_length_immediate_default (insn, shortform) 11250 rtx insn; 11251 int shortform; 11252{ 11253 int len = 0; 11254 int i; 11255 extract_insn_cached (insn); 11256 for (i = recog_data.n_operands - 1; i >= 0; --i) 11257 if (CONSTANT_P (recog_data.operand[i])) 11258 { 11259 if (len) 11260 abort (); 11261 if (shortform 11262 && GET_CODE (recog_data.operand[i]) == CONST_INT 11263 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K')) 11264 len = 1; 11265 else 11266 { 11267 switch (get_attr_mode (insn)) 11268 { 11269 case MODE_QI: 11270 len+=1; 11271 break; 11272 case MODE_HI: 11273 len+=2; 11274 break; 11275 case MODE_SI: 11276 len+=4; 11277 break; 11278 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 11279 case MODE_DI: 11280 len+=4; 11281 break; 11282 default: 11283 fatal_insn ("unknown insn mode", insn); 11284 } 11285 } 11286 } 11287 return len; 11288} 11289/* Compute default value for "length_address" attribute. */ 11290int 11291ix86_attr_length_address_default (insn) 11292 rtx insn; 11293{ 11294 int i; 11295 11296 if (get_attr_type (insn) == TYPE_LEA) 11297 { 11298 rtx set = PATTERN (insn); 11299 if (GET_CODE (set) == SET) 11300 ; 11301 else if (GET_CODE (set) == PARALLEL 11302 && GET_CODE (XVECEXP (set, 0, 0)) == SET) 11303 set = XVECEXP (set, 0, 0); 11304 else 11305 { 11306#ifdef ENABLE_CHECKING 11307 abort (); 11308#endif 11309 return 0; 11310 } 11311 11312 return memory_address_length (SET_SRC (set)); 11313 } 11314 11315 extract_insn_cached (insn); 11316 for (i = recog_data.n_operands - 1; i >= 0; --i) 11317 if (GET_CODE (recog_data.operand[i]) == MEM) 11318 { 11319 return memory_address_length (XEXP (recog_data.operand[i], 0)); 11320 break; 11321 } 11322 return 0; 11323} 11324 11325/* Return the maximum number of instructions a cpu can issue. */ 11326 11327static int 11328ix86_issue_rate () 11329{ 11330 switch (ix86_cpu) 11331 { 11332 case PROCESSOR_PENTIUM: 11333 case PROCESSOR_K6: 11334 return 2; 11335 11336 case PROCESSOR_PENTIUMPRO: 11337 case PROCESSOR_PENTIUM4: 11338 case PROCESSOR_ATHLON: 11339 return 3; 11340 11341 default: 11342 return 1; 11343 } 11344} 11345 11346/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 11347 by DEP_INSN and nothing set by DEP_INSN. */ 11348 11349static int 11350ix86_flags_dependant (insn, dep_insn, insn_type) 11351 rtx insn, dep_insn; 11352 enum attr_type insn_type; 11353{ 11354 rtx set, set2; 11355 11356 /* Simplify the test for uninteresting insns. */ 11357 if (insn_type != TYPE_SETCC 11358 && insn_type != TYPE_ICMOV 11359 && insn_type != TYPE_FCMOV 11360 && insn_type != TYPE_IBR) 11361 return 0; 11362 11363 if ((set = single_set (dep_insn)) != 0) 11364 { 11365 set = SET_DEST (set); 11366 set2 = NULL_RTX; 11367 } 11368 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 11369 && XVECLEN (PATTERN (dep_insn), 0) == 2 11370 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 11371 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 11372 { 11373 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 11374 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 11375 } 11376 else 11377 return 0; 11378 11379 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 11380 return 0; 11381 11382 /* This test is true if the dependent insn reads the flags but 11383 not any other potentially set register. */ 11384 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 11385 return 0; 11386 11387 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 11388 return 0; 11389 11390 return 1; 11391} 11392 11393/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 11394 address with operands set by DEP_INSN. */ 11395 11396static int 11397ix86_agi_dependant (insn, dep_insn, insn_type) 11398 rtx insn, dep_insn; 11399 enum attr_type insn_type; 11400{ 11401 rtx addr; 11402 11403 if (insn_type == TYPE_LEA 11404 && TARGET_PENTIUM) 11405 { 11406 addr = PATTERN (insn); 11407 if (GET_CODE (addr) == SET) 11408 ; 11409 else if (GET_CODE (addr) == PARALLEL 11410 && GET_CODE (XVECEXP (addr, 0, 0)) == SET) 11411 addr = XVECEXP (addr, 0, 0); 11412 else 11413 abort (); 11414 addr = SET_SRC (addr); 11415 } 11416 else 11417 { 11418 int i; 11419 extract_insn_cached (insn); 11420 for (i = recog_data.n_operands - 1; i >= 0; --i) 11421 if (GET_CODE (recog_data.operand[i]) == MEM) 11422 { 11423 addr = XEXP (recog_data.operand[i], 0); 11424 goto found; 11425 } 11426 return 0; 11427 found:; 11428 } 11429 11430 return modified_in_p (addr, dep_insn); 11431} 11432 11433static int 11434ix86_adjust_cost (insn, link, dep_insn, cost) 11435 rtx insn, link, dep_insn; 11436 int cost; 11437{ 11438 enum attr_type insn_type, dep_insn_type; 11439 enum attr_memory memory, dep_memory; 11440 rtx set, set2; 11441 int dep_insn_code_number; 11442 11443 /* Anti and output depenancies have zero cost on all CPUs. */ 11444 if (REG_NOTE_KIND (link) != 0) 11445 return 0; 11446 11447 dep_insn_code_number = recog_memoized (dep_insn); 11448 11449 /* If we can't recognize the insns, we can't really do anything. */ 11450 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 11451 return cost; 11452 11453 insn_type = get_attr_type (insn); 11454 dep_insn_type = get_attr_type (dep_insn); 11455 11456 switch (ix86_cpu) 11457 { 11458 case PROCESSOR_PENTIUM: 11459 /* Address Generation Interlock adds a cycle of latency. */ 11460 if (ix86_agi_dependant (insn, dep_insn, insn_type)) 11461 cost += 1; 11462 11463 /* ??? Compares pair with jump/setcc. */ 11464 if (ix86_flags_dependant (insn, dep_insn, insn_type)) 11465 cost = 0; 11466 11467 /* Floating point stores require value to be ready one cycle ealier. */ 11468 if (insn_type == TYPE_FMOV 11469 && get_attr_memory (insn) == MEMORY_STORE 11470 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 11471 cost += 1; 11472 break; 11473 11474 case PROCESSOR_PENTIUMPRO: 11475 memory = get_attr_memory (insn); 11476 dep_memory = get_attr_memory (dep_insn); 11477 11478 /* Since we can't represent delayed latencies of load+operation, 11479 increase the cost here for non-imov insns. */ 11480 if (dep_insn_type != TYPE_IMOV 11481 && dep_insn_type != TYPE_FMOV 11482 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)) 11483 cost += 1; 11484 11485 /* INT->FP conversion is expensive. */ 11486 if (get_attr_fp_int_src (dep_insn)) 11487 cost += 5; 11488 11489 /* There is one cycle extra latency between an FP op and a store. */ 11490 if (insn_type == TYPE_FMOV 11491 && (set = single_set (dep_insn)) != NULL_RTX 11492 && (set2 = single_set (insn)) != NULL_RTX 11493 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 11494 && GET_CODE (SET_DEST (set2)) == MEM) 11495 cost += 1; 11496 11497 /* Show ability of reorder buffer to hide latency of load by executing 11498 in parallel with previous instruction in case 11499 previous instruction is not needed to compute the address. */ 11500 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 11501 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 11502 { 11503 /* Claim moves to take one cycle, as core can issue one load 11504 at time and the next load can start cycle later. */ 11505 if (dep_insn_type == TYPE_IMOV 11506 || dep_insn_type == TYPE_FMOV) 11507 cost = 1; 11508 else if (cost > 1) 11509 cost--; 11510 } 11511 break; 11512 11513 case PROCESSOR_K6: 11514 memory = get_attr_memory (insn); 11515 dep_memory = get_attr_memory (dep_insn); 11516 /* The esp dependency is resolved before the instruction is really 11517 finished. */ 11518 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 11519 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 11520 return 1; 11521 11522 /* Since we can't represent delayed latencies of load+operation, 11523 increase the cost here for non-imov insns. */ 11524 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 11525 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; 11526 11527 /* INT->FP conversion is expensive. */ 11528 if (get_attr_fp_int_src (dep_insn)) 11529 cost += 5; 11530 11531 /* Show ability of reorder buffer to hide latency of load by executing 11532 in parallel with previous instruction in case 11533 previous instruction is not needed to compute the address. */ 11534 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 11535 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 11536 { 11537 /* Claim moves to take one cycle, as core can issue one load 11538 at time and the next load can start cycle later. */ 11539 if (dep_insn_type == TYPE_IMOV 11540 || dep_insn_type == TYPE_FMOV) 11541 cost = 1; 11542 else if (cost > 2) 11543 cost -= 2; 11544 else 11545 cost = 1; 11546 } 11547 break; 11548 11549 case PROCESSOR_ATHLON: 11550 memory = get_attr_memory (insn); 11551 dep_memory = get_attr_memory (dep_insn); 11552 11553 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 11554 { 11555 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV) 11556 cost += 2; 11557 else 11558 cost += 3; 11559 } 11560 /* Show ability of reorder buffer to hide latency of load by executing 11561 in parallel with previous instruction in case 11562 previous instruction is not needed to compute the address. */ 11563 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 11564 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 11565 { 11566 /* Claim moves to take one cycle, as core can issue one load 11567 at time and the next load can start cycle later. */ 11568 if (dep_insn_type == TYPE_IMOV 11569 || dep_insn_type == TYPE_FMOV) 11570 cost = 0; 11571 else if (cost >= 3) 11572 cost -= 3; 11573 else 11574 cost = 0; 11575 } 11576 11577 default: 11578 break; 11579 } 11580 11581 return cost; 11582} 11583 11584static union 11585{ 11586 struct ppro_sched_data 11587 { 11588 rtx decode[3]; 11589 int issued_this_cycle; 11590 } ppro; 11591} ix86_sched_data; 11592 11593static enum attr_ppro_uops 11594ix86_safe_ppro_uops (insn) 11595 rtx insn; 11596{ 11597 if (recog_memoized (insn) >= 0) 11598 return get_attr_ppro_uops (insn); 11599 else 11600 return PPRO_UOPS_MANY; 11601} 11602 11603static void 11604ix86_dump_ppro_packet (dump) 11605 FILE *dump; 11606{ 11607 if (ix86_sched_data.ppro.decode[0]) 11608 { 11609 fprintf (dump, "PPRO packet: %d", 11610 INSN_UID (ix86_sched_data.ppro.decode[0])); 11611 if (ix86_sched_data.ppro.decode[1]) 11612 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1])); 11613 if (ix86_sched_data.ppro.decode[2]) 11614 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2])); 11615 fputc ('\n', dump); 11616 } 11617} 11618 11619/* We're beginning a new block. Initialize data structures as necessary. */ 11620 11621static void 11622ix86_sched_init (dump, sched_verbose, veclen) 11623 FILE *dump ATTRIBUTE_UNUSED; 11624 int sched_verbose ATTRIBUTE_UNUSED; 11625 int veclen ATTRIBUTE_UNUSED; 11626{ 11627 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data)); 11628} 11629 11630/* Shift INSN to SLOT, and shift everything else down. */ 11631 11632static void 11633ix86_reorder_insn (insnp, slot) 11634 rtx *insnp, *slot; 11635{ 11636 if (insnp != slot) 11637 { 11638 rtx insn = *insnp; 11639 do 11640 insnp[0] = insnp[1]; 11641 while (++insnp != slot); 11642 *insnp = insn; 11643 } 11644} 11645 11646static void 11647ix86_sched_reorder_ppro (ready, e_ready) 11648 rtx *ready; 11649 rtx *e_ready; 11650{ 11651 rtx decode[3]; 11652 enum attr_ppro_uops cur_uops; 11653 int issued_this_cycle; 11654 rtx *insnp; 11655 int i; 11656 11657 /* At this point .ppro.decode contains the state of the three 11658 decoders from last "cycle". That is, those insns that were 11659 actually independent. But here we're scheduling for the 11660 decoder, and we may find things that are decodable in the 11661 same cycle. */ 11662 11663 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode)); 11664 issued_this_cycle = 0; 11665 11666 insnp = e_ready; 11667 cur_uops = ix86_safe_ppro_uops (*insnp); 11668 11669 /* If the decoders are empty, and we've a complex insn at the 11670 head of the priority queue, let it issue without complaint. */ 11671 if (decode[0] == NULL) 11672 { 11673 if (cur_uops == PPRO_UOPS_MANY) 11674 { 11675 decode[0] = *insnp; 11676 goto ppro_done; 11677 } 11678 11679 /* Otherwise, search for a 2-4 uop unsn to issue. */ 11680 while (cur_uops != PPRO_UOPS_FEW) 11681 { 11682 if (insnp == ready) 11683 break; 11684 cur_uops = ix86_safe_ppro_uops (*--insnp); 11685 } 11686 11687 /* If so, move it to the head of the line. */ 11688 if (cur_uops == PPRO_UOPS_FEW) 11689 ix86_reorder_insn (insnp, e_ready); 11690 11691 /* Issue the head of the queue. */ 11692 issued_this_cycle = 1; 11693 decode[0] = *e_ready--; 11694 } 11695 11696 /* Look for simple insns to fill in the other two slots. */ 11697 for (i = 1; i < 3; ++i) 11698 if (decode[i] == NULL) 11699 { 11700 if (ready > e_ready) 11701 goto ppro_done; 11702 11703 insnp = e_ready; 11704 cur_uops = ix86_safe_ppro_uops (*insnp); 11705 while (cur_uops != PPRO_UOPS_ONE) 11706 { 11707 if (insnp == ready) 11708 break; 11709 cur_uops = ix86_safe_ppro_uops (*--insnp); 11710 } 11711 11712 /* Found one. Move it to the head of the queue and issue it. */ 11713 if (cur_uops == PPRO_UOPS_ONE) 11714 { 11715 ix86_reorder_insn (insnp, e_ready); 11716 decode[i] = *e_ready--; 11717 issued_this_cycle++; 11718 continue; 11719 } 11720 11721 /* ??? Didn't find one. Ideally, here we would do a lazy split 11722 of 2-uop insns, issue one and queue the other. */ 11723 } 11724 11725 ppro_done: 11726 if (issued_this_cycle == 0) 11727 issued_this_cycle = 1; 11728 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle; 11729} 11730 11731/* We are about to being issuing insns for this clock cycle. 11732 Override the default sort algorithm to better slot instructions. */ 11733static int 11734ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var) 11735 FILE *dump ATTRIBUTE_UNUSED; 11736 int sched_verbose ATTRIBUTE_UNUSED; 11737 rtx *ready; 11738 int *n_readyp; 11739 int clock_var ATTRIBUTE_UNUSED; 11740{ 11741 int n_ready = *n_readyp; 11742 rtx *e_ready = ready + n_ready - 1; 11743 11744 /* Make sure to go ahead and initialize key items in 11745 ix86_sched_data if we are not going to bother trying to 11746 reorder the ready queue. */ 11747 if (n_ready < 2) 11748 { 11749 ix86_sched_data.ppro.issued_this_cycle = 1; 11750 goto out; 11751 } 11752 11753 switch (ix86_cpu) 11754 { 11755 default: 11756 break; 11757 11758 case PROCESSOR_PENTIUMPRO: 11759 ix86_sched_reorder_ppro (ready, e_ready); 11760 break; 11761 } 11762 11763out: 11764 return ix86_issue_rate (); 11765} 11766 11767/* We are about to issue INSN. Return the number of insns left on the 11768 ready queue that can be issued this cycle. */ 11769 11770static int 11771ix86_variable_issue (dump, sched_verbose, insn, can_issue_more) 11772 FILE *dump; 11773 int sched_verbose; 11774 rtx insn; 11775 int can_issue_more; 11776{ 11777 int i; 11778 switch (ix86_cpu) 11779 { 11780 default: 11781 return can_issue_more - 1; 11782 11783 case PROCESSOR_PENTIUMPRO: 11784 { 11785 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn); 11786 11787 if (uops == PPRO_UOPS_MANY) 11788 { 11789 if (sched_verbose) 11790 ix86_dump_ppro_packet (dump); 11791 ix86_sched_data.ppro.decode[0] = insn; 11792 ix86_sched_data.ppro.decode[1] = NULL; 11793 ix86_sched_data.ppro.decode[2] = NULL; 11794 if (sched_verbose) 11795 ix86_dump_ppro_packet (dump); 11796 ix86_sched_data.ppro.decode[0] = NULL; 11797 } 11798 else if (uops == PPRO_UOPS_FEW) 11799 { 11800 if (sched_verbose) 11801 ix86_dump_ppro_packet (dump); 11802 ix86_sched_data.ppro.decode[0] = insn; 11803 ix86_sched_data.ppro.decode[1] = NULL; 11804 ix86_sched_data.ppro.decode[2] = NULL; 11805 } 11806 else 11807 { 11808 for (i = 0; i < 3; ++i) 11809 if (ix86_sched_data.ppro.decode[i] == NULL) 11810 { 11811 ix86_sched_data.ppro.decode[i] = insn; 11812 break; 11813 } 11814 if (i == 3) 11815 abort (); 11816 if (i == 2) 11817 { 11818 if (sched_verbose) 11819 ix86_dump_ppro_packet (dump); 11820 ix86_sched_data.ppro.decode[0] = NULL; 11821 ix86_sched_data.ppro.decode[1] = NULL; 11822 ix86_sched_data.ppro.decode[2] = NULL; 11823 } 11824 } 11825 } 11826 return --ix86_sched_data.ppro.issued_this_cycle; 11827 } 11828} 11829 11830static int 11831ia32_use_dfa_pipeline_interface () 11832{ 11833 if (ix86_cpu == PROCESSOR_PENTIUM) 11834 return 1; 11835 return 0; 11836} 11837 11838/* How many alternative schedules to try. This should be as wide as the 11839 scheduling freedom in the DFA, but no wider. Making this value too 11840 large results extra work for the scheduler. */ 11841 11842static int 11843ia32_multipass_dfa_lookahead () 11844{ 11845 if (ix86_cpu == PROCESSOR_PENTIUM) 11846 return 2; 11847 else 11848 return 0; 11849} 11850 11851 11852/* Walk through INSNS and look for MEM references whose address is DSTREG or 11853 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as 11854 appropriate. */ 11855 11856void 11857ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg) 11858 rtx insns; 11859 rtx dstref, srcref, dstreg, srcreg; 11860{ 11861 rtx insn; 11862 11863 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn)) 11864 if (INSN_P (insn)) 11865 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref, 11866 dstreg, srcreg); 11867} 11868 11869/* Subroutine of above to actually do the updating by recursively walking 11870 the rtx. */ 11871 11872static void 11873ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg) 11874 rtx x; 11875 rtx dstref, srcref, dstreg, srcreg; 11876{ 11877 enum rtx_code code = GET_CODE (x); 11878 const char *format_ptr = GET_RTX_FORMAT (code); 11879 int i, j; 11880 11881 if (code == MEM && XEXP (x, 0) == dstreg) 11882 MEM_COPY_ATTRIBUTES (x, dstref); 11883 else if (code == MEM && XEXP (x, 0) == srcreg) 11884 MEM_COPY_ATTRIBUTES (x, srcref); 11885 11886 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++) 11887 { 11888 if (*format_ptr == 'e') 11889 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref, 11890 dstreg, srcreg); 11891 else if (*format_ptr == 'E') 11892 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 11893 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref, 11894 dstreg, srcreg); 11895 } 11896} 11897 11898/* Compute the alignment given to a constant that is being placed in memory. 11899 EXP is the constant and ALIGN is the alignment that the object would 11900 ordinarily have. 11901 The value of this function is used instead of that alignment to align 11902 the object. */ 11903 11904int 11905ix86_constant_alignment (exp, align) 11906 tree exp; 11907 int align; 11908{ 11909 if (TREE_CODE (exp) == REAL_CST) 11910 { 11911 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 11912 return 64; 11913 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 11914 return 128; 11915 } 11916 else if (TREE_CODE (exp) == STRING_CST && !TARGET_NO_ALIGN_LONG_STRINGS 11917 && TREE_STRING_LENGTH (exp) >= 31 && align < 256) 11918 return 256; 11919 11920 return align; 11921} 11922 11923/* Compute the alignment for a static variable. 11924 TYPE is the data type, and ALIGN is the alignment that 11925 the object would ordinarily have. The value of this function is used 11926 instead of that alignment to align the object. */ 11927 11928int 11929ix86_data_alignment (type, align) 11930 tree type; 11931 int align; 11932{ 11933 if (AGGREGATE_TYPE_P (type) 11934 && TYPE_SIZE (type) 11935 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 11936 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256 11937 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256) 11938 return 256; 11939 11940 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 11941 to 16byte boundary. */ 11942 if (TARGET_64BIT) 11943 { 11944 if (AGGREGATE_TYPE_P (type) 11945 && TYPE_SIZE (type) 11946 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 11947 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 11948 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 11949 return 128; 11950 } 11951 11952 if (TREE_CODE (type) == ARRAY_TYPE) 11953 { 11954 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 11955 return 64; 11956 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 11957 return 128; 11958 } 11959 else if (TREE_CODE (type) == COMPLEX_TYPE) 11960 { 11961 11962 if (TYPE_MODE (type) == DCmode && align < 64) 11963 return 64; 11964 if (TYPE_MODE (type) == XCmode && align < 128) 11965 return 128; 11966 } 11967 else if ((TREE_CODE (type) == RECORD_TYPE 11968 || TREE_CODE (type) == UNION_TYPE 11969 || TREE_CODE (type) == QUAL_UNION_TYPE) 11970 && TYPE_FIELDS (type)) 11971 { 11972 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 11973 return 64; 11974 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 11975 return 128; 11976 } 11977 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 11978 || TREE_CODE (type) == INTEGER_TYPE) 11979 { 11980 if (TYPE_MODE (type) == DFmode && align < 64) 11981 return 64; 11982 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 11983 return 128; 11984 } 11985 11986 return align; 11987} 11988 11989/* Compute the alignment for a local variable. 11990 TYPE is the data type, and ALIGN is the alignment that 11991 the object would ordinarily have. The value of this macro is used 11992 instead of that alignment to align the object. */ 11993 11994int 11995ix86_local_alignment (type, align) 11996 tree type; 11997 int align; 11998{ 11999 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 12000 to 16byte boundary. */ 12001 if (TARGET_64BIT) 12002 { 12003 if (AGGREGATE_TYPE_P (type) 12004 && TYPE_SIZE (type) 12005 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 12006 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 12007 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 12008 return 128; 12009 } 12010 if (TREE_CODE (type) == ARRAY_TYPE) 12011 { 12012 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 12013 return 64; 12014 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 12015 return 128; 12016 } 12017 else if (TREE_CODE (type) == COMPLEX_TYPE) 12018 { 12019 if (TYPE_MODE (type) == DCmode && align < 64) 12020 return 64; 12021 if (TYPE_MODE (type) == XCmode && align < 128) 12022 return 128; 12023 } 12024 else if ((TREE_CODE (type) == RECORD_TYPE 12025 || TREE_CODE (type) == UNION_TYPE 12026 || TREE_CODE (type) == QUAL_UNION_TYPE) 12027 && TYPE_FIELDS (type)) 12028 { 12029 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 12030 return 64; 12031 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 12032 return 128; 12033 } 12034 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 12035 || TREE_CODE (type) == INTEGER_TYPE) 12036 { 12037 12038 if (TYPE_MODE (type) == DFmode && align < 64) 12039 return 64; 12040 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 12041 return 128; 12042 } 12043 return align; 12044} 12045 12046/* Emit RTL insns to initialize the variable parts of a trampoline. 12047 FNADDR is an RTX for the address of the function's pure code. 12048 CXT is an RTX for the static chain value for the function. */ 12049void 12050x86_initialize_trampoline (tramp, fnaddr, cxt) 12051 rtx tramp, fnaddr, cxt; 12052{ 12053 if (!TARGET_64BIT) 12054 { 12055 /* Compute offset from the end of the jmp to the target function. */ 12056 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 12057 plus_constant (tramp, 10), 12058 NULL_RTX, 1, OPTAB_DIRECT); 12059 emit_move_insn (gen_rtx_MEM (QImode, tramp), 12060 gen_int_mode (0xb9, QImode)); 12061 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 12062 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 12063 gen_int_mode (0xe9, QImode)); 12064 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 12065 } 12066 else 12067 { 12068 int offset = 0; 12069 /* Try to load address using shorter movl instead of movabs. 12070 We may want to support movq for kernel mode, but kernel does not use 12071 trampolines at the moment. */ 12072 if (x86_64_zero_extended_value (fnaddr)) 12073 { 12074 fnaddr = copy_to_mode_reg (DImode, fnaddr); 12075 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12076 gen_int_mode (0xbb41, HImode)); 12077 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 12078 gen_lowpart (SImode, fnaddr)); 12079 offset += 6; 12080 } 12081 else 12082 { 12083 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12084 gen_int_mode (0xbb49, HImode)); 12085 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 12086 fnaddr); 12087 offset += 10; 12088 } 12089 /* Load static chain using movabs to r10. */ 12090 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12091 gen_int_mode (0xba49, HImode)); 12092 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 12093 cxt); 12094 offset += 10; 12095 /* Jump to the r11 */ 12096 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12097 gen_int_mode (0xff49, HImode)); 12098 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 12099 gen_int_mode (0xe3, QImode)); 12100 offset += 3; 12101 if (offset > TRAMPOLINE_SIZE) 12102 abort (); 12103 } 12104 12105#ifdef TRANSFER_FROM_TRAMPOLINE 12106 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), 12107 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 12108#endif 12109} 12110 12111#define def_builtin(MASK, NAME, TYPE, CODE) \ 12112do { \ 12113 if ((MASK) & target_flags \ 12114 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 12115 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 12116 NULL, NULL_TREE); \ 12117} while (0) 12118 12119struct builtin_description 12120{ 12121 const unsigned int mask; 12122 const enum insn_code icode; 12123 const char *const name; 12124 const enum ix86_builtins code; 12125 const enum rtx_code comparison; 12126 const unsigned int flag; 12127}; 12128 12129static const struct builtin_description bdesc_comi[] = 12130{ 12131 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 12132 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 12133 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 12134 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 12135 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 12136 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 12137 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 12138 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 12139 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 12140 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 12141 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 12142 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 12143 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 12144 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 12145 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 12146 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 12147 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 12148 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 12149 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 12150 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 12151 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 12152 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 12153 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 12154 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 12155}; 12156 12157static const struct builtin_description bdesc_2arg[] = 12158{ 12159 /* SSE */ 12160 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 12161 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 12162 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 12163 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 12164 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 12165 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 12166 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 12167 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 12168 12169 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 12170 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 12171 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 12172 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, 12173 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, 12174 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 12175 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, 12176 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, 12177 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, 12178 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, 12179 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, 12180 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, 12181 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 12182 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 12183 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 12184 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 12185 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, 12186 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, 12187 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, 12188 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, 12189 12190 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 12191 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 12192 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 12193 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 12194 12195 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 12196 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 12197 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 12198 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 12199 12200 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 12201 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 12202 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 12203 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 12204 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 12205 12206 /* MMX */ 12207 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 12208 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 12209 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 12210 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 12211 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 12212 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 12213 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 12214 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 12215 12216 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 12217 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 12218 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 12219 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 12220 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 12221 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 12222 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 12223 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 12224 12225 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 12226 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 12227 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 12228 12229 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 12230 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 12231 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 12232 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 12233 12234 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 12235 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 12236 12237 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 12238 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 12239 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 12240 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 12241 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 12242 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 12243 12244 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 12245 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 12246 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 12247 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 12248 12249 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 12250 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 12251 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 12252 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 12253 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 12254 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 12255 12256 /* Special. */ 12257 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 12258 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 12259 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 12260 12261 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 12262 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 12263 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 12264 12265 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 12266 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 12267 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 12268 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 12269 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 12270 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 12271 12272 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 12273 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 12274 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 12275 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 12276 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 12277 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 12278 12279 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 12280 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 12281 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 12282 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 12283 12284 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 12285 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 12286 12287 /* SSE2 */ 12288 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 12289 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 12290 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 12291 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 12292 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 12293 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 12294 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 12295 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 12296 12297 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 12298 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 12299 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 12300 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 }, 12301 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 }, 12302 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 12303 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 }, 12304 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 }, 12305 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 }, 12306 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 }, 12307 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 }, 12308 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 }, 12309 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 12310 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 12311 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 12312 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 12313 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 }, 12314 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 }, 12315 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 }, 12316 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 }, 12317 12318 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 12319 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 12320 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 12321 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 12322 12323 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 12324 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 12325 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 12326 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 12327 12328 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 12329 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 12330 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 12331 12332 /* SSE2 MMX */ 12333 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 12334 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 12335 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 12336 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 12337 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 12338 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 12339 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 12340 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 12341 12342 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 12343 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 12344 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 12345 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 12346 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 12347 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 12348 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 12349 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 12350 12351 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 12352 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 12353 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 }, 12354 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 }, 12355 12356 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 12357 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 12358 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 12359 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 12360 12361 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 12362 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 12363 12364 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 12365 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 12366 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 12367 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 12368 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 12369 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 12370 12371 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 12372 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 12373 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 12374 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 12375 12376 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 12377 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 12378 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 12379 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 12380 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 12381 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 12382 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 12383 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 12384 12385 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 12386 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 12387 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 12388 12389 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 12390 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 12391 12392 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 }, 12393 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 12394 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 }, 12395 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 12396 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 }, 12397 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 12398 12399 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 }, 12400 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 12401 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 }, 12402 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 12403 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 }, 12404 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 12405 12406 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 }, 12407 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 12408 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 }, 12409 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 12410 12411 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 12412 12413 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 12414 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 12415 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 12416 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, 12417 12418 /* PNI MMX */ 12419 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, 12420 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, 12421 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, 12422 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, 12423 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, 12424 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } 12425}; 12426 12427static const struct builtin_description bdesc_1arg[] = 12428{ 12429 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 12430 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 12431 12432 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 12433 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 12434 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 12435 12436 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 12437 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 12438 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 12439 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 12440 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 12441 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 12442 12443 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 12444 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 12445 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 }, 12446 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 }, 12447 12448 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 12449 12450 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 12451 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 12452 12453 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 12454 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 12455 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 12456 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 12457 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 12458 12459 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 12460 12461 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 12462 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 12463 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 12464 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 12465 12466 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 12467 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 12468 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 12469 12470 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }, 12471 12472 /* PNI */ 12473 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, 12474 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, 12475 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 } 12476}; 12477 12478void 12479ix86_init_builtins () 12480{ 12481 if (TARGET_MMX) 12482 ix86_init_mmx_sse_builtins (); 12483} 12484 12485/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 12486 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 12487 builtins. */ 12488static void 12489ix86_init_mmx_sse_builtins () 12490{ 12491 const struct builtin_description * d; 12492 size_t i; 12493 12494 tree pchar_type_node = build_pointer_type (char_type_node); 12495 tree pcchar_type_node = build_pointer_type ( 12496 build_type_variant (char_type_node, 1, 0)); 12497 tree pfloat_type_node = build_pointer_type (float_type_node); 12498 tree pcfloat_type_node = build_pointer_type ( 12499 build_type_variant (float_type_node, 1, 0)); 12500 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 12501 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 12502 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 12503 12504 /* Comparisons. */ 12505 tree int_ftype_v4sf_v4sf 12506 = build_function_type_list (integer_type_node, 12507 V4SF_type_node, V4SF_type_node, NULL_TREE); 12508 tree v4si_ftype_v4sf_v4sf 12509 = build_function_type_list (V4SI_type_node, 12510 V4SF_type_node, V4SF_type_node, NULL_TREE); 12511 /* MMX/SSE/integer conversions. */ 12512 tree int_ftype_v4sf 12513 = build_function_type_list (integer_type_node, 12514 V4SF_type_node, NULL_TREE); 12515 tree int64_ftype_v4sf 12516 = build_function_type_list (long_long_integer_type_node, 12517 V4SF_type_node, NULL_TREE); 12518 tree int_ftype_v8qi 12519 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 12520 tree v4sf_ftype_v4sf_int 12521 = build_function_type_list (V4SF_type_node, 12522 V4SF_type_node, integer_type_node, NULL_TREE); 12523 tree v4sf_ftype_v4sf_int64 12524 = build_function_type_list (V4SF_type_node, 12525 V4SF_type_node, long_long_integer_type_node, 12526 NULL_TREE); 12527 tree v4sf_ftype_v4sf_v2si 12528 = build_function_type_list (V4SF_type_node, 12529 V4SF_type_node, V2SI_type_node, NULL_TREE); 12530 tree int_ftype_v4hi_int 12531 = build_function_type_list (integer_type_node, 12532 V4HI_type_node, integer_type_node, NULL_TREE); 12533 tree v4hi_ftype_v4hi_int_int 12534 = build_function_type_list (V4HI_type_node, V4HI_type_node, 12535 integer_type_node, integer_type_node, 12536 NULL_TREE); 12537 /* Miscellaneous. */ 12538 tree v8qi_ftype_v4hi_v4hi 12539 = build_function_type_list (V8QI_type_node, 12540 V4HI_type_node, V4HI_type_node, NULL_TREE); 12541 tree v4hi_ftype_v2si_v2si 12542 = build_function_type_list (V4HI_type_node, 12543 V2SI_type_node, V2SI_type_node, NULL_TREE); 12544 tree v4sf_ftype_v4sf_v4sf_int 12545 = build_function_type_list (V4SF_type_node, 12546 V4SF_type_node, V4SF_type_node, 12547 integer_type_node, NULL_TREE); 12548 tree v2si_ftype_v4hi_v4hi 12549 = build_function_type_list (V2SI_type_node, 12550 V4HI_type_node, V4HI_type_node, NULL_TREE); 12551 tree v4hi_ftype_v4hi_int 12552 = build_function_type_list (V4HI_type_node, 12553 V4HI_type_node, integer_type_node, NULL_TREE); 12554 tree v4hi_ftype_v4hi_di 12555 = build_function_type_list (V4HI_type_node, 12556 V4HI_type_node, long_long_unsigned_type_node, 12557 NULL_TREE); 12558 tree v2si_ftype_v2si_di 12559 = build_function_type_list (V2SI_type_node, 12560 V2SI_type_node, long_long_unsigned_type_node, 12561 NULL_TREE); 12562 tree void_ftype_void 12563 = build_function_type (void_type_node, void_list_node); 12564 tree void_ftype_unsigned 12565 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 12566 tree void_ftype_unsigned_unsigned 12567 = build_function_type_list (void_type_node, unsigned_type_node, 12568 unsigned_type_node, NULL_TREE); 12569 tree void_ftype_pcvoid_unsigned_unsigned 12570 = build_function_type_list (void_type_node, const_ptr_type_node, 12571 unsigned_type_node, unsigned_type_node, 12572 NULL_TREE); 12573 tree unsigned_ftype_void 12574 = build_function_type (unsigned_type_node, void_list_node); 12575 tree di_ftype_void 12576 = build_function_type (long_long_unsigned_type_node, void_list_node); 12577 tree v4sf_ftype_void 12578 = build_function_type (V4SF_type_node, void_list_node); 12579 tree v2si_ftype_v4sf 12580 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 12581 /* Loads/stores. */ 12582 tree void_ftype_v8qi_v8qi_pchar 12583 = build_function_type_list (void_type_node, 12584 V8QI_type_node, V8QI_type_node, 12585 pchar_type_node, NULL_TREE); 12586 tree v4sf_ftype_pcfloat 12587 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 12588 /* @@@ the type is bogus */ 12589 tree v4sf_ftype_v4sf_pv2si 12590 = build_function_type_list (V4SF_type_node, 12591 V4SF_type_node, pv2si_type_node, NULL_TREE); 12592 tree void_ftype_pv2si_v4sf 12593 = build_function_type_list (void_type_node, 12594 pv2si_type_node, V4SF_type_node, NULL_TREE); 12595 tree void_ftype_pfloat_v4sf 12596 = build_function_type_list (void_type_node, 12597 pfloat_type_node, V4SF_type_node, NULL_TREE); 12598 tree void_ftype_pdi_di 12599 = build_function_type_list (void_type_node, 12600 pdi_type_node, long_long_unsigned_type_node, 12601 NULL_TREE); 12602 tree void_ftype_pv2di_v2di 12603 = build_function_type_list (void_type_node, 12604 pv2di_type_node, V2DI_type_node, NULL_TREE); 12605 /* Normal vector unops. */ 12606 tree v4sf_ftype_v4sf 12607 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 12608 12609 /* Normal vector binops. */ 12610 tree v4sf_ftype_v4sf_v4sf 12611 = build_function_type_list (V4SF_type_node, 12612 V4SF_type_node, V4SF_type_node, NULL_TREE); 12613 tree v8qi_ftype_v8qi_v8qi 12614 = build_function_type_list (V8QI_type_node, 12615 V8QI_type_node, V8QI_type_node, NULL_TREE); 12616 tree v4hi_ftype_v4hi_v4hi 12617 = build_function_type_list (V4HI_type_node, 12618 V4HI_type_node, V4HI_type_node, NULL_TREE); 12619 tree v2si_ftype_v2si_v2si 12620 = build_function_type_list (V2SI_type_node, 12621 V2SI_type_node, V2SI_type_node, NULL_TREE); 12622 tree di_ftype_di_di 12623 = build_function_type_list (long_long_unsigned_type_node, 12624 long_long_unsigned_type_node, 12625 long_long_unsigned_type_node, NULL_TREE); 12626 12627 tree v2si_ftype_v2sf 12628 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 12629 tree v2sf_ftype_v2si 12630 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 12631 tree v2si_ftype_v2si 12632 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 12633 tree v2sf_ftype_v2sf 12634 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 12635 tree v2sf_ftype_v2sf_v2sf 12636 = build_function_type_list (V2SF_type_node, 12637 V2SF_type_node, V2SF_type_node, NULL_TREE); 12638 tree v2si_ftype_v2sf_v2sf 12639 = build_function_type_list (V2SI_type_node, 12640 V2SF_type_node, V2SF_type_node, NULL_TREE); 12641 tree pint_type_node = build_pointer_type (integer_type_node); 12642 tree pcint_type_node = build_pointer_type ( 12643 build_type_variant (integer_type_node, 1, 0)); 12644 tree pdouble_type_node = build_pointer_type (double_type_node); 12645 tree pcdouble_type_node = build_pointer_type ( 12646 build_type_variant (double_type_node, 1, 0)); 12647 tree int_ftype_v2df_v2df 12648 = build_function_type_list (integer_type_node, 12649 V2DF_type_node, V2DF_type_node, NULL_TREE); 12650 12651 tree ti_ftype_void 12652 = build_function_type (intTI_type_node, void_list_node); 12653 tree v2di_ftype_void 12654 = build_function_type (V2DI_type_node, void_list_node); 12655 tree ti_ftype_ti_ti 12656 = build_function_type_list (intTI_type_node, 12657 intTI_type_node, intTI_type_node, NULL_TREE); 12658 tree void_ftype_pcvoid 12659 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 12660 tree v2di_ftype_di 12661 = build_function_type_list (V2DI_type_node, 12662 long_long_unsigned_type_node, NULL_TREE); 12663 tree di_ftype_v2di 12664 = build_function_type_list (long_long_unsigned_type_node, 12665 V2DI_type_node, NULL_TREE); 12666 tree v4sf_ftype_v4si 12667 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 12668 tree v4si_ftype_v4sf 12669 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 12670 tree v2df_ftype_v4si 12671 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 12672 tree v4si_ftype_v2df 12673 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 12674 tree v2si_ftype_v2df 12675 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 12676 tree v4sf_ftype_v2df 12677 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 12678 tree v2df_ftype_v2si 12679 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 12680 tree v2df_ftype_v4sf 12681 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 12682 tree int_ftype_v2df 12683 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 12684 tree int64_ftype_v2df 12685 = build_function_type_list (long_long_integer_type_node, 12686 V2DF_type_node, NULL_TREE); 12687 tree v2df_ftype_v2df_int 12688 = build_function_type_list (V2DF_type_node, 12689 V2DF_type_node, integer_type_node, NULL_TREE); 12690 tree v2df_ftype_v2df_int64 12691 = build_function_type_list (V2DF_type_node, 12692 V2DF_type_node, long_long_integer_type_node, 12693 NULL_TREE); 12694 tree v4sf_ftype_v4sf_v2df 12695 = build_function_type_list (V4SF_type_node, 12696 V4SF_type_node, V2DF_type_node, NULL_TREE); 12697 tree v2df_ftype_v2df_v4sf 12698 = build_function_type_list (V2DF_type_node, 12699 V2DF_type_node, V4SF_type_node, NULL_TREE); 12700 tree v2df_ftype_v2df_v2df_int 12701 = build_function_type_list (V2DF_type_node, 12702 V2DF_type_node, V2DF_type_node, 12703 integer_type_node, 12704 NULL_TREE); 12705 tree v2df_ftype_v2df_pv2si 12706 = build_function_type_list (V2DF_type_node, 12707 V2DF_type_node, pv2si_type_node, NULL_TREE); 12708 tree void_ftype_pv2si_v2df 12709 = build_function_type_list (void_type_node, 12710 pv2si_type_node, V2DF_type_node, NULL_TREE); 12711 tree void_ftype_pdouble_v2df 12712 = build_function_type_list (void_type_node, 12713 pdouble_type_node, V2DF_type_node, NULL_TREE); 12714 tree void_ftype_pint_int 12715 = build_function_type_list (void_type_node, 12716 pint_type_node, integer_type_node, NULL_TREE); 12717 tree void_ftype_v16qi_v16qi_pchar 12718 = build_function_type_list (void_type_node, 12719 V16QI_type_node, V16QI_type_node, 12720 pchar_type_node, NULL_TREE); 12721 tree v2df_ftype_pcdouble 12722 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 12723 tree v2df_ftype_v2df_v2df 12724 = build_function_type_list (V2DF_type_node, 12725 V2DF_type_node, V2DF_type_node, NULL_TREE); 12726 tree v16qi_ftype_v16qi_v16qi 12727 = build_function_type_list (V16QI_type_node, 12728 V16QI_type_node, V16QI_type_node, NULL_TREE); 12729 tree v8hi_ftype_v8hi_v8hi 12730 = build_function_type_list (V8HI_type_node, 12731 V8HI_type_node, V8HI_type_node, NULL_TREE); 12732 tree v4si_ftype_v4si_v4si 12733 = build_function_type_list (V4SI_type_node, 12734 V4SI_type_node, V4SI_type_node, NULL_TREE); 12735 tree v2di_ftype_v2di_v2di 12736 = build_function_type_list (V2DI_type_node, 12737 V2DI_type_node, V2DI_type_node, NULL_TREE); 12738 tree v2di_ftype_v2df_v2df 12739 = build_function_type_list (V2DI_type_node, 12740 V2DF_type_node, V2DF_type_node, NULL_TREE); 12741 tree v2df_ftype_v2df 12742 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 12743 tree v2df_ftype_double 12744 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE); 12745 tree v2df_ftype_double_double 12746 = build_function_type_list (V2DF_type_node, 12747 double_type_node, double_type_node, NULL_TREE); 12748 tree int_ftype_v8hi_int 12749 = build_function_type_list (integer_type_node, 12750 V8HI_type_node, integer_type_node, NULL_TREE); 12751 tree v8hi_ftype_v8hi_int_int 12752 = build_function_type_list (V8HI_type_node, 12753 V8HI_type_node, integer_type_node, 12754 integer_type_node, NULL_TREE); 12755 tree v2di_ftype_v2di_int 12756 = build_function_type_list (V2DI_type_node, 12757 V2DI_type_node, integer_type_node, NULL_TREE); 12758 tree v4si_ftype_v4si_int 12759 = build_function_type_list (V4SI_type_node, 12760 V4SI_type_node, integer_type_node, NULL_TREE); 12761 tree v8hi_ftype_v8hi_int 12762 = build_function_type_list (V8HI_type_node, 12763 V8HI_type_node, integer_type_node, NULL_TREE); 12764 tree v8hi_ftype_v8hi_v2di 12765 = build_function_type_list (V8HI_type_node, 12766 V8HI_type_node, V2DI_type_node, NULL_TREE); 12767 tree v4si_ftype_v4si_v2di 12768 = build_function_type_list (V4SI_type_node, 12769 V4SI_type_node, V2DI_type_node, NULL_TREE); 12770 tree v4si_ftype_v8hi_v8hi 12771 = build_function_type_list (V4SI_type_node, 12772 V8HI_type_node, V8HI_type_node, NULL_TREE); 12773 tree di_ftype_v8qi_v8qi 12774 = build_function_type_list (long_long_unsigned_type_node, 12775 V8QI_type_node, V8QI_type_node, NULL_TREE); 12776 tree v2di_ftype_v16qi_v16qi 12777 = build_function_type_list (V2DI_type_node, 12778 V16QI_type_node, V16QI_type_node, NULL_TREE); 12779 tree int_ftype_v16qi 12780 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 12781 tree v16qi_ftype_pcchar 12782 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 12783 tree void_ftype_pchar_v16qi 12784 = build_function_type_list (void_type_node, 12785 pchar_type_node, V16QI_type_node, NULL_TREE); 12786 tree v4si_ftype_pcint 12787 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); 12788 tree void_ftype_pcint_v4si 12789 = build_function_type_list (void_type_node, 12790 pcint_type_node, V4SI_type_node, NULL_TREE); 12791 tree v2di_ftype_v2di 12792 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); 12793 12794 /* Add all builtins that are more or less simple operations on two 12795 operands. */ 12796 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 12797 { 12798 /* Use one of the operands; the target can have a different mode for 12799 mask-generating compares. */ 12800 enum machine_mode mode; 12801 tree type; 12802 12803 if (d->name == 0) 12804 continue; 12805 mode = insn_data[d->icode].operand[1].mode; 12806 12807 switch (mode) 12808 { 12809 case V16QImode: 12810 type = v16qi_ftype_v16qi_v16qi; 12811 break; 12812 case V8HImode: 12813 type = v8hi_ftype_v8hi_v8hi; 12814 break; 12815 case V4SImode: 12816 type = v4si_ftype_v4si_v4si; 12817 break; 12818 case V2DImode: 12819 type = v2di_ftype_v2di_v2di; 12820 break; 12821 case V2DFmode: 12822 type = v2df_ftype_v2df_v2df; 12823 break; 12824 case TImode: 12825 type = ti_ftype_ti_ti; 12826 break; 12827 case V4SFmode: 12828 type = v4sf_ftype_v4sf_v4sf; 12829 break; 12830 case V8QImode: 12831 type = v8qi_ftype_v8qi_v8qi; 12832 break; 12833 case V4HImode: 12834 type = v4hi_ftype_v4hi_v4hi; 12835 break; 12836 case V2SImode: 12837 type = v2si_ftype_v2si_v2si; 12838 break; 12839 case DImode: 12840 type = di_ftype_di_di; 12841 break; 12842 12843 default: 12844 abort (); 12845 } 12846 12847 /* Override for comparisons. */ 12848 if (d->icode == CODE_FOR_maskcmpv4sf3 12849 || d->icode == CODE_FOR_maskncmpv4sf3 12850 || d->icode == CODE_FOR_vmmaskcmpv4sf3 12851 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 12852 type = v4si_ftype_v4sf_v4sf; 12853 12854 if (d->icode == CODE_FOR_maskcmpv2df3 12855 || d->icode == CODE_FOR_maskncmpv2df3 12856 || d->icode == CODE_FOR_vmmaskcmpv2df3 12857 || d->icode == CODE_FOR_vmmaskncmpv2df3) 12858 type = v2di_ftype_v2df_v2df; 12859 12860 def_builtin (d->mask, d->name, type, d->code); 12861 } 12862 12863 /* Add the remaining MMX insns with somewhat more complicated types. */ 12864 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); 12865 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 12866 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 12867 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 12868 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 12869 12870 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 12871 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 12872 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 12873 12874 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 12875 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 12876 12877 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 12878 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 12879 12880 /* comi/ucomi insns. */ 12881 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 12882 if (d->mask == MASK_SSE2) 12883 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 12884 else 12885 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 12886 12887 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 12888 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 12889 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 12890 12891 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 12892 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 12893 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 12894 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 12895 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 12896 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 12897 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 12898 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 12899 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 12900 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 12901 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 12902 12903 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); 12904 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); 12905 12906 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 12907 12908 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); 12909 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 12910 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); 12911 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); 12912 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 12913 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); 12914 12915 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 12916 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 12917 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 12918 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 12919 12920 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 12921 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 12922 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 12923 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 12924 12925 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 12926 12927 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 12928 12929 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 12930 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 12931 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 12932 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 12933 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 12934 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 12935 12936 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 12937 12938 /* Original 3DNow! */ 12939 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 12940 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 12941 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 12942 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 12943 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 12944 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 12945 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 12946 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 12947 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 12948 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 12949 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 12950 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 12951 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 12952 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 12953 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 12954 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 12955 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 12956 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 12957 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 12958 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 12959 12960 /* 3DNow! extension as used in the Athlon CPU. */ 12961 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 12962 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 12963 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 12964 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 12965 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 12966 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 12967 12968 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); 12969 12970 /* SSE2 */ 12971 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128); 12972 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128); 12973 12974 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 12975 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ); 12976 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q); 12977 12978 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD); 12979 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 12980 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD); 12981 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD); 12982 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 12983 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); 12984 12985 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD); 12986 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD); 12987 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD); 12988 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD); 12989 12990 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 12991 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 12992 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 12993 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 12994 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 12995 12996 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 12997 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 12998 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 12999 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 13000 13001 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 13002 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 13003 13004 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 13005 13006 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 13007 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 13008 13009 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 13010 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 13011 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 13012 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 13013 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 13014 13015 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 13016 13017 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 13018 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 13019 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 13020 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 13021 13022 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 13023 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 13024 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 13025 13026 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 13027 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 13028 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 13029 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 13030 13031 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1); 13032 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD); 13033 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD); 13034 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1); 13035 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD); 13036 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1); 13037 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD); 13038 13039 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 13040 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 13041 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 13042 13043 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA); 13044 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 13045 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD); 13046 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA); 13047 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 13048 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); 13049 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); 13050 13051 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); 13052 13053 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128); 13054 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128); 13055 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 13056 13057 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128); 13058 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128); 13059 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 13060 13061 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128); 13062 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128); 13063 13064 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 13065 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 13066 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 13067 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 13068 13069 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 13070 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 13071 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 13072 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 13073 13074 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 13075 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 13076 13077 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 13078 13079 /* Prescott New Instructions. */ 13080 def_builtin (MASK_PNI, "__builtin_ia32_monitor", 13081 void_ftype_pcvoid_unsigned_unsigned, 13082 IX86_BUILTIN_MONITOR); 13083 def_builtin (MASK_PNI, "__builtin_ia32_mwait", 13084 void_ftype_unsigned_unsigned, 13085 IX86_BUILTIN_MWAIT); 13086 def_builtin (MASK_PNI, "__builtin_ia32_movshdup", 13087 v4sf_ftype_v4sf, 13088 IX86_BUILTIN_MOVSHDUP); 13089 def_builtin (MASK_PNI, "__builtin_ia32_movsldup", 13090 v4sf_ftype_v4sf, 13091 IX86_BUILTIN_MOVSLDUP); 13092 def_builtin (MASK_PNI, "__builtin_ia32_lddqu", 13093 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); 13094 def_builtin (MASK_PNI, "__builtin_ia32_loadddup", 13095 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP); 13096 def_builtin (MASK_PNI, "__builtin_ia32_movddup", 13097 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP); 13098} 13099 13100/* Errors in the source file can cause expand_expr to return const0_rtx 13101 where we expect a vector. To avoid crashing, use one of the vector 13102 clear instructions. */ 13103static rtx 13104safe_vector_operand (x, mode) 13105 rtx x; 13106 enum machine_mode mode; 13107{ 13108 if (x != const0_rtx) 13109 return x; 13110 x = gen_reg_rtx (mode); 13111 13112 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) 13113 emit_insn (gen_mmx_clrdi (mode == DImode ? x 13114 : gen_rtx_SUBREG (DImode, x, 0))); 13115 else 13116 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x 13117 : gen_rtx_SUBREG (V4SFmode, x, 0))); 13118 return x; 13119} 13120 13121/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 13122 13123static rtx 13124ix86_expand_binop_builtin (icode, arglist, target) 13125 enum insn_code icode; 13126 tree arglist; 13127 rtx target; 13128{ 13129 rtx pat; 13130 tree arg0 = TREE_VALUE (arglist); 13131 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13132 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13133 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13134 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13135 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13136 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 13137 13138 if (VECTOR_MODE_P (mode0)) 13139 op0 = safe_vector_operand (op0, mode0); 13140 if (VECTOR_MODE_P (mode1)) 13141 op1 = safe_vector_operand (op1, mode1); 13142 13143 if (! target 13144 || GET_MODE (target) != tmode 13145 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13146 target = gen_reg_rtx (tmode); 13147 13148 if (GET_MODE (op1) == SImode && mode1 == TImode) 13149 { 13150 rtx x = gen_reg_rtx (V4SImode); 13151 emit_insn (gen_sse2_loadd (x, op1)); 13152 op1 = gen_lowpart (TImode, x); 13153 } 13154 13155 /* In case the insn wants input operands in modes different from 13156 the result, abort. */ 13157 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1) 13158 abort (); 13159 13160 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13161 op0 = copy_to_mode_reg (mode0, op0); 13162 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13163 op1 = copy_to_mode_reg (mode1, op1); 13164 13165 /* In the commutative cases, both op0 and op1 are nonimmediate_operand, 13166 yet one of the two must not be a memory. This is normally enforced 13167 by expanders, but we didn't bother to create one here. */ 13168 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) 13169 op0 = copy_to_mode_reg (mode0, op0); 13170 13171 pat = GEN_FCN (icode) (target, op0, op1); 13172 if (! pat) 13173 return 0; 13174 emit_insn (pat); 13175 return target; 13176} 13177 13178/* Subroutine of ix86_expand_builtin to take care of stores. */ 13179 13180static rtx 13181ix86_expand_store_builtin (icode, arglist) 13182 enum insn_code icode; 13183 tree arglist; 13184{ 13185 rtx pat; 13186 tree arg0 = TREE_VALUE (arglist); 13187 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13188 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13189 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13190 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 13191 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 13192 13193 if (VECTOR_MODE_P (mode1)) 13194 op1 = safe_vector_operand (op1, mode1); 13195 13196 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13197 op1 = copy_to_mode_reg (mode1, op1); 13198 13199 pat = GEN_FCN (icode) (op0, op1); 13200 if (pat) 13201 emit_insn (pat); 13202 return 0; 13203} 13204 13205/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 13206 13207static rtx 13208ix86_expand_unop_builtin (icode, arglist, target, do_load) 13209 enum insn_code icode; 13210 tree arglist; 13211 rtx target; 13212 int do_load; 13213{ 13214 rtx pat; 13215 tree arg0 = TREE_VALUE (arglist); 13216 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13217 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13218 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13219 13220 if (! target 13221 || GET_MODE (target) != tmode 13222 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13223 target = gen_reg_rtx (tmode); 13224 if (do_load) 13225 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13226 else 13227 { 13228 if (VECTOR_MODE_P (mode0)) 13229 op0 = safe_vector_operand (op0, mode0); 13230 13231 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13232 op0 = copy_to_mode_reg (mode0, op0); 13233 } 13234 13235 pat = GEN_FCN (icode) (target, op0); 13236 if (! pat) 13237 return 0; 13238 emit_insn (pat); 13239 return target; 13240} 13241 13242/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 13243 sqrtss, rsqrtss, rcpss. */ 13244 13245static rtx 13246ix86_expand_unop1_builtin (icode, arglist, target) 13247 enum insn_code icode; 13248 tree arglist; 13249 rtx target; 13250{ 13251 rtx pat; 13252 tree arg0 = TREE_VALUE (arglist); 13253 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13254 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13255 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13256 13257 if (! target 13258 || GET_MODE (target) != tmode 13259 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13260 target = gen_reg_rtx (tmode); 13261 13262 if (VECTOR_MODE_P (mode0)) 13263 op0 = safe_vector_operand (op0, mode0); 13264 13265 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13266 op0 = copy_to_mode_reg (mode0, op0); 13267 13268 op1 = op0; 13269 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 13270 op1 = copy_to_mode_reg (mode0, op1); 13271 13272 pat = GEN_FCN (icode) (target, op0, op1); 13273 if (! pat) 13274 return 0; 13275 emit_insn (pat); 13276 return target; 13277} 13278 13279/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 13280 13281static rtx 13282ix86_expand_sse_compare (d, arglist, target) 13283 const struct builtin_description *d; 13284 tree arglist; 13285 rtx target; 13286{ 13287 rtx pat; 13288 tree arg0 = TREE_VALUE (arglist); 13289 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13290 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13291 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13292 rtx op2; 13293 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 13294 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 13295 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 13296 enum rtx_code comparison = d->comparison; 13297 13298 if (VECTOR_MODE_P (mode0)) 13299 op0 = safe_vector_operand (op0, mode0); 13300 if (VECTOR_MODE_P (mode1)) 13301 op1 = safe_vector_operand (op1, mode1); 13302 13303 /* Swap operands if we have a comparison that isn't available in 13304 hardware. */ 13305 if (d->flag) 13306 { 13307 rtx tmp = gen_reg_rtx (mode1); 13308 emit_move_insn (tmp, op1); 13309 op1 = op0; 13310 op0 = tmp; 13311 } 13312 13313 if (! target 13314 || GET_MODE (target) != tmode 13315 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 13316 target = gen_reg_rtx (tmode); 13317 13318 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 13319 op0 = copy_to_mode_reg (mode0, op0); 13320 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 13321 op1 = copy_to_mode_reg (mode1, op1); 13322 13323 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 13324 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 13325 if (! pat) 13326 return 0; 13327 emit_insn (pat); 13328 return target; 13329} 13330 13331/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 13332 13333static rtx 13334ix86_expand_sse_comi (d, arglist, target) 13335 const struct builtin_description *d; 13336 tree arglist; 13337 rtx target; 13338{ 13339 rtx pat; 13340 tree arg0 = TREE_VALUE (arglist); 13341 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13342 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13343 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13344 rtx op2; 13345 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 13346 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 13347 enum rtx_code comparison = d->comparison; 13348 13349 if (VECTOR_MODE_P (mode0)) 13350 op0 = safe_vector_operand (op0, mode0); 13351 if (VECTOR_MODE_P (mode1)) 13352 op1 = safe_vector_operand (op1, mode1); 13353 13354 /* Swap operands if we have a comparison that isn't available in 13355 hardware. */ 13356 if (d->flag) 13357 { 13358 rtx tmp = op1; 13359 op1 = op0; 13360 op0 = tmp; 13361 } 13362 13363 target = gen_reg_rtx (SImode); 13364 emit_move_insn (target, const0_rtx); 13365 target = gen_rtx_SUBREG (QImode, target, 0); 13366 13367 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 13368 op0 = copy_to_mode_reg (mode0, op0); 13369 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 13370 op1 = copy_to_mode_reg (mode1, op1); 13371 13372 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 13373 pat = GEN_FCN (d->icode) (op0, op1); 13374 if (! pat) 13375 return 0; 13376 emit_insn (pat); 13377 emit_insn (gen_rtx_SET (VOIDmode, 13378 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 13379 gen_rtx_fmt_ee (comparison, QImode, 13380 SET_DEST (pat), 13381 const0_rtx))); 13382 13383 return SUBREG_REG (target); 13384} 13385 13386/* Expand an expression EXP that calls a built-in function, 13387 with result going to TARGET if that's convenient 13388 (and in mode MODE if that's convenient). 13389 SUBTARGET may be used as the target for computing one of EXP's operands. 13390 IGNORE is nonzero if the value is to be ignored. */ 13391 13392rtx 13393ix86_expand_builtin (exp, target, subtarget, mode, ignore) 13394 tree exp; 13395 rtx target; 13396 rtx subtarget ATTRIBUTE_UNUSED; 13397 enum machine_mode mode ATTRIBUTE_UNUSED; 13398 int ignore ATTRIBUTE_UNUSED; 13399{ 13400 const struct builtin_description *d; 13401 size_t i; 13402 enum insn_code icode; 13403 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 13404 tree arglist = TREE_OPERAND (exp, 1); 13405 tree arg0, arg1, arg2; 13406 rtx op0, op1, op2, pat; 13407 enum machine_mode tmode, mode0, mode1, mode2; 13408 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 13409 13410 switch (fcode) 13411 { 13412 case IX86_BUILTIN_EMMS: 13413 emit_insn (gen_emms ()); 13414 return 0; 13415 13416 case IX86_BUILTIN_SFENCE: 13417 emit_insn (gen_sfence ()); 13418 return 0; 13419 13420 case IX86_BUILTIN_PEXTRW: 13421 case IX86_BUILTIN_PEXTRW128: 13422 icode = (fcode == IX86_BUILTIN_PEXTRW 13423 ? CODE_FOR_mmx_pextrw 13424 : CODE_FOR_sse2_pextrw); 13425 arg0 = TREE_VALUE (arglist); 13426 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13427 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13428 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13429 tmode = insn_data[icode].operand[0].mode; 13430 mode0 = insn_data[icode].operand[1].mode; 13431 mode1 = insn_data[icode].operand[2].mode; 13432 13433 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13434 op0 = copy_to_mode_reg (mode0, op0); 13435 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13436 { 13437 /* @@@ better error message */ 13438 error ("selector must be an immediate"); 13439 return gen_reg_rtx (tmode); 13440 } 13441 if (target == 0 13442 || GET_MODE (target) != tmode 13443 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13444 target = gen_reg_rtx (tmode); 13445 pat = GEN_FCN (icode) (target, op0, op1); 13446 if (! pat) 13447 return 0; 13448 emit_insn (pat); 13449 return target; 13450 13451 case IX86_BUILTIN_PINSRW: 13452 case IX86_BUILTIN_PINSRW128: 13453 icode = (fcode == IX86_BUILTIN_PINSRW 13454 ? CODE_FOR_mmx_pinsrw 13455 : CODE_FOR_sse2_pinsrw); 13456 arg0 = TREE_VALUE (arglist); 13457 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13458 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 13459 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13460 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13461 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 13462 tmode = insn_data[icode].operand[0].mode; 13463 mode0 = insn_data[icode].operand[1].mode; 13464 mode1 = insn_data[icode].operand[2].mode; 13465 mode2 = insn_data[icode].operand[3].mode; 13466 13467 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13468 op0 = copy_to_mode_reg (mode0, op0); 13469 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13470 op1 = copy_to_mode_reg (mode1, op1); 13471 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 13472 { 13473 /* @@@ better error message */ 13474 error ("selector must be an immediate"); 13475 return const0_rtx; 13476 } 13477 if (target == 0 13478 || GET_MODE (target) != tmode 13479 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13480 target = gen_reg_rtx (tmode); 13481 pat = GEN_FCN (icode) (target, op0, op1, op2); 13482 if (! pat) 13483 return 0; 13484 emit_insn (pat); 13485 return target; 13486 13487 case IX86_BUILTIN_MASKMOVQ: 13488 case IX86_BUILTIN_MASKMOVDQU: 13489 icode = (fcode == IX86_BUILTIN_MASKMOVQ 13490 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) 13491 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64 13492 : CODE_FOR_sse2_maskmovdqu)); 13493 /* Note the arg order is different from the operand order. */ 13494 arg1 = TREE_VALUE (arglist); 13495 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 13496 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 13497 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13498 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13499 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 13500 mode0 = insn_data[icode].operand[0].mode; 13501 mode1 = insn_data[icode].operand[1].mode; 13502 mode2 = insn_data[icode].operand[2].mode; 13503 13504 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 13505 op0 = copy_to_mode_reg (mode0, op0); 13506 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 13507 op1 = copy_to_mode_reg (mode1, op1); 13508 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 13509 op2 = copy_to_mode_reg (mode2, op2); 13510 pat = GEN_FCN (icode) (op0, op1, op2); 13511 if (! pat) 13512 return 0; 13513 emit_insn (pat); 13514 return 0; 13515 13516 case IX86_BUILTIN_SQRTSS: 13517 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target); 13518 case IX86_BUILTIN_RSQRTSS: 13519 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target); 13520 case IX86_BUILTIN_RCPSS: 13521 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); 13522 13523 case IX86_BUILTIN_LOADAPS: 13524 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); 13525 13526 case IX86_BUILTIN_LOADUPS: 13527 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 13528 13529 case IX86_BUILTIN_STOREAPS: 13530 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); 13531 13532 case IX86_BUILTIN_STOREUPS: 13533 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 13534 13535 case IX86_BUILTIN_LOADSS: 13536 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1); 13537 13538 case IX86_BUILTIN_STORESS: 13539 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist); 13540 13541 case IX86_BUILTIN_LOADHPS: 13542 case IX86_BUILTIN_LOADLPS: 13543 case IX86_BUILTIN_LOADHPD: 13544 case IX86_BUILTIN_LOADLPD: 13545 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps 13546 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps 13547 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd 13548 : CODE_FOR_sse2_movlpd); 13549 arg0 = TREE_VALUE (arglist); 13550 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13551 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13552 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13553 tmode = insn_data[icode].operand[0].mode; 13554 mode0 = insn_data[icode].operand[1].mode; 13555 mode1 = insn_data[icode].operand[2].mode; 13556 13557 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13558 op0 = copy_to_mode_reg (mode0, op0); 13559 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 13560 if (target == 0 13561 || GET_MODE (target) != tmode 13562 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13563 target = gen_reg_rtx (tmode); 13564 pat = GEN_FCN (icode) (target, op0, op1); 13565 if (! pat) 13566 return 0; 13567 emit_insn (pat); 13568 return target; 13569 13570 case IX86_BUILTIN_STOREHPS: 13571 case IX86_BUILTIN_STORELPS: 13572 case IX86_BUILTIN_STOREHPD: 13573 case IX86_BUILTIN_STORELPD: 13574 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps 13575 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps 13576 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd 13577 : CODE_FOR_sse2_movlpd); 13578 arg0 = TREE_VALUE (arglist); 13579 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13580 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13581 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13582 mode0 = insn_data[icode].operand[1].mode; 13583 mode1 = insn_data[icode].operand[2].mode; 13584 13585 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13586 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13587 op1 = copy_to_mode_reg (mode1, op1); 13588 13589 pat = GEN_FCN (icode) (op0, op0, op1); 13590 if (! pat) 13591 return 0; 13592 emit_insn (pat); 13593 return 0; 13594 13595 case IX86_BUILTIN_MOVNTPS: 13596 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 13597 case IX86_BUILTIN_MOVNTQ: 13598 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 13599 13600 case IX86_BUILTIN_LDMXCSR: 13601 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); 13602 target = assign_386_stack_local (SImode, 0); 13603 emit_move_insn (target, op0); 13604 emit_insn (gen_ldmxcsr (target)); 13605 return 0; 13606 13607 case IX86_BUILTIN_STMXCSR: 13608 target = assign_386_stack_local (SImode, 0); 13609 emit_insn (gen_stmxcsr (target)); 13610 return copy_to_mode_reg (SImode, target); 13611 13612 case IX86_BUILTIN_SHUFPS: 13613 case IX86_BUILTIN_SHUFPD: 13614 icode = (fcode == IX86_BUILTIN_SHUFPS 13615 ? CODE_FOR_sse_shufps 13616 : CODE_FOR_sse2_shufpd); 13617 arg0 = TREE_VALUE (arglist); 13618 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13619 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 13620 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13621 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13622 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 13623 tmode = insn_data[icode].operand[0].mode; 13624 mode0 = insn_data[icode].operand[1].mode; 13625 mode1 = insn_data[icode].operand[2].mode; 13626 mode2 = insn_data[icode].operand[3].mode; 13627 13628 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13629 op0 = copy_to_mode_reg (mode0, op0); 13630 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13631 op1 = copy_to_mode_reg (mode1, op1); 13632 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 13633 { 13634 /* @@@ better error message */ 13635 error ("mask must be an immediate"); 13636 return gen_reg_rtx (tmode); 13637 } 13638 if (target == 0 13639 || GET_MODE (target) != tmode 13640 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13641 target = gen_reg_rtx (tmode); 13642 pat = GEN_FCN (icode) (target, op0, op1, op2); 13643 if (! pat) 13644 return 0; 13645 emit_insn (pat); 13646 return target; 13647 13648 case IX86_BUILTIN_PSHUFW: 13649 case IX86_BUILTIN_PSHUFD: 13650 case IX86_BUILTIN_PSHUFHW: 13651 case IX86_BUILTIN_PSHUFLW: 13652 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw 13653 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw 13654 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd 13655 : CODE_FOR_mmx_pshufw); 13656 arg0 = TREE_VALUE (arglist); 13657 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13658 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13659 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13660 tmode = insn_data[icode].operand[0].mode; 13661 mode1 = insn_data[icode].operand[1].mode; 13662 mode2 = insn_data[icode].operand[2].mode; 13663 13664 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 13665 op0 = copy_to_mode_reg (mode1, op0); 13666 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 13667 { 13668 /* @@@ better error message */ 13669 error ("mask must be an immediate"); 13670 return const0_rtx; 13671 } 13672 if (target == 0 13673 || GET_MODE (target) != tmode 13674 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13675 target = gen_reg_rtx (tmode); 13676 pat = GEN_FCN (icode) (target, op0, op1); 13677 if (! pat) 13678 return 0; 13679 emit_insn (pat); 13680 return target; 13681 13682 case IX86_BUILTIN_PSLLDQI128: 13683 case IX86_BUILTIN_PSRLDQI128: 13684 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 13685 : CODE_FOR_sse2_lshrti3); 13686 arg0 = TREE_VALUE (arglist); 13687 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13688 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13689 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13690 tmode = insn_data[icode].operand[0].mode; 13691 mode1 = insn_data[icode].operand[1].mode; 13692 mode2 = insn_data[icode].operand[2].mode; 13693 13694 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 13695 { 13696 op0 = copy_to_reg (op0); 13697 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 13698 } 13699 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 13700 { 13701 error ("shift must be an immediate"); 13702 return const0_rtx; 13703 } 13704 target = gen_reg_rtx (V2DImode); 13705 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1); 13706 if (! pat) 13707 return 0; 13708 emit_insn (pat); 13709 return target; 13710 13711 case IX86_BUILTIN_FEMMS: 13712 emit_insn (gen_femms ()); 13713 return NULL_RTX; 13714 13715 case IX86_BUILTIN_PAVGUSB: 13716 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target); 13717 13718 case IX86_BUILTIN_PF2ID: 13719 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0); 13720 13721 case IX86_BUILTIN_PFACC: 13722 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target); 13723 13724 case IX86_BUILTIN_PFADD: 13725 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target); 13726 13727 case IX86_BUILTIN_PFCMPEQ: 13728 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target); 13729 13730 case IX86_BUILTIN_PFCMPGE: 13731 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target); 13732 13733 case IX86_BUILTIN_PFCMPGT: 13734 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target); 13735 13736 case IX86_BUILTIN_PFMAX: 13737 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target); 13738 13739 case IX86_BUILTIN_PFMIN: 13740 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target); 13741 13742 case IX86_BUILTIN_PFMUL: 13743 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target); 13744 13745 case IX86_BUILTIN_PFRCP: 13746 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0); 13747 13748 case IX86_BUILTIN_PFRCPIT1: 13749 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target); 13750 13751 case IX86_BUILTIN_PFRCPIT2: 13752 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target); 13753 13754 case IX86_BUILTIN_PFRSQIT1: 13755 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target); 13756 13757 case IX86_BUILTIN_PFRSQRT: 13758 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0); 13759 13760 case IX86_BUILTIN_PFSUB: 13761 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target); 13762 13763 case IX86_BUILTIN_PFSUBR: 13764 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target); 13765 13766 case IX86_BUILTIN_PI2FD: 13767 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0); 13768 13769 case IX86_BUILTIN_PMULHRW: 13770 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); 13771 13772 case IX86_BUILTIN_PF2IW: 13773 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); 13774 13775 case IX86_BUILTIN_PFNACC: 13776 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target); 13777 13778 case IX86_BUILTIN_PFPNACC: 13779 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target); 13780 13781 case IX86_BUILTIN_PI2FW: 13782 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0); 13783 13784 case IX86_BUILTIN_PSWAPDSI: 13785 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0); 13786 13787 case IX86_BUILTIN_PSWAPDSF: 13788 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); 13789 13790 case IX86_BUILTIN_SSE_ZERO: 13791 target = gen_reg_rtx (V4SFmode); 13792 emit_insn (gen_sse_clrv4sf (target)); 13793 return target; 13794 13795 case IX86_BUILTIN_MMX_ZERO: 13796 target = gen_reg_rtx (DImode); 13797 emit_insn (gen_mmx_clrdi (target)); 13798 return target; 13799 13800 case IX86_BUILTIN_CLRTI: 13801 target = gen_reg_rtx (V2DImode); 13802 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0))); 13803 return target; 13804 13805 13806 case IX86_BUILTIN_SQRTSD: 13807 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target); 13808 case IX86_BUILTIN_LOADAPD: 13809 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1); 13810 case IX86_BUILTIN_LOADUPD: 13811 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); 13812 13813 case IX86_BUILTIN_STOREAPD: 13814 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 13815 case IX86_BUILTIN_STOREUPD: 13816 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); 13817 13818 case IX86_BUILTIN_LOADSD: 13819 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1); 13820 13821 case IX86_BUILTIN_STORESD: 13822 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist); 13823 13824 case IX86_BUILTIN_SETPD1: 13825 target = assign_386_stack_local (DFmode, 0); 13826 arg0 = TREE_VALUE (arglist); 13827 emit_move_insn (adjust_address (target, DFmode, 0), 13828 expand_expr (arg0, NULL_RTX, VOIDmode, 0)); 13829 op0 = gen_reg_rtx (V2DFmode); 13830 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0))); 13831 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0))); 13832 return op0; 13833 13834 case IX86_BUILTIN_SETPD: 13835 target = assign_386_stack_local (V2DFmode, 0); 13836 arg0 = TREE_VALUE (arglist); 13837 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13838 emit_move_insn (adjust_address (target, DFmode, 0), 13839 expand_expr (arg0, NULL_RTX, VOIDmode, 0)); 13840 emit_move_insn (adjust_address (target, DFmode, 8), 13841 expand_expr (arg1, NULL_RTX, VOIDmode, 0)); 13842 op0 = gen_reg_rtx (V2DFmode); 13843 emit_insn (gen_sse2_movapd (op0, target)); 13844 return op0; 13845 13846 case IX86_BUILTIN_LOADRPD: 13847 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, 13848 gen_reg_rtx (V2DFmode), 1); 13849 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1))); 13850 return target; 13851 13852 case IX86_BUILTIN_LOADPD1: 13853 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, 13854 gen_reg_rtx (V2DFmode), 1); 13855 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx)); 13856 return target; 13857 13858 case IX86_BUILTIN_STOREPD1: 13859 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 13860 case IX86_BUILTIN_STORERPD: 13861 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 13862 13863 case IX86_BUILTIN_CLRPD: 13864 target = gen_reg_rtx (V2DFmode); 13865 emit_insn (gen_sse_clrv2df (target)); 13866 return target; 13867 13868 case IX86_BUILTIN_MFENCE: 13869 emit_insn (gen_sse2_mfence ()); 13870 return 0; 13871 case IX86_BUILTIN_LFENCE: 13872 emit_insn (gen_sse2_lfence ()); 13873 return 0; 13874 13875 case IX86_BUILTIN_CLFLUSH: 13876 arg0 = TREE_VALUE (arglist); 13877 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13878 icode = CODE_FOR_sse2_clflush; 13879 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 13880 op0 = copy_to_mode_reg (Pmode, op0); 13881 13882 emit_insn (gen_sse2_clflush (op0)); 13883 return 0; 13884 13885 case IX86_BUILTIN_MOVNTPD: 13886 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); 13887 case IX86_BUILTIN_MOVNTDQ: 13888 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); 13889 case IX86_BUILTIN_MOVNTI: 13890 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); 13891 13892 case IX86_BUILTIN_LOADDQA: 13893 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1); 13894 case IX86_BUILTIN_LOADDQU: 13895 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); 13896 case IX86_BUILTIN_LOADD: 13897 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1); 13898 13899 case IX86_BUILTIN_STOREDQA: 13900 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist); 13901 case IX86_BUILTIN_STOREDQU: 13902 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); 13903 case IX86_BUILTIN_STORED: 13904 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist); 13905 13906 case IX86_BUILTIN_MONITOR: 13907 arg0 = TREE_VALUE (arglist); 13908 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13909 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 13910 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13911 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13912 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 13913 if (!REG_P (op0)) 13914 op0 = copy_to_mode_reg (SImode, op0); 13915 if (!REG_P (op1)) 13916 op1 = copy_to_mode_reg (SImode, op1); 13917 if (!REG_P (op2)) 13918 op2 = copy_to_mode_reg (SImode, op2); 13919 emit_insn (gen_monitor (op0, op1, op2)); 13920 return 0; 13921 13922 case IX86_BUILTIN_MWAIT: 13923 arg0 = TREE_VALUE (arglist); 13924 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13925 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13926 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13927 if (!REG_P (op0)) 13928 op0 = copy_to_mode_reg (SImode, op0); 13929 if (!REG_P (op1)) 13930 op1 = copy_to_mode_reg (SImode, op1); 13931 emit_insn (gen_mwait (op0, op1)); 13932 return 0; 13933 13934 case IX86_BUILTIN_LOADDDUP: 13935 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1); 13936 13937 case IX86_BUILTIN_LDDQU: 13938 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target, 13939 1); 13940 13941 default: 13942 break; 13943 } 13944 13945 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 13946 if (d->code == fcode) 13947 { 13948 /* Compares are treated specially. */ 13949 if (d->icode == CODE_FOR_maskcmpv4sf3 13950 || d->icode == CODE_FOR_vmmaskcmpv4sf3 13951 || d->icode == CODE_FOR_maskncmpv4sf3 13952 || d->icode == CODE_FOR_vmmaskncmpv4sf3 13953 || d->icode == CODE_FOR_maskcmpv2df3 13954 || d->icode == CODE_FOR_vmmaskcmpv2df3 13955 || d->icode == CODE_FOR_maskncmpv2df3 13956 || d->icode == CODE_FOR_vmmaskncmpv2df3) 13957 return ix86_expand_sse_compare (d, arglist, target); 13958 13959 return ix86_expand_binop_builtin (d->icode, arglist, target); 13960 } 13961 13962 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 13963 if (d->code == fcode) 13964 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 13965 13966 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 13967 if (d->code == fcode) 13968 return ix86_expand_sse_comi (d, arglist, target); 13969 13970 /* @@@ Should really do something sensible here. */ 13971 return 0; 13972} 13973 13974/* Store OPERAND to the memory after reload is completed. This means 13975 that we can't easily use assign_stack_local. */ 13976rtx 13977ix86_force_to_memory (mode, operand) 13978 enum machine_mode mode; 13979 rtx operand; 13980{ 13981 rtx result; 13982 if (!reload_completed) 13983 abort (); 13984 if (TARGET_64BIT && TARGET_RED_ZONE) 13985 { 13986 result = gen_rtx_MEM (mode, 13987 gen_rtx_PLUS (Pmode, 13988 stack_pointer_rtx, 13989 GEN_INT (-RED_ZONE_SIZE))); 13990 emit_move_insn (result, operand); 13991 } 13992 else if (TARGET_64BIT && !TARGET_RED_ZONE) 13993 { 13994 switch (mode) 13995 { 13996 case HImode: 13997 case SImode: 13998 operand = gen_lowpart (DImode, operand); 13999 /* FALLTHRU */ 14000 case DImode: 14001 emit_insn ( 14002 gen_rtx_SET (VOIDmode, 14003 gen_rtx_MEM (DImode, 14004 gen_rtx_PRE_DEC (DImode, 14005 stack_pointer_rtx)), 14006 operand)); 14007 break; 14008 default: 14009 abort (); 14010 } 14011 result = gen_rtx_MEM (mode, stack_pointer_rtx); 14012 } 14013 else 14014 { 14015 switch (mode) 14016 { 14017 case DImode: 14018 { 14019 rtx operands[2]; 14020 split_di (&operand, 1, operands, operands + 1); 14021 emit_insn ( 14022 gen_rtx_SET (VOIDmode, 14023 gen_rtx_MEM (SImode, 14024 gen_rtx_PRE_DEC (Pmode, 14025 stack_pointer_rtx)), 14026 operands[1])); 14027 emit_insn ( 14028 gen_rtx_SET (VOIDmode, 14029 gen_rtx_MEM (SImode, 14030 gen_rtx_PRE_DEC (Pmode, 14031 stack_pointer_rtx)), 14032 operands[0])); 14033 } 14034 break; 14035 case HImode: 14036 /* It is better to store HImodes as SImodes. */ 14037 if (!TARGET_PARTIAL_REG_STALL) 14038 operand = gen_lowpart (SImode, operand); 14039 /* FALLTHRU */ 14040 case SImode: 14041 emit_insn ( 14042 gen_rtx_SET (VOIDmode, 14043 gen_rtx_MEM (GET_MODE (operand), 14044 gen_rtx_PRE_DEC (SImode, 14045 stack_pointer_rtx)), 14046 operand)); 14047 break; 14048 default: 14049 abort (); 14050 } 14051 result = gen_rtx_MEM (mode, stack_pointer_rtx); 14052 } 14053 return result; 14054} 14055 14056/* Free operand from the memory. */ 14057void 14058ix86_free_from_memory (mode) 14059 enum machine_mode mode; 14060{ 14061 if (!TARGET_64BIT || !TARGET_RED_ZONE) 14062 { 14063 int size; 14064 14065 if (mode == DImode || TARGET_64BIT) 14066 size = 8; 14067 else if (mode == HImode && TARGET_PARTIAL_REG_STALL) 14068 size = 2; 14069 else 14070 size = 4; 14071 /* Use LEA to deallocate stack space. In peephole2 it will be converted 14072 to pop or add instruction if registers are available. */ 14073 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 14074 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 14075 GEN_INT (size)))); 14076 } 14077} 14078 14079/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 14080 QImode must go into class Q_REGS. 14081 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 14082 movdf to do mem-to-mem moves through integer regs. */ 14083enum reg_class 14084ix86_preferred_reload_class (x, class) 14085 rtx x; 14086 enum reg_class class; 14087{ 14088 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x))) 14089 return NO_REGS; 14090 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 14091 { 14092 /* SSE can't load any constant directly yet. */ 14093 if (SSE_CLASS_P (class)) 14094 return NO_REGS; 14095 /* Floats can load 0 and 1. */ 14096 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x)) 14097 { 14098 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */ 14099 if (MAYBE_SSE_CLASS_P (class)) 14100 return (reg_class_subset_p (class, GENERAL_REGS) 14101 ? GENERAL_REGS : FLOAT_REGS); 14102 else 14103 return class; 14104 } 14105 /* General regs can load everything. */ 14106 if (reg_class_subset_p (class, GENERAL_REGS)) 14107 return GENERAL_REGS; 14108 /* In case we haven't resolved FLOAT or SSE yet, give up. */ 14109 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)) 14110 return NO_REGS; 14111 } 14112 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x)) 14113 return NO_REGS; 14114 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS)) 14115 return Q_REGS; 14116 return class; 14117} 14118 14119/* If we are copying between general and FP registers, we need a memory 14120 location. The same is true for SSE and MMX registers. 14121 14122 The macro can't work reliably when one of the CLASSES is class containing 14123 registers from multiple units (SSE, MMX, integer). We avoid this by never 14124 combining those units in single alternative in the machine description. 14125 Ensure that this constraint holds to avoid unexpected surprises. 14126 14127 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 14128 enforce these sanity checks. */ 14129int 14130ix86_secondary_memory_needed (class1, class2, mode, strict) 14131 enum reg_class class1, class2; 14132 enum machine_mode mode; 14133 int strict; 14134{ 14135 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 14136 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 14137 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 14138 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 14139 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 14140 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 14141 { 14142 if (strict) 14143 abort (); 14144 else 14145 return 1; 14146 } 14147 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2) 14148 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2) 14149 && (mode) != SImode) 14150 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 14151 && (mode) != SImode)); 14152} 14153/* Return the cost of moving data from a register in class CLASS1 to 14154 one in class CLASS2. 14155 14156 It is not required that the cost always equal 2 when FROM is the same as TO; 14157 on some machines it is expensive to move between registers if they are not 14158 general registers. */ 14159int 14160ix86_register_move_cost (mode, class1, class2) 14161 enum machine_mode mode; 14162 enum reg_class class1, class2; 14163{ 14164 /* In case we require secondary memory, compute cost of the store followed 14165 by load. In order to avoid bad register allocation choices, we need 14166 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 14167 14168 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 14169 { 14170 int cost = 1; 14171 14172 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 14173 MEMORY_MOVE_COST (mode, class1, 1)); 14174 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 14175 MEMORY_MOVE_COST (mode, class2, 1)); 14176 14177 /* In case of copying from general_purpose_register we may emit multiple 14178 stores followed by single load causing memory size mismatch stall. 14179 Count this as arbitarily high cost of 20. */ 14180 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 14181 cost += 20; 14182 14183 /* In the case of FP/MMX moves, the registers actually overlap, and we 14184 have to switch modes in order to treat them differently. */ 14185 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 14186 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 14187 cost += 20; 14188 14189 return cost; 14190 } 14191 14192 /* Moves between SSE/MMX and integer unit are expensive. */ 14193 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 14194 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 14195 return ix86_cost->mmxsse_to_integer; 14196 if (MAYBE_FLOAT_CLASS_P (class1)) 14197 return ix86_cost->fp_move; 14198 if (MAYBE_SSE_CLASS_P (class1)) 14199 return ix86_cost->sse_move; 14200 if (MAYBE_MMX_CLASS_P (class1)) 14201 return ix86_cost->mmx_move; 14202 return 2; 14203} 14204 14205/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 14206int 14207ix86_hard_regno_mode_ok (regno, mode) 14208 int regno; 14209 enum machine_mode mode; 14210{ 14211 /* Flags and only flags can only hold CCmode values. */ 14212 if (CC_REGNO_P (regno)) 14213 return GET_MODE_CLASS (mode) == MODE_CC; 14214 if (GET_MODE_CLASS (mode) == MODE_CC 14215 || GET_MODE_CLASS (mode) == MODE_RANDOM 14216 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 14217 return 0; 14218 if (FP_REGNO_P (regno)) 14219 return VALID_FP_MODE_P (mode); 14220 if (SSE_REGNO_P (regno)) 14221 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0); 14222 if (MMX_REGNO_P (regno)) 14223 return (TARGET_MMX 14224 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0); 14225 /* We handle both integer and floats in the general purpose registers. 14226 In future we should be able to handle vector modes as well. */ 14227 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) 14228 return 0; 14229 /* Take care for QImode values - they can be in non-QI regs, but then 14230 they do cause partial register stalls. */ 14231 if (regno < 4 || mode != QImode || TARGET_64BIT) 14232 return 1; 14233 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL; 14234} 14235 14236/* Return the cost of moving data of mode M between a 14237 register and memory. A value of 2 is the default; this cost is 14238 relative to those in `REGISTER_MOVE_COST'. 14239 14240 If moving between registers and memory is more expensive than 14241 between two registers, you should define this macro to express the 14242 relative cost. 14243 14244 Model also increased moving costs of QImode registers in non 14245 Q_REGS classes. 14246 */ 14247int 14248ix86_memory_move_cost (mode, class, in) 14249 enum machine_mode mode; 14250 enum reg_class class; 14251 int in; 14252{ 14253 if (FLOAT_CLASS_P (class)) 14254 { 14255 int index; 14256 switch (mode) 14257 { 14258 case SFmode: 14259 index = 0; 14260 break; 14261 case DFmode: 14262 index = 1; 14263 break; 14264 case XFmode: 14265 case TFmode: 14266 index = 2; 14267 break; 14268 default: 14269 return 100; 14270 } 14271 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 14272 } 14273 if (SSE_CLASS_P (class)) 14274 { 14275 int index; 14276 switch (GET_MODE_SIZE (mode)) 14277 { 14278 case 4: 14279 index = 0; 14280 break; 14281 case 8: 14282 index = 1; 14283 break; 14284 case 16: 14285 index = 2; 14286 break; 14287 default: 14288 return 100; 14289 } 14290 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 14291 } 14292 if (MMX_CLASS_P (class)) 14293 { 14294 int index; 14295 switch (GET_MODE_SIZE (mode)) 14296 { 14297 case 4: 14298 index = 0; 14299 break; 14300 case 8: 14301 index = 1; 14302 break; 14303 default: 14304 return 100; 14305 } 14306 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 14307 } 14308 switch (GET_MODE_SIZE (mode)) 14309 { 14310 case 1: 14311 if (in) 14312 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 14313 : ix86_cost->movzbl_load); 14314 else 14315 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 14316 : ix86_cost->int_store[0] + 4); 14317 break; 14318 case 2: 14319 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 14320 default: 14321 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 14322 if (mode == TFmode) 14323 mode = XFmode; 14324 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 14325 * ((int) GET_MODE_SIZE (mode) 14326 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD); 14327 } 14328} 14329 14330#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) 14331static void 14332ix86_svr3_asm_out_constructor (symbol, priority) 14333 rtx symbol; 14334 int priority ATTRIBUTE_UNUSED; 14335{ 14336 init_section (); 14337 fputs ("\tpushl $", asm_out_file); 14338 assemble_name (asm_out_file, XSTR (symbol, 0)); 14339 fputc ('\n', asm_out_file); 14340} 14341#endif 14342 14343#if TARGET_MACHO 14344 14345static int current_machopic_label_num; 14346 14347/* Given a symbol name and its associated stub, write out the 14348 definition of the stub. */ 14349 14350void 14351machopic_output_stub (file, symb, stub) 14352 FILE *file; 14353 const char *symb, *stub; 14354{ 14355 unsigned int length; 14356 char *binder_name, *symbol_name, lazy_ptr_name[32]; 14357 int label = ++current_machopic_label_num; 14358 14359 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 14360 symb = (*targetm.strip_name_encoding) (symb); 14361 14362 length = strlen (stub); 14363 binder_name = alloca (length + 32); 14364 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 14365 14366 length = strlen (symb); 14367 symbol_name = alloca (length + 32); 14368 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 14369 14370 sprintf (lazy_ptr_name, "L%d$lz", label); 14371 14372 if (MACHOPIC_PURE) 14373 machopic_picsymbol_stub_section (); 14374 else 14375 machopic_symbol_stub_section (); 14376 14377 fprintf (file, "%s:\n", stub); 14378 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 14379 14380 if (MACHOPIC_PURE) 14381 { 14382 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label); 14383 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 14384 fprintf (file, "\tjmp %%edx\n"); 14385 } 14386 else 14387 fprintf (file, "\tjmp *%s\n", lazy_ptr_name); 14388 14389 fprintf (file, "%s:\n", binder_name); 14390 14391 if (MACHOPIC_PURE) 14392 { 14393 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 14394 fprintf (file, "\tpushl %%eax\n"); 14395 } 14396 else 14397 fprintf (file, "\t pushl $%s\n", lazy_ptr_name); 14398 14399 fprintf (file, "\tjmp dyld_stub_binding_helper\n"); 14400 14401 machopic_lazy_symbol_ptr_section (); 14402 fprintf (file, "%s:\n", lazy_ptr_name); 14403 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 14404 fprintf (file, "\t.long %s\n", binder_name); 14405} 14406#endif /* TARGET_MACHO */ 14407 14408/* Order the registers for register allocator. */ 14409 14410void 14411x86_order_regs_for_local_alloc () 14412{ 14413 int pos = 0; 14414 int i; 14415 14416 /* First allocate the local general purpose registers. */ 14417 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 14418 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 14419 reg_alloc_order [pos++] = i; 14420 14421 /* Global general purpose registers. */ 14422 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 14423 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 14424 reg_alloc_order [pos++] = i; 14425 14426 /* x87 registers come first in case we are doing FP math 14427 using them. */ 14428 if (!TARGET_SSE_MATH) 14429 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 14430 reg_alloc_order [pos++] = i; 14431 14432 /* SSE registers. */ 14433 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 14434 reg_alloc_order [pos++] = i; 14435 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 14436 reg_alloc_order [pos++] = i; 14437 14438 /* x87 registerts. */ 14439 if (TARGET_SSE_MATH) 14440 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 14441 reg_alloc_order [pos++] = i; 14442 14443 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 14444 reg_alloc_order [pos++] = i; 14445 14446 /* Initialize the rest of array as we do not allocate some registers 14447 at all. */ 14448 while (pos < FIRST_PSEUDO_REGISTER) 14449 reg_alloc_order [pos++] = 0; 14450} 14451 14452/* Returns an expression indicating where the this parameter is 14453 located on entry to the FUNCTION. */ 14454 14455static rtx 14456x86_this_parameter (function) 14457 tree function; 14458{ 14459 tree type = TREE_TYPE (function); 14460 14461 if (TARGET_64BIT) 14462 { 14463 int n = aggregate_value_p (TREE_TYPE (type)) != 0; 14464 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 14465 } 14466 14467 if (ix86_fntype_regparm (type) > 0) 14468 { 14469 tree parm; 14470 14471 parm = TYPE_ARG_TYPES (type); 14472 /* Figure out whether or not the function has a variable number of 14473 arguments. */ 14474 for (; parm; parm = TREE_CHAIN (parm)) 14475 if (TREE_VALUE (parm) == void_type_node) 14476 break; 14477 /* If not, the this parameter is in %eax. */ 14478 if (parm) 14479 return gen_rtx_REG (SImode, 0); 14480 } 14481 14482 if (aggregate_value_p (TREE_TYPE (type))) 14483 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 14484 else 14485 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 14486} 14487 14488/* Determine whether x86_output_mi_thunk can succeed. */ 14489 14490static bool 14491x86_can_output_mi_thunk (thunk, delta, vcall_offset, function) 14492 tree thunk ATTRIBUTE_UNUSED; 14493 HOST_WIDE_INT delta ATTRIBUTE_UNUSED; 14494 HOST_WIDE_INT vcall_offset; 14495 tree function; 14496{ 14497 /* 64-bit can handle anything. */ 14498 if (TARGET_64BIT) 14499 return true; 14500 14501 /* For 32-bit, everything's fine if we have one free register. */ 14502 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3) 14503 return true; 14504 14505 /* Need a free register for vcall_offset. */ 14506 if (vcall_offset) 14507 return false; 14508 14509 /* Need a free register for GOT references. */ 14510 if (flag_pic && !(*targetm.binds_local_p) (function)) 14511 return false; 14512 14513 /* Otherwise ok. */ 14514 return true; 14515} 14516 14517/* Output the assembler code for a thunk function. THUNK_DECL is the 14518 declaration for the thunk function itself, FUNCTION is the decl for 14519 the target function. DELTA is an immediate constant offset to be 14520 added to THIS. If VCALL_OFFSET is non-zero, the word at 14521 *(*this + vcall_offset) should be added to THIS. */ 14522 14523static void 14524x86_output_mi_thunk (file, thunk, delta, vcall_offset, function) 14525 FILE *file ATTRIBUTE_UNUSED; 14526 tree thunk ATTRIBUTE_UNUSED; 14527 HOST_WIDE_INT delta; 14528 HOST_WIDE_INT vcall_offset; 14529 tree function; 14530{ 14531 rtx xops[3]; 14532 rtx this = x86_this_parameter (function); 14533 rtx this_reg, tmp; 14534 14535 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 14536 pull it in now and let DELTA benefit. */ 14537 if (REG_P (this)) 14538 this_reg = this; 14539 else if (vcall_offset) 14540 { 14541 /* Put the this parameter into %eax. */ 14542 xops[0] = this; 14543 xops[1] = this_reg = gen_rtx_REG (Pmode, 0); 14544 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 14545 } 14546 else 14547 this_reg = NULL_RTX; 14548 14549 /* Adjust the this parameter by a fixed constant. */ 14550 if (delta) 14551 { 14552 xops[0] = GEN_INT (delta); 14553 xops[1] = this_reg ? this_reg : this; 14554 if (TARGET_64BIT) 14555 { 14556 if (!x86_64_general_operand (xops[0], DImode)) 14557 { 14558 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 14559 xops[1] = tmp; 14560 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 14561 xops[0] = tmp; 14562 xops[1] = this; 14563 } 14564 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 14565 } 14566 else 14567 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 14568 } 14569 14570 /* Adjust the this parameter by a value stored in the vtable. */ 14571 if (vcall_offset) 14572 { 14573 if (TARGET_64BIT) 14574 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 14575 else 14576 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 14577 14578 xops[0] = gen_rtx_MEM (Pmode, this_reg); 14579 xops[1] = tmp; 14580 if (TARGET_64BIT) 14581 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 14582 else 14583 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 14584 14585 /* Adjust the this parameter. */ 14586 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 14587 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 14588 { 14589 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 14590 xops[0] = GEN_INT (vcall_offset); 14591 xops[1] = tmp2; 14592 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 14593 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 14594 } 14595 xops[1] = this_reg; 14596 if (TARGET_64BIT) 14597 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 14598 else 14599 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 14600 } 14601 14602 /* If necessary, drop THIS back to its stack slot. */ 14603 if (this_reg && this_reg != this) 14604 { 14605 xops[0] = this_reg; 14606 xops[1] = this; 14607 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 14608 } 14609 14610 xops[0] = DECL_RTL (function); 14611 if (TARGET_64BIT) 14612 { 14613 if (!flag_pic || (*targetm.binds_local_p) (function)) 14614 output_asm_insn ("jmp\t%P0", xops); 14615 else 14616 { 14617 tmp = XEXP (xops[0], 0); 14618 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL); 14619 tmp = gen_rtx_CONST (Pmode, tmp); 14620 tmp = gen_rtx_MEM (QImode, tmp); 14621 xops[0] = tmp; 14622 output_asm_insn ("jmp\t%A0", xops); 14623 } 14624 } 14625 else 14626 { 14627 if (!flag_pic || (*targetm.binds_local_p) (function)) 14628 output_asm_insn ("jmp\t%P0", xops); 14629 else 14630 { 14631 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 14632 output_set_got (tmp); 14633 14634 xops[1] = tmp; 14635 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 14636 output_asm_insn ("jmp\t{*}%1", xops); 14637 } 14638 } 14639} 14640 14641int 14642x86_field_alignment (field, computed) 14643 tree field; 14644 int computed; 14645{ 14646 enum machine_mode mode; 14647 tree type = TREE_TYPE (field); 14648 14649 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 14650 return computed; 14651 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 14652 ? get_inner_array_type (type) : type); 14653 if (mode == DFmode || mode == DCmode 14654 || GET_MODE_CLASS (mode) == MODE_INT 14655 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 14656 return MIN (32, computed); 14657 return computed; 14658} 14659 14660/* Output assembler code to FILE to increment profiler label # LABELNO 14661 for profiling a function entry. */ 14662void 14663x86_function_profiler (file, labelno) 14664 FILE *file; 14665 int labelno; 14666{ 14667 if (TARGET_64BIT) 14668 if (flag_pic) 14669 { 14670#ifndef NO_PROFILE_COUNTERS 14671 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); 14672#endif 14673 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 14674 } 14675 else 14676 { 14677#ifndef NO_PROFILE_COUNTERS 14678 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); 14679#endif 14680 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 14681 } 14682 else if (flag_pic) 14683 { 14684#ifndef NO_PROFILE_COUNTERS 14685 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 14686 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 14687#endif 14688 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 14689 } 14690 else 14691 { 14692#ifndef NO_PROFILE_COUNTERS 14693 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 14694 PROFILE_COUNT_REGISTER); 14695#endif 14696 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 14697 } 14698} 14699 14700/* Implement machine specific optimizations. 14701 At the moment we implement single transformation: AMD Athlon works faster 14702 when RET is not destination of conditional jump or directly preceeded 14703 by other jump instruction. We avoid the penalty by inserting NOP just 14704 before the RET instructions in such cases. */ 14705void 14706x86_machine_dependent_reorg (first) 14707 rtx first ATTRIBUTE_UNUSED; 14708{ 14709 edge e; 14710 14711 if (!TARGET_ATHLON || !optimize || optimize_size) 14712 return; 14713 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) 14714 { 14715 basic_block bb = e->src; 14716 rtx ret = bb->end; 14717 rtx prev; 14718 bool insert = false; 14719 14720 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb)) 14721 continue; 14722 prev = prev_nonnote_insn (ret); 14723 if (prev && GET_CODE (prev) == CODE_LABEL) 14724 { 14725 edge e; 14726 for (e = bb->pred; e; e = e->pred_next) 14727 if (EDGE_FREQUENCY (e) && e->src->index > 0 14728 && !(e->flags & EDGE_FALLTHRU)) 14729 insert = 1; 14730 } 14731 if (!insert) 14732 { 14733 prev = prev_real_insn (ret); 14734 if (prev && GET_CODE (prev) == JUMP_INSN 14735 && any_condjump_p (prev)) 14736 insert = 1; 14737 } 14738 if (insert) 14739 emit_insn_before (gen_nop (), ret); 14740 } 14741} 14742 14743#include "gt-i386.h" 14744