i386.c revision 117408
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003 Free Software Foundation, Inc. 4 5This file is part of GNU CC. 6 7GNU CC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GNU CC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GNU CC; see the file COPYING. If not, write to 19the Free Software Foundation, 59 Temple Place - Suite 330, 20Boston, MA 02111-1307, USA. */ 21 22 23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 117408 2003-07-11 04:29:35Z kan $ */ 24 25 26#include "config.h" 27#include "system.h" 28#include "rtl.h" 29#include "tree.h" 30#include "tm_p.h" 31#include "regs.h" 32#include "hard-reg-set.h" 33#include "real.h" 34#include "insn-config.h" 35#include "conditions.h" 36#include "output.h" 37#include "insn-attr.h" 38#include "flags.h" 39#include "except.h" 40#include "function.h" 41#include "recog.h" 42#include "expr.h" 43#include "optabs.h" 44#include "toplev.h" 45#include "basic-block.h" 46#include "ggc.h" 47#include "target.h" 48#include "target-def.h" 49#include "langhooks.h" 50 51#ifndef CHECK_STACK_LIMIT 52#define CHECK_STACK_LIMIT (-1) 53#endif 54 55/* Processor costs (relative to an add) */ 56static const 57struct processor_costs size_cost = { /* costs for tunning for size */ 58 2, /* cost of an add instruction */ 59 3, /* cost of a lea instruction */ 60 2, /* variable shift costs */ 61 3, /* constant shift costs */ 62 3, /* cost of starting a multiply */ 63 0, /* cost of multiply per each bit set */ 64 3, /* cost of a divide/mod */ 65 3, /* cost of movsx */ 66 3, /* cost of movzx */ 67 0, /* "large" insn */ 68 2, /* MOVE_RATIO */ 69 2, /* cost for loading QImode using movzbl */ 70 {2, 2, 2}, /* cost of loading integer registers 71 in QImode, HImode and SImode. 72 Relative to reg-reg move (2). */ 73 {2, 2, 2}, /* cost of storing integer registers */ 74 2, /* cost of reg,reg fld/fst */ 75 {2, 2, 2}, /* cost of loading fp registers 76 in SFmode, DFmode and XFmode */ 77 {2, 2, 2}, /* cost of loading integer registers */ 78 3, /* cost of moving MMX register */ 79 {3, 3}, /* cost of loading MMX registers 80 in SImode and DImode */ 81 {3, 3}, /* cost of storing MMX registers 82 in SImode and DImode */ 83 3, /* cost of moving SSE register */ 84 {3, 3, 3}, /* cost of loading SSE registers 85 in SImode, DImode and TImode */ 86 {3, 3, 3}, /* cost of storing SSE registers 87 in SImode, DImode and TImode */ 88 3, /* MMX or SSE register to integer */ 89 0, /* size of prefetch block */ 90 0, /* number of parallel prefetches */ 91 2, /* cost of FADD and FSUB insns. */ 92 2, /* cost of FMUL instruction. */ 93 2, /* cost of FDIV instruction. */ 94 2, /* cost of FABS instruction. */ 95 2, /* cost of FCHS instruction. */ 96 2, /* cost of FSQRT instruction. */ 97}; 98 99/* Processor costs (relative to an add) */ 100static const 101struct processor_costs i386_cost = { /* 386 specific costs */ 102 1, /* cost of an add instruction */ 103 1, /* cost of a lea instruction */ 104 3, /* variable shift costs */ 105 2, /* constant shift costs */ 106 6, /* cost of starting a multiply */ 107 1, /* cost of multiply per each bit set */ 108 23, /* cost of a divide/mod */ 109 3, /* cost of movsx */ 110 2, /* cost of movzx */ 111 15, /* "large" insn */ 112 3, /* MOVE_RATIO */ 113 4, /* cost for loading QImode using movzbl */ 114 {2, 4, 2}, /* cost of loading integer registers 115 in QImode, HImode and SImode. 116 Relative to reg-reg move (2). */ 117 {2, 4, 2}, /* cost of storing integer registers */ 118 2, /* cost of reg,reg fld/fst */ 119 {8, 8, 8}, /* cost of loading fp registers 120 in SFmode, DFmode and XFmode */ 121 {8, 8, 8}, /* cost of loading integer registers */ 122 2, /* cost of moving MMX register */ 123 {4, 8}, /* cost of loading MMX registers 124 in SImode and DImode */ 125 {4, 8}, /* cost of storing MMX registers 126 in SImode and DImode */ 127 2, /* cost of moving SSE register */ 128 {4, 8, 16}, /* cost of loading SSE registers 129 in SImode, DImode and TImode */ 130 {4, 8, 16}, /* cost of storing SSE registers 131 in SImode, DImode and TImode */ 132 3, /* MMX or SSE register to integer */ 133 0, /* size of prefetch block */ 134 0, /* number of parallel prefetches */ 135 23, /* cost of FADD and FSUB insns. */ 136 27, /* cost of FMUL instruction. */ 137 88, /* cost of FDIV instruction. */ 138 22, /* cost of FABS instruction. */ 139 24, /* cost of FCHS instruction. */ 140 122, /* cost of FSQRT instruction. */ 141}; 142 143static const 144struct processor_costs i486_cost = { /* 486 specific costs */ 145 1, /* cost of an add instruction */ 146 1, /* cost of a lea instruction */ 147 3, /* variable shift costs */ 148 2, /* constant shift costs */ 149 12, /* cost of starting a multiply */ 150 1, /* cost of multiply per each bit set */ 151 40, /* cost of a divide/mod */ 152 3, /* cost of movsx */ 153 2, /* cost of movzx */ 154 15, /* "large" insn */ 155 3, /* MOVE_RATIO */ 156 4, /* cost for loading QImode using movzbl */ 157 {2, 4, 2}, /* cost of loading integer registers 158 in QImode, HImode and SImode. 159 Relative to reg-reg move (2). */ 160 {2, 4, 2}, /* cost of storing integer registers */ 161 2, /* cost of reg,reg fld/fst */ 162 {8, 8, 8}, /* cost of loading fp registers 163 in SFmode, DFmode and XFmode */ 164 {8, 8, 8}, /* cost of loading integer registers */ 165 2, /* cost of moving MMX register */ 166 {4, 8}, /* cost of loading MMX registers 167 in SImode and DImode */ 168 {4, 8}, /* cost of storing MMX registers 169 in SImode and DImode */ 170 2, /* cost of moving SSE register */ 171 {4, 8, 16}, /* cost of loading SSE registers 172 in SImode, DImode and TImode */ 173 {4, 8, 16}, /* cost of storing SSE registers 174 in SImode, DImode and TImode */ 175 3, /* MMX or SSE register to integer */ 176 0, /* size of prefetch block */ 177 0, /* number of parallel prefetches */ 178 8, /* cost of FADD and FSUB insns. */ 179 16, /* cost of FMUL instruction. */ 180 73, /* cost of FDIV instruction. */ 181 3, /* cost of FABS instruction. */ 182 3, /* cost of FCHS instruction. */ 183 83, /* cost of FSQRT instruction. */ 184}; 185 186static const 187struct processor_costs pentium_cost = { 188 1, /* cost of an add instruction */ 189 1, /* cost of a lea instruction */ 190 4, /* variable shift costs */ 191 1, /* constant shift costs */ 192 11, /* cost of starting a multiply */ 193 0, /* cost of multiply per each bit set */ 194 25, /* cost of a divide/mod */ 195 3, /* cost of movsx */ 196 2, /* cost of movzx */ 197 8, /* "large" insn */ 198 6, /* MOVE_RATIO */ 199 6, /* cost for loading QImode using movzbl */ 200 {2, 4, 2}, /* cost of loading integer registers 201 in QImode, HImode and SImode. 202 Relative to reg-reg move (2). */ 203 {2, 4, 2}, /* cost of storing integer registers */ 204 2, /* cost of reg,reg fld/fst */ 205 {2, 2, 6}, /* cost of loading fp registers 206 in SFmode, DFmode and XFmode */ 207 {4, 4, 6}, /* cost of loading integer registers */ 208 8, /* cost of moving MMX register */ 209 {8, 8}, /* cost of loading MMX registers 210 in SImode and DImode */ 211 {8, 8}, /* cost of storing MMX registers 212 in SImode and DImode */ 213 2, /* cost of moving SSE register */ 214 {4, 8, 16}, /* cost of loading SSE registers 215 in SImode, DImode and TImode */ 216 {4, 8, 16}, /* cost of storing SSE registers 217 in SImode, DImode and TImode */ 218 3, /* MMX or SSE register to integer */ 219 0, /* size of prefetch block */ 220 0, /* number of parallel prefetches */ 221 3, /* cost of FADD and FSUB insns. */ 222 3, /* cost of FMUL instruction. */ 223 39, /* cost of FDIV instruction. */ 224 1, /* cost of FABS instruction. */ 225 1, /* cost of FCHS instruction. */ 226 70, /* cost of FSQRT instruction. */ 227}; 228 229static const 230struct processor_costs pentiumpro_cost = { 231 1, /* cost of an add instruction */ 232 1, /* cost of a lea instruction */ 233 1, /* variable shift costs */ 234 1, /* constant shift costs */ 235 4, /* cost of starting a multiply */ 236 0, /* cost of multiply per each bit set */ 237 17, /* cost of a divide/mod */ 238 1, /* cost of movsx */ 239 1, /* cost of movzx */ 240 8, /* "large" insn */ 241 6, /* MOVE_RATIO */ 242 2, /* cost for loading QImode using movzbl */ 243 {4, 4, 4}, /* cost of loading integer registers 244 in QImode, HImode and SImode. 245 Relative to reg-reg move (2). */ 246 {2, 2, 2}, /* cost of storing integer registers */ 247 2, /* cost of reg,reg fld/fst */ 248 {2, 2, 6}, /* cost of loading fp registers 249 in SFmode, DFmode and XFmode */ 250 {4, 4, 6}, /* cost of loading integer registers */ 251 2, /* cost of moving MMX register */ 252 {2, 2}, /* cost of loading MMX registers 253 in SImode and DImode */ 254 {2, 2}, /* cost of storing MMX registers 255 in SImode and DImode */ 256 2, /* cost of moving SSE register */ 257 {2, 2, 8}, /* cost of loading SSE registers 258 in SImode, DImode and TImode */ 259 {2, 2, 8}, /* cost of storing SSE registers 260 in SImode, DImode and TImode */ 261 3, /* MMX or SSE register to integer */ 262 32, /* size of prefetch block */ 263 6, /* number of parallel prefetches */ 264 3, /* cost of FADD and FSUB insns. */ 265 5, /* cost of FMUL instruction. */ 266 56, /* cost of FDIV instruction. */ 267 2, /* cost of FABS instruction. */ 268 2, /* cost of FCHS instruction. */ 269 56, /* cost of FSQRT instruction. */ 270}; 271 272static const 273struct processor_costs k6_cost = { 274 1, /* cost of an add instruction */ 275 2, /* cost of a lea instruction */ 276 1, /* variable shift costs */ 277 1, /* constant shift costs */ 278 3, /* cost of starting a multiply */ 279 0, /* cost of multiply per each bit set */ 280 18, /* cost of a divide/mod */ 281 2, /* cost of movsx */ 282 2, /* cost of movzx */ 283 8, /* "large" insn */ 284 4, /* MOVE_RATIO */ 285 3, /* cost for loading QImode using movzbl */ 286 {4, 5, 4}, /* cost of loading integer registers 287 in QImode, HImode and SImode. 288 Relative to reg-reg move (2). */ 289 {2, 3, 2}, /* cost of storing integer registers */ 290 4, /* cost of reg,reg fld/fst */ 291 {6, 6, 6}, /* cost of loading fp registers 292 in SFmode, DFmode and XFmode */ 293 {4, 4, 4}, /* cost of loading integer registers */ 294 2, /* cost of moving MMX register */ 295 {2, 2}, /* cost of loading MMX registers 296 in SImode and DImode */ 297 {2, 2}, /* cost of storing MMX registers 298 in SImode and DImode */ 299 2, /* cost of moving SSE register */ 300 {2, 2, 8}, /* cost of loading SSE registers 301 in SImode, DImode and TImode */ 302 {2, 2, 8}, /* cost of storing SSE registers 303 in SImode, DImode and TImode */ 304 6, /* MMX or SSE register to integer */ 305 32, /* size of prefetch block */ 306 1, /* number of parallel prefetches */ 307 2, /* cost of FADD and FSUB insns. */ 308 2, /* cost of FMUL instruction. */ 309 56, /* cost of FDIV instruction. */ 310 2, /* cost of FABS instruction. */ 311 2, /* cost of FCHS instruction. */ 312 56, /* cost of FSQRT instruction. */ 313}; 314 315static const 316struct processor_costs athlon_cost = { 317 1, /* cost of an add instruction */ 318 2, /* cost of a lea instruction */ 319 1, /* variable shift costs */ 320 1, /* constant shift costs */ 321 5, /* cost of starting a multiply */ 322 0, /* cost of multiply per each bit set */ 323 42, /* cost of a divide/mod */ 324 1, /* cost of movsx */ 325 1, /* cost of movzx */ 326 8, /* "large" insn */ 327 9, /* MOVE_RATIO */ 328 4, /* cost for loading QImode using movzbl */ 329 {3, 4, 3}, /* cost of loading integer registers 330 in QImode, HImode and SImode. 331 Relative to reg-reg move (2). */ 332 {3, 4, 3}, /* cost of storing integer registers */ 333 4, /* cost of reg,reg fld/fst */ 334 {4, 4, 12}, /* cost of loading fp registers 335 in SFmode, DFmode and XFmode */ 336 {6, 6, 8}, /* cost of loading integer registers */ 337 2, /* cost of moving MMX register */ 338 {4, 4}, /* cost of loading MMX registers 339 in SImode and DImode */ 340 {4, 4}, /* cost of storing MMX registers 341 in SImode and DImode */ 342 2, /* cost of moving SSE register */ 343 {4, 4, 6}, /* cost of loading SSE registers 344 in SImode, DImode and TImode */ 345 {4, 4, 5}, /* cost of storing SSE registers 346 in SImode, DImode and TImode */ 347 5, /* MMX or SSE register to integer */ 348 64, /* size of prefetch block */ 349 6, /* number of parallel prefetches */ 350 4, /* cost of FADD and FSUB insns. */ 351 4, /* cost of FMUL instruction. */ 352 24, /* cost of FDIV instruction. */ 353 2, /* cost of FABS instruction. */ 354 2, /* cost of FCHS instruction. */ 355 35, /* cost of FSQRT instruction. */ 356}; 357 358static const 359struct processor_costs pentium4_cost = { 360 1, /* cost of an add instruction */ 361 1, /* cost of a lea instruction */ 362 8, /* variable shift costs */ 363 8, /* constant shift costs */ 364 30, /* cost of starting a multiply */ 365 0, /* cost of multiply per each bit set */ 366 112, /* cost of a divide/mod */ 367 1, /* cost of movsx */ 368 1, /* cost of movzx */ 369 16, /* "large" insn */ 370 6, /* MOVE_RATIO */ 371 2, /* cost for loading QImode using movzbl */ 372 {4, 5, 4}, /* cost of loading integer registers 373 in QImode, HImode and SImode. 374 Relative to reg-reg move (2). */ 375 {2, 3, 2}, /* cost of storing integer registers */ 376 2, /* cost of reg,reg fld/fst */ 377 {2, 2, 6}, /* cost of loading fp registers 378 in SFmode, DFmode and XFmode */ 379 {4, 4, 6}, /* cost of loading integer registers */ 380 2, /* cost of moving MMX register */ 381 {2, 2}, /* cost of loading MMX registers 382 in SImode and DImode */ 383 {2, 2}, /* cost of storing MMX registers 384 in SImode and DImode */ 385 12, /* cost of moving SSE register */ 386 {12, 12, 12}, /* cost of loading SSE registers 387 in SImode, DImode and TImode */ 388 {2, 2, 8}, /* cost of storing SSE registers 389 in SImode, DImode and TImode */ 390 10, /* MMX or SSE register to integer */ 391 64, /* size of prefetch block */ 392 6, /* number of parallel prefetches */ 393 5, /* cost of FADD and FSUB insns. */ 394 7, /* cost of FMUL instruction. */ 395 43, /* cost of FDIV instruction. */ 396 2, /* cost of FABS instruction. */ 397 2, /* cost of FCHS instruction. */ 398 43, /* cost of FSQRT instruction. */ 399}; 400 401const struct processor_costs *ix86_cost = &pentium_cost; 402 403/* Processor feature/optimization bitmasks. */ 404#define m_386 (1<<PROCESSOR_I386) 405#define m_486 (1<<PROCESSOR_I486) 406#define m_PENT (1<<PROCESSOR_PENTIUM) 407#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 408#define m_K6 (1<<PROCESSOR_K6) 409#define m_ATHLON (1<<PROCESSOR_ATHLON) 410#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 411 412const int x86_use_leave = m_386 | m_K6 | m_ATHLON; 413const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4; 414const int x86_zero_extend_with_and = m_486 | m_PENT; 415const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */; 416const int x86_double_with_add = ~m_386; 417const int x86_use_bit_test = m_386; 418const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; 419const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4; 420const int x86_3dnow_a = m_ATHLON; 421const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4; 422const int x86_branch_hints = m_PENT4; 423const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; 424const int x86_partial_reg_stall = m_PPRO; 425const int x86_use_loop = m_K6; 426const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT); 427const int x86_use_mov0 = m_K6; 428const int x86_use_cltd = ~(m_PENT | m_K6); 429const int x86_read_modify_write = ~m_PENT; 430const int x86_read_modify = ~(m_PENT | m_PPRO); 431const int x86_split_long_moves = m_PPRO; 432const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON; 433const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 434const int x86_single_stringop = m_386 | m_PENT4; 435const int x86_qimode_math = ~(0); 436const int x86_promote_qi_regs = 0; 437const int x86_himode_math = ~(m_PPRO); 438const int x86_promote_hi_regs = m_PPRO; 439const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4; 440const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4; 441const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4; 442const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; 443const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO); 444const int x86_partial_reg_dependency = m_ATHLON | m_PENT4; 445const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4; 446const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO; 447const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 448const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO; 449const int x86_decompose_lea = m_PENT4; 450const int x86_shift1 = ~m_486; 451const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4; 452 453/* In case the avreage insn count for single function invocation is 454 lower than this constant, emit fast (but longer) prologue and 455 epilogue code. */ 456#define FAST_PROLOGUE_INSN_COUNT 30 457 458/* Set by prologue expander and used by epilogue expander to determine 459 the style used. */ 460static int use_fast_prologue_epilogue; 461 462/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 463static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 464static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 465static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 466 467/* Array of the smallest class containing reg number REGNO, indexed by 468 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 469 470enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 471{ 472 /* ax, dx, cx, bx */ 473 AREG, DREG, CREG, BREG, 474 /* si, di, bp, sp */ 475 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 476 /* FP registers */ 477 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 478 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 479 /* arg pointer */ 480 NON_Q_REGS, 481 /* flags, fpsr, dirflag, frame */ 482 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 483 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 484 SSE_REGS, SSE_REGS, 485 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 486 MMX_REGS, MMX_REGS, 487 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 488 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 489 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 490 SSE_REGS, SSE_REGS, 491}; 492 493/* The "default" register map used in 32bit mode. */ 494 495int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 496{ 497 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 498 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 499 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 500 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 501 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 502 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 503 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 504}; 505 506static int const x86_64_int_parameter_registers[6] = 507{ 508 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 509 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 510}; 511 512static int const x86_64_int_return_registers[4] = 513{ 514 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 515}; 516 517/* The "default" register map used in 64bit mode. */ 518int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 519{ 520 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 521 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 522 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 523 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 524 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 525 8,9,10,11,12,13,14,15, /* extended integer registers */ 526 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 527}; 528 529/* Define the register numbers to be used in Dwarf debugging information. 530 The SVR4 reference port C compiler uses the following register numbers 531 in its Dwarf output code: 532 0 for %eax (gcc regno = 0) 533 1 for %ecx (gcc regno = 2) 534 2 for %edx (gcc regno = 1) 535 3 for %ebx (gcc regno = 3) 536 4 for %esp (gcc regno = 7) 537 5 for %ebp (gcc regno = 6) 538 6 for %esi (gcc regno = 4) 539 7 for %edi (gcc regno = 5) 540 The following three DWARF register numbers are never generated by 541 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 542 believes these numbers have these meanings. 543 8 for %eip (no gcc equivalent) 544 9 for %eflags (gcc regno = 17) 545 10 for %trapno (no gcc equivalent) 546 It is not at all clear how we should number the FP stack registers 547 for the x86 architecture. If the version of SDB on x86/svr4 were 548 a bit less brain dead with respect to floating-point then we would 549 have a precedent to follow with respect to DWARF register numbers 550 for x86 FP registers, but the SDB on x86/svr4 is so completely 551 broken with respect to FP registers that it is hardly worth thinking 552 of it as something to strive for compatibility with. 553 The version of x86/svr4 SDB I have at the moment does (partially) 554 seem to believe that DWARF register number 11 is associated with 555 the x86 register %st(0), but that's about all. Higher DWARF 556 register numbers don't seem to be associated with anything in 557 particular, and even for DWARF regno 11, SDB only seems to under- 558 stand that it should say that a variable lives in %st(0) (when 559 asked via an `=' command) if we said it was in DWARF regno 11, 560 but SDB still prints garbage when asked for the value of the 561 variable in question (via a `/' command). 562 (Also note that the labels SDB prints for various FP stack regs 563 when doing an `x' command are all wrong.) 564 Note that these problems generally don't affect the native SVR4 565 C compiler because it doesn't allow the use of -O with -g and 566 because when it is *not* optimizing, it allocates a memory 567 location for each floating-point variable, and the memory 568 location is what gets described in the DWARF AT_location 569 attribute for the variable in question. 570 Regardless of the severe mental illness of the x86/svr4 SDB, we 571 do something sensible here and we use the following DWARF 572 register numbers. Note that these are all stack-top-relative 573 numbers. 574 11 for %st(0) (gcc regno = 8) 575 12 for %st(1) (gcc regno = 9) 576 13 for %st(2) (gcc regno = 10) 577 14 for %st(3) (gcc regno = 11) 578 15 for %st(4) (gcc regno = 12) 579 16 for %st(5) (gcc regno = 13) 580 17 for %st(6) (gcc regno = 14) 581 18 for %st(7) (gcc regno = 15) 582*/ 583int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 584{ 585 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 586 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 587 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 588 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 589 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 590 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */ 591 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */ 592}; 593 594/* Test and compare insns in i386.md store the information needed to 595 generate branch and scc insns here. */ 596 597rtx ix86_compare_op0 = NULL_RTX; 598rtx ix86_compare_op1 = NULL_RTX; 599 600/* The encoding characters for the four TLS models present in ELF. */ 601 602static char const tls_model_chars[] = " GLil"; 603 604#define MAX_386_STACK_LOCALS 3 605/* Size of the register save area. */ 606#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 607 608/* Define the structure for the machine field in struct function. */ 609struct machine_function GTY(()) 610{ 611 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS]; 612 const char *some_ld_name; 613 int save_varrargs_registers; 614 int accesses_prev_frame; 615}; 616 617#define ix86_stack_locals (cfun->machine->stack_locals) 618#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) 619 620/* Structure describing stack frame layout. 621 Stack grows downward: 622 623 [arguments] 624 <- ARG_POINTER 625 saved pc 626 627 saved frame pointer if frame_pointer_needed 628 <- HARD_FRAME_POINTER 629 [saved regs] 630 631 [padding1] \ 632 ) 633 [va_arg registers] ( 634 > to_allocate <- FRAME_POINTER 635 [frame] ( 636 ) 637 [padding2] / 638 */ 639struct ix86_frame 640{ 641 int nregs; 642 int padding1; 643 int va_arg_size; 644 HOST_WIDE_INT frame; 645 int padding2; 646 int outgoing_arguments_size; 647 int red_zone_size; 648 649 HOST_WIDE_INT to_allocate; 650 /* The offsets relative to ARG_POINTER. */ 651 HOST_WIDE_INT frame_pointer_offset; 652 HOST_WIDE_INT hard_frame_pointer_offset; 653 HOST_WIDE_INT stack_pointer_offset; 654}; 655 656/* Used to enable/disable debugging features. */ 657const char *ix86_debug_arg_string, *ix86_debug_addr_string; 658/* Code model option as passed by user. */ 659const char *ix86_cmodel_string; 660/* Parsed value. */ 661enum cmodel ix86_cmodel; 662/* Asm dialect. */ 663const char *ix86_asm_string; 664enum asm_dialect ix86_asm_dialect = ASM_ATT; 665/* TLS dialext. */ 666const char *ix86_tls_dialect_string; 667enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 668 669/* Which unit we are generating floating point math for. */ 670enum fpmath_unit ix86_fpmath; 671 672/* Which cpu are we scheduling for. */ 673enum processor_type ix86_cpu; 674/* Which instruction set architecture to use. */ 675enum processor_type ix86_arch; 676 677/* Strings to hold which cpu and instruction set architecture to use. */ 678const char *ix86_cpu_string; /* for -mcpu=<xxx> */ 679const char *ix86_arch_string; /* for -march=<xxx> */ 680const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */ 681 682/* # of registers to use to pass arguments. */ 683const char *ix86_regparm_string; 684 685/* true if sse prefetch instruction is not NOOP. */ 686int x86_prefetch_sse; 687 688/* ix86_regparm_string as a number */ 689int ix86_regparm; 690 691/* Alignment to use for loops and jumps: */ 692 693/* Power of two alignment for loops. */ 694const char *ix86_align_loops_string; 695 696/* Power of two alignment for non-loop jumps. */ 697const char *ix86_align_jumps_string; 698 699/* Power of two alignment for stack boundary in bytes. */ 700const char *ix86_preferred_stack_boundary_string; 701 702/* Preferred alignment for stack boundary in bits. */ 703int ix86_preferred_stack_boundary; 704 705/* Values 1-5: see jump.c */ 706int ix86_branch_cost; 707const char *ix86_branch_cost_string; 708 709/* Power of two alignment for functions. */ 710const char *ix86_align_funcs_string; 711 712/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 713static char internal_label_prefix[16]; 714static int internal_label_prefix_len; 715 716static int local_symbolic_operand PARAMS ((rtx, enum machine_mode)); 717static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model)); 718static void output_pic_addr_const PARAMS ((FILE *, rtx, int)); 719static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode, 720 int, int, FILE *)); 721static const char *get_some_local_dynamic_name PARAMS ((void)); 722static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *)); 723static rtx maybe_get_pool_constant PARAMS ((rtx)); 724static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx)); 725static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code, 726 rtx *, rtx *)); 727static rtx get_thread_pointer PARAMS ((void)); 728static void get_pc_thunk_name PARAMS ((char [32], unsigned int)); 729static rtx gen_push PARAMS ((rtx)); 730static int memory_address_length PARAMS ((rtx addr)); 731static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type)); 732static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type)); 733static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx)); 734static void ix86_dump_ppro_packet PARAMS ((FILE *)); 735static void ix86_reorder_insn PARAMS ((rtx *, rtx *)); 736static struct machine_function * ix86_init_machine_status PARAMS ((void)); 737static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode)); 738static int ix86_nsaved_regs PARAMS ((void)); 739static void ix86_emit_save_regs PARAMS ((void)); 740static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT)); 741static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int)); 742static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT)); 743static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx)); 744static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *)); 745static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void)); 746static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT)); 747static rtx ix86_expand_aligntest PARAMS ((rtx, int)); 748static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx)); 749static int ix86_issue_rate PARAMS ((void)); 750static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int)); 751static void ix86_sched_init PARAMS ((FILE *, int, int)); 752static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); 753static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int)); 754static int ia32_use_dfa_pipeline_interface PARAMS ((void)); 755static int ia32_multipass_dfa_lookahead PARAMS ((void)); 756static void ix86_init_mmx_sse_builtins PARAMS ((void)); 757static rtx x86_this_parameter PARAMS ((tree)); 758static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, 759 HOST_WIDE_INT, tree)); 760static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT, 761 HOST_WIDE_INT, tree)); 762 763struct ix86_address 764{ 765 rtx base, index, disp; 766 HOST_WIDE_INT scale; 767}; 768 769static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *)); 770static bool ix86_cannot_force_const_mem PARAMS ((rtx)); 771 772static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED; 773static const char *ix86_strip_name_encoding PARAMS ((const char *)) 774 ATTRIBUTE_UNUSED; 775 776struct builtin_description; 777static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *, 778 tree, rtx)); 779static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, 780 tree, rtx)); 781static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); 782static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); 783static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); 784static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); 785static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); 786static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); 787static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code, 788 enum rtx_code *, 789 enum rtx_code *, 790 enum rtx_code *)); 791static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx, 792 rtx *, rtx *)); 793static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code)); 794static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code)); 795static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code)); 796static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code)); 797static unsigned int ix86_select_alt_pic_regnum PARAMS ((void)); 798static int ix86_save_reg PARAMS ((unsigned int, int)); 799static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *)); 800static int ix86_comp_type_attributes PARAMS ((tree, tree)); 801static int ix86_fntype_regparm PARAMS ((tree)); 802const struct attribute_spec ix86_attribute_table[]; 803static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *)); 804static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *)); 805static int ix86_value_regno PARAMS ((enum machine_mode)); 806static bool contains_128bit_aligned_vector_p PARAMS ((tree)); 807 808#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) 809static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); 810#endif 811 812/* Register class used for passing given 64bit part of the argument. 813 These represent classes as documented by the PS ABI, with the exception 814 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 815 use SF or DFmode move instead of DImode to avoid reformating penalties. 816 817 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 818 whenever possible (upper half does contain padding). 819 */ 820enum x86_64_reg_class 821 { 822 X86_64_NO_CLASS, 823 X86_64_INTEGER_CLASS, 824 X86_64_INTEGERSI_CLASS, 825 X86_64_SSE_CLASS, 826 X86_64_SSESF_CLASS, 827 X86_64_SSEDF_CLASS, 828 X86_64_SSEUP_CLASS, 829 X86_64_X87_CLASS, 830 X86_64_X87UP_CLASS, 831 X86_64_MEMORY_CLASS 832 }; 833static const char * const x86_64_reg_class_name[] = 834 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"}; 835 836#define MAX_CLASSES 4 837static int classify_argument PARAMS ((enum machine_mode, tree, 838 enum x86_64_reg_class [MAX_CLASSES], 839 int)); 840static int examine_argument PARAMS ((enum machine_mode, tree, int, int *, 841 int *)); 842static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int, 843 const int *, int)); 844static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, 845 enum x86_64_reg_class)); 846 847/* Initialize the GCC target structure. */ 848#undef TARGET_ATTRIBUTE_TABLE 849#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 850#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 851# undef TARGET_MERGE_DECL_ATTRIBUTES 852# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 853#endif 854 855#undef TARGET_COMP_TYPE_ATTRIBUTES 856#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 857 858#undef TARGET_INIT_BUILTINS 859#define TARGET_INIT_BUILTINS ix86_init_builtins 860 861#undef TARGET_EXPAND_BUILTIN 862#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 863 864#undef TARGET_ASM_FUNCTION_EPILOGUE 865#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 866 867#undef TARGET_ASM_OPEN_PAREN 868#define TARGET_ASM_OPEN_PAREN "" 869#undef TARGET_ASM_CLOSE_PAREN 870#define TARGET_ASM_CLOSE_PAREN "" 871 872#undef TARGET_ASM_ALIGNED_HI_OP 873#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 874#undef TARGET_ASM_ALIGNED_SI_OP 875#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 876#ifdef ASM_QUAD 877#undef TARGET_ASM_ALIGNED_DI_OP 878#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 879#endif 880 881#undef TARGET_ASM_UNALIGNED_HI_OP 882#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 883#undef TARGET_ASM_UNALIGNED_SI_OP 884#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 885#undef TARGET_ASM_UNALIGNED_DI_OP 886#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 887 888#undef TARGET_SCHED_ADJUST_COST 889#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 890#undef TARGET_SCHED_ISSUE_RATE 891#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 892#undef TARGET_SCHED_VARIABLE_ISSUE 893#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue 894#undef TARGET_SCHED_INIT 895#define TARGET_SCHED_INIT ix86_sched_init 896#undef TARGET_SCHED_REORDER 897#define TARGET_SCHED_REORDER ix86_sched_reorder 898#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE 899#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \ 900 ia32_use_dfa_pipeline_interface 901#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 902#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 903 ia32_multipass_dfa_lookahead 904 905#ifdef HAVE_AS_TLS 906#undef TARGET_HAVE_TLS 907#define TARGET_HAVE_TLS true 908#endif 909#undef TARGET_CANNOT_FORCE_CONST_MEM 910#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 911 912#undef TARGET_ASM_OUTPUT_MI_THUNK 913#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 914#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 915#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 916 917struct gcc_target targetm = TARGET_INITIALIZER; 918 919/* The svr4 ABI for the i386 says that records and unions are returned 920 in memory. */ 921#ifndef DEFAULT_PCC_STRUCT_RETURN 922#define DEFAULT_PCC_STRUCT_RETURN 1 923#endif 924 925/* Sometimes certain combinations of command options do not make 926 sense on a particular target machine. You can define a macro 927 `OVERRIDE_OPTIONS' to take account of this. This macro, if 928 defined, is executed once just after all the command options have 929 been parsed. 930 931 Don't use this macro to turn on various extra optimizations for 932 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 933 934void 935override_options () 936{ 937 int i; 938 /* Comes from final.c -- no real reason to change it. */ 939#define MAX_CODE_ALIGN 16 940 941 static struct ptt 942 { 943 const struct processor_costs *cost; /* Processor costs */ 944 const int target_enable; /* Target flags to enable. */ 945 const int target_disable; /* Target flags to disable. */ 946 const int align_loop; /* Default alignments. */ 947 const int align_loop_max_skip; 948 const int align_jump; 949 const int align_jump_max_skip; 950 const int align_func; 951 const int branch_cost; 952 } 953 const processor_target_table[PROCESSOR_max] = 954 { 955 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1}, 956 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1}, 957 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1}, 958 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1}, 959 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1}, 960 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1}, 961 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1} 962 }; 963 964 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 965 static struct pta 966 { 967 const char *const name; /* processor name or nickname. */ 968 const enum processor_type processor; 969 const enum pta_flags 970 { 971 PTA_SSE = 1, 972 PTA_SSE2 = 2, 973 PTA_MMX = 4, 974 PTA_PREFETCH_SSE = 8, 975 PTA_3DNOW = 16, 976 PTA_3DNOW_A = 64 977 } flags; 978 } 979 const processor_alias_table[] = 980 { 981 {"i386", PROCESSOR_I386, 0}, 982 {"i486", PROCESSOR_I486, 0}, 983 {"i586", PROCESSOR_PENTIUM, 0}, 984 {"pentium", PROCESSOR_PENTIUM, 0}, 985 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 986 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 987 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 988 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 989 {"i686", PROCESSOR_PENTIUMPRO, 0}, 990 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 991 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 992 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 993 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | 994 PTA_MMX | PTA_PREFETCH_SSE}, 995 {"k6", PROCESSOR_K6, PTA_MMX}, 996 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 997 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 998 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 999 | PTA_3DNOW_A}, 1000 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1001 | PTA_3DNOW | PTA_3DNOW_A}, 1002 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1003 | PTA_3DNOW_A | PTA_SSE}, 1004 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1005 | PTA_3DNOW_A | PTA_SSE}, 1006 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1007 | PTA_3DNOW_A | PTA_SSE}, 1008 }; 1009 1010 int const pta_size = ARRAY_SIZE (processor_alias_table); 1011 1012 /* By default our XFmode is the 80-bit extended format. If we have 1013 use TFmode instead, it's also the 80-bit format, but with padding. */ 1014 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format; 1015 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format; 1016 1017 /* Set the default values for switches whose default depends on TARGET_64BIT 1018 in case they weren't overwriten by command line options. */ 1019 if (TARGET_64BIT) 1020 { 1021 if (flag_omit_frame_pointer == 2) 1022 flag_omit_frame_pointer = 1; 1023 if (flag_asynchronous_unwind_tables == 2) 1024 flag_asynchronous_unwind_tables = 1; 1025 if (flag_pcc_struct_return == 2) 1026 flag_pcc_struct_return = 0; 1027 } 1028 else 1029 { 1030 if (flag_omit_frame_pointer == 2) 1031 flag_omit_frame_pointer = 0; 1032 if (flag_asynchronous_unwind_tables == 2) 1033 flag_asynchronous_unwind_tables = 0; 1034 if (flag_pcc_struct_return == 2) 1035 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1036 } 1037 1038#ifdef SUBTARGET_OVERRIDE_OPTIONS 1039 SUBTARGET_OVERRIDE_OPTIONS; 1040#endif 1041 1042 if (!ix86_cpu_string && ix86_arch_string) 1043 ix86_cpu_string = ix86_arch_string; 1044 if (!ix86_cpu_string) 1045 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT]; 1046 if (!ix86_arch_string) 1047 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386"; 1048 1049 if (ix86_cmodel_string != 0) 1050 { 1051 if (!strcmp (ix86_cmodel_string, "small")) 1052 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1053 else if (flag_pic) 1054 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1055 else if (!strcmp (ix86_cmodel_string, "32")) 1056 ix86_cmodel = CM_32; 1057 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1058 ix86_cmodel = CM_KERNEL; 1059 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic) 1060 ix86_cmodel = CM_MEDIUM; 1061 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1062 ix86_cmodel = CM_LARGE; 1063 else 1064 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1065 } 1066 else 1067 { 1068 ix86_cmodel = CM_32; 1069 if (TARGET_64BIT) 1070 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1071 } 1072 if (ix86_asm_string != 0) 1073 { 1074 if (!strcmp (ix86_asm_string, "intel")) 1075 ix86_asm_dialect = ASM_INTEL; 1076 else if (!strcmp (ix86_asm_string, "att")) 1077 ix86_asm_dialect = ASM_ATT; 1078 else 1079 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1080 } 1081 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1082 error ("code model `%s' not supported in the %s bit mode", 1083 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1084 if (ix86_cmodel == CM_LARGE) 1085 sorry ("code model `large' not supported yet"); 1086 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1087 sorry ("%i-bit mode not compiled in", 1088 (target_flags & MASK_64BIT) ? 64 : 32); 1089 1090 for (i = 0; i < pta_size; i++) 1091 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1092 { 1093 ix86_arch = processor_alias_table[i].processor; 1094 /* Default cpu tuning to the architecture. */ 1095 ix86_cpu = ix86_arch; 1096 if (processor_alias_table[i].flags & PTA_MMX 1097 && !(target_flags_explicit & MASK_MMX)) 1098 target_flags |= MASK_MMX; 1099 if (processor_alias_table[i].flags & PTA_3DNOW 1100 && !(target_flags_explicit & MASK_3DNOW)) 1101 target_flags |= MASK_3DNOW; 1102 if (processor_alias_table[i].flags & PTA_3DNOW_A 1103 && !(target_flags_explicit & MASK_3DNOW_A)) 1104 target_flags |= MASK_3DNOW_A; 1105 if (processor_alias_table[i].flags & PTA_SSE 1106 && !(target_flags_explicit & MASK_SSE)) 1107 target_flags |= MASK_SSE; 1108 if (processor_alias_table[i].flags & PTA_SSE2 1109 && !(target_flags_explicit & MASK_SSE2)) 1110 target_flags |= MASK_SSE2; 1111 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1112 x86_prefetch_sse = true; 1113 break; 1114 } 1115 1116 if (i == pta_size) 1117 error ("bad value (%s) for -march= switch", ix86_arch_string); 1118 1119 for (i = 0; i < pta_size; i++) 1120 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name)) 1121 { 1122 ix86_cpu = processor_alias_table[i].processor; 1123 break; 1124 } 1125 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1126 x86_prefetch_sse = true; 1127 if (i == pta_size) 1128 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string); 1129 1130 if (optimize_size) 1131 ix86_cost = &size_cost; 1132 else 1133 ix86_cost = processor_target_table[ix86_cpu].cost; 1134 target_flags |= processor_target_table[ix86_cpu].target_enable; 1135 target_flags &= ~processor_target_table[ix86_cpu].target_disable; 1136 1137 /* Arrange to set up i386_stack_locals for all functions. */ 1138 init_machine_status = ix86_init_machine_status; 1139 1140 /* Validate -mregparm= value. */ 1141 if (ix86_regparm_string) 1142 { 1143 i = atoi (ix86_regparm_string); 1144 if (i < 0 || i > REGPARM_MAX) 1145 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1146 else 1147 ix86_regparm = i; 1148 } 1149 else 1150 if (TARGET_64BIT) 1151 ix86_regparm = REGPARM_MAX; 1152 1153 /* If the user has provided any of the -malign-* options, 1154 warn and use that value only if -falign-* is not set. 1155 Remove this code in GCC 3.2 or later. */ 1156 if (ix86_align_loops_string) 1157 { 1158 warning ("-malign-loops is obsolete, use -falign-loops"); 1159 if (align_loops == 0) 1160 { 1161 i = atoi (ix86_align_loops_string); 1162 if (i < 0 || i > MAX_CODE_ALIGN) 1163 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1164 else 1165 align_loops = 1 << i; 1166 } 1167 } 1168 1169 if (ix86_align_jumps_string) 1170 { 1171 warning ("-malign-jumps is obsolete, use -falign-jumps"); 1172 if (align_jumps == 0) 1173 { 1174 i = atoi (ix86_align_jumps_string); 1175 if (i < 0 || i > MAX_CODE_ALIGN) 1176 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1177 else 1178 align_jumps = 1 << i; 1179 } 1180 } 1181 1182 if (ix86_align_funcs_string) 1183 { 1184 warning ("-malign-functions is obsolete, use -falign-functions"); 1185 if (align_functions == 0) 1186 { 1187 i = atoi (ix86_align_funcs_string); 1188 if (i < 0 || i > MAX_CODE_ALIGN) 1189 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1190 else 1191 align_functions = 1 << i; 1192 } 1193 } 1194 1195 /* Default align_* from the processor table. */ 1196 if (align_loops == 0) 1197 { 1198 align_loops = processor_target_table[ix86_cpu].align_loop; 1199 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip; 1200 } 1201 if (align_jumps == 0) 1202 { 1203 align_jumps = processor_target_table[ix86_cpu].align_jump; 1204 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip; 1205 } 1206 if (align_functions == 0) 1207 { 1208 align_functions = processor_target_table[ix86_cpu].align_func; 1209 } 1210 1211 /* Validate -mpreferred-stack-boundary= value, or provide default. 1212 The default of 128 bits is for Pentium III's SSE __m128, but we 1213 don't want additional code to keep the stack aligned when 1214 optimizing for code size. */ 1215 ix86_preferred_stack_boundary = (optimize_size 1216 ? TARGET_64BIT ? 128 : 32 1217 : 128); 1218 if (ix86_preferred_stack_boundary_string) 1219 { 1220 i = atoi (ix86_preferred_stack_boundary_string); 1221 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 1222 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1223 TARGET_64BIT ? 4 : 2); 1224 else 1225 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1226 } 1227 1228 /* Validate -mbranch-cost= value, or provide default. */ 1229 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost; 1230 if (ix86_branch_cost_string) 1231 { 1232 i = atoi (ix86_branch_cost_string); 1233 if (i < 0 || i > 5) 1234 error ("-mbranch-cost=%d is not between 0 and 5", i); 1235 else 1236 ix86_branch_cost = i; 1237 } 1238 1239 if (ix86_tls_dialect_string) 1240 { 1241 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 1242 ix86_tls_dialect = TLS_DIALECT_GNU; 1243 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 1244 ix86_tls_dialect = TLS_DIALECT_SUN; 1245 else 1246 error ("bad value (%s) for -mtls-dialect= switch", 1247 ix86_tls_dialect_string); 1248 } 1249 1250 /* Keep nonleaf frame pointers. */ 1251 if (TARGET_OMIT_LEAF_FRAME_POINTER) 1252 flag_omit_frame_pointer = 1; 1253 1254 /* If we're doing fast math, we don't care about comparison order 1255 wrt NaNs. This lets us use a shorter comparison sequence. */ 1256 if (flag_unsafe_math_optimizations) 1257 target_flags &= ~MASK_IEEE_FP; 1258 1259 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1260 since the insns won't need emulation. */ 1261 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1262 target_flags &= ~MASK_NO_FANCY_MATH_387; 1263 1264 if (TARGET_64BIT) 1265 { 1266 if (TARGET_ALIGN_DOUBLE) 1267 error ("-malign-double makes no sense in the 64bit mode"); 1268 if (TARGET_RTD) 1269 error ("-mrtd calling convention not supported in the 64bit mode"); 1270 /* Enable by default the SSE and MMX builtins. */ 1271 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE); 1272 ix86_fpmath = FPMATH_SSE; 1273 } 1274 else 1275 ix86_fpmath = FPMATH_387; 1276 1277 if (ix86_fpmath_string != 0) 1278 { 1279 if (! strcmp (ix86_fpmath_string, "387")) 1280 ix86_fpmath = FPMATH_387; 1281 else if (! strcmp (ix86_fpmath_string, "sse")) 1282 { 1283 if (!TARGET_SSE) 1284 { 1285 warning ("SSE instruction set disabled, using 387 arithmetics"); 1286 ix86_fpmath = FPMATH_387; 1287 } 1288 else 1289 ix86_fpmath = FPMATH_SSE; 1290 } 1291 else if (! strcmp (ix86_fpmath_string, "387,sse") 1292 || ! strcmp (ix86_fpmath_string, "sse,387")) 1293 { 1294 if (!TARGET_SSE) 1295 { 1296 warning ("SSE instruction set disabled, using 387 arithmetics"); 1297 ix86_fpmath = FPMATH_387; 1298 } 1299 else if (!TARGET_80387) 1300 { 1301 warning ("387 instruction set disabled, using SSE arithmetics"); 1302 ix86_fpmath = FPMATH_SSE; 1303 } 1304 else 1305 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1306 } 1307 else 1308 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1309 } 1310 1311 /* It makes no sense to ask for just SSE builtins, so MMX is also turned 1312 on by -msse. */ 1313 if (TARGET_SSE) 1314 { 1315 target_flags |= MASK_MMX; 1316 x86_prefetch_sse = true; 1317 } 1318 1319 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ 1320 if (TARGET_3DNOW) 1321 { 1322 target_flags |= MASK_MMX; 1323 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX 1324 extensions it adds. */ 1325 if (x86_3dnow_a & (1 << ix86_arch)) 1326 target_flags |= MASK_3DNOW_A; 1327 } 1328 if ((x86_accumulate_outgoing_args & CPUMASK) 1329 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 1330 && !optimize_size) 1331 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1332 1333 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1334 { 1335 char *p; 1336 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1337 p = strchr (internal_label_prefix, 'X'); 1338 internal_label_prefix_len = p - internal_label_prefix; 1339 *p = '\0'; 1340 } 1341} 1342 1343void 1344optimization_options (level, size) 1345 int level; 1346 int size ATTRIBUTE_UNUSED; 1347{ 1348 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 1349 make the problem with not enough registers even worse. */ 1350#ifdef INSN_SCHEDULING 1351 if (level > 1) 1352 flag_schedule_insns = 0; 1353#endif 1354 1355 /* The default values of these switches depend on the TARGET_64BIT 1356 that is not known at this moment. Mark these values with 2 and 1357 let user the to override these. In case there is no command line option 1358 specifying them, we will set the defaults in override_options. */ 1359 if (optimize >= 1) 1360 flag_omit_frame_pointer = 2; 1361 flag_pcc_struct_return = 2; 1362 flag_asynchronous_unwind_tables = 2; 1363} 1364 1365/* Table of valid machine attributes. */ 1366const struct attribute_spec ix86_attribute_table[] = 1367{ 1368 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 1369 /* Stdcall attribute says callee is responsible for popping arguments 1370 if they are not variable. */ 1371 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1372 /* Cdecl attribute says the callee is a normal C declaration */ 1373 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1374 /* Regparm attribute specifies how many integer arguments are to be 1375 passed in registers. */ 1376 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute }, 1377#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 1378 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1379 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1380 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 1381#endif 1382 { NULL, 0, 0, false, false, false, NULL } 1383}; 1384 1385/* Handle a "cdecl" or "stdcall" attribute; 1386 arguments as in struct attribute_spec.handler. */ 1387static tree 1388ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs) 1389 tree *node; 1390 tree name; 1391 tree args ATTRIBUTE_UNUSED; 1392 int flags ATTRIBUTE_UNUSED; 1393 bool *no_add_attrs; 1394{ 1395 if (TREE_CODE (*node) != FUNCTION_TYPE 1396 && TREE_CODE (*node) != METHOD_TYPE 1397 && TREE_CODE (*node) != FIELD_DECL 1398 && TREE_CODE (*node) != TYPE_DECL) 1399 { 1400 warning ("`%s' attribute only applies to functions", 1401 IDENTIFIER_POINTER (name)); 1402 *no_add_attrs = true; 1403 } 1404 1405 if (TARGET_64BIT) 1406 { 1407 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name)); 1408 *no_add_attrs = true; 1409 } 1410 1411 return NULL_TREE; 1412} 1413 1414/* Handle a "regparm" attribute; 1415 arguments as in struct attribute_spec.handler. */ 1416static tree 1417ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs) 1418 tree *node; 1419 tree name; 1420 tree args; 1421 int flags ATTRIBUTE_UNUSED; 1422 bool *no_add_attrs; 1423{ 1424 if (TREE_CODE (*node) != FUNCTION_TYPE 1425 && TREE_CODE (*node) != METHOD_TYPE 1426 && TREE_CODE (*node) != FIELD_DECL 1427 && TREE_CODE (*node) != TYPE_DECL) 1428 { 1429 warning ("`%s' attribute only applies to functions", 1430 IDENTIFIER_POINTER (name)); 1431 *no_add_attrs = true; 1432 } 1433 else 1434 { 1435 tree cst; 1436 1437 cst = TREE_VALUE (args); 1438 if (TREE_CODE (cst) != INTEGER_CST) 1439 { 1440 warning ("`%s' attribute requires an integer constant argument", 1441 IDENTIFIER_POINTER (name)); 1442 *no_add_attrs = true; 1443 } 1444 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 1445 { 1446 warning ("argument to `%s' attribute larger than %d", 1447 IDENTIFIER_POINTER (name), REGPARM_MAX); 1448 *no_add_attrs = true; 1449 } 1450 } 1451 1452 return NULL_TREE; 1453} 1454 1455/* Return 0 if the attributes for two types are incompatible, 1 if they 1456 are compatible, and 2 if they are nearly compatible (which causes a 1457 warning to be generated). */ 1458 1459static int 1460ix86_comp_type_attributes (type1, type2) 1461 tree type1; 1462 tree type2; 1463{ 1464 /* Check for mismatch of non-default calling convention. */ 1465 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 1466 1467 if (TREE_CODE (type1) != FUNCTION_TYPE) 1468 return 1; 1469 1470 /* Check for mismatched return types (cdecl vs stdcall). */ 1471 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 1472 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 1473 return 0; 1474 return 1; 1475} 1476 1477/* Return the regparm value for a fuctio with the indicated TYPE. */ 1478 1479static int 1480ix86_fntype_regparm (type) 1481 tree type; 1482{ 1483 tree attr; 1484 1485 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 1486 if (attr) 1487 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1488 else 1489 return ix86_regparm; 1490} 1491 1492/* Value is the number of bytes of arguments automatically 1493 popped when returning from a subroutine call. 1494 FUNDECL is the declaration node of the function (as a tree), 1495 FUNTYPE is the data type of the function (as a tree), 1496 or for a library call it is an identifier node for the subroutine name. 1497 SIZE is the number of bytes of arguments passed on the stack. 1498 1499 On the 80386, the RTD insn may be used to pop them if the number 1500 of args is fixed, but if the number is variable then the caller 1501 must pop them all. RTD can't be used for library calls now 1502 because the library is compiled with the Unix compiler. 1503 Use of RTD is a selectable option, since it is incompatible with 1504 standard Unix calling sequences. If the option is not selected, 1505 the caller must always pop the args. 1506 1507 The attribute stdcall is equivalent to RTD on a per module basis. */ 1508 1509int 1510ix86_return_pops_args (fundecl, funtype, size) 1511 tree fundecl; 1512 tree funtype; 1513 int size; 1514{ 1515 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 1516 1517 /* Cdecl functions override -mrtd, and never pop the stack. */ 1518 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 1519 1520 /* Stdcall functions will pop the stack if not variable args. */ 1521 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))) 1522 rtd = 1; 1523 1524 if (rtd 1525 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 1526 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 1527 == void_type_node))) 1528 return size; 1529 } 1530 1531 /* Lose any fake structure return argument if it is passed on the stack. */ 1532 if (aggregate_value_p (TREE_TYPE (funtype)) 1533 && !TARGET_64BIT) 1534 { 1535 int nregs = ix86_fntype_regparm (funtype); 1536 1537 if (!nregs) 1538 return GET_MODE_SIZE (Pmode); 1539 } 1540 1541 return 0; 1542} 1543 1544/* Argument support functions. */ 1545 1546/* Return true when register may be used to pass function parameters. */ 1547bool 1548ix86_function_arg_regno_p (regno) 1549 int regno; 1550{ 1551 int i; 1552 if (!TARGET_64BIT) 1553 return (regno < REGPARM_MAX 1554 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 1555 if (SSE_REGNO_P (regno) && TARGET_SSE) 1556 return true; 1557 /* RAX is used as hidden argument to va_arg functions. */ 1558 if (!regno) 1559 return true; 1560 for (i = 0; i < REGPARM_MAX; i++) 1561 if (regno == x86_64_int_parameter_registers[i]) 1562 return true; 1563 return false; 1564} 1565 1566/* Initialize a variable CUM of type CUMULATIVE_ARGS 1567 for a call to a function whose data type is FNTYPE. 1568 For a library call, FNTYPE is 0. */ 1569 1570void 1571init_cumulative_args (cum, fntype, libname) 1572 CUMULATIVE_ARGS *cum; /* Argument info to initialize */ 1573 tree fntype; /* tree ptr for function decl */ 1574 rtx libname; /* SYMBOL_REF of library name or 0 */ 1575{ 1576 static CUMULATIVE_ARGS zero_cum; 1577 tree param, next_param; 1578 1579 if (TARGET_DEBUG_ARG) 1580 { 1581 fprintf (stderr, "\ninit_cumulative_args ("); 1582 if (fntype) 1583 fprintf (stderr, "fntype code = %s, ret code = %s", 1584 tree_code_name[(int) TREE_CODE (fntype)], 1585 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 1586 else 1587 fprintf (stderr, "no fntype"); 1588 1589 if (libname) 1590 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 1591 } 1592 1593 *cum = zero_cum; 1594 1595 /* Set up the number of registers to use for passing arguments. */ 1596 cum->nregs = ix86_regparm; 1597 cum->sse_nregs = SSE_REGPARM_MAX; 1598 if (fntype && !TARGET_64BIT) 1599 { 1600 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype)); 1601 1602 if (attr) 1603 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1604 } 1605 cum->maybe_vaarg = false; 1606 1607 /* Determine if this function has variable arguments. This is 1608 indicated by the last argument being 'void_type_mode' if there 1609 are no variable arguments. If there are variable arguments, then 1610 we won't pass anything in registers */ 1611 1612 if (cum->nregs) 1613 { 1614 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 1615 param != 0; param = next_param) 1616 { 1617 next_param = TREE_CHAIN (param); 1618 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 1619 { 1620 if (!TARGET_64BIT) 1621 cum->nregs = 0; 1622 cum->maybe_vaarg = true; 1623 } 1624 } 1625 } 1626 if ((!fntype && !libname) 1627 || (fntype && !TYPE_ARG_TYPES (fntype))) 1628 cum->maybe_vaarg = 1; 1629 1630 if (TARGET_DEBUG_ARG) 1631 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 1632 1633 return; 1634} 1635 1636/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal 1637 of this code is to classify each 8bytes of incoming argument by the register 1638 class and assign registers accordingly. */ 1639 1640/* Return the union class of CLASS1 and CLASS2. 1641 See the x86-64 PS ABI for details. */ 1642 1643static enum x86_64_reg_class 1644merge_classes (class1, class2) 1645 enum x86_64_reg_class class1, class2; 1646{ 1647 /* Rule #1: If both classes are equal, this is the resulting class. */ 1648 if (class1 == class2) 1649 return class1; 1650 1651 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 1652 the other class. */ 1653 if (class1 == X86_64_NO_CLASS) 1654 return class2; 1655 if (class2 == X86_64_NO_CLASS) 1656 return class1; 1657 1658 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 1659 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 1660 return X86_64_MEMORY_CLASS; 1661 1662 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 1663 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 1664 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 1665 return X86_64_INTEGERSI_CLASS; 1666 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 1667 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 1668 return X86_64_INTEGER_CLASS; 1669 1670 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */ 1671 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS 1672 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS) 1673 return X86_64_MEMORY_CLASS; 1674 1675 /* Rule #6: Otherwise class SSE is used. */ 1676 return X86_64_SSE_CLASS; 1677} 1678 1679/* Classify the argument of type TYPE and mode MODE. 1680 CLASSES will be filled by the register class used to pass each word 1681 of the operand. The number of words is returned. In case the parameter 1682 should be passed in memory, 0 is returned. As a special case for zero 1683 sized containers, classes[0] will be NO_CLASS and 1 is returned. 1684 1685 BIT_OFFSET is used internally for handling records and specifies offset 1686 of the offset in bits modulo 256 to avoid overflow cases. 1687 1688 See the x86-64 PS ABI for details. 1689*/ 1690 1691static int 1692classify_argument (mode, type, classes, bit_offset) 1693 enum machine_mode mode; 1694 tree type; 1695 enum x86_64_reg_class classes[MAX_CLASSES]; 1696 int bit_offset; 1697{ 1698 int bytes = 1699 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1700 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1701 1702 /* Variable sized entities are always passed/returned in memory. */ 1703 if (bytes < 0) 1704 return 0; 1705 1706 if (type && AGGREGATE_TYPE_P (type)) 1707 { 1708 int i; 1709 tree field; 1710 enum x86_64_reg_class subclasses[MAX_CLASSES]; 1711 1712 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 1713 if (bytes > 16) 1714 return 0; 1715 1716 for (i = 0; i < words; i++) 1717 classes[i] = X86_64_NO_CLASS; 1718 1719 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 1720 signalize memory class, so handle it as special case. */ 1721 if (!words) 1722 { 1723 classes[0] = X86_64_NO_CLASS; 1724 return 1; 1725 } 1726 1727 /* Classify each field of record and merge classes. */ 1728 if (TREE_CODE (type) == RECORD_TYPE) 1729 { 1730 /* For classes first merge in the field of the subclasses. */ 1731 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 1732 { 1733 tree bases = TYPE_BINFO_BASETYPES (type); 1734 int n_bases = TREE_VEC_LENGTH (bases); 1735 int i; 1736 1737 for (i = 0; i < n_bases; ++i) 1738 { 1739 tree binfo = TREE_VEC_ELT (bases, i); 1740 int num; 1741 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 1742 tree type = BINFO_TYPE (binfo); 1743 1744 num = classify_argument (TYPE_MODE (type), 1745 type, subclasses, 1746 (offset + bit_offset) % 256); 1747 if (!num) 1748 return 0; 1749 for (i = 0; i < num; i++) 1750 { 1751 int pos = (offset + (bit_offset % 64)) / 8 / 8; 1752 classes[i + pos] = 1753 merge_classes (subclasses[i], classes[i + pos]); 1754 } 1755 } 1756 } 1757 /* And now merge the fields of structure. */ 1758 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1759 { 1760 if (TREE_CODE (field) == FIELD_DECL) 1761 { 1762 int num; 1763 1764 /* Bitfields are always classified as integer. Handle them 1765 early, since later code would consider them to be 1766 misaligned integers. */ 1767 if (DECL_BIT_FIELD (field)) 1768 { 1769 for (i = int_bit_position (field) / 8 / 8; 1770 i < (int_bit_position (field) 1771 + tree_low_cst (DECL_SIZE (field), 0) 1772 + 63) / 8 / 8; i++) 1773 classes[i] = 1774 merge_classes (X86_64_INTEGER_CLASS, 1775 classes[i]); 1776 } 1777 else 1778 { 1779 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1780 TREE_TYPE (field), subclasses, 1781 (int_bit_position (field) 1782 + bit_offset) % 256); 1783 if (!num) 1784 return 0; 1785 for (i = 0; i < num; i++) 1786 { 1787 int pos = 1788 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 1789 classes[i + pos] = 1790 merge_classes (subclasses[i], classes[i + pos]); 1791 } 1792 } 1793 } 1794 } 1795 } 1796 /* Arrays are handled as small records. */ 1797 else if (TREE_CODE (type) == ARRAY_TYPE) 1798 { 1799 int num; 1800 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 1801 TREE_TYPE (type), subclasses, bit_offset); 1802 if (!num) 1803 return 0; 1804 1805 /* The partial classes are now full classes. */ 1806 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 1807 subclasses[0] = X86_64_SSE_CLASS; 1808 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 1809 subclasses[0] = X86_64_INTEGER_CLASS; 1810 1811 for (i = 0; i < words; i++) 1812 classes[i] = subclasses[i % num]; 1813 } 1814 /* Unions are similar to RECORD_TYPE but offset is always 0. */ 1815 else if (TREE_CODE (type) == UNION_TYPE 1816 || TREE_CODE (type) == QUAL_UNION_TYPE) 1817 { 1818 /* For classes first merge in the field of the subclasses. */ 1819 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 1820 { 1821 tree bases = TYPE_BINFO_BASETYPES (type); 1822 int n_bases = TREE_VEC_LENGTH (bases); 1823 int i; 1824 1825 for (i = 0; i < n_bases; ++i) 1826 { 1827 tree binfo = TREE_VEC_ELT (bases, i); 1828 int num; 1829 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 1830 tree type = BINFO_TYPE (binfo); 1831 1832 num = classify_argument (TYPE_MODE (type), 1833 type, subclasses, 1834 (offset + (bit_offset % 64)) % 256); 1835 if (!num) 1836 return 0; 1837 for (i = 0; i < num; i++) 1838 { 1839 int pos = (offset + (bit_offset % 64)) / 8 / 8; 1840 classes[i + pos] = 1841 merge_classes (subclasses[i], classes[i + pos]); 1842 } 1843 } 1844 } 1845 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 1846 { 1847 if (TREE_CODE (field) == FIELD_DECL) 1848 { 1849 int num; 1850 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 1851 TREE_TYPE (field), subclasses, 1852 bit_offset); 1853 if (!num) 1854 return 0; 1855 for (i = 0; i < num; i++) 1856 classes[i] = merge_classes (subclasses[i], classes[i]); 1857 } 1858 } 1859 } 1860 else 1861 abort (); 1862 1863 /* Final merger cleanup. */ 1864 for (i = 0; i < words; i++) 1865 { 1866 /* If one class is MEMORY, everything should be passed in 1867 memory. */ 1868 if (classes[i] == X86_64_MEMORY_CLASS) 1869 return 0; 1870 1871 /* The X86_64_SSEUP_CLASS should be always preceded by 1872 X86_64_SSE_CLASS. */ 1873 if (classes[i] == X86_64_SSEUP_CLASS 1874 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 1875 classes[i] = X86_64_SSE_CLASS; 1876 1877 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 1878 if (classes[i] == X86_64_X87UP_CLASS 1879 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 1880 classes[i] = X86_64_SSE_CLASS; 1881 } 1882 return words; 1883 } 1884 1885 /* Compute alignment needed. We align all types to natural boundaries with 1886 exception of XFmode that is aligned to 64bits. */ 1887 if (mode != VOIDmode && mode != BLKmode) 1888 { 1889 int mode_alignment = GET_MODE_BITSIZE (mode); 1890 1891 if (mode == XFmode) 1892 mode_alignment = 128; 1893 else if (mode == XCmode) 1894 mode_alignment = 256; 1895 /* Misaligned fields are always returned in memory. */ 1896 if (bit_offset % mode_alignment) 1897 return 0; 1898 } 1899 1900 /* Classification of atomic types. */ 1901 switch (mode) 1902 { 1903 case DImode: 1904 case SImode: 1905 case HImode: 1906 case QImode: 1907 case CSImode: 1908 case CHImode: 1909 case CQImode: 1910 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 1911 classes[0] = X86_64_INTEGERSI_CLASS; 1912 else 1913 classes[0] = X86_64_INTEGER_CLASS; 1914 return 1; 1915 case CDImode: 1916 case TImode: 1917 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1918 return 2; 1919 case CTImode: 1920 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 1921 classes[2] = classes[3] = X86_64_INTEGER_CLASS; 1922 return 4; 1923 case SFmode: 1924 if (!(bit_offset % 64)) 1925 classes[0] = X86_64_SSESF_CLASS; 1926 else 1927 classes[0] = X86_64_SSE_CLASS; 1928 return 1; 1929 case DFmode: 1930 classes[0] = X86_64_SSEDF_CLASS; 1931 return 1; 1932 case TFmode: 1933 classes[0] = X86_64_X87_CLASS; 1934 classes[1] = X86_64_X87UP_CLASS; 1935 return 2; 1936 case TCmode: 1937 classes[0] = X86_64_X87_CLASS; 1938 classes[1] = X86_64_X87UP_CLASS; 1939 classes[2] = X86_64_X87_CLASS; 1940 classes[3] = X86_64_X87UP_CLASS; 1941 return 4; 1942 case DCmode: 1943 classes[0] = X86_64_SSEDF_CLASS; 1944 classes[1] = X86_64_SSEDF_CLASS; 1945 return 2; 1946 case SCmode: 1947 classes[0] = X86_64_SSE_CLASS; 1948 return 1; 1949 case V4SFmode: 1950 case V4SImode: 1951 case V16QImode: 1952 case V8HImode: 1953 case V2DFmode: 1954 case V2DImode: 1955 classes[0] = X86_64_SSE_CLASS; 1956 classes[1] = X86_64_SSEUP_CLASS; 1957 return 2; 1958 case V2SFmode: 1959 case V2SImode: 1960 case V4HImode: 1961 case V8QImode: 1962 return 0; 1963 case BLKmode: 1964 case VOIDmode: 1965 return 0; 1966 default: 1967 abort (); 1968 } 1969} 1970 1971/* Examine the argument and return set number of register required in each 1972 class. Return 0 iff parameter should be passed in memory. */ 1973static int 1974examine_argument (mode, type, in_return, int_nregs, sse_nregs) 1975 enum machine_mode mode; 1976 tree type; 1977 int *int_nregs, *sse_nregs; 1978 int in_return; 1979{ 1980 enum x86_64_reg_class class[MAX_CLASSES]; 1981 int n = classify_argument (mode, type, class, 0); 1982 1983 *int_nregs = 0; 1984 *sse_nregs = 0; 1985 if (!n) 1986 return 0; 1987 for (n--; n >= 0; n--) 1988 switch (class[n]) 1989 { 1990 case X86_64_INTEGER_CLASS: 1991 case X86_64_INTEGERSI_CLASS: 1992 (*int_nregs)++; 1993 break; 1994 case X86_64_SSE_CLASS: 1995 case X86_64_SSESF_CLASS: 1996 case X86_64_SSEDF_CLASS: 1997 (*sse_nregs)++; 1998 break; 1999 case X86_64_NO_CLASS: 2000 case X86_64_SSEUP_CLASS: 2001 break; 2002 case X86_64_X87_CLASS: 2003 case X86_64_X87UP_CLASS: 2004 if (!in_return) 2005 return 0; 2006 break; 2007 case X86_64_MEMORY_CLASS: 2008 abort (); 2009 } 2010 return 1; 2011} 2012/* Construct container for the argument used by GCC interface. See 2013 FUNCTION_ARG for the detailed description. */ 2014static rtx 2015construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno) 2016 enum machine_mode mode; 2017 tree type; 2018 int in_return; 2019 int nintregs, nsseregs; 2020 const int * intreg; 2021 int sse_regno; 2022{ 2023 enum machine_mode tmpmode; 2024 int bytes = 2025 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2026 enum x86_64_reg_class class[MAX_CLASSES]; 2027 int n; 2028 int i; 2029 int nexps = 0; 2030 int needed_sseregs, needed_intregs; 2031 rtx exp[MAX_CLASSES]; 2032 rtx ret; 2033 2034 n = classify_argument (mode, type, class, 0); 2035 if (TARGET_DEBUG_ARG) 2036 { 2037 if (!n) 2038 fprintf (stderr, "Memory class\n"); 2039 else 2040 { 2041 fprintf (stderr, "Classes:"); 2042 for (i = 0; i < n; i++) 2043 { 2044 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 2045 } 2046 fprintf (stderr, "\n"); 2047 } 2048 } 2049 if (!n) 2050 return NULL; 2051 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs)) 2052 return NULL; 2053 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 2054 return NULL; 2055 2056 /* First construct simple cases. Avoid SCmode, since we want to use 2057 single register to pass this type. */ 2058 if (n == 1 && mode != SCmode) 2059 switch (class[0]) 2060 { 2061 case X86_64_INTEGER_CLASS: 2062 case X86_64_INTEGERSI_CLASS: 2063 return gen_rtx_REG (mode, intreg[0]); 2064 case X86_64_SSE_CLASS: 2065 case X86_64_SSESF_CLASS: 2066 case X86_64_SSEDF_CLASS: 2067 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2068 case X86_64_X87_CLASS: 2069 return gen_rtx_REG (mode, FIRST_STACK_REG); 2070 case X86_64_NO_CLASS: 2071 /* Zero sized array, struct or class. */ 2072 return NULL; 2073 default: 2074 abort (); 2075 } 2076 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS) 2077 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2078 if (n == 2 2079 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 2080 return gen_rtx_REG (TFmode, FIRST_STACK_REG); 2081 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 2082 && class[1] == X86_64_INTEGER_CLASS 2083 && (mode == CDImode || mode == TImode) 2084 && intreg[0] + 1 == intreg[1]) 2085 return gen_rtx_REG (mode, intreg[0]); 2086 if (n == 4 2087 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS 2088 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS) 2089 return gen_rtx_REG (TCmode, FIRST_STACK_REG); 2090 2091 /* Otherwise figure out the entries of the PARALLEL. */ 2092 for (i = 0; i < n; i++) 2093 { 2094 switch (class[i]) 2095 { 2096 case X86_64_NO_CLASS: 2097 break; 2098 case X86_64_INTEGER_CLASS: 2099 case X86_64_INTEGERSI_CLASS: 2100 /* Merge TImodes on aligned occassions here too. */ 2101 if (i * 8 + 8 > bytes) 2102 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 2103 else if (class[i] == X86_64_INTEGERSI_CLASS) 2104 tmpmode = SImode; 2105 else 2106 tmpmode = DImode; 2107 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 2108 if (tmpmode == BLKmode) 2109 tmpmode = DImode; 2110 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2111 gen_rtx_REG (tmpmode, *intreg), 2112 GEN_INT (i*8)); 2113 intreg++; 2114 break; 2115 case X86_64_SSESF_CLASS: 2116 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2117 gen_rtx_REG (SFmode, 2118 SSE_REGNO (sse_regno)), 2119 GEN_INT (i*8)); 2120 sse_regno++; 2121 break; 2122 case X86_64_SSEDF_CLASS: 2123 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2124 gen_rtx_REG (DFmode, 2125 SSE_REGNO (sse_regno)), 2126 GEN_INT (i*8)); 2127 sse_regno++; 2128 break; 2129 case X86_64_SSE_CLASS: 2130 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 2131 tmpmode = TImode; 2132 else 2133 tmpmode = DImode; 2134 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2135 gen_rtx_REG (tmpmode, 2136 SSE_REGNO (sse_regno)), 2137 GEN_INT (i*8)); 2138 if (tmpmode == TImode) 2139 i++; 2140 sse_regno++; 2141 break; 2142 default: 2143 abort (); 2144 } 2145 } 2146 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 2147 for (i = 0; i < nexps; i++) 2148 XVECEXP (ret, 0, i) = exp [i]; 2149 return ret; 2150} 2151 2152/* Update the data in CUM to advance over an argument 2153 of mode MODE and data type TYPE. 2154 (TYPE is null for libcalls where that information may not be available.) */ 2155 2156void 2157function_arg_advance (cum, mode, type, named) 2158 CUMULATIVE_ARGS *cum; /* current arg information */ 2159 enum machine_mode mode; /* current arg mode */ 2160 tree type; /* type of the argument or 0 if lib support */ 2161 int named; /* whether or not the argument was named */ 2162{ 2163 int bytes = 2164 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2165 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2166 2167 if (TARGET_DEBUG_ARG) 2168 fprintf (stderr, 2169 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n", 2170 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2171 if (TARGET_64BIT) 2172 { 2173 int int_nregs, sse_nregs; 2174 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 2175 cum->words += words; 2176 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 2177 { 2178 cum->nregs -= int_nregs; 2179 cum->sse_nregs -= sse_nregs; 2180 cum->regno += int_nregs; 2181 cum->sse_regno += sse_nregs; 2182 } 2183 else 2184 cum->words += words; 2185 } 2186 else 2187 { 2188 if (TARGET_SSE && mode == TImode) 2189 { 2190 cum->sse_words += words; 2191 cum->sse_nregs -= 1; 2192 cum->sse_regno += 1; 2193 if (cum->sse_nregs <= 0) 2194 { 2195 cum->sse_nregs = 0; 2196 cum->sse_regno = 0; 2197 } 2198 } 2199 else 2200 { 2201 cum->words += words; 2202 cum->nregs -= words; 2203 cum->regno += words; 2204 2205 if (cum->nregs <= 0) 2206 { 2207 cum->nregs = 0; 2208 cum->regno = 0; 2209 } 2210 } 2211 } 2212 return; 2213} 2214 2215/* Define where to put the arguments to a function. 2216 Value is zero to push the argument on the stack, 2217 or a hard register in which to store the argument. 2218 2219 MODE is the argument's machine mode. 2220 TYPE is the data type of the argument (as a tree). 2221 This is null for libcalls where that information may 2222 not be available. 2223 CUM is a variable of type CUMULATIVE_ARGS which gives info about 2224 the preceding args and about the function being called. 2225 NAMED is nonzero if this argument is a named parameter 2226 (otherwise it is an extra parameter matching an ellipsis). */ 2227 2228rtx 2229function_arg (cum, mode, type, named) 2230 CUMULATIVE_ARGS *cum; /* current arg information */ 2231 enum machine_mode mode; /* current arg mode */ 2232 tree type; /* type of the argument or 0 if lib support */ 2233 int named; /* != 0 for normal args, == 0 for ... args */ 2234{ 2235 rtx ret = NULL_RTX; 2236 int bytes = 2237 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2238 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2239 2240 /* Handle an hidden AL argument containing number of registers for varargs 2241 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 2242 any AL settings. */ 2243 if (mode == VOIDmode) 2244 { 2245 if (TARGET_64BIT) 2246 return GEN_INT (cum->maybe_vaarg 2247 ? (cum->sse_nregs < 0 2248 ? SSE_REGPARM_MAX 2249 : cum->sse_regno) 2250 : -1); 2251 else 2252 return constm1_rtx; 2253 } 2254 if (TARGET_64BIT) 2255 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs, 2256 &x86_64_int_parameter_registers [cum->regno], 2257 cum->sse_regno); 2258 else 2259 switch (mode) 2260 { 2261 /* For now, pass fp/complex values on the stack. */ 2262 default: 2263 break; 2264 2265 case BLKmode: 2266 if (bytes < 0) 2267 break; 2268 /* FALLTHRU */ 2269 case DImode: 2270 case SImode: 2271 case HImode: 2272 case QImode: 2273 if (words <= cum->nregs) 2274 ret = gen_rtx_REG (mode, cum->regno); 2275 break; 2276 case TImode: 2277 if (cum->sse_nregs) 2278 ret = gen_rtx_REG (mode, cum->sse_regno); 2279 break; 2280 } 2281 2282 if (TARGET_DEBUG_ARG) 2283 { 2284 fprintf (stderr, 2285 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 2286 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2287 2288 if (ret) 2289 print_simple_rtl (stderr, ret); 2290 else 2291 fprintf (stderr, ", stack"); 2292 2293 fprintf (stderr, " )\n"); 2294 } 2295 2296 return ret; 2297} 2298 2299/* Return true when TYPE should be 128bit aligned for 32bit argument passing 2300 ABI */ 2301static bool 2302contains_128bit_aligned_vector_p (type) 2303 tree type; 2304{ 2305 enum machine_mode mode = TYPE_MODE (type); 2306 if (SSE_REG_MODE_P (mode) 2307 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 2308 return true; 2309 if (TYPE_ALIGN (type) < 128) 2310 return false; 2311 2312 if (AGGREGATE_TYPE_P (type)) 2313 { 2314 /* Walk the agregates recursivly. */ 2315 if (TREE_CODE (type) == RECORD_TYPE 2316 || TREE_CODE (type) == UNION_TYPE 2317 || TREE_CODE (type) == QUAL_UNION_TYPE) 2318 { 2319 tree field; 2320 2321 if (TYPE_BINFO (type) != NULL 2322 && TYPE_BINFO_BASETYPES (type) != NULL) 2323 { 2324 tree bases = TYPE_BINFO_BASETYPES (type); 2325 int n_bases = TREE_VEC_LENGTH (bases); 2326 int i; 2327 2328 for (i = 0; i < n_bases; ++i) 2329 { 2330 tree binfo = TREE_VEC_ELT (bases, i); 2331 tree type = BINFO_TYPE (binfo); 2332 2333 if (contains_128bit_aligned_vector_p (type)) 2334 return true; 2335 } 2336 } 2337 /* And now merge the fields of structure. */ 2338 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2339 { 2340 if (TREE_CODE (field) == FIELD_DECL 2341 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 2342 return true; 2343 } 2344 } 2345 /* Just for use if some languages passes arrays by value. */ 2346 else if (TREE_CODE (type) == ARRAY_TYPE) 2347 { 2348 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 2349 return true; 2350 } 2351 else 2352 abort (); 2353 } 2354 return false; 2355} 2356 2357/* A C expression that indicates when an argument must be passed by 2358 reference. If nonzero for an argument, a copy of that argument is 2359 made in memory and a pointer to the argument is passed instead of 2360 the argument itself. The pointer is passed in whatever way is 2361 appropriate for passing a pointer to that type. */ 2362 2363int 2364function_arg_pass_by_reference (cum, mode, type, named) 2365 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED; 2366 enum machine_mode mode ATTRIBUTE_UNUSED; 2367 tree type; 2368 int named ATTRIBUTE_UNUSED; 2369{ 2370 if (!TARGET_64BIT) 2371 return 0; 2372 2373 if (type && int_size_in_bytes (type) == -1) 2374 { 2375 if (TARGET_DEBUG_ARG) 2376 fprintf (stderr, "function_arg_pass_by_reference\n"); 2377 return 1; 2378 } 2379 2380 return 0; 2381} 2382 2383/* Gives the alignment boundary, in bits, of an argument with the specified mode 2384 and type. */ 2385 2386int 2387ix86_function_arg_boundary (mode, type) 2388 enum machine_mode mode; 2389 tree type; 2390{ 2391 int align; 2392 if (type) 2393 align = TYPE_ALIGN (type); 2394 else 2395 align = GET_MODE_ALIGNMENT (mode); 2396 if (align < PARM_BOUNDARY) 2397 align = PARM_BOUNDARY; 2398 if (!TARGET_64BIT) 2399 { 2400 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 2401 make an exception for SSE modes since these require 128bit 2402 alignment. 2403 2404 The handling here differs from field_alignment. ICC aligns MMX 2405 arguments to 4 byte boundaries, while structure fields are aligned 2406 to 8 byte boundaries. */ 2407 if (!type) 2408 { 2409 if (!SSE_REG_MODE_P (mode)) 2410 align = PARM_BOUNDARY; 2411 } 2412 else 2413 { 2414 if (!contains_128bit_aligned_vector_p (type)) 2415 align = PARM_BOUNDARY; 2416 } 2417 if (align != PARM_BOUNDARY && !TARGET_SSE) 2418 abort(); 2419 } 2420 if (align > 128) 2421 align = 128; 2422 return align; 2423} 2424 2425/* Return true if N is a possible register number of function value. */ 2426bool 2427ix86_function_value_regno_p (regno) 2428 int regno; 2429{ 2430 if (!TARGET_64BIT) 2431 { 2432 return ((regno) == 0 2433 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 2434 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 2435 } 2436 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 2437 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 2438 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 2439} 2440 2441/* Define how to find the value returned by a function. 2442 VALTYPE is the data type of the value (as a tree). 2443 If the precise function being called is known, FUNC is its FUNCTION_DECL; 2444 otherwise, FUNC is 0. */ 2445rtx 2446ix86_function_value (valtype) 2447 tree valtype; 2448{ 2449 if (TARGET_64BIT) 2450 { 2451 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1, 2452 REGPARM_MAX, SSE_REGPARM_MAX, 2453 x86_64_int_return_registers, 0); 2454 /* For zero sized structures, construct_continer return NULL, but we need 2455 to keep rest of compiler happy by returning meaningfull value. */ 2456 if (!ret) 2457 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 2458 return ret; 2459 } 2460 else 2461 return gen_rtx_REG (TYPE_MODE (valtype), 2462 ix86_value_regno (TYPE_MODE (valtype))); 2463} 2464 2465/* Return false iff type is returned in memory. */ 2466int 2467ix86_return_in_memory (type) 2468 tree type; 2469{ 2470 int needed_intregs, needed_sseregs; 2471 if (TARGET_64BIT) 2472 { 2473 return !examine_argument (TYPE_MODE (type), type, 1, 2474 &needed_intregs, &needed_sseregs); 2475 } 2476 else 2477 { 2478 if (TYPE_MODE (type) == BLKmode 2479 || (VECTOR_MODE_P (TYPE_MODE (type)) 2480 && int_size_in_bytes (type) == 8) 2481 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode 2482 && TYPE_MODE (type) != TFmode 2483 && !VECTOR_MODE_P (TYPE_MODE (type)))) 2484 return 1; 2485 return 0; 2486 } 2487} 2488 2489/* Define how to find the value returned by a library function 2490 assuming the value has mode MODE. */ 2491rtx 2492ix86_libcall_value (mode) 2493 enum machine_mode mode; 2494{ 2495 if (TARGET_64BIT) 2496 { 2497 switch (mode) 2498 { 2499 case SFmode: 2500 case SCmode: 2501 case DFmode: 2502 case DCmode: 2503 return gen_rtx_REG (mode, FIRST_SSE_REG); 2504 case TFmode: 2505 case TCmode: 2506 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 2507 default: 2508 return gen_rtx_REG (mode, 0); 2509 } 2510 } 2511 else 2512 return gen_rtx_REG (mode, ix86_value_regno (mode)); 2513} 2514 2515/* Given a mode, return the register to use for a return value. */ 2516 2517static int 2518ix86_value_regno (mode) 2519 enum machine_mode mode; 2520{ 2521 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387) 2522 return FIRST_FLOAT_REG; 2523 if (mode == TImode || VECTOR_MODE_P (mode)) 2524 return FIRST_SSE_REG; 2525 return 0; 2526} 2527 2528/* Create the va_list data type. */ 2529 2530tree 2531ix86_build_va_list () 2532{ 2533 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 2534 2535 /* For i386 we use plain pointer to argument area. */ 2536 if (!TARGET_64BIT) 2537 return build_pointer_type (char_type_node); 2538 2539 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 2540 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 2541 2542 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 2543 unsigned_type_node); 2544 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 2545 unsigned_type_node); 2546 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 2547 ptr_type_node); 2548 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 2549 ptr_type_node); 2550 2551 DECL_FIELD_CONTEXT (f_gpr) = record; 2552 DECL_FIELD_CONTEXT (f_fpr) = record; 2553 DECL_FIELD_CONTEXT (f_ovf) = record; 2554 DECL_FIELD_CONTEXT (f_sav) = record; 2555 2556 TREE_CHAIN (record) = type_decl; 2557 TYPE_NAME (record) = type_decl; 2558 TYPE_FIELDS (record) = f_gpr; 2559 TREE_CHAIN (f_gpr) = f_fpr; 2560 TREE_CHAIN (f_fpr) = f_ovf; 2561 TREE_CHAIN (f_ovf) = f_sav; 2562 2563 layout_type (record); 2564 2565 /* The correct type is an array type of one element. */ 2566 return build_array_type (record, build_index_type (size_zero_node)); 2567} 2568 2569/* Perform any needed actions needed for a function that is receiving a 2570 variable number of arguments. 2571 2572 CUM is as above. 2573 2574 MODE and TYPE are the mode and type of the current parameter. 2575 2576 PRETEND_SIZE is a variable that should be set to the amount of stack 2577 that must be pushed by the prolog to pretend that our caller pushed 2578 it. 2579 2580 Normally, this macro will push all remaining incoming registers on the 2581 stack and set PRETEND_SIZE to the length of the registers pushed. */ 2582 2583void 2584ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) 2585 CUMULATIVE_ARGS *cum; 2586 enum machine_mode mode; 2587 tree type; 2588 int *pretend_size ATTRIBUTE_UNUSED; 2589 int no_rtl; 2590 2591{ 2592 CUMULATIVE_ARGS next_cum; 2593 rtx save_area = NULL_RTX, mem; 2594 rtx label; 2595 rtx label_ref; 2596 rtx tmp_reg; 2597 rtx nsse_reg; 2598 int set; 2599 tree fntype; 2600 int stdarg_p; 2601 int i; 2602 2603 if (!TARGET_64BIT) 2604 return; 2605 2606 /* Indicate to allocate space on the stack for varargs save area. */ 2607 ix86_save_varrargs_registers = 1; 2608 2609 cfun->stack_alignment_needed = 128; 2610 2611 fntype = TREE_TYPE (current_function_decl); 2612 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 2613 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 2614 != void_type_node)); 2615 2616 /* For varargs, we do not want to skip the dummy va_dcl argument. 2617 For stdargs, we do want to skip the last named argument. */ 2618 next_cum = *cum; 2619 if (stdarg_p) 2620 function_arg_advance (&next_cum, mode, type, 1); 2621 2622 if (!no_rtl) 2623 save_area = frame_pointer_rtx; 2624 2625 set = get_varargs_alias_set (); 2626 2627 for (i = next_cum.regno; i < ix86_regparm; i++) 2628 { 2629 mem = gen_rtx_MEM (Pmode, 2630 plus_constant (save_area, i * UNITS_PER_WORD)); 2631 set_mem_alias_set (mem, set); 2632 emit_move_insn (mem, gen_rtx_REG (Pmode, 2633 x86_64_int_parameter_registers[i])); 2634 } 2635 2636 if (next_cum.sse_nregs) 2637 { 2638 /* Now emit code to save SSE registers. The AX parameter contains number 2639 of SSE parameter regsiters used to call this function. We use 2640 sse_prologue_save insn template that produces computed jump across 2641 SSE saves. We need some preparation work to get this working. */ 2642 2643 label = gen_label_rtx (); 2644 label_ref = gen_rtx_LABEL_REF (Pmode, label); 2645 2646 /* Compute address to jump to : 2647 label - 5*eax + nnamed_sse_arguments*5 */ 2648 tmp_reg = gen_reg_rtx (Pmode); 2649 nsse_reg = gen_reg_rtx (Pmode); 2650 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 2651 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2652 gen_rtx_MULT (Pmode, nsse_reg, 2653 GEN_INT (4)))); 2654 if (next_cum.sse_regno) 2655 emit_move_insn 2656 (nsse_reg, 2657 gen_rtx_CONST (DImode, 2658 gen_rtx_PLUS (DImode, 2659 label_ref, 2660 GEN_INT (next_cum.sse_regno * 4)))); 2661 else 2662 emit_move_insn (nsse_reg, label_ref); 2663 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 2664 2665 /* Compute address of memory block we save into. We always use pointer 2666 pointing 127 bytes after first byte to store - this is needed to keep 2667 instruction size limited by 4 bytes. */ 2668 tmp_reg = gen_reg_rtx (Pmode); 2669 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 2670 plus_constant (save_area, 2671 8 * REGPARM_MAX + 127))); 2672 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 2673 set_mem_alias_set (mem, set); 2674 set_mem_align (mem, BITS_PER_WORD); 2675 2676 /* And finally do the dirty job! */ 2677 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 2678 GEN_INT (next_cum.sse_regno), label)); 2679 } 2680 2681} 2682 2683/* Implement va_start. */ 2684 2685void 2686ix86_va_start (valist, nextarg) 2687 tree valist; 2688 rtx nextarg; 2689{ 2690 HOST_WIDE_INT words, n_gpr, n_fpr; 2691 tree f_gpr, f_fpr, f_ovf, f_sav; 2692 tree gpr, fpr, ovf, sav, t; 2693 2694 /* Only 64bit target needs something special. */ 2695 if (!TARGET_64BIT) 2696 { 2697 std_expand_builtin_va_start (valist, nextarg); 2698 return; 2699 } 2700 2701 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2702 f_fpr = TREE_CHAIN (f_gpr); 2703 f_ovf = TREE_CHAIN (f_fpr); 2704 f_sav = TREE_CHAIN (f_ovf); 2705 2706 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2707 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2708 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2709 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2710 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2711 2712 /* Count number of gp and fp argument registers used. */ 2713 words = current_function_args_info.words; 2714 n_gpr = current_function_args_info.regno; 2715 n_fpr = current_function_args_info.sse_regno; 2716 2717 if (TARGET_DEBUG_ARG) 2718 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 2719 (int) words, (int) n_gpr, (int) n_fpr); 2720 2721 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 2722 build_int_2 (n_gpr * 8, 0)); 2723 TREE_SIDE_EFFECTS (t) = 1; 2724 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2725 2726 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 2727 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0)); 2728 TREE_SIDE_EFFECTS (t) = 1; 2729 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2730 2731 /* Find the overflow area. */ 2732 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 2733 if (words != 0) 2734 t = build (PLUS_EXPR, TREE_TYPE (ovf), t, 2735 build_int_2 (words * UNITS_PER_WORD, 0)); 2736 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2737 TREE_SIDE_EFFECTS (t) = 1; 2738 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2739 2740 /* Find the register save area. 2741 Prologue of the function save it right above stack frame. */ 2742 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx); 2743 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 2744 TREE_SIDE_EFFECTS (t) = 1; 2745 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2746} 2747 2748/* Implement va_arg. */ 2749rtx 2750ix86_va_arg (valist, type) 2751 tree valist, type; 2752{ 2753 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 2754 tree f_gpr, f_fpr, f_ovf, f_sav; 2755 tree gpr, fpr, ovf, sav, t; 2756 int size, rsize; 2757 rtx lab_false, lab_over = NULL_RTX; 2758 rtx addr_rtx, r; 2759 rtx container; 2760 int indirect_p = 0; 2761 2762 /* Only 64bit target needs something special. */ 2763 if (!TARGET_64BIT) 2764 { 2765 return std_expand_builtin_va_arg (valist, type); 2766 } 2767 2768 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 2769 f_fpr = TREE_CHAIN (f_gpr); 2770 f_ovf = TREE_CHAIN (f_fpr); 2771 f_sav = TREE_CHAIN (f_ovf); 2772 2773 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 2774 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 2775 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 2776 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 2777 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 2778 2779 size = int_size_in_bytes (type); 2780 if (size == -1) 2781 { 2782 /* Passed by reference. */ 2783 indirect_p = 1; 2784 type = build_pointer_type (type); 2785 size = int_size_in_bytes (type); 2786 } 2787 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2788 2789 container = construct_container (TYPE_MODE (type), type, 0, 2790 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 2791 /* 2792 * Pull the value out of the saved registers ... 2793 */ 2794 2795 addr_rtx = gen_reg_rtx (Pmode); 2796 2797 if (container) 2798 { 2799 rtx int_addr_rtx, sse_addr_rtx; 2800 int needed_intregs, needed_sseregs; 2801 int need_temp; 2802 2803 lab_over = gen_label_rtx (); 2804 lab_false = gen_label_rtx (); 2805 2806 examine_argument (TYPE_MODE (type), type, 0, 2807 &needed_intregs, &needed_sseregs); 2808 2809 2810 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64) 2811 || TYPE_ALIGN (type) > 128); 2812 2813 /* In case we are passing structure, verify that it is consetuctive block 2814 on the register save area. If not we need to do moves. */ 2815 if (!need_temp && !REG_P (container)) 2816 { 2817 /* Verify that all registers are strictly consetuctive */ 2818 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 2819 { 2820 int i; 2821 2822 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2823 { 2824 rtx slot = XVECEXP (container, 0, i); 2825 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 2826 || INTVAL (XEXP (slot, 1)) != i * 16) 2827 need_temp = 1; 2828 } 2829 } 2830 else 2831 { 2832 int i; 2833 2834 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 2835 { 2836 rtx slot = XVECEXP (container, 0, i); 2837 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 2838 || INTVAL (XEXP (slot, 1)) != i * 8) 2839 need_temp = 1; 2840 } 2841 } 2842 } 2843 if (!need_temp) 2844 { 2845 int_addr_rtx = addr_rtx; 2846 sse_addr_rtx = addr_rtx; 2847 } 2848 else 2849 { 2850 int_addr_rtx = gen_reg_rtx (Pmode); 2851 sse_addr_rtx = gen_reg_rtx (Pmode); 2852 } 2853 /* First ensure that we fit completely in registers. */ 2854 if (needed_intregs) 2855 { 2856 emit_cmp_and_jump_insns (expand_expr 2857 (gpr, NULL_RTX, SImode, EXPAND_NORMAL), 2858 GEN_INT ((REGPARM_MAX - needed_intregs + 2859 1) * 8), GE, const1_rtx, SImode, 2860 1, lab_false); 2861 } 2862 if (needed_sseregs) 2863 { 2864 emit_cmp_and_jump_insns (expand_expr 2865 (fpr, NULL_RTX, SImode, EXPAND_NORMAL), 2866 GEN_INT ((SSE_REGPARM_MAX - 2867 needed_sseregs + 1) * 16 + 2868 REGPARM_MAX * 8), GE, const1_rtx, 2869 SImode, 1, lab_false); 2870 } 2871 2872 /* Compute index to start of area used for integer regs. */ 2873 if (needed_intregs) 2874 { 2875 t = build (PLUS_EXPR, ptr_type_node, sav, gpr); 2876 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL); 2877 if (r != int_addr_rtx) 2878 emit_move_insn (int_addr_rtx, r); 2879 } 2880 if (needed_sseregs) 2881 { 2882 t = build (PLUS_EXPR, ptr_type_node, sav, fpr); 2883 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL); 2884 if (r != sse_addr_rtx) 2885 emit_move_insn (sse_addr_rtx, r); 2886 } 2887 if (need_temp) 2888 { 2889 int i; 2890 rtx mem; 2891 rtx x; 2892 2893 /* Never use the memory itself, as it has the alias set. */ 2894 x = XEXP (assign_temp (type, 0, 1, 0), 0); 2895 mem = gen_rtx_MEM (BLKmode, x); 2896 force_operand (x, addr_rtx); 2897 set_mem_alias_set (mem, get_varargs_alias_set ()); 2898 set_mem_align (mem, BITS_PER_UNIT); 2899 2900 for (i = 0; i < XVECLEN (container, 0); i++) 2901 { 2902 rtx slot = XVECEXP (container, 0, i); 2903 rtx reg = XEXP (slot, 0); 2904 enum machine_mode mode = GET_MODE (reg); 2905 rtx src_addr; 2906 rtx src_mem; 2907 int src_offset; 2908 rtx dest_mem; 2909 2910 if (SSE_REGNO_P (REGNO (reg))) 2911 { 2912 src_addr = sse_addr_rtx; 2913 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 2914 } 2915 else 2916 { 2917 src_addr = int_addr_rtx; 2918 src_offset = REGNO (reg) * 8; 2919 } 2920 src_mem = gen_rtx_MEM (mode, src_addr); 2921 set_mem_alias_set (src_mem, get_varargs_alias_set ()); 2922 src_mem = adjust_address (src_mem, mode, src_offset); 2923 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1))); 2924 emit_move_insn (dest_mem, src_mem); 2925 } 2926 } 2927 2928 if (needed_intregs) 2929 { 2930 t = 2931 build (PLUS_EXPR, TREE_TYPE (gpr), gpr, 2932 build_int_2 (needed_intregs * 8, 0)); 2933 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 2934 TREE_SIDE_EFFECTS (t) = 1; 2935 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2936 } 2937 if (needed_sseregs) 2938 { 2939 t = 2940 build (PLUS_EXPR, TREE_TYPE (fpr), fpr, 2941 build_int_2 (needed_sseregs * 16, 0)); 2942 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 2943 TREE_SIDE_EFFECTS (t) = 1; 2944 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2945 } 2946 2947 emit_jump_insn (gen_jump (lab_over)); 2948 emit_barrier (); 2949 emit_label (lab_false); 2950 } 2951 2952 /* ... otherwise out of the overflow area. */ 2953 2954 /* Care for on-stack alignment if needed. */ 2955 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64) 2956 t = ovf; 2957 else 2958 { 2959 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 2960 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0)); 2961 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1)); 2962 } 2963 t = save_expr (t); 2964 2965 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL); 2966 if (r != addr_rtx) 2967 emit_move_insn (addr_rtx, r); 2968 2969 t = 2970 build (PLUS_EXPR, TREE_TYPE (t), t, 2971 build_int_2 (rsize * UNITS_PER_WORD, 0)); 2972 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 2973 TREE_SIDE_EFFECTS (t) = 1; 2974 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 2975 2976 if (container) 2977 emit_label (lab_over); 2978 2979 if (indirect_p) 2980 { 2981 r = gen_rtx_MEM (Pmode, addr_rtx); 2982 set_mem_alias_set (r, get_varargs_alias_set ()); 2983 emit_move_insn (addr_rtx, r); 2984 } 2985 2986 return addr_rtx; 2987} 2988 2989/* Return nonzero if OP is either a i387 or SSE fp register. */ 2990int 2991any_fp_register_operand (op, mode) 2992 rtx op; 2993 enum machine_mode mode ATTRIBUTE_UNUSED; 2994{ 2995 return ANY_FP_REG_P (op); 2996} 2997 2998/* Return nonzero if OP is an i387 fp register. */ 2999int 3000fp_register_operand (op, mode) 3001 rtx op; 3002 enum machine_mode mode ATTRIBUTE_UNUSED; 3003{ 3004 return FP_REG_P (op); 3005} 3006 3007/* Return nonzero if OP is a non-fp register_operand. */ 3008int 3009register_and_not_any_fp_reg_operand (op, mode) 3010 rtx op; 3011 enum machine_mode mode; 3012{ 3013 return register_operand (op, mode) && !ANY_FP_REG_P (op); 3014} 3015 3016/* Return nonzero of OP is a register operand other than an 3017 i387 fp register. */ 3018int 3019register_and_not_fp_reg_operand (op, mode) 3020 rtx op; 3021 enum machine_mode mode; 3022{ 3023 return register_operand (op, mode) && !FP_REG_P (op); 3024} 3025 3026/* Return nonzero if OP is general operand representable on x86_64. */ 3027 3028int 3029x86_64_general_operand (op, mode) 3030 rtx op; 3031 enum machine_mode mode; 3032{ 3033 if (!TARGET_64BIT) 3034 return general_operand (op, mode); 3035 if (nonimmediate_operand (op, mode)) 3036 return 1; 3037 return x86_64_sign_extended_value (op); 3038} 3039 3040/* Return nonzero if OP is general operand representable on x86_64 3041 as either sign extended or zero extended constant. */ 3042 3043int 3044x86_64_szext_general_operand (op, mode) 3045 rtx op; 3046 enum machine_mode mode; 3047{ 3048 if (!TARGET_64BIT) 3049 return general_operand (op, mode); 3050 if (nonimmediate_operand (op, mode)) 3051 return 1; 3052 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 3053} 3054 3055/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 3056 3057int 3058x86_64_nonmemory_operand (op, mode) 3059 rtx op; 3060 enum machine_mode mode; 3061{ 3062 if (!TARGET_64BIT) 3063 return nonmemory_operand (op, mode); 3064 if (register_operand (op, mode)) 3065 return 1; 3066 return x86_64_sign_extended_value (op); 3067} 3068 3069/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */ 3070 3071int 3072x86_64_movabs_operand (op, mode) 3073 rtx op; 3074 enum machine_mode mode; 3075{ 3076 if (!TARGET_64BIT || !flag_pic) 3077 return nonmemory_operand (op, mode); 3078 if (register_operand (op, mode) || x86_64_sign_extended_value (op)) 3079 return 1; 3080 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op)) 3081 return 1; 3082 return 0; 3083} 3084 3085/* Return nonzero if OPNUM's MEM should be matched 3086 in movabs* patterns. */ 3087 3088int 3089ix86_check_movabs (insn, opnum) 3090 rtx insn; 3091 int opnum; 3092{ 3093 rtx set, mem; 3094 3095 set = PATTERN (insn); 3096 if (GET_CODE (set) == PARALLEL) 3097 set = XVECEXP (set, 0, 0); 3098 if (GET_CODE (set) != SET) 3099 abort (); 3100 mem = XEXP (set, opnum); 3101 while (GET_CODE (mem) == SUBREG) 3102 mem = SUBREG_REG (mem); 3103 if (GET_CODE (mem) != MEM) 3104 abort (); 3105 return (volatile_ok || !MEM_VOLATILE_P (mem)); 3106} 3107 3108/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 3109 3110int 3111x86_64_szext_nonmemory_operand (op, mode) 3112 rtx op; 3113 enum machine_mode mode; 3114{ 3115 if (!TARGET_64BIT) 3116 return nonmemory_operand (op, mode); 3117 if (register_operand (op, mode)) 3118 return 1; 3119 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 3120} 3121 3122/* Return nonzero if OP is immediate operand representable on x86_64. */ 3123 3124int 3125x86_64_immediate_operand (op, mode) 3126 rtx op; 3127 enum machine_mode mode; 3128{ 3129 if (!TARGET_64BIT) 3130 return immediate_operand (op, mode); 3131 return x86_64_sign_extended_value (op); 3132} 3133 3134/* Return nonzero if OP is immediate operand representable on x86_64. */ 3135 3136int 3137x86_64_zext_immediate_operand (op, mode) 3138 rtx op; 3139 enum machine_mode mode ATTRIBUTE_UNUSED; 3140{ 3141 return x86_64_zero_extended_value (op); 3142} 3143 3144/* Return nonzero if OP is (const_int 1), else return zero. */ 3145 3146int 3147const_int_1_operand (op, mode) 3148 rtx op; 3149 enum machine_mode mode ATTRIBUTE_UNUSED; 3150{ 3151 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1); 3152} 3153 3154/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand 3155 for shift & compare patterns, as shifting by 0 does not change flags), 3156 else return zero. */ 3157 3158int 3159const_int_1_31_operand (op, mode) 3160 rtx op; 3161 enum machine_mode mode ATTRIBUTE_UNUSED; 3162{ 3163 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31); 3164} 3165 3166/* Returns 1 if OP is either a symbol reference or a sum of a symbol 3167 reference and a constant. */ 3168 3169int 3170symbolic_operand (op, mode) 3171 register rtx op; 3172 enum machine_mode mode ATTRIBUTE_UNUSED; 3173{ 3174 switch (GET_CODE (op)) 3175 { 3176 case SYMBOL_REF: 3177 case LABEL_REF: 3178 return 1; 3179 3180 case CONST: 3181 op = XEXP (op, 0); 3182 if (GET_CODE (op) == SYMBOL_REF 3183 || GET_CODE (op) == LABEL_REF 3184 || (GET_CODE (op) == UNSPEC 3185 && (XINT (op, 1) == UNSPEC_GOT 3186 || XINT (op, 1) == UNSPEC_GOTOFF 3187 || XINT (op, 1) == UNSPEC_GOTPCREL))) 3188 return 1; 3189 if (GET_CODE (op) != PLUS 3190 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3191 return 0; 3192 3193 op = XEXP (op, 0); 3194 if (GET_CODE (op) == SYMBOL_REF 3195 || GET_CODE (op) == LABEL_REF) 3196 return 1; 3197 /* Only @GOTOFF gets offsets. */ 3198 if (GET_CODE (op) != UNSPEC 3199 || XINT (op, 1) != UNSPEC_GOTOFF) 3200 return 0; 3201 3202 op = XVECEXP (op, 0, 0); 3203 if (GET_CODE (op) == SYMBOL_REF 3204 || GET_CODE (op) == LABEL_REF) 3205 return 1; 3206 return 0; 3207 3208 default: 3209 return 0; 3210 } 3211} 3212 3213/* Return true if the operand contains a @GOT or @GOTOFF reference. */ 3214 3215int 3216pic_symbolic_operand (op, mode) 3217 register rtx op; 3218 enum machine_mode mode ATTRIBUTE_UNUSED; 3219{ 3220 if (GET_CODE (op) != CONST) 3221 return 0; 3222 op = XEXP (op, 0); 3223 if (TARGET_64BIT) 3224 { 3225 if (GET_CODE (XEXP (op, 0)) == UNSPEC) 3226 return 1; 3227 } 3228 else 3229 { 3230 if (GET_CODE (op) == UNSPEC) 3231 return 1; 3232 if (GET_CODE (op) != PLUS 3233 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3234 return 0; 3235 op = XEXP (op, 0); 3236 if (GET_CODE (op) == UNSPEC) 3237 return 1; 3238 } 3239 return 0; 3240} 3241 3242/* Return true if OP is a symbolic operand that resolves locally. */ 3243 3244static int 3245local_symbolic_operand (op, mode) 3246 rtx op; 3247 enum machine_mode mode ATTRIBUTE_UNUSED; 3248{ 3249 if (GET_CODE (op) == CONST 3250 && GET_CODE (XEXP (op, 0)) == PLUS 3251 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3252 op = XEXP (XEXP (op, 0), 0); 3253 3254 if (GET_CODE (op) == LABEL_REF) 3255 return 1; 3256 3257 if (GET_CODE (op) != SYMBOL_REF) 3258 return 0; 3259 3260 /* These we've been told are local by varasm and encode_section_info 3261 respectively. */ 3262 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op)) 3263 return 1; 3264 3265 /* There is, however, a not insubstantial body of code in the rest of 3266 the compiler that assumes it can just stick the results of 3267 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ 3268 /* ??? This is a hack. Should update the body of the compiler to 3269 always create a DECL an invoke targetm.encode_section_info. */ 3270 if (strncmp (XSTR (op, 0), internal_label_prefix, 3271 internal_label_prefix_len) == 0) 3272 return 1; 3273 3274 return 0; 3275} 3276 3277/* Test for various thread-local symbols. See ix86_encode_section_info. */ 3278 3279int 3280tls_symbolic_operand (op, mode) 3281 register rtx op; 3282 enum machine_mode mode ATTRIBUTE_UNUSED; 3283{ 3284 const char *symbol_str; 3285 3286 if (GET_CODE (op) != SYMBOL_REF) 3287 return 0; 3288 symbol_str = XSTR (op, 0); 3289 3290 if (symbol_str[0] != '%') 3291 return 0; 3292 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars; 3293} 3294 3295static int 3296tls_symbolic_operand_1 (op, kind) 3297 rtx op; 3298 enum tls_model kind; 3299{ 3300 const char *symbol_str; 3301 3302 if (GET_CODE (op) != SYMBOL_REF) 3303 return 0; 3304 symbol_str = XSTR (op, 0); 3305 3306 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind]; 3307} 3308 3309int 3310global_dynamic_symbolic_operand (op, mode) 3311 register rtx op; 3312 enum machine_mode mode ATTRIBUTE_UNUSED; 3313{ 3314 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC); 3315} 3316 3317int 3318local_dynamic_symbolic_operand (op, mode) 3319 register rtx op; 3320 enum machine_mode mode ATTRIBUTE_UNUSED; 3321{ 3322 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC); 3323} 3324 3325int 3326initial_exec_symbolic_operand (op, mode) 3327 register rtx op; 3328 enum machine_mode mode ATTRIBUTE_UNUSED; 3329{ 3330 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC); 3331} 3332 3333int 3334local_exec_symbolic_operand (op, mode) 3335 register rtx op; 3336 enum machine_mode mode ATTRIBUTE_UNUSED; 3337{ 3338 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC); 3339} 3340 3341/* Test for a valid operand for a call instruction. Don't allow the 3342 arg pointer register or virtual regs since they may decay into 3343 reg + const, which the patterns can't handle. */ 3344 3345int 3346call_insn_operand (op, mode) 3347 rtx op; 3348 enum machine_mode mode ATTRIBUTE_UNUSED; 3349{ 3350 /* Disallow indirect through a virtual register. This leads to 3351 compiler aborts when trying to eliminate them. */ 3352 if (GET_CODE (op) == REG 3353 && (op == arg_pointer_rtx 3354 || op == frame_pointer_rtx 3355 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 3356 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 3357 return 0; 3358 3359 /* Disallow `call 1234'. Due to varying assembler lameness this 3360 gets either rejected or translated to `call .+1234'. */ 3361 if (GET_CODE (op) == CONST_INT) 3362 return 0; 3363 3364 /* Explicitly allow SYMBOL_REF even if pic. */ 3365 if (GET_CODE (op) == SYMBOL_REF) 3366 return 1; 3367 3368 /* Otherwise we can allow any general_operand in the address. */ 3369 return general_operand (op, Pmode); 3370} 3371 3372int 3373constant_call_address_operand (op, mode) 3374 rtx op; 3375 enum machine_mode mode ATTRIBUTE_UNUSED; 3376{ 3377 if (GET_CODE (op) == CONST 3378 && GET_CODE (XEXP (op, 0)) == PLUS 3379 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3380 op = XEXP (XEXP (op, 0), 0); 3381 return GET_CODE (op) == SYMBOL_REF; 3382} 3383 3384/* Match exactly zero and one. */ 3385 3386int 3387const0_operand (op, mode) 3388 register rtx op; 3389 enum machine_mode mode; 3390{ 3391 return op == CONST0_RTX (mode); 3392} 3393 3394int 3395const1_operand (op, mode) 3396 register rtx op; 3397 enum machine_mode mode ATTRIBUTE_UNUSED; 3398{ 3399 return op == const1_rtx; 3400} 3401 3402/* Match 2, 4, or 8. Used for leal multiplicands. */ 3403 3404int 3405const248_operand (op, mode) 3406 register rtx op; 3407 enum machine_mode mode ATTRIBUTE_UNUSED; 3408{ 3409 return (GET_CODE (op) == CONST_INT 3410 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8)); 3411} 3412 3413/* True if this is a constant appropriate for an increment or decremenmt. */ 3414 3415int 3416incdec_operand (op, mode) 3417 register rtx op; 3418 enum machine_mode mode ATTRIBUTE_UNUSED; 3419{ 3420 /* On Pentium4, the inc and dec operations causes extra dependency on flag 3421 registers, since carry flag is not set. */ 3422 if (TARGET_PENTIUM4 && !optimize_size) 3423 return 0; 3424 return op == const1_rtx || op == constm1_rtx; 3425} 3426 3427/* Return nonzero if OP is acceptable as operand of DImode shift 3428 expander. */ 3429 3430int 3431shiftdi_operand (op, mode) 3432 rtx op; 3433 enum machine_mode mode ATTRIBUTE_UNUSED; 3434{ 3435 if (TARGET_64BIT) 3436 return nonimmediate_operand (op, mode); 3437 else 3438 return register_operand (op, mode); 3439} 3440 3441/* Return false if this is the stack pointer, or any other fake 3442 register eliminable to the stack pointer. Otherwise, this is 3443 a register operand. 3444 3445 This is used to prevent esp from being used as an index reg. 3446 Which would only happen in pathological cases. */ 3447 3448int 3449reg_no_sp_operand (op, mode) 3450 register rtx op; 3451 enum machine_mode mode; 3452{ 3453 rtx t = op; 3454 if (GET_CODE (t) == SUBREG) 3455 t = SUBREG_REG (t); 3456 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx) 3457 return 0; 3458 3459 return register_operand (op, mode); 3460} 3461 3462int 3463mmx_reg_operand (op, mode) 3464 register rtx op; 3465 enum machine_mode mode ATTRIBUTE_UNUSED; 3466{ 3467 return MMX_REG_P (op); 3468} 3469 3470/* Return false if this is any eliminable register. Otherwise 3471 general_operand. */ 3472 3473int 3474general_no_elim_operand (op, mode) 3475 register rtx op; 3476 enum machine_mode mode; 3477{ 3478 rtx t = op; 3479 if (GET_CODE (t) == SUBREG) 3480 t = SUBREG_REG (t); 3481 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3482 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3483 || t == virtual_stack_dynamic_rtx) 3484 return 0; 3485 if (REG_P (t) 3486 && REGNO (t) >= FIRST_VIRTUAL_REGISTER 3487 && REGNO (t) <= LAST_VIRTUAL_REGISTER) 3488 return 0; 3489 3490 return general_operand (op, mode); 3491} 3492 3493/* Return false if this is any eliminable register. Otherwise 3494 register_operand or const_int. */ 3495 3496int 3497nonmemory_no_elim_operand (op, mode) 3498 register rtx op; 3499 enum machine_mode mode; 3500{ 3501 rtx t = op; 3502 if (GET_CODE (t) == SUBREG) 3503 t = SUBREG_REG (t); 3504 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3505 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3506 || t == virtual_stack_dynamic_rtx) 3507 return 0; 3508 3509 return GET_CODE (op) == CONST_INT || register_operand (op, mode); 3510} 3511 3512/* Return false if this is any eliminable register or stack register, 3513 otherwise work like register_operand. */ 3514 3515int 3516index_register_operand (op, mode) 3517 register rtx op; 3518 enum machine_mode mode; 3519{ 3520 rtx t = op; 3521 if (GET_CODE (t) == SUBREG) 3522 t = SUBREG_REG (t); 3523 if (!REG_P (t)) 3524 return 0; 3525 if (t == arg_pointer_rtx 3526 || t == frame_pointer_rtx 3527 || t == virtual_incoming_args_rtx 3528 || t == virtual_stack_vars_rtx 3529 || t == virtual_stack_dynamic_rtx 3530 || REGNO (t) == STACK_POINTER_REGNUM) 3531 return 0; 3532 3533 return general_operand (op, mode); 3534} 3535 3536/* Return true if op is a Q_REGS class register. */ 3537 3538int 3539q_regs_operand (op, mode) 3540 register rtx op; 3541 enum machine_mode mode; 3542{ 3543 if (mode != VOIDmode && GET_MODE (op) != mode) 3544 return 0; 3545 if (GET_CODE (op) == SUBREG) 3546 op = SUBREG_REG (op); 3547 return ANY_QI_REG_P (op); 3548} 3549 3550/* Return true if op is a NON_Q_REGS class register. */ 3551 3552int 3553non_q_regs_operand (op, mode) 3554 register rtx op; 3555 enum machine_mode mode; 3556{ 3557 if (mode != VOIDmode && GET_MODE (op) != mode) 3558 return 0; 3559 if (GET_CODE (op) == SUBREG) 3560 op = SUBREG_REG (op); 3561 return NON_QI_REG_P (op); 3562} 3563 3564/* Return 1 when OP is operand acceptable for standard SSE move. */ 3565int 3566vector_move_operand (op, mode) 3567 rtx op; 3568 enum machine_mode mode; 3569{ 3570 if (nonimmediate_operand (op, mode)) 3571 return 1; 3572 if (GET_MODE (op) != mode && mode != VOIDmode) 3573 return 0; 3574 return (op == CONST0_RTX (GET_MODE (op))); 3575} 3576 3577/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS 3578 insns. */ 3579int 3580sse_comparison_operator (op, mode) 3581 rtx op; 3582 enum machine_mode mode ATTRIBUTE_UNUSED; 3583{ 3584 enum rtx_code code = GET_CODE (op); 3585 switch (code) 3586 { 3587 /* Operations supported directly. */ 3588 case EQ: 3589 case LT: 3590 case LE: 3591 case UNORDERED: 3592 case NE: 3593 case UNGE: 3594 case UNGT: 3595 case ORDERED: 3596 return 1; 3597 /* These are equivalent to ones above in non-IEEE comparisons. */ 3598 case UNEQ: 3599 case UNLT: 3600 case UNLE: 3601 case LTGT: 3602 case GE: 3603 case GT: 3604 return !TARGET_IEEE_FP; 3605 default: 3606 return 0; 3607 } 3608} 3609/* Return 1 if OP is a valid comparison operator in valid mode. */ 3610int 3611ix86_comparison_operator (op, mode) 3612 register rtx op; 3613 enum machine_mode mode; 3614{ 3615 enum machine_mode inmode; 3616 enum rtx_code code = GET_CODE (op); 3617 if (mode != VOIDmode && GET_MODE (op) != mode) 3618 return 0; 3619 if (GET_RTX_CLASS (code) != '<') 3620 return 0; 3621 inmode = GET_MODE (XEXP (op, 0)); 3622 3623 if (inmode == CCFPmode || inmode == CCFPUmode) 3624 { 3625 enum rtx_code second_code, bypass_code; 3626 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3627 return (bypass_code == NIL && second_code == NIL); 3628 } 3629 switch (code) 3630 { 3631 case EQ: case NE: 3632 return 1; 3633 case LT: case GE: 3634 if (inmode == CCmode || inmode == CCGCmode 3635 || inmode == CCGOCmode || inmode == CCNOmode) 3636 return 1; 3637 return 0; 3638 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU: 3639 if (inmode == CCmode) 3640 return 1; 3641 return 0; 3642 case GT: case LE: 3643 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode) 3644 return 1; 3645 return 0; 3646 default: 3647 return 0; 3648 } 3649} 3650 3651/* Return 1 if OP is a comparison operator that can be issued by fcmov. */ 3652 3653int 3654fcmov_comparison_operator (op, mode) 3655 register rtx op; 3656 enum machine_mode mode; 3657{ 3658 enum machine_mode inmode; 3659 enum rtx_code code = GET_CODE (op); 3660 if (mode != VOIDmode && GET_MODE (op) != mode) 3661 return 0; 3662 if (GET_RTX_CLASS (code) != '<') 3663 return 0; 3664 inmode = GET_MODE (XEXP (op, 0)); 3665 if (inmode == CCFPmode || inmode == CCFPUmode) 3666 { 3667 enum rtx_code second_code, bypass_code; 3668 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 3669 if (bypass_code != NIL || second_code != NIL) 3670 return 0; 3671 code = ix86_fp_compare_code_to_integer (code); 3672 } 3673 /* i387 supports just limited amount of conditional codes. */ 3674 switch (code) 3675 { 3676 case LTU: case GTU: case LEU: case GEU: 3677 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode) 3678 return 1; 3679 return 0; 3680 case ORDERED: case UNORDERED: 3681 case EQ: case NE: 3682 return 1; 3683 default: 3684 return 0; 3685 } 3686} 3687 3688/* Return 1 if OP is a binary operator that can be promoted to wider mode. */ 3689 3690int 3691promotable_binary_operator (op, mode) 3692 register rtx op; 3693 enum machine_mode mode ATTRIBUTE_UNUSED; 3694{ 3695 switch (GET_CODE (op)) 3696 { 3697 case MULT: 3698 /* Modern CPUs have same latency for HImode and SImode multiply, 3699 but 386 and 486 do HImode multiply faster. */ 3700 return ix86_cpu > PROCESSOR_I486; 3701 case PLUS: 3702 case AND: 3703 case IOR: 3704 case XOR: 3705 case ASHIFT: 3706 return 1; 3707 default: 3708 return 0; 3709 } 3710} 3711 3712/* Nearly general operand, but accept any const_double, since we wish 3713 to be able to drop them into memory rather than have them get pulled 3714 into registers. */ 3715 3716int 3717cmp_fp_expander_operand (op, mode) 3718 register rtx op; 3719 enum machine_mode mode; 3720{ 3721 if (mode != VOIDmode && mode != GET_MODE (op)) 3722 return 0; 3723 if (GET_CODE (op) == CONST_DOUBLE) 3724 return 1; 3725 return general_operand (op, mode); 3726} 3727 3728/* Match an SI or HImode register for a zero_extract. */ 3729 3730int 3731ext_register_operand (op, mode) 3732 register rtx op; 3733 enum machine_mode mode ATTRIBUTE_UNUSED; 3734{ 3735 int regno; 3736 if ((!TARGET_64BIT || GET_MODE (op) != DImode) 3737 && GET_MODE (op) != SImode && GET_MODE (op) != HImode) 3738 return 0; 3739 3740 if (!register_operand (op, VOIDmode)) 3741 return 0; 3742 3743 /* Be curefull to accept only registers having upper parts. */ 3744 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op)); 3745 return (regno > LAST_VIRTUAL_REGISTER || regno < 4); 3746} 3747 3748/* Return 1 if this is a valid binary floating-point operation. 3749 OP is the expression matched, and MODE is its mode. */ 3750 3751int 3752binary_fp_operator (op, mode) 3753 register rtx op; 3754 enum machine_mode mode; 3755{ 3756 if (mode != VOIDmode && mode != GET_MODE (op)) 3757 return 0; 3758 3759 switch (GET_CODE (op)) 3760 { 3761 case PLUS: 3762 case MINUS: 3763 case MULT: 3764 case DIV: 3765 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT; 3766 3767 default: 3768 return 0; 3769 } 3770} 3771 3772int 3773mult_operator (op, mode) 3774 register rtx op; 3775 enum machine_mode mode ATTRIBUTE_UNUSED; 3776{ 3777 return GET_CODE (op) == MULT; 3778} 3779 3780int 3781div_operator (op, mode) 3782 register rtx op; 3783 enum machine_mode mode ATTRIBUTE_UNUSED; 3784{ 3785 return GET_CODE (op) == DIV; 3786} 3787 3788int 3789arith_or_logical_operator (op, mode) 3790 rtx op; 3791 enum machine_mode mode; 3792{ 3793 return ((mode == VOIDmode || GET_MODE (op) == mode) 3794 && (GET_RTX_CLASS (GET_CODE (op)) == 'c' 3795 || GET_RTX_CLASS (GET_CODE (op)) == '2')); 3796} 3797 3798/* Returns 1 if OP is memory operand with a displacement. */ 3799 3800int 3801memory_displacement_operand (op, mode) 3802 register rtx op; 3803 enum machine_mode mode; 3804{ 3805 struct ix86_address parts; 3806 3807 if (! memory_operand (op, mode)) 3808 return 0; 3809 3810 if (! ix86_decompose_address (XEXP (op, 0), &parts)) 3811 abort (); 3812 3813 return parts.disp != NULL_RTX; 3814} 3815 3816/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0, 3817 re-recognize the operand to avoid a copy_to_mode_reg that will fail. 3818 3819 ??? It seems likely that this will only work because cmpsi is an 3820 expander, and no actual insns use this. */ 3821 3822int 3823cmpsi_operand (op, mode) 3824 rtx op; 3825 enum machine_mode mode; 3826{ 3827 if (nonimmediate_operand (op, mode)) 3828 return 1; 3829 3830 if (GET_CODE (op) == AND 3831 && GET_MODE (op) == SImode 3832 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT 3833 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT 3834 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT 3835 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8 3836 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8 3837 && GET_CODE (XEXP (op, 1)) == CONST_INT) 3838 return 1; 3839 3840 return 0; 3841} 3842 3843/* Returns 1 if OP is memory operand that can not be represented by the 3844 modRM array. */ 3845 3846int 3847long_memory_operand (op, mode) 3848 register rtx op; 3849 enum machine_mode mode; 3850{ 3851 if (! memory_operand (op, mode)) 3852 return 0; 3853 3854 return memory_address_length (op) != 0; 3855} 3856 3857/* Return nonzero if the rtx is known aligned. */ 3858 3859int 3860aligned_operand (op, mode) 3861 rtx op; 3862 enum machine_mode mode; 3863{ 3864 struct ix86_address parts; 3865 3866 if (!general_operand (op, mode)) 3867 return 0; 3868 3869 /* Registers and immediate operands are always "aligned". */ 3870 if (GET_CODE (op) != MEM) 3871 return 1; 3872 3873 /* Don't even try to do any aligned optimizations with volatiles. */ 3874 if (MEM_VOLATILE_P (op)) 3875 return 0; 3876 3877 op = XEXP (op, 0); 3878 3879 /* Pushes and pops are only valid on the stack pointer. */ 3880 if (GET_CODE (op) == PRE_DEC 3881 || GET_CODE (op) == POST_INC) 3882 return 1; 3883 3884 /* Decode the address. */ 3885 if (! ix86_decompose_address (op, &parts)) 3886 abort (); 3887 3888 if (parts.base && GET_CODE (parts.base) == SUBREG) 3889 parts.base = SUBREG_REG (parts.base); 3890 if (parts.index && GET_CODE (parts.index) == SUBREG) 3891 parts.index = SUBREG_REG (parts.index); 3892 3893 /* Look for some component that isn't known to be aligned. */ 3894 if (parts.index) 3895 { 3896 if (parts.scale < 4 3897 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32) 3898 return 0; 3899 } 3900 if (parts.base) 3901 { 3902 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32) 3903 return 0; 3904 } 3905 if (parts.disp) 3906 { 3907 if (GET_CODE (parts.disp) != CONST_INT 3908 || (INTVAL (parts.disp) & 3) != 0) 3909 return 0; 3910 } 3911 3912 /* Didn't find one -- this must be an aligned address. */ 3913 return 1; 3914} 3915 3916/* Return true if the constant is something that can be loaded with 3917 a special instruction. Only handle 0.0 and 1.0; others are less 3918 worthwhile. */ 3919 3920int 3921standard_80387_constant_p (x) 3922 rtx x; 3923{ 3924 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 3925 return -1; 3926 /* Note that on the 80387, other constants, such as pi, that we should support 3927 too. On some machines, these are much slower to load as standard constant, 3928 than to load from doubles in memory. */ 3929 if (x == CONST0_RTX (GET_MODE (x))) 3930 return 1; 3931 if (x == CONST1_RTX (GET_MODE (x))) 3932 return 2; 3933 return 0; 3934} 3935 3936/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 3937 */ 3938int 3939standard_sse_constant_p (x) 3940 rtx x; 3941{ 3942 if (x == const0_rtx) 3943 return 1; 3944 return (x == CONST0_RTX (GET_MODE (x))); 3945} 3946 3947/* Returns 1 if OP contains a symbol reference */ 3948 3949int 3950symbolic_reference_mentioned_p (op) 3951 rtx op; 3952{ 3953 register const char *fmt; 3954 register int i; 3955 3956 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 3957 return 1; 3958 3959 fmt = GET_RTX_FORMAT (GET_CODE (op)); 3960 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 3961 { 3962 if (fmt[i] == 'E') 3963 { 3964 register int j; 3965 3966 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 3967 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 3968 return 1; 3969 } 3970 3971 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 3972 return 1; 3973 } 3974 3975 return 0; 3976} 3977 3978/* Return 1 if it is appropriate to emit `ret' instructions in the 3979 body of a function. Do this only if the epilogue is simple, needing a 3980 couple of insns. Prior to reloading, we can't tell how many registers 3981 must be saved, so return 0 then. Return 0 if there is no frame 3982 marker to de-allocate. 3983 3984 If NON_SAVING_SETJMP is defined and true, then it is not possible 3985 for the epilogue to be simple, so return 0. This is a special case 3986 since NON_SAVING_SETJMP will not cause regs_ever_live to change 3987 until final, but jump_optimize may need to know sooner if a 3988 `return' is OK. */ 3989 3990int 3991ix86_can_use_return_insn_p () 3992{ 3993 struct ix86_frame frame; 3994 3995#ifdef NON_SAVING_SETJMP 3996 if (NON_SAVING_SETJMP && current_function_calls_setjmp) 3997 return 0; 3998#endif 3999 4000 if (! reload_completed || frame_pointer_needed) 4001 return 0; 4002 4003 /* Don't allow more than 32 pop, since that's all we can do 4004 with one instruction. */ 4005 if (current_function_pops_args 4006 && current_function_args_size >= 32768) 4007 return 0; 4008 4009 ix86_compute_frame_layout (&frame); 4010 return frame.to_allocate == 0 && frame.nregs == 0; 4011} 4012 4013/* Return 1 if VALUE can be stored in the sign extended immediate field. */ 4014int 4015x86_64_sign_extended_value (value) 4016 rtx value; 4017{ 4018 switch (GET_CODE (value)) 4019 { 4020 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known 4021 to be at least 32 and this all acceptable constants are 4022 represented as CONST_INT. */ 4023 case CONST_INT: 4024 if (HOST_BITS_PER_WIDE_INT == 32) 4025 return 1; 4026 else 4027 { 4028 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode); 4029 return trunc_int_for_mode (val, SImode) == val; 4030 } 4031 break; 4032 4033 /* For certain code models, the symbolic references are known to fit. 4034 in CM_SMALL_PIC model we know it fits if it is local to the shared 4035 library. Don't count TLS SYMBOL_REFs here, since they should fit 4036 only if inside of UNSPEC handled below. */ 4037 case SYMBOL_REF: 4038 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL); 4039 4040 /* For certain code models, the code is near as well. */ 4041 case LABEL_REF: 4042 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM 4043 || ix86_cmodel == CM_KERNEL); 4044 4045 /* We also may accept the offsetted memory references in certain special 4046 cases. */ 4047 case CONST: 4048 if (GET_CODE (XEXP (value, 0)) == UNSPEC) 4049 switch (XINT (XEXP (value, 0), 1)) 4050 { 4051 case UNSPEC_GOTPCREL: 4052 case UNSPEC_DTPOFF: 4053 case UNSPEC_GOTNTPOFF: 4054 case UNSPEC_NTPOFF: 4055 return 1; 4056 default: 4057 break; 4058 } 4059 if (GET_CODE (XEXP (value, 0)) == PLUS) 4060 { 4061 rtx op1 = XEXP (XEXP (value, 0), 0); 4062 rtx op2 = XEXP (XEXP (value, 0), 1); 4063 HOST_WIDE_INT offset; 4064 4065 if (ix86_cmodel == CM_LARGE) 4066 return 0; 4067 if (GET_CODE (op2) != CONST_INT) 4068 return 0; 4069 offset = trunc_int_for_mode (INTVAL (op2), DImode); 4070 switch (GET_CODE (op1)) 4071 { 4072 case SYMBOL_REF: 4073 /* For CM_SMALL assume that latest object is 16MB before 4074 end of 31bits boundary. We may also accept pretty 4075 large negative constants knowing that all objects are 4076 in the positive half of address space. */ 4077 if (ix86_cmodel == CM_SMALL 4078 && offset < 16*1024*1024 4079 && trunc_int_for_mode (offset, SImode) == offset) 4080 return 1; 4081 /* For CM_KERNEL we know that all object resist in the 4082 negative half of 32bits address space. We may not 4083 accept negative offsets, since they may be just off 4084 and we may accept pretty large positive ones. */ 4085 if (ix86_cmodel == CM_KERNEL 4086 && offset > 0 4087 && trunc_int_for_mode (offset, SImode) == offset) 4088 return 1; 4089 break; 4090 case LABEL_REF: 4091 /* These conditions are similar to SYMBOL_REF ones, just the 4092 constraints for code models differ. */ 4093 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 4094 && offset < 16*1024*1024 4095 && trunc_int_for_mode (offset, SImode) == offset) 4096 return 1; 4097 if (ix86_cmodel == CM_KERNEL 4098 && offset > 0 4099 && trunc_int_for_mode (offset, SImode) == offset) 4100 return 1; 4101 break; 4102 case UNSPEC: 4103 switch (XINT (op1, 1)) 4104 { 4105 case UNSPEC_DTPOFF: 4106 case UNSPEC_NTPOFF: 4107 if (offset > 0 4108 && trunc_int_for_mode (offset, SImode) == offset) 4109 return 1; 4110 } 4111 break; 4112 default: 4113 return 0; 4114 } 4115 } 4116 return 0; 4117 default: 4118 return 0; 4119 } 4120} 4121 4122/* Return 1 if VALUE can be stored in the zero extended immediate field. */ 4123int 4124x86_64_zero_extended_value (value) 4125 rtx value; 4126{ 4127 switch (GET_CODE (value)) 4128 { 4129 case CONST_DOUBLE: 4130 if (HOST_BITS_PER_WIDE_INT == 32) 4131 return (GET_MODE (value) == VOIDmode 4132 && !CONST_DOUBLE_HIGH (value)); 4133 else 4134 return 0; 4135 case CONST_INT: 4136 if (HOST_BITS_PER_WIDE_INT == 32) 4137 return INTVAL (value) >= 0; 4138 else 4139 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff); 4140 break; 4141 4142 /* For certain code models, the symbolic references are known to fit. */ 4143 case SYMBOL_REF: 4144 return ix86_cmodel == CM_SMALL; 4145 4146 /* For certain code models, the code is near as well. */ 4147 case LABEL_REF: 4148 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM; 4149 4150 /* We also may accept the offsetted memory references in certain special 4151 cases. */ 4152 case CONST: 4153 if (GET_CODE (XEXP (value, 0)) == PLUS) 4154 { 4155 rtx op1 = XEXP (XEXP (value, 0), 0); 4156 rtx op2 = XEXP (XEXP (value, 0), 1); 4157 4158 if (ix86_cmodel == CM_LARGE) 4159 return 0; 4160 switch (GET_CODE (op1)) 4161 { 4162 case SYMBOL_REF: 4163 return 0; 4164 /* For small code model we may accept pretty large positive 4165 offsets, since one bit is available for free. Negative 4166 offsets are limited by the size of NULL pointer area 4167 specified by the ABI. */ 4168 if (ix86_cmodel == CM_SMALL 4169 && GET_CODE (op2) == CONST_INT 4170 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 4171 && (trunc_int_for_mode (INTVAL (op2), SImode) 4172 == INTVAL (op2))) 4173 return 1; 4174 /* ??? For the kernel, we may accept adjustment of 4175 -0x10000000, since we know that it will just convert 4176 negative address space to positive, but perhaps this 4177 is not worthwhile. */ 4178 break; 4179 case LABEL_REF: 4180 /* These conditions are similar to SYMBOL_REF ones, just the 4181 constraints for code models differ. */ 4182 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 4183 && GET_CODE (op2) == CONST_INT 4184 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 4185 && (trunc_int_for_mode (INTVAL (op2), SImode) 4186 == INTVAL (op2))) 4187 return 1; 4188 break; 4189 default: 4190 return 0; 4191 } 4192 } 4193 return 0; 4194 default: 4195 return 0; 4196 } 4197} 4198 4199/* Value should be nonzero if functions must have frame pointers. 4200 Zero means the frame pointer need not be set up (and parms may 4201 be accessed via the stack pointer) in functions that seem suitable. */ 4202 4203int 4204ix86_frame_pointer_required () 4205{ 4206 /* If we accessed previous frames, then the generated code expects 4207 to be able to access the saved ebp value in our frame. */ 4208 if (cfun->machine->accesses_prev_frame) 4209 return 1; 4210 4211 /* Several x86 os'es need a frame pointer for other reasons, 4212 usually pertaining to setjmp. */ 4213 if (SUBTARGET_FRAME_POINTER_REQUIRED) 4214 return 1; 4215 4216 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 4217 the frame pointer by default. Turn it back on now if we've not 4218 got a leaf function. */ 4219 if (TARGET_OMIT_LEAF_FRAME_POINTER 4220 && (!current_function_is_leaf)) 4221 return 1; 4222 4223 if (current_function_profile) 4224 return 1; 4225 4226 return 0; 4227} 4228 4229/* Record that the current function accesses previous call frames. */ 4230 4231void 4232ix86_setup_frame_addresses () 4233{ 4234 cfun->machine->accesses_prev_frame = 1; 4235} 4236 4237#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY) 4238# define USE_HIDDEN_LINKONCE 1 4239#else 4240# define USE_HIDDEN_LINKONCE 0 4241#endif 4242 4243static int pic_labels_used; 4244 4245/* Fills in the label name that should be used for a pc thunk for 4246 the given register. */ 4247 4248static void 4249get_pc_thunk_name (name, regno) 4250 char name[32]; 4251 unsigned int regno; 4252{ 4253 if (USE_HIDDEN_LINKONCE) 4254 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 4255 else 4256 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 4257} 4258 4259 4260/* This function generates code for -fpic that loads %ebx with 4261 the return address of the caller and then returns. */ 4262 4263void 4264ix86_asm_file_end (file) 4265 FILE *file; 4266{ 4267 rtx xops[2]; 4268 int regno; 4269 4270 for (regno = 0; regno < 8; ++regno) 4271 { 4272 char name[32]; 4273 4274 if (! ((pic_labels_used >> regno) & 1)) 4275 continue; 4276 4277 get_pc_thunk_name (name, regno); 4278 4279 if (USE_HIDDEN_LINKONCE) 4280 { 4281 tree decl; 4282 4283 decl = build_decl (FUNCTION_DECL, get_identifier (name), 4284 error_mark_node); 4285 TREE_PUBLIC (decl) = 1; 4286 TREE_STATIC (decl) = 1; 4287 DECL_ONE_ONLY (decl) = 1; 4288 4289 (*targetm.asm_out.unique_section) (decl, 0); 4290 named_section (decl, NULL, 0); 4291 4292 (*targetm.asm_out.globalize_label) (file, name); 4293 fputs ("\t.hidden\t", file); 4294 assemble_name (file, name); 4295 fputc ('\n', file); 4296 ASM_DECLARE_FUNCTION_NAME (file, name, decl); 4297 } 4298 else 4299 { 4300 text_section (); 4301 ASM_OUTPUT_LABEL (file, name); 4302 } 4303 4304 xops[0] = gen_rtx_REG (SImode, regno); 4305 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 4306 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 4307 output_asm_insn ("ret", xops); 4308 } 4309} 4310 4311/* Emit code for the SET_GOT patterns. */ 4312 4313const char * 4314output_set_got (dest) 4315 rtx dest; 4316{ 4317 rtx xops[3]; 4318 4319 xops[0] = dest; 4320 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 4321 4322 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 4323 { 4324 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); 4325 4326 if (!flag_pic) 4327 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 4328 else 4329 output_asm_insn ("call\t%a2", xops); 4330 4331#if TARGET_MACHO 4332 /* Output the "canonical" label name ("Lxx$pb") here too. This 4333 is what will be referred to by the Mach-O PIC subsystem. */ 4334 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 4335#endif 4336 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", 4337 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 4338 4339 if (flag_pic) 4340 output_asm_insn ("pop{l}\t%0", xops); 4341 } 4342 else 4343 { 4344 char name[32]; 4345 get_pc_thunk_name (name, REGNO (dest)); 4346 pic_labels_used |= 1 << REGNO (dest); 4347 4348 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4349 xops[2] = gen_rtx_MEM (QImode, xops[2]); 4350 output_asm_insn ("call\t%X2", xops); 4351 } 4352 4353 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 4354 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 4355 else if (!TARGET_MACHO) 4356 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops); 4357 4358 return ""; 4359} 4360 4361/* Generate an "push" pattern for input ARG. */ 4362 4363static rtx 4364gen_push (arg) 4365 rtx arg; 4366{ 4367 return gen_rtx_SET (VOIDmode, 4368 gen_rtx_MEM (Pmode, 4369 gen_rtx_PRE_DEC (Pmode, 4370 stack_pointer_rtx)), 4371 arg); 4372} 4373 4374/* Return >= 0 if there is an unused call-clobbered register available 4375 for the entire function. */ 4376 4377static unsigned int 4378ix86_select_alt_pic_regnum () 4379{ 4380 if (current_function_is_leaf && !current_function_profile) 4381 { 4382 int i; 4383 for (i = 2; i >= 0; --i) 4384 if (!regs_ever_live[i]) 4385 return i; 4386 } 4387 4388 return INVALID_REGNUM; 4389} 4390 4391/* Return 1 if we need to save REGNO. */ 4392static int 4393ix86_save_reg (regno, maybe_eh_return) 4394 unsigned int regno; 4395 int maybe_eh_return; 4396{ 4397 if (pic_offset_table_rtx 4398 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 4399 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 4400 || current_function_profile 4401 || current_function_calls_eh_return 4402 || current_function_uses_const_pool)) 4403 { 4404 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 4405 return 0; 4406 return 1; 4407 } 4408 4409 if (current_function_calls_eh_return && maybe_eh_return) 4410 { 4411 unsigned i; 4412 for (i = 0; ; i++) 4413 { 4414 unsigned test = EH_RETURN_DATA_REGNO (i); 4415 if (test == INVALID_REGNUM) 4416 break; 4417 if (test == regno) 4418 return 1; 4419 } 4420 } 4421 4422 return (regs_ever_live[regno] 4423 && !call_used_regs[regno] 4424 && !fixed_regs[regno] 4425 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 4426} 4427 4428/* Return number of registers to be saved on the stack. */ 4429 4430static int 4431ix86_nsaved_regs () 4432{ 4433 int nregs = 0; 4434 int regno; 4435 4436 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4437 if (ix86_save_reg (regno, true)) 4438 nregs++; 4439 return nregs; 4440} 4441 4442/* Return the offset between two registers, one to be eliminated, and the other 4443 its replacement, at the start of a routine. */ 4444 4445HOST_WIDE_INT 4446ix86_initial_elimination_offset (from, to) 4447 int from; 4448 int to; 4449{ 4450 struct ix86_frame frame; 4451 ix86_compute_frame_layout (&frame); 4452 4453 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 4454 return frame.hard_frame_pointer_offset; 4455 else if (from == FRAME_POINTER_REGNUM 4456 && to == HARD_FRAME_POINTER_REGNUM) 4457 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 4458 else 4459 { 4460 if (to != STACK_POINTER_REGNUM) 4461 abort (); 4462 else if (from == ARG_POINTER_REGNUM) 4463 return frame.stack_pointer_offset; 4464 else if (from != FRAME_POINTER_REGNUM) 4465 abort (); 4466 else 4467 return frame.stack_pointer_offset - frame.frame_pointer_offset; 4468 } 4469} 4470 4471/* Fill structure ix86_frame about frame of currently computed function. */ 4472 4473static void 4474ix86_compute_frame_layout (frame) 4475 struct ix86_frame *frame; 4476{ 4477 HOST_WIDE_INT total_size; 4478 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 4479 int offset; 4480 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 4481 HOST_WIDE_INT size = get_frame_size (); 4482 4483 frame->nregs = ix86_nsaved_regs (); 4484 total_size = size; 4485 4486 /* Skip return address and saved base pointer. */ 4487 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 4488 4489 frame->hard_frame_pointer_offset = offset; 4490 4491 /* Do some sanity checking of stack_alignment_needed and 4492 preferred_alignment, since i386 port is the only using those features 4493 that may break easily. */ 4494 4495 if (size && !stack_alignment_needed) 4496 abort (); 4497 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT) 4498 abort (); 4499 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 4500 abort (); 4501 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 4502 abort (); 4503 4504 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 4505 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 4506 4507 /* Register save area */ 4508 offset += frame->nregs * UNITS_PER_WORD; 4509 4510 /* Va-arg area */ 4511 if (ix86_save_varrargs_registers) 4512 { 4513 offset += X86_64_VARARGS_SIZE; 4514 frame->va_arg_size = X86_64_VARARGS_SIZE; 4515 } 4516 else 4517 frame->va_arg_size = 0; 4518 4519 /* Align start of frame for local function. */ 4520 frame->padding1 = ((offset + stack_alignment_needed - 1) 4521 & -stack_alignment_needed) - offset; 4522 4523 offset += frame->padding1; 4524 4525 /* Frame pointer points here. */ 4526 frame->frame_pointer_offset = offset; 4527 4528 offset += size; 4529 4530 /* Add outgoing arguments area. Can be skipped if we eliminated 4531 all the function calls as dead code. */ 4532 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf) 4533 { 4534 offset += current_function_outgoing_args_size; 4535 frame->outgoing_arguments_size = current_function_outgoing_args_size; 4536 } 4537 else 4538 frame->outgoing_arguments_size = 0; 4539 4540 /* Align stack boundary. Only needed if we're calling another function 4541 or using alloca. */ 4542 if (!current_function_is_leaf || current_function_calls_alloca) 4543 frame->padding2 = ((offset + preferred_alignment - 1) 4544 & -preferred_alignment) - offset; 4545 else 4546 frame->padding2 = 0; 4547 4548 offset += frame->padding2; 4549 4550 /* We've reached end of stack frame. */ 4551 frame->stack_pointer_offset = offset; 4552 4553 /* Size prologue needs to allocate. */ 4554 frame->to_allocate = 4555 (size + frame->padding1 + frame->padding2 4556 + frame->outgoing_arguments_size + frame->va_arg_size); 4557 4558 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging 4559 && current_function_is_leaf) 4560 { 4561 frame->red_zone_size = frame->to_allocate; 4562 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 4563 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 4564 } 4565 else 4566 frame->red_zone_size = 0; 4567 frame->to_allocate -= frame->red_zone_size; 4568 frame->stack_pointer_offset -= frame->red_zone_size; 4569#if 0 4570 fprintf (stderr, "nregs: %i\n", frame->nregs); 4571 fprintf (stderr, "size: %i\n", size); 4572 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 4573 fprintf (stderr, "padding1: %i\n", frame->padding1); 4574 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 4575 fprintf (stderr, "padding2: %i\n", frame->padding2); 4576 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 4577 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 4578 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 4579 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 4580 frame->hard_frame_pointer_offset); 4581 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 4582#endif 4583} 4584 4585/* Emit code to save registers in the prologue. */ 4586 4587static void 4588ix86_emit_save_regs () 4589{ 4590 register int regno; 4591 rtx insn; 4592 4593 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4594 if (ix86_save_reg (regno, true)) 4595 { 4596 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 4597 RTX_FRAME_RELATED_P (insn) = 1; 4598 } 4599} 4600 4601/* Emit code to save registers using MOV insns. First register 4602 is restored from POINTER + OFFSET. */ 4603static void 4604ix86_emit_save_regs_using_mov (pointer, offset) 4605 rtx pointer; 4606 HOST_WIDE_INT offset; 4607{ 4608 int regno; 4609 rtx insn; 4610 4611 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4612 if (ix86_save_reg (regno, true)) 4613 { 4614 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 4615 Pmode, offset), 4616 gen_rtx_REG (Pmode, regno)); 4617 RTX_FRAME_RELATED_P (insn) = 1; 4618 offset += UNITS_PER_WORD; 4619 } 4620} 4621 4622/* Expand the prologue into a bunch of separate insns. */ 4623 4624void 4625ix86_expand_prologue () 4626{ 4627 rtx insn; 4628 bool pic_reg_used; 4629 struct ix86_frame frame; 4630 int use_mov = 0; 4631 HOST_WIDE_INT allocate; 4632 4633 if (!optimize_size) 4634 { 4635 use_fast_prologue_epilogue 4636 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT); 4637 if (TARGET_PROLOGUE_USING_MOVE) 4638 use_mov = use_fast_prologue_epilogue; 4639 } 4640 ix86_compute_frame_layout (&frame); 4641 4642 /* Note: AT&T enter does NOT have reversed args. Enter is probably 4643 slower on all targets. Also sdb doesn't like it. */ 4644 4645 if (frame_pointer_needed) 4646 { 4647 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 4648 RTX_FRAME_RELATED_P (insn) = 1; 4649 4650 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 4651 RTX_FRAME_RELATED_P (insn) = 1; 4652 } 4653 4654 allocate = frame.to_allocate; 4655 /* In case we are dealing only with single register and empty frame, 4656 push is equivalent of the mov+add sequence. */ 4657 if (allocate == 0 && frame.nregs <= 1) 4658 use_mov = 0; 4659 4660 if (!use_mov) 4661 ix86_emit_save_regs (); 4662 else 4663 allocate += frame.nregs * UNITS_PER_WORD; 4664 4665 if (allocate == 0) 4666 ; 4667 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 4668 { 4669 insn = emit_insn (gen_pro_epilogue_adjust_stack 4670 (stack_pointer_rtx, stack_pointer_rtx, 4671 GEN_INT (-allocate))); 4672 RTX_FRAME_RELATED_P (insn) = 1; 4673 } 4674 else 4675 { 4676 /* ??? Is this only valid for Win32? */ 4677 4678 rtx arg0, sym; 4679 4680 if (TARGET_64BIT) 4681 abort (); 4682 4683 arg0 = gen_rtx_REG (SImode, 0); 4684 emit_move_insn (arg0, GEN_INT (allocate)); 4685 4686 sym = gen_rtx_MEM (FUNCTION_MODE, 4687 gen_rtx_SYMBOL_REF (Pmode, "_alloca")); 4688 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx)); 4689 4690 CALL_INSN_FUNCTION_USAGE (insn) 4691 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0), 4692 CALL_INSN_FUNCTION_USAGE (insn)); 4693 4694 /* Don't allow scheduling pass to move insns across __alloca 4695 call. */ 4696 emit_insn (gen_blockage (const0_rtx)); 4697 } 4698 if (use_mov) 4699 { 4700 if (!frame_pointer_needed || !frame.to_allocate) 4701 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 4702 else 4703 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 4704 -frame.nregs * UNITS_PER_WORD); 4705 } 4706 4707#ifdef SUBTARGET_PROLOGUE 4708 SUBTARGET_PROLOGUE; 4709#endif 4710 4711 pic_reg_used = false; 4712 if (pic_offset_table_rtx 4713 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 4714 || current_function_profile)) 4715 { 4716 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 4717 4718 if (alt_pic_reg_used != INVALID_REGNUM) 4719 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 4720 4721 pic_reg_used = true; 4722 } 4723 4724 if (pic_reg_used) 4725 { 4726 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 4727 4728 /* Even with accurate pre-reload life analysis, we can wind up 4729 deleting all references to the pic register after reload. 4730 Consider if cross-jumping unifies two sides of a branch 4731 controled by a comparison vs the only read from a global. 4732 In which case, allow the set_got to be deleted, though we're 4733 too late to do anything about the ebx save in the prologue. */ 4734 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 4735 } 4736 4737 /* Prevent function calls from be scheduled before the call to mcount. 4738 In the pic_reg_used case, make sure that the got load isn't deleted. */ 4739 if (current_function_profile) 4740 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 4741} 4742 4743/* Emit code to restore saved registers using MOV insns. First register 4744 is restored from POINTER + OFFSET. */ 4745static void 4746ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return) 4747 rtx pointer; 4748 int offset; 4749 int maybe_eh_return; 4750{ 4751 int regno; 4752 4753 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4754 if (ix86_save_reg (regno, maybe_eh_return)) 4755 { 4756 emit_move_insn (gen_rtx_REG (Pmode, regno), 4757 adjust_address (gen_rtx_MEM (Pmode, pointer), 4758 Pmode, offset)); 4759 offset += UNITS_PER_WORD; 4760 } 4761} 4762 4763/* Restore function stack, frame, and registers. */ 4764 4765void 4766ix86_expand_epilogue (style) 4767 int style; 4768{ 4769 int regno; 4770 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 4771 struct ix86_frame frame; 4772 HOST_WIDE_INT offset; 4773 4774 ix86_compute_frame_layout (&frame); 4775 4776 /* Calculate start of saved registers relative to ebp. Special care 4777 must be taken for the normal return case of a function using 4778 eh_return: the eax and edx registers are marked as saved, but not 4779 restored along this path. */ 4780 offset = frame.nregs; 4781 if (current_function_calls_eh_return && style != 2) 4782 offset -= 2; 4783 offset *= -UNITS_PER_WORD; 4784 4785 /* If we're only restoring one register and sp is not valid then 4786 using a move instruction to restore the register since it's 4787 less work than reloading sp and popping the register. 4788 4789 The default code result in stack adjustment using add/lea instruction, 4790 while this code results in LEAVE instruction (or discrete equivalent), 4791 so it is profitable in some other cases as well. Especially when there 4792 are no registers to restore. We also use this code when TARGET_USE_LEAVE 4793 and there is exactly one register to pop. This heruistic may need some 4794 tuning in future. */ 4795 if ((!sp_valid && frame.nregs <= 1) 4796 || (TARGET_EPILOGUE_USING_MOVE 4797 && use_fast_prologue_epilogue 4798 && (frame.nregs > 1 || frame.to_allocate)) 4799 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 4800 || (frame_pointer_needed && TARGET_USE_LEAVE 4801 && use_fast_prologue_epilogue && frame.nregs == 1) 4802 || current_function_calls_eh_return) 4803 { 4804 /* Restore registers. We can use ebp or esp to address the memory 4805 locations. If both are available, default to ebp, since offsets 4806 are known to be small. Only exception is esp pointing directly to the 4807 end of block of saved registers, where we may simplify addressing 4808 mode. */ 4809 4810 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 4811 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 4812 frame.to_allocate, style == 2); 4813 else 4814 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 4815 offset, style == 2); 4816 4817 /* eh_return epilogues need %ecx added to the stack pointer. */ 4818 if (style == 2) 4819 { 4820 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 4821 4822 if (frame_pointer_needed) 4823 { 4824 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 4825 tmp = plus_constant (tmp, UNITS_PER_WORD); 4826 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 4827 4828 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 4829 emit_move_insn (hard_frame_pointer_rtx, tmp); 4830 4831 emit_insn (gen_pro_epilogue_adjust_stack 4832 (stack_pointer_rtx, sa, const0_rtx)); 4833 } 4834 else 4835 { 4836 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 4837 tmp = plus_constant (tmp, (frame.to_allocate 4838 + frame.nregs * UNITS_PER_WORD)); 4839 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 4840 } 4841 } 4842 else if (!frame_pointer_needed) 4843 emit_insn (gen_pro_epilogue_adjust_stack 4844 (stack_pointer_rtx, stack_pointer_rtx, 4845 GEN_INT (frame.to_allocate 4846 + frame.nregs * UNITS_PER_WORD))); 4847 /* If not an i386, mov & pop is faster than "leave". */ 4848 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue) 4849 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4850 else 4851 { 4852 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4853 hard_frame_pointer_rtx, 4854 const0_rtx)); 4855 if (TARGET_64BIT) 4856 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4857 else 4858 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4859 } 4860 } 4861 else 4862 { 4863 /* First step is to deallocate the stack frame so that we can 4864 pop the registers. */ 4865 if (!sp_valid) 4866 { 4867 if (!frame_pointer_needed) 4868 abort (); 4869 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx, 4870 hard_frame_pointer_rtx, 4871 GEN_INT (offset))); 4872 } 4873 else if (frame.to_allocate) 4874 emit_insn (gen_pro_epilogue_adjust_stack 4875 (stack_pointer_rtx, stack_pointer_rtx, 4876 GEN_INT (frame.to_allocate))); 4877 4878 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 4879 if (ix86_save_reg (regno, false)) 4880 { 4881 if (TARGET_64BIT) 4882 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 4883 else 4884 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 4885 } 4886 if (frame_pointer_needed) 4887 { 4888 /* Leave results in shorter dependency chains on CPUs that are 4889 able to grok it fast. */ 4890 if (TARGET_USE_LEAVE) 4891 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 4892 else if (TARGET_64BIT) 4893 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 4894 else 4895 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 4896 } 4897 } 4898 4899 /* Sibcall epilogues don't want a return instruction. */ 4900 if (style == 0) 4901 return; 4902 4903 if (current_function_pops_args && current_function_args_size) 4904 { 4905 rtx popc = GEN_INT (current_function_pops_args); 4906 4907 /* i386 can only pop 64K bytes. If asked to pop more, pop 4908 return address, do explicit add, and jump indirectly to the 4909 caller. */ 4910 4911 if (current_function_pops_args >= 65536) 4912 { 4913 rtx ecx = gen_rtx_REG (SImode, 2); 4914 4915 /* There are is no "pascal" calling convention in 64bit ABI. */ 4916 if (TARGET_64BIT) 4917 abort (); 4918 4919 emit_insn (gen_popsi1 (ecx)); 4920 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 4921 emit_jump_insn (gen_return_indirect_internal (ecx)); 4922 } 4923 else 4924 emit_jump_insn (gen_return_pop_internal (popc)); 4925 } 4926 else 4927 emit_jump_insn (gen_return_internal ()); 4928} 4929 4930/* Reset from the function's potential modifications. */ 4931 4932static void 4933ix86_output_function_epilogue (file, size) 4934 FILE *file ATTRIBUTE_UNUSED; 4935 HOST_WIDE_INT size ATTRIBUTE_UNUSED; 4936{ 4937 if (pic_offset_table_rtx) 4938 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 4939} 4940 4941/* Extract the parts of an RTL expression that is a valid memory address 4942 for an instruction. Return 0 if the structure of the address is 4943 grossly off. Return -1 if the address contains ASHIFT, so it is not 4944 strictly valid, but still used for computing length of lea instruction. 4945 */ 4946 4947static int 4948ix86_decompose_address (addr, out) 4949 register rtx addr; 4950 struct ix86_address *out; 4951{ 4952 rtx base = NULL_RTX; 4953 rtx index = NULL_RTX; 4954 rtx disp = NULL_RTX; 4955 HOST_WIDE_INT scale = 1; 4956 rtx scale_rtx = NULL_RTX; 4957 int retval = 1; 4958 4959 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 4960 base = addr; 4961 else if (GET_CODE (addr) == PLUS) 4962 { 4963 rtx op0 = XEXP (addr, 0); 4964 rtx op1 = XEXP (addr, 1); 4965 enum rtx_code code0 = GET_CODE (op0); 4966 enum rtx_code code1 = GET_CODE (op1); 4967 4968 if (code0 == REG || code0 == SUBREG) 4969 { 4970 if (code1 == REG || code1 == SUBREG) 4971 index = op0, base = op1; /* index + base */ 4972 else 4973 base = op0, disp = op1; /* base + displacement */ 4974 } 4975 else if (code0 == MULT) 4976 { 4977 index = XEXP (op0, 0); 4978 scale_rtx = XEXP (op0, 1); 4979 if (code1 == REG || code1 == SUBREG) 4980 base = op1; /* index*scale + base */ 4981 else 4982 disp = op1; /* index*scale + disp */ 4983 } 4984 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT) 4985 { 4986 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */ 4987 scale_rtx = XEXP (XEXP (op0, 0), 1); 4988 base = XEXP (op0, 1); 4989 disp = op1; 4990 } 4991 else if (code0 == PLUS) 4992 { 4993 index = XEXP (op0, 0); /* index + base + disp */ 4994 base = XEXP (op0, 1); 4995 disp = op1; 4996 } 4997 else 4998 return 0; 4999 } 5000 else if (GET_CODE (addr) == MULT) 5001 { 5002 index = XEXP (addr, 0); /* index*scale */ 5003 scale_rtx = XEXP (addr, 1); 5004 } 5005 else if (GET_CODE (addr) == ASHIFT) 5006 { 5007 rtx tmp; 5008 5009 /* We're called for lea too, which implements ashift on occasion. */ 5010 index = XEXP (addr, 0); 5011 tmp = XEXP (addr, 1); 5012 if (GET_CODE (tmp) != CONST_INT) 5013 return 0; 5014 scale = INTVAL (tmp); 5015 if ((unsigned HOST_WIDE_INT) scale > 3) 5016 return 0; 5017 scale = 1 << scale; 5018 retval = -1; 5019 } 5020 else 5021 disp = addr; /* displacement */ 5022 5023 /* Extract the integral value of scale. */ 5024 if (scale_rtx) 5025 { 5026 if (GET_CODE (scale_rtx) != CONST_INT) 5027 return 0; 5028 scale = INTVAL (scale_rtx); 5029 } 5030 5031 /* Allow arg pointer and stack pointer as index if there is not scaling */ 5032 if (base && index && scale == 1 5033 && (index == arg_pointer_rtx || index == frame_pointer_rtx 5034 || index == stack_pointer_rtx)) 5035 { 5036 rtx tmp = base; 5037 base = index; 5038 index = tmp; 5039 } 5040 5041 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 5042 if ((base == hard_frame_pointer_rtx 5043 || base == frame_pointer_rtx 5044 || base == arg_pointer_rtx) && !disp) 5045 disp = const0_rtx; 5046 5047 /* Special case: on K6, [%esi] makes the instruction vector decoded. 5048 Avoid this by transforming to [%esi+0]. */ 5049 if (ix86_cpu == PROCESSOR_K6 && !optimize_size 5050 && base && !index && !disp 5051 && REG_P (base) 5052 && REGNO_REG_CLASS (REGNO (base)) == SIREG) 5053 disp = const0_rtx; 5054 5055 /* Special case: encode reg+reg instead of reg*2. */ 5056 if (!base && index && scale && scale == 2) 5057 base = index, scale = 1; 5058 5059 /* Special case: scaling cannot be encoded without base or displacement. */ 5060 if (!base && !disp && index && scale != 1) 5061 disp = const0_rtx; 5062 5063 out->base = base; 5064 out->index = index; 5065 out->disp = disp; 5066 out->scale = scale; 5067 5068 return retval; 5069} 5070 5071/* Return cost of the memory address x. 5072 For i386, it is better to use a complex address than let gcc copy 5073 the address into a reg and make a new pseudo. But not if the address 5074 requires to two regs - that would mean more pseudos with longer 5075 lifetimes. */ 5076int 5077ix86_address_cost (x) 5078 rtx x; 5079{ 5080 struct ix86_address parts; 5081 int cost = 1; 5082 5083 if (!ix86_decompose_address (x, &parts)) 5084 abort (); 5085 5086 if (parts.base && GET_CODE (parts.base) == SUBREG) 5087 parts.base = SUBREG_REG (parts.base); 5088 if (parts.index && GET_CODE (parts.index) == SUBREG) 5089 parts.index = SUBREG_REG (parts.index); 5090 5091 /* More complex memory references are better. */ 5092 if (parts.disp && parts.disp != const0_rtx) 5093 cost--; 5094 5095 /* Attempt to minimize number of registers in the address. */ 5096 if ((parts.base 5097 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 5098 || (parts.index 5099 && (!REG_P (parts.index) 5100 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 5101 cost++; 5102 5103 if (parts.base 5104 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 5105 && parts.index 5106 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 5107 && parts.base != parts.index) 5108 cost++; 5109 5110 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 5111 since it's predecode logic can't detect the length of instructions 5112 and it degenerates to vector decoded. Increase cost of such 5113 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 5114 to split such addresses or even refuse such addresses at all. 5115 5116 Following addressing modes are affected: 5117 [base+scale*index] 5118 [scale*index+disp] 5119 [base+index] 5120 5121 The first and last case may be avoidable by explicitly coding the zero in 5122 memory address, but I don't have AMD-K6 machine handy to check this 5123 theory. */ 5124 5125 if (TARGET_K6 5126 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 5127 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 5128 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 5129 cost += 10; 5130 5131 return cost; 5132} 5133 5134/* If X is a machine specific address (i.e. a symbol or label being 5135 referenced as a displacement from the GOT implemented using an 5136 UNSPEC), then return the base term. Otherwise return X. */ 5137 5138rtx 5139ix86_find_base_term (x) 5140 rtx x; 5141{ 5142 rtx term; 5143 5144 if (TARGET_64BIT) 5145 { 5146 if (GET_CODE (x) != CONST) 5147 return x; 5148 term = XEXP (x, 0); 5149 if (GET_CODE (term) == PLUS 5150 && (GET_CODE (XEXP (term, 1)) == CONST_INT 5151 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 5152 term = XEXP (term, 0); 5153 if (GET_CODE (term) != UNSPEC 5154 || XINT (term, 1) != UNSPEC_GOTPCREL) 5155 return x; 5156 5157 term = XVECEXP (term, 0, 0); 5158 5159 if (GET_CODE (term) != SYMBOL_REF 5160 && GET_CODE (term) != LABEL_REF) 5161 return x; 5162 5163 return term; 5164 } 5165 5166 if (GET_CODE (x) != PLUS 5167 || XEXP (x, 0) != pic_offset_table_rtx 5168 || GET_CODE (XEXP (x, 1)) != CONST) 5169 return x; 5170 5171 term = XEXP (XEXP (x, 1), 0); 5172 5173 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT) 5174 term = XEXP (term, 0); 5175 5176 if (GET_CODE (term) != UNSPEC 5177 || XINT (term, 1) != UNSPEC_GOTOFF) 5178 return x; 5179 5180 term = XVECEXP (term, 0, 0); 5181 5182 if (GET_CODE (term) != SYMBOL_REF 5183 && GET_CODE (term) != LABEL_REF) 5184 return x; 5185 5186 return term; 5187} 5188 5189/* Determine if a given RTX is a valid constant. We already know this 5190 satisfies CONSTANT_P. */ 5191 5192bool 5193legitimate_constant_p (x) 5194 rtx x; 5195{ 5196 rtx inner; 5197 5198 switch (GET_CODE (x)) 5199 { 5200 case SYMBOL_REF: 5201 /* TLS symbols are not constant. */ 5202 if (tls_symbolic_operand (x, Pmode)) 5203 return false; 5204 break; 5205 5206 case CONST: 5207 inner = XEXP (x, 0); 5208 5209 /* Offsets of TLS symbols are never valid. 5210 Discourage CSE from creating them. */ 5211 if (GET_CODE (inner) == PLUS 5212 && tls_symbolic_operand (XEXP (inner, 0), Pmode)) 5213 return false; 5214 5215 /* Only some unspecs are valid as "constants". */ 5216 if (GET_CODE (inner) == UNSPEC) 5217 switch (XINT (inner, 1)) 5218 { 5219 case UNSPEC_TPOFF: 5220 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); 5221 default: 5222 return false; 5223 } 5224 break; 5225 5226 default: 5227 break; 5228 } 5229 5230 /* Otherwise we handle everything else in the move patterns. */ 5231 return true; 5232} 5233 5234/* Determine if it's legal to put X into the constant pool. This 5235 is not possible for the address of thread-local symbols, which 5236 is checked above. */ 5237 5238static bool 5239ix86_cannot_force_const_mem (x) 5240 rtx x; 5241{ 5242 return !legitimate_constant_p (x); 5243} 5244 5245/* Determine if a given RTX is a valid constant address. */ 5246 5247bool 5248constant_address_p (x) 5249 rtx x; 5250{ 5251 switch (GET_CODE (x)) 5252 { 5253 case LABEL_REF: 5254 case CONST_INT: 5255 return true; 5256 5257 case CONST_DOUBLE: 5258 return TARGET_64BIT; 5259 5260 case CONST: 5261 /* For Mach-O, really believe the CONST. */ 5262 if (TARGET_MACHO) 5263 return true; 5264 /* Otherwise fall through. */ 5265 case SYMBOL_REF: 5266 return !flag_pic && legitimate_constant_p (x); 5267 5268 default: 5269 return false; 5270 } 5271} 5272 5273/* Nonzero if the constant value X is a legitimate general operand 5274 when generating PIC code. It is given that flag_pic is on and 5275 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 5276 5277bool 5278legitimate_pic_operand_p (x) 5279 rtx x; 5280{ 5281 rtx inner; 5282 5283 switch (GET_CODE (x)) 5284 { 5285 case CONST: 5286 inner = XEXP (x, 0); 5287 5288 /* Only some unspecs are valid as "constants". */ 5289 if (GET_CODE (inner) == UNSPEC) 5290 switch (XINT (inner, 1)) 5291 { 5292 case UNSPEC_TPOFF: 5293 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); 5294 default: 5295 return false; 5296 } 5297 /* FALLTHRU */ 5298 5299 case SYMBOL_REF: 5300 case LABEL_REF: 5301 return legitimate_pic_address_disp_p (x); 5302 5303 default: 5304 return true; 5305 } 5306} 5307 5308/* Determine if a given CONST RTX is a valid memory displacement 5309 in PIC mode. */ 5310 5311int 5312legitimate_pic_address_disp_p (disp) 5313 register rtx disp; 5314{ 5315 bool saw_plus; 5316 5317 /* In 64bit mode we can allow direct addresses of symbols and labels 5318 when they are not dynamic symbols. */ 5319 if (TARGET_64BIT) 5320 { 5321 /* TLS references should always be enclosed in UNSPEC. */ 5322 if (tls_symbolic_operand (disp, GET_MODE (disp))) 5323 return 0; 5324 if (GET_CODE (disp) == SYMBOL_REF 5325 && ix86_cmodel == CM_SMALL_PIC 5326 && (CONSTANT_POOL_ADDRESS_P (disp) 5327 || SYMBOL_REF_FLAG (disp))) 5328 return 1; 5329 if (GET_CODE (disp) == LABEL_REF) 5330 return 1; 5331 if (GET_CODE (disp) == CONST 5332 && GET_CODE (XEXP (disp, 0)) == PLUS 5333 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF 5334 && ix86_cmodel == CM_SMALL_PIC 5335 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0)) 5336 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0)))) 5337 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF) 5338 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT 5339 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024 5340 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024) 5341 return 1; 5342 } 5343 if (GET_CODE (disp) != CONST) 5344 return 0; 5345 disp = XEXP (disp, 0); 5346 5347 if (TARGET_64BIT) 5348 { 5349 /* We are unsafe to allow PLUS expressions. This limit allowed distance 5350 of GOT tables. We should not need these anyway. */ 5351 if (GET_CODE (disp) != UNSPEC 5352 || XINT (disp, 1) != UNSPEC_GOTPCREL) 5353 return 0; 5354 5355 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 5356 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 5357 return 0; 5358 return 1; 5359 } 5360 5361 saw_plus = false; 5362 if (GET_CODE (disp) == PLUS) 5363 { 5364 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 5365 return 0; 5366 disp = XEXP (disp, 0); 5367 saw_plus = true; 5368 } 5369 5370 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */ 5371 if (TARGET_MACHO && GET_CODE (disp) == MINUS) 5372 { 5373 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 5374 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 5375 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 5376 { 5377 const char *sym_name = XSTR (XEXP (disp, 1), 0); 5378 if (strstr (sym_name, "$pb") != 0) 5379 return 1; 5380 } 5381 } 5382 5383 if (GET_CODE (disp) != UNSPEC) 5384 return 0; 5385 5386 switch (XINT (disp, 1)) 5387 { 5388 case UNSPEC_GOT: 5389 if (saw_plus) 5390 return false; 5391 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 5392 case UNSPEC_GOTOFF: 5393 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5394 case UNSPEC_GOTTPOFF: 5395 case UNSPEC_GOTNTPOFF: 5396 case UNSPEC_INDNTPOFF: 5397 if (saw_plus) 5398 return false; 5399 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5400 case UNSPEC_NTPOFF: 5401 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5402 case UNSPEC_DTPOFF: 5403 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5404 } 5405 5406 return 0; 5407} 5408 5409/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 5410 memory address for an instruction. The MODE argument is the machine mode 5411 for the MEM expression that wants to use this address. 5412 5413 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 5414 convert common non-canonical forms to canonical form so that they will 5415 be recognized. */ 5416 5417int 5418legitimate_address_p (mode, addr, strict) 5419 enum machine_mode mode; 5420 register rtx addr; 5421 int strict; 5422{ 5423 struct ix86_address parts; 5424 rtx base, index, disp; 5425 HOST_WIDE_INT scale; 5426 const char *reason = NULL; 5427 rtx reason_rtx = NULL_RTX; 5428 5429 if (TARGET_DEBUG_ADDR) 5430 { 5431 fprintf (stderr, 5432 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 5433 GET_MODE_NAME (mode), strict); 5434 debug_rtx (addr); 5435 } 5436 5437 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP) 5438 { 5439 if (TARGET_DEBUG_ADDR) 5440 fprintf (stderr, "Success.\n"); 5441 return TRUE; 5442 } 5443 5444 if (ix86_decompose_address (addr, &parts) <= 0) 5445 { 5446 reason = "decomposition failed"; 5447 goto report_error; 5448 } 5449 5450 base = parts.base; 5451 index = parts.index; 5452 disp = parts.disp; 5453 scale = parts.scale; 5454 5455 /* Validate base register. 5456 5457 Don't allow SUBREG's here, it can lead to spill failures when the base 5458 is one word out of a two word structure, which is represented internally 5459 as a DImode int. */ 5460 5461 if (base) 5462 { 5463 rtx reg; 5464 reason_rtx = base; 5465 5466 if (GET_CODE (base) == SUBREG) 5467 reg = SUBREG_REG (base); 5468 else 5469 reg = base; 5470 5471 if (GET_CODE (reg) != REG) 5472 { 5473 reason = "base is not a register"; 5474 goto report_error; 5475 } 5476 5477 if (GET_MODE (base) != Pmode) 5478 { 5479 reason = "base is not in Pmode"; 5480 goto report_error; 5481 } 5482 5483 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 5484 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 5485 { 5486 reason = "base is not valid"; 5487 goto report_error; 5488 } 5489 } 5490 5491 /* Validate index register. 5492 5493 Don't allow SUBREG's here, it can lead to spill failures when the index 5494 is one word out of a two word structure, which is represented internally 5495 as a DImode int. */ 5496 5497 if (index) 5498 { 5499 rtx reg; 5500 reason_rtx = index; 5501 5502 if (GET_CODE (index) == SUBREG) 5503 reg = SUBREG_REG (index); 5504 else 5505 reg = index; 5506 5507 if (GET_CODE (reg) != REG) 5508 { 5509 reason = "index is not a register"; 5510 goto report_error; 5511 } 5512 5513 if (GET_MODE (index) != Pmode) 5514 { 5515 reason = "index is not in Pmode"; 5516 goto report_error; 5517 } 5518 5519 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 5520 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 5521 { 5522 reason = "index is not valid"; 5523 goto report_error; 5524 } 5525 } 5526 5527 /* Validate scale factor. */ 5528 if (scale != 1) 5529 { 5530 reason_rtx = GEN_INT (scale); 5531 if (!index) 5532 { 5533 reason = "scale without index"; 5534 goto report_error; 5535 } 5536 5537 if (scale != 2 && scale != 4 && scale != 8) 5538 { 5539 reason = "scale is not a valid multiplier"; 5540 goto report_error; 5541 } 5542 } 5543 5544 /* Validate displacement. */ 5545 if (disp) 5546 { 5547 reason_rtx = disp; 5548 5549 if (GET_CODE (disp) == CONST 5550 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 5551 switch (XINT (XEXP (disp, 0), 1)) 5552 { 5553 case UNSPEC_GOT: 5554 case UNSPEC_GOTOFF: 5555 case UNSPEC_GOTPCREL: 5556 if (!flag_pic) 5557 abort (); 5558 goto is_legitimate_pic; 5559 5560 case UNSPEC_GOTTPOFF: 5561 case UNSPEC_GOTNTPOFF: 5562 case UNSPEC_INDNTPOFF: 5563 case UNSPEC_NTPOFF: 5564 case UNSPEC_DTPOFF: 5565 break; 5566 5567 default: 5568 reason = "invalid address unspec"; 5569 goto report_error; 5570 } 5571 5572 else if (flag_pic && (SYMBOLIC_CONST (disp) 5573#if TARGET_MACHO 5574 && !machopic_operand_p (disp) 5575#endif 5576 )) 5577 { 5578 is_legitimate_pic: 5579 if (TARGET_64BIT && (index || base)) 5580 { 5581 /* foo@dtpoff(%rX) is ok. */ 5582 if (GET_CODE (disp) != CONST 5583 || GET_CODE (XEXP (disp, 0)) != PLUS 5584 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 5585 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 5586 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 5587 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 5588 { 5589 reason = "non-constant pic memory reference"; 5590 goto report_error; 5591 } 5592 } 5593 else if (! legitimate_pic_address_disp_p (disp)) 5594 { 5595 reason = "displacement is an invalid pic construct"; 5596 goto report_error; 5597 } 5598 5599 /* This code used to verify that a symbolic pic displacement 5600 includes the pic_offset_table_rtx register. 5601 5602 While this is good idea, unfortunately these constructs may 5603 be created by "adds using lea" optimization for incorrect 5604 code like: 5605 5606 int a; 5607 int foo(int i) 5608 { 5609 return *(&a+i); 5610 } 5611 5612 This code is nonsensical, but results in addressing 5613 GOT table with pic_offset_table_rtx base. We can't 5614 just refuse it easily, since it gets matched by 5615 "addsi3" pattern, that later gets split to lea in the 5616 case output register differs from input. While this 5617 can be handled by separate addsi pattern for this case 5618 that never results in lea, this seems to be easier and 5619 correct fix for crash to disable this test. */ 5620 } 5621 else if (!CONSTANT_ADDRESS_P (disp)) 5622 { 5623 reason = "displacement is not constant"; 5624 goto report_error; 5625 } 5626 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp)) 5627 { 5628 reason = "displacement is out of range"; 5629 goto report_error; 5630 } 5631 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE) 5632 { 5633 reason = "displacement is a const_double"; 5634 goto report_error; 5635 } 5636 } 5637 5638 /* Everything looks valid. */ 5639 if (TARGET_DEBUG_ADDR) 5640 fprintf (stderr, "Success.\n"); 5641 return TRUE; 5642 5643 report_error: 5644 if (TARGET_DEBUG_ADDR) 5645 { 5646 fprintf (stderr, "Error: %s\n", reason); 5647 debug_rtx (reason_rtx); 5648 } 5649 return FALSE; 5650} 5651 5652/* Return an unique alias set for the GOT. */ 5653 5654static HOST_WIDE_INT 5655ix86_GOT_alias_set () 5656{ 5657 static HOST_WIDE_INT set = -1; 5658 if (set == -1) 5659 set = new_alias_set (); 5660 return set; 5661} 5662 5663/* Return a legitimate reference for ORIG (an address) using the 5664 register REG. If REG is 0, a new pseudo is generated. 5665 5666 There are two types of references that must be handled: 5667 5668 1. Global data references must load the address from the GOT, via 5669 the PIC reg. An insn is emitted to do this load, and the reg is 5670 returned. 5671 5672 2. Static data references, constant pool addresses, and code labels 5673 compute the address as an offset from the GOT, whose base is in 5674 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to 5675 differentiate them from global data objects. The returned 5676 address is the PIC reg + an unspec constant. 5677 5678 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 5679 reg also appears in the address. */ 5680 5681rtx 5682legitimize_pic_address (orig, reg) 5683 rtx orig; 5684 rtx reg; 5685{ 5686 rtx addr = orig; 5687 rtx new = orig; 5688 rtx base; 5689 5690#if TARGET_MACHO 5691 if (reg == 0) 5692 reg = gen_reg_rtx (Pmode); 5693 /* Use the generic Mach-O PIC machinery. */ 5694 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 5695#endif 5696 5697 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 5698 new = addr; 5699 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 5700 { 5701 /* This symbol may be referenced via a displacement from the PIC 5702 base address (@GOTOFF). */ 5703 5704 if (reload_in_progress) 5705 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 5706 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 5707 new = gen_rtx_CONST (Pmode, new); 5708 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5709 5710 if (reg != 0) 5711 { 5712 emit_move_insn (reg, new); 5713 new = reg; 5714 } 5715 } 5716 else if (GET_CODE (addr) == SYMBOL_REF) 5717 { 5718 if (TARGET_64BIT) 5719 { 5720 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 5721 new = gen_rtx_CONST (Pmode, new); 5722 new = gen_rtx_MEM (Pmode, new); 5723 RTX_UNCHANGING_P (new) = 1; 5724 set_mem_alias_set (new, ix86_GOT_alias_set ()); 5725 5726 if (reg == 0) 5727 reg = gen_reg_rtx (Pmode); 5728 /* Use directly gen_movsi, otherwise the address is loaded 5729 into register for CSE. We don't want to CSE this addresses, 5730 instead we CSE addresses from the GOT table, so skip this. */ 5731 emit_insn (gen_movsi (reg, new)); 5732 new = reg; 5733 } 5734 else 5735 { 5736 /* This symbol must be referenced via a load from the 5737 Global Offset Table (@GOT). */ 5738 5739 if (reload_in_progress) 5740 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 5741 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 5742 new = gen_rtx_CONST (Pmode, new); 5743 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5744 new = gen_rtx_MEM (Pmode, new); 5745 RTX_UNCHANGING_P (new) = 1; 5746 set_mem_alias_set (new, ix86_GOT_alias_set ()); 5747 5748 if (reg == 0) 5749 reg = gen_reg_rtx (Pmode); 5750 emit_move_insn (reg, new); 5751 new = reg; 5752 } 5753 } 5754 else 5755 { 5756 if (GET_CODE (addr) == CONST) 5757 { 5758 addr = XEXP (addr, 0); 5759 5760 /* We must match stuff we generate before. Assume the only 5761 unspecs that can get here are ours. Not that we could do 5762 anything with them anyway... */ 5763 if (GET_CODE (addr) == UNSPEC 5764 || (GET_CODE (addr) == PLUS 5765 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 5766 return orig; 5767 if (GET_CODE (addr) != PLUS) 5768 abort (); 5769 } 5770 if (GET_CODE (addr) == PLUS) 5771 { 5772 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 5773 5774 /* Check first to see if this is a constant offset from a @GOTOFF 5775 symbol reference. */ 5776 if (local_symbolic_operand (op0, Pmode) 5777 && GET_CODE (op1) == CONST_INT) 5778 { 5779 if (!TARGET_64BIT) 5780 { 5781 if (reload_in_progress) 5782 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 5783 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 5784 UNSPEC_GOTOFF); 5785 new = gen_rtx_PLUS (Pmode, new, op1); 5786 new = gen_rtx_CONST (Pmode, new); 5787 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 5788 5789 if (reg != 0) 5790 { 5791 emit_move_insn (reg, new); 5792 new = reg; 5793 } 5794 } 5795 else 5796 { 5797 if (INTVAL (op1) < -16*1024*1024 5798 || INTVAL (op1) >= 16*1024*1024) 5799 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1)); 5800 } 5801 } 5802 else 5803 { 5804 base = legitimize_pic_address (XEXP (addr, 0), reg); 5805 new = legitimize_pic_address (XEXP (addr, 1), 5806 base == reg ? NULL_RTX : reg); 5807 5808 if (GET_CODE (new) == CONST_INT) 5809 new = plus_constant (base, INTVAL (new)); 5810 else 5811 { 5812 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 5813 { 5814 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 5815 new = XEXP (new, 1); 5816 } 5817 new = gen_rtx_PLUS (Pmode, base, new); 5818 } 5819 } 5820 } 5821 } 5822 return new; 5823} 5824 5825static void 5826ix86_encode_section_info (decl, first) 5827 tree decl; 5828 int first ATTRIBUTE_UNUSED; 5829{ 5830 bool local_p = (*targetm.binds_local_p) (decl); 5831 rtx rtl, symbol; 5832 5833 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl); 5834 if (GET_CODE (rtl) != MEM) 5835 return; 5836 symbol = XEXP (rtl, 0); 5837 if (GET_CODE (symbol) != SYMBOL_REF) 5838 return; 5839 5840 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global 5841 symbol so that we may access it directly in the GOT. */ 5842 5843 if (flag_pic) 5844 SYMBOL_REF_FLAG (symbol) = local_p; 5845 5846 /* For ELF, encode thread-local data with %[GLil] for "global dynamic", 5847 "local dynamic", "initial exec" or "local exec" TLS models 5848 respectively. */ 5849 5850 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl)) 5851 { 5852 const char *symbol_str; 5853 char *newstr; 5854 size_t len; 5855 enum tls_model kind = decl_tls_model (decl); 5856 5857 if (TARGET_64BIT && ! flag_pic) 5858 { 5859 /* x86-64 doesn't allow non-pic code for shared libraries, 5860 so don't generate GD/LD TLS models for non-pic code. */ 5861 switch (kind) 5862 { 5863 case TLS_MODEL_GLOBAL_DYNAMIC: 5864 kind = TLS_MODEL_INITIAL_EXEC; break; 5865 case TLS_MODEL_LOCAL_DYNAMIC: 5866 kind = TLS_MODEL_LOCAL_EXEC; break; 5867 default: 5868 break; 5869 } 5870 } 5871 5872 symbol_str = XSTR (symbol, 0); 5873 5874 if (symbol_str[0] == '%') 5875 { 5876 if (symbol_str[1] == tls_model_chars[kind]) 5877 return; 5878 symbol_str += 2; 5879 } 5880 len = strlen (symbol_str) + 1; 5881 newstr = alloca (len + 2); 5882 5883 newstr[0] = '%'; 5884 newstr[1] = tls_model_chars[kind]; 5885 memcpy (newstr + 2, symbol_str, len); 5886 5887 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1); 5888 } 5889} 5890 5891/* Undo the above when printing symbol names. */ 5892 5893static const char * 5894ix86_strip_name_encoding (str) 5895 const char *str; 5896{ 5897 if (str[0] == '%') 5898 str += 2; 5899 if (str [0] == '*') 5900 str += 1; 5901 return str; 5902} 5903 5904/* Load the thread pointer into a register. */ 5905 5906static rtx 5907get_thread_pointer () 5908{ 5909 rtx tp; 5910 5911 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 5912 tp = gen_rtx_MEM (Pmode, tp); 5913 RTX_UNCHANGING_P (tp) = 1; 5914 set_mem_alias_set (tp, ix86_GOT_alias_set ()); 5915 tp = force_reg (Pmode, tp); 5916 5917 return tp; 5918} 5919 5920/* Try machine-dependent ways of modifying an illegitimate address 5921 to be legitimate. If we find one, return the new, valid address. 5922 This macro is used in only one place: `memory_address' in explow.c. 5923 5924 OLDX is the address as it was before break_out_memory_refs was called. 5925 In some cases it is useful to look at this to decide what needs to be done. 5926 5927 MODE and WIN are passed so that this macro can use 5928 GO_IF_LEGITIMATE_ADDRESS. 5929 5930 It is always safe for this macro to do nothing. It exists to recognize 5931 opportunities to optimize the output. 5932 5933 For the 80386, we handle X+REG by loading X into a register R and 5934 using R+REG. R will go in a general reg and indexing will be used. 5935 However, if REG is a broken-out memory address or multiplication, 5936 nothing needs to be done because REG can certainly go in a general reg. 5937 5938 When -fpic is used, special handling is needed for symbolic references. 5939 See comments by legitimize_pic_address in i386.c for details. */ 5940 5941rtx 5942legitimize_address (x, oldx, mode) 5943 register rtx x; 5944 register rtx oldx ATTRIBUTE_UNUSED; 5945 enum machine_mode mode; 5946{ 5947 int changed = 0; 5948 unsigned log; 5949 5950 if (TARGET_DEBUG_ADDR) 5951 { 5952 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 5953 GET_MODE_NAME (mode)); 5954 debug_rtx (x); 5955 } 5956 5957 log = tls_symbolic_operand (x, mode); 5958 if (log) 5959 { 5960 rtx dest, base, off, pic; 5961 int type; 5962 5963 switch (log) 5964 { 5965 case TLS_MODEL_GLOBAL_DYNAMIC: 5966 dest = gen_reg_rtx (Pmode); 5967 if (TARGET_64BIT) 5968 { 5969 rtx rax = gen_rtx_REG (Pmode, 0), insns; 5970 5971 start_sequence (); 5972 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 5973 insns = get_insns (); 5974 end_sequence (); 5975 5976 emit_libcall_block (insns, dest, rax, x); 5977 } 5978 else 5979 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 5980 break; 5981 5982 case TLS_MODEL_LOCAL_DYNAMIC: 5983 base = gen_reg_rtx (Pmode); 5984 if (TARGET_64BIT) 5985 { 5986 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 5987 5988 start_sequence (); 5989 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 5990 insns = get_insns (); 5991 end_sequence (); 5992 5993 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 5994 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 5995 emit_libcall_block (insns, base, rax, note); 5996 } 5997 else 5998 emit_insn (gen_tls_local_dynamic_base_32 (base)); 5999 6000 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 6001 off = gen_rtx_CONST (Pmode, off); 6002 6003 return gen_rtx_PLUS (Pmode, base, off); 6004 6005 case TLS_MODEL_INITIAL_EXEC: 6006 if (TARGET_64BIT) 6007 { 6008 pic = NULL; 6009 type = UNSPEC_GOTNTPOFF; 6010 } 6011 else if (flag_pic) 6012 { 6013 if (reload_in_progress) 6014 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6015 pic = pic_offset_table_rtx; 6016 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 6017 } 6018 else if (!TARGET_GNU_TLS) 6019 { 6020 pic = gen_reg_rtx (Pmode); 6021 emit_insn (gen_set_got (pic)); 6022 type = UNSPEC_GOTTPOFF; 6023 } 6024 else 6025 { 6026 pic = NULL; 6027 type = UNSPEC_INDNTPOFF; 6028 } 6029 6030 base = get_thread_pointer (); 6031 6032 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 6033 off = gen_rtx_CONST (Pmode, off); 6034 if (pic) 6035 off = gen_rtx_PLUS (Pmode, pic, off); 6036 off = gen_rtx_MEM (Pmode, off); 6037 RTX_UNCHANGING_P (off) = 1; 6038 set_mem_alias_set (off, ix86_GOT_alias_set ()); 6039 dest = gen_reg_rtx (Pmode); 6040 6041 if (TARGET_64BIT || TARGET_GNU_TLS) 6042 { 6043 emit_move_insn (dest, off); 6044 return gen_rtx_PLUS (Pmode, base, dest); 6045 } 6046 else 6047 emit_insn (gen_subsi3 (dest, base, off)); 6048 break; 6049 6050 case TLS_MODEL_LOCAL_EXEC: 6051 base = get_thread_pointer (); 6052 6053 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 6054 (TARGET_64BIT || TARGET_GNU_TLS) 6055 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 6056 off = gen_rtx_CONST (Pmode, off); 6057 6058 if (TARGET_64BIT || TARGET_GNU_TLS) 6059 return gen_rtx_PLUS (Pmode, base, off); 6060 else 6061 { 6062 dest = gen_reg_rtx (Pmode); 6063 emit_insn (gen_subsi3 (dest, base, off)); 6064 } 6065 break; 6066 6067 default: 6068 abort (); 6069 } 6070 6071 return dest; 6072 } 6073 6074 if (flag_pic && SYMBOLIC_CONST (x)) 6075 return legitimize_pic_address (x, 0); 6076 6077 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 6078 if (GET_CODE (x) == ASHIFT 6079 && GET_CODE (XEXP (x, 1)) == CONST_INT 6080 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4) 6081 { 6082 changed = 1; 6083 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 6084 GEN_INT (1 << log)); 6085 } 6086 6087 if (GET_CODE (x) == PLUS) 6088 { 6089 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 6090 6091 if (GET_CODE (XEXP (x, 0)) == ASHIFT 6092 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 6093 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4) 6094 { 6095 changed = 1; 6096 XEXP (x, 0) = gen_rtx_MULT (Pmode, 6097 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 6098 GEN_INT (1 << log)); 6099 } 6100 6101 if (GET_CODE (XEXP (x, 1)) == ASHIFT 6102 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 6103 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4) 6104 { 6105 changed = 1; 6106 XEXP (x, 1) = gen_rtx_MULT (Pmode, 6107 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 6108 GEN_INT (1 << log)); 6109 } 6110 6111 /* Put multiply first if it isn't already. */ 6112 if (GET_CODE (XEXP (x, 1)) == MULT) 6113 { 6114 rtx tmp = XEXP (x, 0); 6115 XEXP (x, 0) = XEXP (x, 1); 6116 XEXP (x, 1) = tmp; 6117 changed = 1; 6118 } 6119 6120 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 6121 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 6122 created by virtual register instantiation, register elimination, and 6123 similar optimizations. */ 6124 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 6125 { 6126 changed = 1; 6127 x = gen_rtx_PLUS (Pmode, 6128 gen_rtx_PLUS (Pmode, XEXP (x, 0), 6129 XEXP (XEXP (x, 1), 0)), 6130 XEXP (XEXP (x, 1), 1)); 6131 } 6132 6133 /* Canonicalize 6134 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 6135 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 6136 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 6137 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 6138 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 6139 && CONSTANT_P (XEXP (x, 1))) 6140 { 6141 rtx constant; 6142 rtx other = NULL_RTX; 6143 6144 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6145 { 6146 constant = XEXP (x, 1); 6147 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 6148 } 6149 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 6150 { 6151 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 6152 other = XEXP (x, 1); 6153 } 6154 else 6155 constant = 0; 6156 6157 if (constant) 6158 { 6159 changed = 1; 6160 x = gen_rtx_PLUS (Pmode, 6161 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 6162 XEXP (XEXP (XEXP (x, 0), 1), 0)), 6163 plus_constant (other, INTVAL (constant))); 6164 } 6165 } 6166 6167 if (changed && legitimate_address_p (mode, x, FALSE)) 6168 return x; 6169 6170 if (GET_CODE (XEXP (x, 0)) == MULT) 6171 { 6172 changed = 1; 6173 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 6174 } 6175 6176 if (GET_CODE (XEXP (x, 1)) == MULT) 6177 { 6178 changed = 1; 6179 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 6180 } 6181 6182 if (changed 6183 && GET_CODE (XEXP (x, 1)) == REG 6184 && GET_CODE (XEXP (x, 0)) == REG) 6185 return x; 6186 6187 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 6188 { 6189 changed = 1; 6190 x = legitimize_pic_address (x, 0); 6191 } 6192 6193 if (changed && legitimate_address_p (mode, x, FALSE)) 6194 return x; 6195 6196 if (GET_CODE (XEXP (x, 0)) == REG) 6197 { 6198 register rtx temp = gen_reg_rtx (Pmode); 6199 register rtx val = force_operand (XEXP (x, 1), temp); 6200 if (val != temp) 6201 emit_move_insn (temp, val); 6202 6203 XEXP (x, 1) = temp; 6204 return x; 6205 } 6206 6207 else if (GET_CODE (XEXP (x, 1)) == REG) 6208 { 6209 register rtx temp = gen_reg_rtx (Pmode); 6210 register rtx val = force_operand (XEXP (x, 0), temp); 6211 if (val != temp) 6212 emit_move_insn (temp, val); 6213 6214 XEXP (x, 0) = temp; 6215 return x; 6216 } 6217 } 6218 6219 return x; 6220} 6221 6222/* Print an integer constant expression in assembler syntax. Addition 6223 and subtraction are the only arithmetic that may appear in these 6224 expressions. FILE is the stdio stream to write to, X is the rtx, and 6225 CODE is the operand print code from the output string. */ 6226 6227static void 6228output_pic_addr_const (file, x, code) 6229 FILE *file; 6230 rtx x; 6231 int code; 6232{ 6233 char buf[256]; 6234 6235 switch (GET_CODE (x)) 6236 { 6237 case PC: 6238 if (flag_pic) 6239 putc ('.', file); 6240 else 6241 abort (); 6242 break; 6243 6244 case SYMBOL_REF: 6245 assemble_name (file, XSTR (x, 0)); 6246 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x)) 6247 fputs ("@PLT", file); 6248 break; 6249 6250 case LABEL_REF: 6251 x = XEXP (x, 0); 6252 /* FALLTHRU */ 6253 case CODE_LABEL: 6254 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 6255 assemble_name (asm_out_file, buf); 6256 break; 6257 6258 case CONST_INT: 6259 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 6260 break; 6261 6262 case CONST: 6263 /* This used to output parentheses around the expression, 6264 but that does not work on the 386 (either ATT or BSD assembler). */ 6265 output_pic_addr_const (file, XEXP (x, 0), code); 6266 break; 6267 6268 case CONST_DOUBLE: 6269 if (GET_MODE (x) == VOIDmode) 6270 { 6271 /* We can use %d if the number is <32 bits and positive. */ 6272 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 6273 fprintf (file, "0x%lx%08lx", 6274 (unsigned long) CONST_DOUBLE_HIGH (x), 6275 (unsigned long) CONST_DOUBLE_LOW (x)); 6276 else 6277 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 6278 } 6279 else 6280 /* We can't handle floating point constants; 6281 PRINT_OPERAND must handle them. */ 6282 output_operand_lossage ("floating constant misused"); 6283 break; 6284 6285 case PLUS: 6286 /* Some assemblers need integer constants to appear first. */ 6287 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 6288 { 6289 output_pic_addr_const (file, XEXP (x, 0), code); 6290 putc ('+', file); 6291 output_pic_addr_const (file, XEXP (x, 1), code); 6292 } 6293 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6294 { 6295 output_pic_addr_const (file, XEXP (x, 1), code); 6296 putc ('+', file); 6297 output_pic_addr_const (file, XEXP (x, 0), code); 6298 } 6299 else 6300 abort (); 6301 break; 6302 6303 case MINUS: 6304 if (!TARGET_MACHO) 6305 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 6306 output_pic_addr_const (file, XEXP (x, 0), code); 6307 putc ('-', file); 6308 output_pic_addr_const (file, XEXP (x, 1), code); 6309 if (!TARGET_MACHO) 6310 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 6311 break; 6312 6313 case UNSPEC: 6314 if (XVECLEN (x, 0) != 1) 6315 abort (); 6316 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 6317 switch (XINT (x, 1)) 6318 { 6319 case UNSPEC_GOT: 6320 fputs ("@GOT", file); 6321 break; 6322 case UNSPEC_GOTOFF: 6323 fputs ("@GOTOFF", file); 6324 break; 6325 case UNSPEC_GOTPCREL: 6326 fputs ("@GOTPCREL(%rip)", file); 6327 break; 6328 case UNSPEC_GOTTPOFF: 6329 /* FIXME: This might be @TPOFF in Sun ld too. */ 6330 fputs ("@GOTTPOFF", file); 6331 break; 6332 case UNSPEC_TPOFF: 6333 fputs ("@TPOFF", file); 6334 break; 6335 case UNSPEC_NTPOFF: 6336 if (TARGET_64BIT) 6337 fputs ("@TPOFF", file); 6338 else 6339 fputs ("@NTPOFF", file); 6340 break; 6341 case UNSPEC_DTPOFF: 6342 fputs ("@DTPOFF", file); 6343 break; 6344 case UNSPEC_GOTNTPOFF: 6345 if (TARGET_64BIT) 6346 fputs ("@GOTTPOFF(%rip)", file); 6347 else 6348 fputs ("@GOTNTPOFF", file); 6349 break; 6350 case UNSPEC_INDNTPOFF: 6351 fputs ("@INDNTPOFF", file); 6352 break; 6353 default: 6354 output_operand_lossage ("invalid UNSPEC as operand"); 6355 break; 6356 } 6357 break; 6358 6359 default: 6360 output_operand_lossage ("invalid expression as operand"); 6361 } 6362} 6363 6364/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST. 6365 We need to handle our special PIC relocations. */ 6366 6367void 6368i386_dwarf_output_addr_const (file, x) 6369 FILE *file; 6370 rtx x; 6371{ 6372#ifdef ASM_QUAD 6373 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG); 6374#else 6375 if (TARGET_64BIT) 6376 abort (); 6377 fprintf (file, "%s", ASM_LONG); 6378#endif 6379 if (flag_pic) 6380 output_pic_addr_const (file, x, '\0'); 6381 else 6382 output_addr_const (file, x); 6383 fputc ('\n', file); 6384} 6385 6386/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL. 6387 We need to emit DTP-relative relocations. */ 6388 6389void 6390i386_output_dwarf_dtprel (file, size, x) 6391 FILE *file; 6392 int size; 6393 rtx x; 6394{ 6395 fputs (ASM_LONG, file); 6396 output_addr_const (file, x); 6397 fputs ("@DTPOFF", file); 6398 switch (size) 6399 { 6400 case 4: 6401 break; 6402 case 8: 6403 fputs (", 0", file); 6404 break; 6405 default: 6406 abort (); 6407 } 6408} 6409 6410/* In the name of slightly smaller debug output, and to cater to 6411 general assembler losage, recognize PIC+GOTOFF and turn it back 6412 into a direct symbol reference. */ 6413 6414rtx 6415i386_simplify_dwarf_addr (orig_x) 6416 rtx orig_x; 6417{ 6418 rtx x = orig_x, y; 6419 6420 if (GET_CODE (x) == MEM) 6421 x = XEXP (x, 0); 6422 6423 if (TARGET_64BIT) 6424 { 6425 if (GET_CODE (x) != CONST 6426 || GET_CODE (XEXP (x, 0)) != UNSPEC 6427 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 6428 || GET_CODE (orig_x) != MEM) 6429 return orig_x; 6430 return XVECEXP (XEXP (x, 0), 0, 0); 6431 } 6432 6433 if (GET_CODE (x) != PLUS 6434 || GET_CODE (XEXP (x, 1)) != CONST) 6435 return orig_x; 6436 6437 if (GET_CODE (XEXP (x, 0)) == REG 6438 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 6439 /* %ebx + GOT/GOTOFF */ 6440 y = NULL; 6441 else if (GET_CODE (XEXP (x, 0)) == PLUS) 6442 { 6443 /* %ebx + %reg * scale + GOT/GOTOFF */ 6444 y = XEXP (x, 0); 6445 if (GET_CODE (XEXP (y, 0)) == REG 6446 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM) 6447 y = XEXP (y, 1); 6448 else if (GET_CODE (XEXP (y, 1)) == REG 6449 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM) 6450 y = XEXP (y, 0); 6451 else 6452 return orig_x; 6453 if (GET_CODE (y) != REG 6454 && GET_CODE (y) != MULT 6455 && GET_CODE (y) != ASHIFT) 6456 return orig_x; 6457 } 6458 else 6459 return orig_x; 6460 6461 x = XEXP (XEXP (x, 1), 0); 6462 if (GET_CODE (x) == UNSPEC 6463 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 6464 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 6465 { 6466 if (y) 6467 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0)); 6468 return XVECEXP (x, 0, 0); 6469 } 6470 6471 if (GET_CODE (x) == PLUS 6472 && GET_CODE (XEXP (x, 0)) == UNSPEC 6473 && GET_CODE (XEXP (x, 1)) == CONST_INT 6474 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 6475 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF 6476 && GET_CODE (orig_x) != MEM))) 6477 { 6478 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1)); 6479 if (y) 6480 return gen_rtx_PLUS (Pmode, y, x); 6481 return x; 6482 } 6483 6484 return orig_x; 6485} 6486 6487static void 6488put_condition_code (code, mode, reverse, fp, file) 6489 enum rtx_code code; 6490 enum machine_mode mode; 6491 int reverse, fp; 6492 FILE *file; 6493{ 6494 const char *suffix; 6495 6496 if (mode == CCFPmode || mode == CCFPUmode) 6497 { 6498 enum rtx_code second_code, bypass_code; 6499 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 6500 if (bypass_code != NIL || second_code != NIL) 6501 abort (); 6502 code = ix86_fp_compare_code_to_integer (code); 6503 mode = CCmode; 6504 } 6505 if (reverse) 6506 code = reverse_condition (code); 6507 6508 switch (code) 6509 { 6510 case EQ: 6511 suffix = "e"; 6512 break; 6513 case NE: 6514 suffix = "ne"; 6515 break; 6516 case GT: 6517 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode) 6518 abort (); 6519 suffix = "g"; 6520 break; 6521 case GTU: 6522 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers. 6523 Those same assemblers have the same but opposite losage on cmov. */ 6524 if (mode != CCmode) 6525 abort (); 6526 suffix = fp ? "nbe" : "a"; 6527 break; 6528 case LT: 6529 if (mode == CCNOmode || mode == CCGOCmode) 6530 suffix = "s"; 6531 else if (mode == CCmode || mode == CCGCmode) 6532 suffix = "l"; 6533 else 6534 abort (); 6535 break; 6536 case LTU: 6537 if (mode != CCmode) 6538 abort (); 6539 suffix = "b"; 6540 break; 6541 case GE: 6542 if (mode == CCNOmode || mode == CCGOCmode) 6543 suffix = "ns"; 6544 else if (mode == CCmode || mode == CCGCmode) 6545 suffix = "ge"; 6546 else 6547 abort (); 6548 break; 6549 case GEU: 6550 /* ??? As above. */ 6551 if (mode != CCmode) 6552 abort (); 6553 suffix = fp ? "nb" : "ae"; 6554 break; 6555 case LE: 6556 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode) 6557 abort (); 6558 suffix = "le"; 6559 break; 6560 case LEU: 6561 if (mode != CCmode) 6562 abort (); 6563 suffix = "be"; 6564 break; 6565 case UNORDERED: 6566 suffix = fp ? "u" : "p"; 6567 break; 6568 case ORDERED: 6569 suffix = fp ? "nu" : "np"; 6570 break; 6571 default: 6572 abort (); 6573 } 6574 fputs (suffix, file); 6575} 6576 6577void 6578print_reg (x, code, file) 6579 rtx x; 6580 int code; 6581 FILE *file; 6582{ 6583 if (REGNO (x) == ARG_POINTER_REGNUM 6584 || REGNO (x) == FRAME_POINTER_REGNUM 6585 || REGNO (x) == FLAGS_REG 6586 || REGNO (x) == FPSR_REG) 6587 abort (); 6588 6589 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 6590 putc ('%', file); 6591 6592 if (code == 'w' || MMX_REG_P (x)) 6593 code = 2; 6594 else if (code == 'b') 6595 code = 1; 6596 else if (code == 'k') 6597 code = 4; 6598 else if (code == 'q') 6599 code = 8; 6600 else if (code == 'y') 6601 code = 3; 6602 else if (code == 'h') 6603 code = 0; 6604 else 6605 code = GET_MODE_SIZE (GET_MODE (x)); 6606 6607 /* Irritatingly, AMD extended registers use different naming convention 6608 from the normal registers. */ 6609 if (REX_INT_REG_P (x)) 6610 { 6611 if (!TARGET_64BIT) 6612 abort (); 6613 switch (code) 6614 { 6615 case 0: 6616 error ("extended registers have no high halves"); 6617 break; 6618 case 1: 6619 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 6620 break; 6621 case 2: 6622 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 6623 break; 6624 case 4: 6625 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 6626 break; 6627 case 8: 6628 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 6629 break; 6630 default: 6631 error ("unsupported operand size for extended register"); 6632 break; 6633 } 6634 return; 6635 } 6636 switch (code) 6637 { 6638 case 3: 6639 if (STACK_TOP_P (x)) 6640 { 6641 fputs ("st(0)", file); 6642 break; 6643 } 6644 /* FALLTHRU */ 6645 case 8: 6646 case 4: 6647 case 12: 6648 if (! ANY_FP_REG_P (x)) 6649 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 6650 /* FALLTHRU */ 6651 case 16: 6652 case 2: 6653 fputs (hi_reg_name[REGNO (x)], file); 6654 break; 6655 case 1: 6656 fputs (qi_reg_name[REGNO (x)], file); 6657 break; 6658 case 0: 6659 fputs (qi_high_reg_name[REGNO (x)], file); 6660 break; 6661 default: 6662 abort (); 6663 } 6664} 6665 6666/* Locate some local-dynamic symbol still in use by this function 6667 so that we can print its name in some tls_local_dynamic_base 6668 pattern. */ 6669 6670static const char * 6671get_some_local_dynamic_name () 6672{ 6673 rtx insn; 6674 6675 if (cfun->machine->some_ld_name) 6676 return cfun->machine->some_ld_name; 6677 6678 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 6679 if (INSN_P (insn) 6680 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 6681 return cfun->machine->some_ld_name; 6682 6683 abort (); 6684} 6685 6686static int 6687get_some_local_dynamic_name_1 (px, data) 6688 rtx *px; 6689 void *data ATTRIBUTE_UNUSED; 6690{ 6691 rtx x = *px; 6692 6693 if (GET_CODE (x) == SYMBOL_REF 6694 && local_dynamic_symbolic_operand (x, Pmode)) 6695 { 6696 cfun->machine->some_ld_name = XSTR (x, 0); 6697 return 1; 6698 } 6699 6700 return 0; 6701} 6702 6703/* Meaning of CODE: 6704 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 6705 C -- print opcode suffix for set/cmov insn. 6706 c -- like C, but print reversed condition 6707 F,f -- likewise, but for floating-point. 6708 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise 6709 nothing 6710 R -- print the prefix for register names. 6711 z -- print the opcode suffix for the size of the current operand. 6712 * -- print a star (in certain assembler syntax) 6713 A -- print an absolute memory reference. 6714 w -- print the operand as if it's a "word" (HImode) even if it isn't. 6715 s -- print a shift double count, followed by the assemblers argument 6716 delimiter. 6717 b -- print the QImode name of the register for the indicated operand. 6718 %b0 would print %al if operands[0] is reg 0. 6719 w -- likewise, print the HImode name of the register. 6720 k -- likewise, print the SImode name of the register. 6721 q -- likewise, print the DImode name of the register. 6722 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 6723 y -- print "st(0)" instead of "st" as a register. 6724 D -- print condition for SSE cmp instruction. 6725 P -- if PIC, print an @PLT suffix. 6726 X -- don't print any sort of PIC '@' suffix for a symbol. 6727 & -- print some in-use local-dynamic symbol name. 6728 */ 6729 6730void 6731print_operand (file, x, code) 6732 FILE *file; 6733 rtx x; 6734 int code; 6735{ 6736 if (code) 6737 { 6738 switch (code) 6739 { 6740 case '*': 6741 if (ASSEMBLER_DIALECT == ASM_ATT) 6742 putc ('*', file); 6743 return; 6744 6745 case '&': 6746 assemble_name (file, get_some_local_dynamic_name ()); 6747 return; 6748 6749 case 'A': 6750 if (ASSEMBLER_DIALECT == ASM_ATT) 6751 putc ('*', file); 6752 else if (ASSEMBLER_DIALECT == ASM_INTEL) 6753 { 6754 /* Intel syntax. For absolute addresses, registers should not 6755 be surrounded by braces. */ 6756 if (GET_CODE (x) != REG) 6757 { 6758 putc ('[', file); 6759 PRINT_OPERAND (file, x, 0); 6760 putc (']', file); 6761 return; 6762 } 6763 } 6764 else 6765 abort (); 6766 6767 PRINT_OPERAND (file, x, 0); 6768 return; 6769 6770 6771 case 'L': 6772 if (ASSEMBLER_DIALECT == ASM_ATT) 6773 putc ('l', file); 6774 return; 6775 6776 case 'W': 6777 if (ASSEMBLER_DIALECT == ASM_ATT) 6778 putc ('w', file); 6779 return; 6780 6781 case 'B': 6782 if (ASSEMBLER_DIALECT == ASM_ATT) 6783 putc ('b', file); 6784 return; 6785 6786 case 'Q': 6787 if (ASSEMBLER_DIALECT == ASM_ATT) 6788 putc ('l', file); 6789 return; 6790 6791 case 'S': 6792 if (ASSEMBLER_DIALECT == ASM_ATT) 6793 putc ('s', file); 6794 return; 6795 6796 case 'T': 6797 if (ASSEMBLER_DIALECT == ASM_ATT) 6798 putc ('t', file); 6799 return; 6800 6801 case 'z': 6802 /* 387 opcodes don't get size suffixes if the operands are 6803 registers. */ 6804 if (STACK_REG_P (x)) 6805 return; 6806 6807 /* Likewise if using Intel opcodes. */ 6808 if (ASSEMBLER_DIALECT == ASM_INTEL) 6809 return; 6810 6811 /* This is the size of op from size of operand. */ 6812 switch (GET_MODE_SIZE (GET_MODE (x))) 6813 { 6814 case 2: 6815#ifdef HAVE_GAS_FILDS_FISTS 6816 putc ('s', file); 6817#endif 6818 return; 6819 6820 case 4: 6821 if (GET_MODE (x) == SFmode) 6822 { 6823 putc ('s', file); 6824 return; 6825 } 6826 else 6827 putc ('l', file); 6828 return; 6829 6830 case 12: 6831 case 16: 6832 putc ('t', file); 6833 return; 6834 6835 case 8: 6836 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 6837 { 6838#ifdef GAS_MNEMONICS 6839 putc ('q', file); 6840#else 6841 putc ('l', file); 6842 putc ('l', file); 6843#endif 6844 } 6845 else 6846 putc ('l', file); 6847 return; 6848 6849 default: 6850 abort (); 6851 } 6852 6853 case 'b': 6854 case 'w': 6855 case 'k': 6856 case 'q': 6857 case 'h': 6858 case 'y': 6859 case 'X': 6860 case 'P': 6861 break; 6862 6863 case 's': 6864 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 6865 { 6866 PRINT_OPERAND (file, x, 0); 6867 putc (',', file); 6868 } 6869 return; 6870 6871 case 'D': 6872 /* Little bit of braindamage here. The SSE compare instructions 6873 does use completely different names for the comparisons that the 6874 fp conditional moves. */ 6875 switch (GET_CODE (x)) 6876 { 6877 case EQ: 6878 case UNEQ: 6879 fputs ("eq", file); 6880 break; 6881 case LT: 6882 case UNLT: 6883 fputs ("lt", file); 6884 break; 6885 case LE: 6886 case UNLE: 6887 fputs ("le", file); 6888 break; 6889 case UNORDERED: 6890 fputs ("unord", file); 6891 break; 6892 case NE: 6893 case LTGT: 6894 fputs ("neq", file); 6895 break; 6896 case UNGE: 6897 case GE: 6898 fputs ("nlt", file); 6899 break; 6900 case UNGT: 6901 case GT: 6902 fputs ("nle", file); 6903 break; 6904 case ORDERED: 6905 fputs ("ord", file); 6906 break; 6907 default: 6908 abort (); 6909 break; 6910 } 6911 return; 6912 case 'O': 6913#ifdef CMOV_SUN_AS_SYNTAX 6914 if (ASSEMBLER_DIALECT == ASM_ATT) 6915 { 6916 switch (GET_MODE (x)) 6917 { 6918 case HImode: putc ('w', file); break; 6919 case SImode: 6920 case SFmode: putc ('l', file); break; 6921 case DImode: 6922 case DFmode: putc ('q', file); break; 6923 default: abort (); 6924 } 6925 putc ('.', file); 6926 } 6927#endif 6928 return; 6929 case 'C': 6930 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 6931 return; 6932 case 'F': 6933#ifdef CMOV_SUN_AS_SYNTAX 6934 if (ASSEMBLER_DIALECT == ASM_ATT) 6935 putc ('.', file); 6936#endif 6937 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 6938 return; 6939 6940 /* Like above, but reverse condition */ 6941 case 'c': 6942 /* Check to see if argument to %c is really a constant 6943 and not a condition code which needs to be reversed. */ 6944 if (GET_RTX_CLASS (GET_CODE (x)) != '<') 6945 { 6946 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 6947 return; 6948 } 6949 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 6950 return; 6951 case 'f': 6952#ifdef CMOV_SUN_AS_SYNTAX 6953 if (ASSEMBLER_DIALECT == ASM_ATT) 6954 putc ('.', file); 6955#endif 6956 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 6957 return; 6958 case '+': 6959 { 6960 rtx x; 6961 6962 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 6963 return; 6964 6965 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 6966 if (x) 6967 { 6968 int pred_val = INTVAL (XEXP (x, 0)); 6969 6970 if (pred_val < REG_BR_PROB_BASE * 45 / 100 6971 || pred_val > REG_BR_PROB_BASE * 55 / 100) 6972 { 6973 int taken = pred_val > REG_BR_PROB_BASE / 2; 6974 int cputaken = final_forward_branch_p (current_output_insn) == 0; 6975 6976 /* Emit hints only in the case default branch prediction 6977 heruistics would fail. */ 6978 if (taken != cputaken) 6979 { 6980 /* We use 3e (DS) prefix for taken branches and 6981 2e (CS) prefix for not taken branches. */ 6982 if (taken) 6983 fputs ("ds ; ", file); 6984 else 6985 fputs ("cs ; ", file); 6986 } 6987 } 6988 } 6989 return; 6990 } 6991 default: 6992 output_operand_lossage ("invalid operand code `%c'", code); 6993 } 6994 } 6995 6996 if (GET_CODE (x) == REG) 6997 { 6998 PRINT_REG (x, code, file); 6999 } 7000 7001 else if (GET_CODE (x) == MEM) 7002 { 7003 /* No `byte ptr' prefix for call instructions. */ 7004 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 7005 { 7006 const char * size; 7007 switch (GET_MODE_SIZE (GET_MODE (x))) 7008 { 7009 case 1: size = "BYTE"; break; 7010 case 2: size = "WORD"; break; 7011 case 4: size = "DWORD"; break; 7012 case 8: size = "QWORD"; break; 7013 case 12: size = "XWORD"; break; 7014 case 16: size = "XMMWORD"; break; 7015 default: 7016 abort (); 7017 } 7018 7019 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 7020 if (code == 'b') 7021 size = "BYTE"; 7022 else if (code == 'w') 7023 size = "WORD"; 7024 else if (code == 'k') 7025 size = "DWORD"; 7026 7027 fputs (size, file); 7028 fputs (" PTR ", file); 7029 } 7030 7031 x = XEXP (x, 0); 7032 if (flag_pic && CONSTANT_ADDRESS_P (x)) 7033 output_pic_addr_const (file, x, code); 7034 /* Avoid (%rip) for call operands. */ 7035 else if (CONSTANT_ADDRESS_P (x) && code == 'P' 7036 && GET_CODE (x) != CONST_INT) 7037 output_addr_const (file, x); 7038 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 7039 output_operand_lossage ("invalid constraints for operand"); 7040 else 7041 output_address (x); 7042 } 7043 7044 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 7045 { 7046 REAL_VALUE_TYPE r; 7047 long l; 7048 7049 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7050 REAL_VALUE_TO_TARGET_SINGLE (r, l); 7051 7052 if (ASSEMBLER_DIALECT == ASM_ATT) 7053 putc ('$', file); 7054 fprintf (file, "0x%lx", l); 7055 } 7056 7057 /* These float cases don't actually occur as immediate operands. */ 7058 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 7059 { 7060 char dstr[30]; 7061 7062 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 7063 fprintf (file, "%s", dstr); 7064 } 7065 7066 else if (GET_CODE (x) == CONST_DOUBLE 7067 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)) 7068 { 7069 char dstr[30]; 7070 7071 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 7072 fprintf (file, "%s", dstr); 7073 } 7074 7075 else 7076 { 7077 if (code != 'P') 7078 { 7079 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 7080 { 7081 if (ASSEMBLER_DIALECT == ASM_ATT) 7082 putc ('$', file); 7083 } 7084 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 7085 || GET_CODE (x) == LABEL_REF) 7086 { 7087 if (ASSEMBLER_DIALECT == ASM_ATT) 7088 putc ('$', file); 7089 else 7090 fputs ("OFFSET FLAT:", file); 7091 } 7092 } 7093 if (GET_CODE (x) == CONST_INT) 7094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7095 else if (flag_pic) 7096 output_pic_addr_const (file, x, code); 7097 else 7098 output_addr_const (file, x); 7099 } 7100} 7101 7102/* Print a memory operand whose address is ADDR. */ 7103 7104void 7105print_operand_address (file, addr) 7106 FILE *file; 7107 register rtx addr; 7108{ 7109 struct ix86_address parts; 7110 rtx base, index, disp; 7111 int scale; 7112 7113 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP) 7114 { 7115 if (ASSEMBLER_DIALECT == ASM_INTEL) 7116 fputs ("DWORD PTR ", file); 7117 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7118 putc ('%', file); 7119 if (TARGET_64BIT) 7120 fputs ("fs:0", file); 7121 else 7122 fputs ("gs:0", file); 7123 return; 7124 } 7125 7126 if (! ix86_decompose_address (addr, &parts)) 7127 abort (); 7128 7129 base = parts.base; 7130 index = parts.index; 7131 disp = parts.disp; 7132 scale = parts.scale; 7133 7134 if (!base && !index) 7135 { 7136 /* Displacement only requires special attention. */ 7137 7138 if (GET_CODE (disp) == CONST_INT) 7139 { 7140 if (ASSEMBLER_DIALECT == ASM_INTEL) 7141 { 7142 if (USER_LABEL_PREFIX[0] == 0) 7143 putc ('%', file); 7144 fputs ("ds:", file); 7145 } 7146 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr)); 7147 } 7148 else if (flag_pic) 7149 output_pic_addr_const (file, addr, 0); 7150 else 7151 output_addr_const (file, addr); 7152 7153 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 7154 if (TARGET_64BIT 7155 && ((GET_CODE (addr) == SYMBOL_REF 7156 && ! tls_symbolic_operand (addr, GET_MODE (addr))) 7157 || GET_CODE (addr) == LABEL_REF 7158 || (GET_CODE (addr) == CONST 7159 && GET_CODE (XEXP (addr, 0)) == PLUS 7160 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF 7161 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF) 7162 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT))) 7163 fputs ("(%rip)", file); 7164 } 7165 else 7166 { 7167 if (ASSEMBLER_DIALECT == ASM_ATT) 7168 { 7169 if (disp) 7170 { 7171 if (flag_pic) 7172 output_pic_addr_const (file, disp, 0); 7173 else if (GET_CODE (disp) == LABEL_REF) 7174 output_asm_label (disp); 7175 else 7176 output_addr_const (file, disp); 7177 } 7178 7179 putc ('(', file); 7180 if (base) 7181 PRINT_REG (base, 0, file); 7182 if (index) 7183 { 7184 putc (',', file); 7185 PRINT_REG (index, 0, file); 7186 if (scale != 1) 7187 fprintf (file, ",%d", scale); 7188 } 7189 putc (')', file); 7190 } 7191 else 7192 { 7193 rtx offset = NULL_RTX; 7194 7195 if (disp) 7196 { 7197 /* Pull out the offset of a symbol; print any symbol itself. */ 7198 if (GET_CODE (disp) == CONST 7199 && GET_CODE (XEXP (disp, 0)) == PLUS 7200 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 7201 { 7202 offset = XEXP (XEXP (disp, 0), 1); 7203 disp = gen_rtx_CONST (VOIDmode, 7204 XEXP (XEXP (disp, 0), 0)); 7205 } 7206 7207 if (flag_pic) 7208 output_pic_addr_const (file, disp, 0); 7209 else if (GET_CODE (disp) == LABEL_REF) 7210 output_asm_label (disp); 7211 else if (GET_CODE (disp) == CONST_INT) 7212 offset = disp; 7213 else 7214 output_addr_const (file, disp); 7215 } 7216 7217 putc ('[', file); 7218 if (base) 7219 { 7220 PRINT_REG (base, 0, file); 7221 if (offset) 7222 { 7223 if (INTVAL (offset) >= 0) 7224 putc ('+', file); 7225 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 7226 } 7227 } 7228 else if (offset) 7229 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 7230 else 7231 putc ('0', file); 7232 7233 if (index) 7234 { 7235 putc ('+', file); 7236 PRINT_REG (index, 0, file); 7237 if (scale != 1) 7238 fprintf (file, "*%d", scale); 7239 } 7240 putc (']', file); 7241 } 7242 } 7243} 7244 7245bool 7246output_addr_const_extra (file, x) 7247 FILE *file; 7248 rtx x; 7249{ 7250 rtx op; 7251 7252 if (GET_CODE (x) != UNSPEC) 7253 return false; 7254 7255 op = XVECEXP (x, 0, 0); 7256 switch (XINT (x, 1)) 7257 { 7258 case UNSPEC_GOTTPOFF: 7259 output_addr_const (file, op); 7260 /* FIXME: This might be @TPOFF in Sun ld. */ 7261 fputs ("@GOTTPOFF", file); 7262 break; 7263 case UNSPEC_TPOFF: 7264 output_addr_const (file, op); 7265 fputs ("@TPOFF", file); 7266 break; 7267 case UNSPEC_NTPOFF: 7268 output_addr_const (file, op); 7269 if (TARGET_64BIT) 7270 fputs ("@TPOFF", file); 7271 else 7272 fputs ("@NTPOFF", file); 7273 break; 7274 case UNSPEC_DTPOFF: 7275 output_addr_const (file, op); 7276 fputs ("@DTPOFF", file); 7277 break; 7278 case UNSPEC_GOTNTPOFF: 7279 output_addr_const (file, op); 7280 if (TARGET_64BIT) 7281 fputs ("@GOTTPOFF(%rip)", file); 7282 else 7283 fputs ("@GOTNTPOFF", file); 7284 break; 7285 case UNSPEC_INDNTPOFF: 7286 output_addr_const (file, op); 7287 fputs ("@INDNTPOFF", file); 7288 break; 7289 7290 default: 7291 return false; 7292 } 7293 7294 return true; 7295} 7296 7297/* Split one or more DImode RTL references into pairs of SImode 7298 references. The RTL can be REG, offsettable MEM, integer constant, or 7299 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 7300 split and "num" is its length. lo_half and hi_half are output arrays 7301 that parallel "operands". */ 7302 7303void 7304split_di (operands, num, lo_half, hi_half) 7305 rtx operands[]; 7306 int num; 7307 rtx lo_half[], hi_half[]; 7308{ 7309 while (num--) 7310 { 7311 rtx op = operands[num]; 7312 7313 /* simplify_subreg refuse to split volatile memory addresses, 7314 but we still have to handle it. */ 7315 if (GET_CODE (op) == MEM) 7316 { 7317 lo_half[num] = adjust_address (op, SImode, 0); 7318 hi_half[num] = adjust_address (op, SImode, 4); 7319 } 7320 else 7321 { 7322 lo_half[num] = simplify_gen_subreg (SImode, op, 7323 GET_MODE (op) == VOIDmode 7324 ? DImode : GET_MODE (op), 0); 7325 hi_half[num] = simplify_gen_subreg (SImode, op, 7326 GET_MODE (op) == VOIDmode 7327 ? DImode : GET_MODE (op), 4); 7328 } 7329 } 7330} 7331/* Split one or more TImode RTL references into pairs of SImode 7332 references. The RTL can be REG, offsettable MEM, integer constant, or 7333 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 7334 split and "num" is its length. lo_half and hi_half are output arrays 7335 that parallel "operands". */ 7336 7337void 7338split_ti (operands, num, lo_half, hi_half) 7339 rtx operands[]; 7340 int num; 7341 rtx lo_half[], hi_half[]; 7342{ 7343 while (num--) 7344 { 7345 rtx op = operands[num]; 7346 7347 /* simplify_subreg refuse to split volatile memory addresses, but we 7348 still have to handle it. */ 7349 if (GET_CODE (op) == MEM) 7350 { 7351 lo_half[num] = adjust_address (op, DImode, 0); 7352 hi_half[num] = adjust_address (op, DImode, 8); 7353 } 7354 else 7355 { 7356 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 7357 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 7358 } 7359 } 7360} 7361 7362/* Output code to perform a 387 binary operation in INSN, one of PLUS, 7363 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 7364 is the expression of the binary operation. The output may either be 7365 emitted here, or returned to the caller, like all output_* functions. 7366 7367 There is no guarantee that the operands are the same mode, as they 7368 might be within FLOAT or FLOAT_EXTEND expressions. */ 7369 7370#ifndef SYSV386_COMPAT 7371/* Set to 1 for compatibility with brain-damaged assemblers. No-one 7372 wants to fix the assemblers because that causes incompatibility 7373 with gcc. No-one wants to fix gcc because that causes 7374 incompatibility with assemblers... You can use the option of 7375 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 7376#define SYSV386_COMPAT 1 7377#endif 7378 7379const char * 7380output_387_binary_op (insn, operands) 7381 rtx insn; 7382 rtx *operands; 7383{ 7384 static char buf[30]; 7385 const char *p; 7386 const char *ssep; 7387 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]); 7388 7389#ifdef ENABLE_CHECKING 7390 /* Even if we do not want to check the inputs, this documents input 7391 constraints. Which helps in understanding the following code. */ 7392 if (STACK_REG_P (operands[0]) 7393 && ((REG_P (operands[1]) 7394 && REGNO (operands[0]) == REGNO (operands[1]) 7395 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 7396 || (REG_P (operands[2]) 7397 && REGNO (operands[0]) == REGNO (operands[2]) 7398 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 7399 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 7400 ; /* ok */ 7401 else if (!is_sse) 7402 abort (); 7403#endif 7404 7405 switch (GET_CODE (operands[3])) 7406 { 7407 case PLUS: 7408 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7409 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7410 p = "fiadd"; 7411 else 7412 p = "fadd"; 7413 ssep = "add"; 7414 break; 7415 7416 case MINUS: 7417 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7418 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7419 p = "fisub"; 7420 else 7421 p = "fsub"; 7422 ssep = "sub"; 7423 break; 7424 7425 case MULT: 7426 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7427 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7428 p = "fimul"; 7429 else 7430 p = "fmul"; 7431 ssep = "mul"; 7432 break; 7433 7434 case DIV: 7435 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7436 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7437 p = "fidiv"; 7438 else 7439 p = "fdiv"; 7440 ssep = "div"; 7441 break; 7442 7443 default: 7444 abort (); 7445 } 7446 7447 if (is_sse) 7448 { 7449 strcpy (buf, ssep); 7450 if (GET_MODE (operands[0]) == SFmode) 7451 strcat (buf, "ss\t{%2, %0|%0, %2}"); 7452 else 7453 strcat (buf, "sd\t{%2, %0|%0, %2}"); 7454 return buf; 7455 } 7456 strcpy (buf, p); 7457 7458 switch (GET_CODE (operands[3])) 7459 { 7460 case MULT: 7461 case PLUS: 7462 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 7463 { 7464 rtx temp = operands[2]; 7465 operands[2] = operands[1]; 7466 operands[1] = temp; 7467 } 7468 7469 /* know operands[0] == operands[1]. */ 7470 7471 if (GET_CODE (operands[2]) == MEM) 7472 { 7473 p = "%z2\t%2"; 7474 break; 7475 } 7476 7477 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 7478 { 7479 if (STACK_TOP_P (operands[0])) 7480 /* How is it that we are storing to a dead operand[2]? 7481 Well, presumably operands[1] is dead too. We can't 7482 store the result to st(0) as st(0) gets popped on this 7483 instruction. Instead store to operands[2] (which I 7484 think has to be st(1)). st(1) will be popped later. 7485 gcc <= 2.8.1 didn't have this check and generated 7486 assembly code that the Unixware assembler rejected. */ 7487 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 7488 else 7489 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 7490 break; 7491 } 7492 7493 if (STACK_TOP_P (operands[0])) 7494 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 7495 else 7496 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 7497 break; 7498 7499 case MINUS: 7500 case DIV: 7501 if (GET_CODE (operands[1]) == MEM) 7502 { 7503 p = "r%z1\t%1"; 7504 break; 7505 } 7506 7507 if (GET_CODE (operands[2]) == MEM) 7508 { 7509 p = "%z2\t%2"; 7510 break; 7511 } 7512 7513 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 7514 { 7515#if SYSV386_COMPAT 7516 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 7517 derived assemblers, confusingly reverse the direction of 7518 the operation for fsub{r} and fdiv{r} when the 7519 destination register is not st(0). The Intel assembler 7520 doesn't have this brain damage. Read !SYSV386_COMPAT to 7521 figure out what the hardware really does. */ 7522 if (STACK_TOP_P (operands[0])) 7523 p = "{p\t%0, %2|rp\t%2, %0}"; 7524 else 7525 p = "{rp\t%2, %0|p\t%0, %2}"; 7526#else 7527 if (STACK_TOP_P (operands[0])) 7528 /* As above for fmul/fadd, we can't store to st(0). */ 7529 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 7530 else 7531 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 7532#endif 7533 break; 7534 } 7535 7536 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 7537 { 7538#if SYSV386_COMPAT 7539 if (STACK_TOP_P (operands[0])) 7540 p = "{rp\t%0, %1|p\t%1, %0}"; 7541 else 7542 p = "{p\t%1, %0|rp\t%0, %1}"; 7543#else 7544 if (STACK_TOP_P (operands[0])) 7545 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 7546 else 7547 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 7548#endif 7549 break; 7550 } 7551 7552 if (STACK_TOP_P (operands[0])) 7553 { 7554 if (STACK_TOP_P (operands[1])) 7555 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 7556 else 7557 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 7558 break; 7559 } 7560 else if (STACK_TOP_P (operands[1])) 7561 { 7562#if SYSV386_COMPAT 7563 p = "{\t%1, %0|r\t%0, %1}"; 7564#else 7565 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 7566#endif 7567 } 7568 else 7569 { 7570#if SYSV386_COMPAT 7571 p = "{r\t%2, %0|\t%0, %2}"; 7572#else 7573 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 7574#endif 7575 } 7576 break; 7577 7578 default: 7579 abort (); 7580 } 7581 7582 strcat (buf, p); 7583 return buf; 7584} 7585 7586/* Output code to initialize control word copies used by 7587 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN 7588 is set to control word rounding downwards. */ 7589void 7590emit_i387_cw_initialization (normal, round_down) 7591 rtx normal, round_down; 7592{ 7593 rtx reg = gen_reg_rtx (HImode); 7594 7595 emit_insn (gen_x86_fnstcw_1 (normal)); 7596 emit_move_insn (reg, normal); 7597 if (!TARGET_PARTIAL_REG_STALL && !optimize_size 7598 && !TARGET_64BIT) 7599 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 7600 else 7601 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00))); 7602 emit_move_insn (round_down, reg); 7603} 7604 7605/* Output code for INSN to convert a float to a signed int. OPERANDS 7606 are the insn operands. The output may be [HSD]Imode and the input 7607 operand may be [SDX]Fmode. */ 7608 7609const char * 7610output_fix_trunc (insn, operands) 7611 rtx insn; 7612 rtx *operands; 7613{ 7614 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 7615 int dimode_p = GET_MODE (operands[0]) == DImode; 7616 7617 /* Jump through a hoop or two for DImode, since the hardware has no 7618 non-popping instruction. We used to do this a different way, but 7619 that was somewhat fragile and broke with post-reload splitters. */ 7620 if (dimode_p && !stack_top_dies) 7621 output_asm_insn ("fld\t%y1", operands); 7622 7623 if (!STACK_TOP_P (operands[1])) 7624 abort (); 7625 7626 if (GET_CODE (operands[0]) != MEM) 7627 abort (); 7628 7629 output_asm_insn ("fldcw\t%3", operands); 7630 if (stack_top_dies || dimode_p) 7631 output_asm_insn ("fistp%z0\t%0", operands); 7632 else 7633 output_asm_insn ("fist%z0\t%0", operands); 7634 output_asm_insn ("fldcw\t%2", operands); 7635 7636 return ""; 7637} 7638 7639/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 7640 should be used and 2 when fnstsw should be used. UNORDERED_P is true 7641 when fucom should be used. */ 7642 7643const char * 7644output_fp_compare (insn, operands, eflags_p, unordered_p) 7645 rtx insn; 7646 rtx *operands; 7647 int eflags_p, unordered_p; 7648{ 7649 int stack_top_dies; 7650 rtx cmp_op0 = operands[0]; 7651 rtx cmp_op1 = operands[1]; 7652 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]); 7653 7654 if (eflags_p == 2) 7655 { 7656 cmp_op0 = cmp_op1; 7657 cmp_op1 = operands[2]; 7658 } 7659 if (is_sse) 7660 { 7661 if (GET_MODE (operands[0]) == SFmode) 7662 if (unordered_p) 7663 return "ucomiss\t{%1, %0|%0, %1}"; 7664 else 7665 return "comiss\t{%1, %0|%0, %1}"; 7666 else 7667 if (unordered_p) 7668 return "ucomisd\t{%1, %0|%0, %1}"; 7669 else 7670 return "comisd\t{%1, %0|%0, %1}"; 7671 } 7672 7673 if (! STACK_TOP_P (cmp_op0)) 7674 abort (); 7675 7676 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 7677 7678 if (STACK_REG_P (cmp_op1) 7679 && stack_top_dies 7680 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 7681 && REGNO (cmp_op1) != FIRST_STACK_REG) 7682 { 7683 /* If both the top of the 387 stack dies, and the other operand 7684 is also a stack register that dies, then this must be a 7685 `fcompp' float compare */ 7686 7687 if (eflags_p == 1) 7688 { 7689 /* There is no double popping fcomi variant. Fortunately, 7690 eflags is immune from the fstp's cc clobbering. */ 7691 if (unordered_p) 7692 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 7693 else 7694 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 7695 return "fstp\t%y0"; 7696 } 7697 else 7698 { 7699 if (eflags_p == 2) 7700 { 7701 if (unordered_p) 7702 return "fucompp\n\tfnstsw\t%0"; 7703 else 7704 return "fcompp\n\tfnstsw\t%0"; 7705 } 7706 else 7707 { 7708 if (unordered_p) 7709 return "fucompp"; 7710 else 7711 return "fcompp"; 7712 } 7713 } 7714 } 7715 else 7716 { 7717 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 7718 7719 static const char * const alt[24] = 7720 { 7721 "fcom%z1\t%y1", 7722 "fcomp%z1\t%y1", 7723 "fucom%z1\t%y1", 7724 "fucomp%z1\t%y1", 7725 7726 "ficom%z1\t%y1", 7727 "ficomp%z1\t%y1", 7728 NULL, 7729 NULL, 7730 7731 "fcomi\t{%y1, %0|%0, %y1}", 7732 "fcomip\t{%y1, %0|%0, %y1}", 7733 "fucomi\t{%y1, %0|%0, %y1}", 7734 "fucomip\t{%y1, %0|%0, %y1}", 7735 7736 NULL, 7737 NULL, 7738 NULL, 7739 NULL, 7740 7741 "fcom%z2\t%y2\n\tfnstsw\t%0", 7742 "fcomp%z2\t%y2\n\tfnstsw\t%0", 7743 "fucom%z2\t%y2\n\tfnstsw\t%0", 7744 "fucomp%z2\t%y2\n\tfnstsw\t%0", 7745 7746 "ficom%z2\t%y2\n\tfnstsw\t%0", 7747 "ficomp%z2\t%y2\n\tfnstsw\t%0", 7748 NULL, 7749 NULL 7750 }; 7751 7752 int mask; 7753 const char *ret; 7754 7755 mask = eflags_p << 3; 7756 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2; 7757 mask |= unordered_p << 1; 7758 mask |= stack_top_dies; 7759 7760 if (mask >= 24) 7761 abort (); 7762 ret = alt[mask]; 7763 if (ret == NULL) 7764 abort (); 7765 7766 return ret; 7767 } 7768} 7769 7770void 7771ix86_output_addr_vec_elt (file, value) 7772 FILE *file; 7773 int value; 7774{ 7775 const char *directive = ASM_LONG; 7776 7777 if (TARGET_64BIT) 7778 { 7779#ifdef ASM_QUAD 7780 directive = ASM_QUAD; 7781#else 7782 abort (); 7783#endif 7784 } 7785 7786 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 7787} 7788 7789void 7790ix86_output_addr_diff_elt (file, value, rel) 7791 FILE *file; 7792 int value, rel; 7793{ 7794 if (TARGET_64BIT) 7795 fprintf (file, "%s%s%d-%s%d\n", 7796 ASM_LONG, LPREFIX, value, LPREFIX, rel); 7797 else if (HAVE_AS_GOTOFF_IN_DATA) 7798 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 7799#if TARGET_MACHO 7800 else if (TARGET_MACHO) 7801 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value, 7802 machopic_function_base_name () + 1); 7803#endif 7804 else 7805 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 7806 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 7807} 7808 7809/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 7810 for the target. */ 7811 7812void 7813ix86_expand_clear (dest) 7814 rtx dest; 7815{ 7816 rtx tmp; 7817 7818 /* We play register width games, which are only valid after reload. */ 7819 if (!reload_completed) 7820 abort (); 7821 7822 /* Avoid HImode and its attendant prefix byte. */ 7823 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 7824 dest = gen_rtx_REG (SImode, REGNO (dest)); 7825 7826 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 7827 7828 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 7829 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 7830 { 7831 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 7832 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 7833 } 7834 7835 emit_insn (tmp); 7836} 7837 7838/* X is an unchanging MEM. If it is a constant pool reference, return 7839 the constant pool rtx, else NULL. */ 7840 7841static rtx 7842maybe_get_pool_constant (x) 7843 rtx x; 7844{ 7845 x = XEXP (x, 0); 7846 7847 if (flag_pic && ! TARGET_64BIT) 7848 { 7849 if (GET_CODE (x) != PLUS) 7850 return NULL_RTX; 7851 if (XEXP (x, 0) != pic_offset_table_rtx) 7852 return NULL_RTX; 7853 x = XEXP (x, 1); 7854 if (GET_CODE (x) != CONST) 7855 return NULL_RTX; 7856 x = XEXP (x, 0); 7857 if (GET_CODE (x) != UNSPEC) 7858 return NULL_RTX; 7859 if (XINT (x, 1) != UNSPEC_GOTOFF) 7860 return NULL_RTX; 7861 x = XVECEXP (x, 0, 0); 7862 } 7863 7864 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 7865 return get_pool_constant (x); 7866 7867 return NULL_RTX; 7868} 7869 7870void 7871ix86_expand_move (mode, operands) 7872 enum machine_mode mode; 7873 rtx operands[]; 7874{ 7875 int strict = (reload_in_progress || reload_completed); 7876 rtx insn, op0, op1, tmp; 7877 7878 op0 = operands[0]; 7879 op1 = operands[1]; 7880 7881 if (tls_symbolic_operand (op1, Pmode)) 7882 { 7883 op1 = legitimize_address (op1, op1, VOIDmode); 7884 if (GET_CODE (op0) == MEM) 7885 { 7886 tmp = gen_reg_rtx (mode); 7887 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1)); 7888 op1 = tmp; 7889 } 7890 } 7891 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 7892 { 7893#if TARGET_MACHO 7894 if (MACHOPIC_PURE) 7895 { 7896 rtx temp = ((reload_in_progress 7897 || ((op0 && GET_CODE (op0) == REG) 7898 && mode == Pmode)) 7899 ? op0 : gen_reg_rtx (Pmode)); 7900 op1 = machopic_indirect_data_reference (op1, temp); 7901 op1 = machopic_legitimize_pic_address (op1, mode, 7902 temp == op1 ? 0 : temp); 7903 } 7904 else 7905 { 7906 if (MACHOPIC_INDIRECT) 7907 op1 = machopic_indirect_data_reference (op1, 0); 7908 } 7909 if (op0 != op1) 7910 { 7911 insn = gen_rtx_SET (VOIDmode, op0, op1); 7912 emit_insn (insn); 7913 } 7914 return; 7915#endif /* TARGET_MACHO */ 7916 if (GET_CODE (op0) == MEM) 7917 op1 = force_reg (Pmode, op1); 7918 else 7919 { 7920 rtx temp = op0; 7921 if (GET_CODE (temp) != REG) 7922 temp = gen_reg_rtx (Pmode); 7923 temp = legitimize_pic_address (op1, temp); 7924 if (temp == op0) 7925 return; 7926 op1 = temp; 7927 } 7928 } 7929 else 7930 { 7931 if (GET_CODE (op0) == MEM 7932 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 7933 || !push_operand (op0, mode)) 7934 && GET_CODE (op1) == MEM) 7935 op1 = force_reg (mode, op1); 7936 7937 if (push_operand (op0, mode) 7938 && ! general_no_elim_operand (op1, mode)) 7939 op1 = copy_to_mode_reg (mode, op1); 7940 7941 /* Force large constants in 64bit compilation into register 7942 to get them CSEed. */ 7943 if (TARGET_64BIT && mode == DImode 7944 && immediate_operand (op1, mode) 7945 && !x86_64_zero_extended_value (op1) 7946 && !register_operand (op0, mode) 7947 && optimize && !reload_completed && !reload_in_progress) 7948 op1 = copy_to_mode_reg (mode, op1); 7949 7950 if (FLOAT_MODE_P (mode)) 7951 { 7952 /* If we are loading a floating point constant to a register, 7953 force the value to memory now, since we'll get better code 7954 out the back end. */ 7955 7956 if (strict) 7957 ; 7958 else if (GET_CODE (op1) == CONST_DOUBLE) 7959 { 7960 op1 = validize_mem (force_const_mem (mode, op1)); 7961 if (!register_operand (op0, mode)) 7962 { 7963 rtx temp = gen_reg_rtx (mode); 7964 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 7965 emit_move_insn (op0, temp); 7966 return; 7967 } 7968 } 7969 } 7970 } 7971 7972 insn = gen_rtx_SET (VOIDmode, op0, op1); 7973 7974 emit_insn (insn); 7975} 7976 7977void 7978ix86_expand_vector_move (mode, operands) 7979 enum machine_mode mode; 7980 rtx operands[]; 7981{ 7982 /* Force constants other than zero into memory. We do not know how 7983 the instructions used to build constants modify the upper 64 bits 7984 of the register, once we have that information we may be able 7985 to handle some of them more efficiently. */ 7986 if ((reload_in_progress | reload_completed) == 0 7987 && register_operand (operands[0], mode) 7988 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode)) 7989 { 7990 operands[1] = force_const_mem (mode, operands[1]); 7991 emit_move_insn (operands[0], operands[1]); 7992 return; 7993 } 7994 7995 /* Make operand1 a register if it isn't already. */ 7996 if (!no_new_pseudos 7997 && !register_operand (operands[0], mode) 7998 && !register_operand (operands[1], mode)) 7999 { 8000 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]); 8001 emit_move_insn (operands[0], temp); 8002 return; 8003 } 8004 8005 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); 8006} 8007 8008/* Attempt to expand a binary operator. Make the expansion closer to the 8009 actual machine, then just general_operand, which will allow 3 separate 8010 memory references (one output, two input) in a single insn. */ 8011 8012void 8013ix86_expand_binary_operator (code, mode, operands) 8014 enum rtx_code code; 8015 enum machine_mode mode; 8016 rtx operands[]; 8017{ 8018 int matching_memory; 8019 rtx src1, src2, dst, op, clob; 8020 8021 dst = operands[0]; 8022 src1 = operands[1]; 8023 src2 = operands[2]; 8024 8025 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 8026 if (GET_RTX_CLASS (code) == 'c' 8027 && (rtx_equal_p (dst, src2) 8028 || immediate_operand (src1, mode))) 8029 { 8030 rtx temp = src1; 8031 src1 = src2; 8032 src2 = temp; 8033 } 8034 8035 /* If the destination is memory, and we do not have matching source 8036 operands, do things in registers. */ 8037 matching_memory = 0; 8038 if (GET_CODE (dst) == MEM) 8039 { 8040 if (rtx_equal_p (dst, src1)) 8041 matching_memory = 1; 8042 else if (GET_RTX_CLASS (code) == 'c' 8043 && rtx_equal_p (dst, src2)) 8044 matching_memory = 2; 8045 else 8046 dst = gen_reg_rtx (mode); 8047 } 8048 8049 /* Both source operands cannot be in memory. */ 8050 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 8051 { 8052 if (matching_memory != 2) 8053 src2 = force_reg (mode, src2); 8054 else 8055 src1 = force_reg (mode, src1); 8056 } 8057 8058 /* If the operation is not commutable, source 1 cannot be a constant 8059 or non-matching memory. */ 8060 if ((CONSTANT_P (src1) 8061 || (!matching_memory && GET_CODE (src1) == MEM)) 8062 && GET_RTX_CLASS (code) != 'c') 8063 src1 = force_reg (mode, src1); 8064 8065 /* If optimizing, copy to regs to improve CSE */ 8066 if (optimize && ! no_new_pseudos) 8067 { 8068 if (GET_CODE (dst) == MEM) 8069 dst = gen_reg_rtx (mode); 8070 if (GET_CODE (src1) == MEM) 8071 src1 = force_reg (mode, src1); 8072 if (GET_CODE (src2) == MEM) 8073 src2 = force_reg (mode, src2); 8074 } 8075 8076 /* Emit the instruction. */ 8077 8078 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 8079 if (reload_in_progress) 8080 { 8081 /* Reload doesn't know about the flags register, and doesn't know that 8082 it doesn't want to clobber it. We can only do this with PLUS. */ 8083 if (code != PLUS) 8084 abort (); 8085 emit_insn (op); 8086 } 8087 else 8088 { 8089 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 8090 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 8091 } 8092 8093 /* Fix up the destination if needed. */ 8094 if (dst != operands[0]) 8095 emit_move_insn (operands[0], dst); 8096} 8097 8098/* Return TRUE or FALSE depending on whether the binary operator meets the 8099 appropriate constraints. */ 8100 8101int 8102ix86_binary_operator_ok (code, mode, operands) 8103 enum rtx_code code; 8104 enum machine_mode mode ATTRIBUTE_UNUSED; 8105 rtx operands[3]; 8106{ 8107 /* Both source operands cannot be in memory. */ 8108 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 8109 return 0; 8110 /* If the operation is not commutable, source 1 cannot be a constant. */ 8111 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c') 8112 return 0; 8113 /* If the destination is memory, we must have a matching source operand. */ 8114 if (GET_CODE (operands[0]) == MEM 8115 && ! (rtx_equal_p (operands[0], operands[1]) 8116 || (GET_RTX_CLASS (code) == 'c' 8117 && rtx_equal_p (operands[0], operands[2])))) 8118 return 0; 8119 /* If the operation is not commutable and the source 1 is memory, we must 8120 have a matching destination. */ 8121 if (GET_CODE (operands[1]) == MEM 8122 && GET_RTX_CLASS (code) != 'c' 8123 && ! rtx_equal_p (operands[0], operands[1])) 8124 return 0; 8125 return 1; 8126} 8127 8128/* Attempt to expand a unary operator. Make the expansion closer to the 8129 actual machine, then just general_operand, which will allow 2 separate 8130 memory references (one output, one input) in a single insn. */ 8131 8132void 8133ix86_expand_unary_operator (code, mode, operands) 8134 enum rtx_code code; 8135 enum machine_mode mode; 8136 rtx operands[]; 8137{ 8138 int matching_memory; 8139 rtx src, dst, op, clob; 8140 8141 dst = operands[0]; 8142 src = operands[1]; 8143 8144 /* If the destination is memory, and we do not have matching source 8145 operands, do things in registers. */ 8146 matching_memory = 0; 8147 if (GET_CODE (dst) == MEM) 8148 { 8149 if (rtx_equal_p (dst, src)) 8150 matching_memory = 1; 8151 else 8152 dst = gen_reg_rtx (mode); 8153 } 8154 8155 /* When source operand is memory, destination must match. */ 8156 if (!matching_memory && GET_CODE (src) == MEM) 8157 src = force_reg (mode, src); 8158 8159 /* If optimizing, copy to regs to improve CSE */ 8160 if (optimize && ! no_new_pseudos) 8161 { 8162 if (GET_CODE (dst) == MEM) 8163 dst = gen_reg_rtx (mode); 8164 if (GET_CODE (src) == MEM) 8165 src = force_reg (mode, src); 8166 } 8167 8168 /* Emit the instruction. */ 8169 8170 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 8171 if (reload_in_progress || code == NOT) 8172 { 8173 /* Reload doesn't know about the flags register, and doesn't know that 8174 it doesn't want to clobber it. */ 8175 if (code != NOT) 8176 abort (); 8177 emit_insn (op); 8178 } 8179 else 8180 { 8181 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 8182 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 8183 } 8184 8185 /* Fix up the destination if needed. */ 8186 if (dst != operands[0]) 8187 emit_move_insn (operands[0], dst); 8188} 8189 8190/* Return TRUE or FALSE depending on whether the unary operator meets the 8191 appropriate constraints. */ 8192 8193int 8194ix86_unary_operator_ok (code, mode, operands) 8195 enum rtx_code code ATTRIBUTE_UNUSED; 8196 enum machine_mode mode ATTRIBUTE_UNUSED; 8197 rtx operands[2] ATTRIBUTE_UNUSED; 8198{ 8199 /* If one of operands is memory, source and destination must match. */ 8200 if ((GET_CODE (operands[0]) == MEM 8201 || GET_CODE (operands[1]) == MEM) 8202 && ! rtx_equal_p (operands[0], operands[1])) 8203 return FALSE; 8204 return TRUE; 8205} 8206 8207/* Return TRUE or FALSE depending on whether the first SET in INSN 8208 has source and destination with matching CC modes, and that the 8209 CC mode is at least as constrained as REQ_MODE. */ 8210 8211int 8212ix86_match_ccmode (insn, req_mode) 8213 rtx insn; 8214 enum machine_mode req_mode; 8215{ 8216 rtx set; 8217 enum machine_mode set_mode; 8218 8219 set = PATTERN (insn); 8220 if (GET_CODE (set) == PARALLEL) 8221 set = XVECEXP (set, 0, 0); 8222 if (GET_CODE (set) != SET) 8223 abort (); 8224 if (GET_CODE (SET_SRC (set)) != COMPARE) 8225 abort (); 8226 8227 set_mode = GET_MODE (SET_DEST (set)); 8228 switch (set_mode) 8229 { 8230 case CCNOmode: 8231 if (req_mode != CCNOmode 8232 && (req_mode != CCmode 8233 || XEXP (SET_SRC (set), 1) != const0_rtx)) 8234 return 0; 8235 break; 8236 case CCmode: 8237 if (req_mode == CCGCmode) 8238 return 0; 8239 /* FALLTHRU */ 8240 case CCGCmode: 8241 if (req_mode == CCGOCmode || req_mode == CCNOmode) 8242 return 0; 8243 /* FALLTHRU */ 8244 case CCGOCmode: 8245 if (req_mode == CCZmode) 8246 return 0; 8247 /* FALLTHRU */ 8248 case CCZmode: 8249 break; 8250 8251 default: 8252 abort (); 8253 } 8254 8255 return (GET_MODE (SET_SRC (set)) == set_mode); 8256} 8257 8258/* Generate insn patterns to do an integer compare of OPERANDS. */ 8259 8260static rtx 8261ix86_expand_int_compare (code, op0, op1) 8262 enum rtx_code code; 8263 rtx op0, op1; 8264{ 8265 enum machine_mode cmpmode; 8266 rtx tmp, flags; 8267 8268 cmpmode = SELECT_CC_MODE (code, op0, op1); 8269 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 8270 8271 /* This is very simple, but making the interface the same as in the 8272 FP case makes the rest of the code easier. */ 8273 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 8274 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 8275 8276 /* Return the test that should be put into the flags user, i.e. 8277 the bcc, scc, or cmov instruction. */ 8278 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 8279} 8280 8281/* Figure out whether to use ordered or unordered fp comparisons. 8282 Return the appropriate mode to use. */ 8283 8284enum machine_mode 8285ix86_fp_compare_mode (code) 8286 enum rtx_code code ATTRIBUTE_UNUSED; 8287{ 8288 /* ??? In order to make all comparisons reversible, we do all comparisons 8289 non-trapping when compiling for IEEE. Once gcc is able to distinguish 8290 all forms trapping and nontrapping comparisons, we can make inequality 8291 comparisons trapping again, since it results in better code when using 8292 FCOM based compares. */ 8293 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 8294} 8295 8296enum machine_mode 8297ix86_cc_mode (code, op0, op1) 8298 enum rtx_code code; 8299 rtx op0, op1; 8300{ 8301 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 8302 return ix86_fp_compare_mode (code); 8303 switch (code) 8304 { 8305 /* Only zero flag is needed. */ 8306 case EQ: /* ZF=0 */ 8307 case NE: /* ZF!=0 */ 8308 return CCZmode; 8309 /* Codes needing carry flag. */ 8310 case GEU: /* CF=0 */ 8311 case GTU: /* CF=0 & ZF=0 */ 8312 case LTU: /* CF=1 */ 8313 case LEU: /* CF=1 | ZF=1 */ 8314 return CCmode; 8315 /* Codes possibly doable only with sign flag when 8316 comparing against zero. */ 8317 case GE: /* SF=OF or SF=0 */ 8318 case LT: /* SF<>OF or SF=1 */ 8319 if (op1 == const0_rtx) 8320 return CCGOCmode; 8321 else 8322 /* For other cases Carry flag is not required. */ 8323 return CCGCmode; 8324 /* Codes doable only with sign flag when comparing 8325 against zero, but we miss jump instruction for it 8326 so we need to use relational tests agains overflow 8327 that thus needs to be zero. */ 8328 case GT: /* ZF=0 & SF=OF */ 8329 case LE: /* ZF=1 | SF<>OF */ 8330 if (op1 == const0_rtx) 8331 return CCNOmode; 8332 else 8333 return CCGCmode; 8334 /* strcmp pattern do (use flags) and combine may ask us for proper 8335 mode. */ 8336 case USE: 8337 return CCmode; 8338 default: 8339 abort (); 8340 } 8341} 8342 8343/* Return true if we should use an FCOMI instruction for this fp comparison. */ 8344 8345int 8346ix86_use_fcomi_compare (code) 8347 enum rtx_code code ATTRIBUTE_UNUSED; 8348{ 8349 enum rtx_code swapped_code = swap_condition (code); 8350 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 8351 || (ix86_fp_comparison_cost (swapped_code) 8352 == ix86_fp_comparison_fcomi_cost (swapped_code))); 8353} 8354 8355/* Swap, force into registers, or otherwise massage the two operands 8356 to a fp comparison. The operands are updated in place; the new 8357 comparsion code is returned. */ 8358 8359static enum rtx_code 8360ix86_prepare_fp_compare_args (code, pop0, pop1) 8361 enum rtx_code code; 8362 rtx *pop0, *pop1; 8363{ 8364 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 8365 rtx op0 = *pop0, op1 = *pop1; 8366 enum machine_mode op_mode = GET_MODE (op0); 8367 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1); 8368 8369 /* All of the unordered compare instructions only work on registers. 8370 The same is true of the XFmode compare instructions. The same is 8371 true of the fcomi compare instructions. */ 8372 8373 if (!is_sse 8374 && (fpcmp_mode == CCFPUmode 8375 || op_mode == XFmode 8376 || op_mode == TFmode 8377 || ix86_use_fcomi_compare (code))) 8378 { 8379 op0 = force_reg (op_mode, op0); 8380 op1 = force_reg (op_mode, op1); 8381 } 8382 else 8383 { 8384 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 8385 things around if they appear profitable, otherwise force op0 8386 into a register. */ 8387 8388 if (standard_80387_constant_p (op0) == 0 8389 || (GET_CODE (op0) == MEM 8390 && ! (standard_80387_constant_p (op1) == 0 8391 || GET_CODE (op1) == MEM))) 8392 { 8393 rtx tmp; 8394 tmp = op0, op0 = op1, op1 = tmp; 8395 code = swap_condition (code); 8396 } 8397 8398 if (GET_CODE (op0) != REG) 8399 op0 = force_reg (op_mode, op0); 8400 8401 if (CONSTANT_P (op1)) 8402 { 8403 if (standard_80387_constant_p (op1)) 8404 op1 = force_reg (op_mode, op1); 8405 else 8406 op1 = validize_mem (force_const_mem (op_mode, op1)); 8407 } 8408 } 8409 8410 /* Try to rearrange the comparison to make it cheaper. */ 8411 if (ix86_fp_comparison_cost (code) 8412 > ix86_fp_comparison_cost (swap_condition (code)) 8413 && (GET_CODE (op1) == REG || !no_new_pseudos)) 8414 { 8415 rtx tmp; 8416 tmp = op0, op0 = op1, op1 = tmp; 8417 code = swap_condition (code); 8418 if (GET_CODE (op0) != REG) 8419 op0 = force_reg (op_mode, op0); 8420 } 8421 8422 *pop0 = op0; 8423 *pop1 = op1; 8424 return code; 8425} 8426 8427/* Convert comparison codes we use to represent FP comparison to integer 8428 code that will result in proper branch. Return UNKNOWN if no such code 8429 is available. */ 8430static enum rtx_code 8431ix86_fp_compare_code_to_integer (code) 8432 enum rtx_code code; 8433{ 8434 switch (code) 8435 { 8436 case GT: 8437 return GTU; 8438 case GE: 8439 return GEU; 8440 case ORDERED: 8441 case UNORDERED: 8442 return code; 8443 break; 8444 case UNEQ: 8445 return EQ; 8446 break; 8447 case UNLT: 8448 return LTU; 8449 break; 8450 case UNLE: 8451 return LEU; 8452 break; 8453 case LTGT: 8454 return NE; 8455 break; 8456 default: 8457 return UNKNOWN; 8458 } 8459} 8460 8461/* Split comparison code CODE into comparisons we can do using branch 8462 instructions. BYPASS_CODE is comparison code for branch that will 8463 branch around FIRST_CODE and SECOND_CODE. If some of branches 8464 is not required, set value to NIL. 8465 We never require more than two branches. */ 8466static void 8467ix86_fp_comparison_codes (code, bypass_code, first_code, second_code) 8468 enum rtx_code code, *bypass_code, *first_code, *second_code; 8469{ 8470 *first_code = code; 8471 *bypass_code = NIL; 8472 *second_code = NIL; 8473 8474 /* The fcomi comparison sets flags as follows: 8475 8476 cmp ZF PF CF 8477 > 0 0 0 8478 < 0 0 1 8479 = 1 0 0 8480 un 1 1 1 */ 8481 8482 switch (code) 8483 { 8484 case GT: /* GTU - CF=0 & ZF=0 */ 8485 case GE: /* GEU - CF=0 */ 8486 case ORDERED: /* PF=0 */ 8487 case UNORDERED: /* PF=1 */ 8488 case UNEQ: /* EQ - ZF=1 */ 8489 case UNLT: /* LTU - CF=1 */ 8490 case UNLE: /* LEU - CF=1 | ZF=1 */ 8491 case LTGT: /* EQ - ZF=0 */ 8492 break; 8493 case LT: /* LTU - CF=1 - fails on unordered */ 8494 *first_code = UNLT; 8495 *bypass_code = UNORDERED; 8496 break; 8497 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 8498 *first_code = UNLE; 8499 *bypass_code = UNORDERED; 8500 break; 8501 case EQ: /* EQ - ZF=1 - fails on unordered */ 8502 *first_code = UNEQ; 8503 *bypass_code = UNORDERED; 8504 break; 8505 case NE: /* NE - ZF=0 - fails on unordered */ 8506 *first_code = LTGT; 8507 *second_code = UNORDERED; 8508 break; 8509 case UNGE: /* GEU - CF=0 - fails on unordered */ 8510 *first_code = GE; 8511 *second_code = UNORDERED; 8512 break; 8513 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 8514 *first_code = GT; 8515 *second_code = UNORDERED; 8516 break; 8517 default: 8518 abort (); 8519 } 8520 if (!TARGET_IEEE_FP) 8521 { 8522 *second_code = NIL; 8523 *bypass_code = NIL; 8524 } 8525} 8526 8527/* Return cost of comparison done fcom + arithmetics operations on AX. 8528 All following functions do use number of instructions as an cost metrics. 8529 In future this should be tweaked to compute bytes for optimize_size and 8530 take into account performance of various instructions on various CPUs. */ 8531static int 8532ix86_fp_comparison_arithmetics_cost (code) 8533 enum rtx_code code; 8534{ 8535 if (!TARGET_IEEE_FP) 8536 return 4; 8537 /* The cost of code output by ix86_expand_fp_compare. */ 8538 switch (code) 8539 { 8540 case UNLE: 8541 case UNLT: 8542 case LTGT: 8543 case GT: 8544 case GE: 8545 case UNORDERED: 8546 case ORDERED: 8547 case UNEQ: 8548 return 4; 8549 break; 8550 case LT: 8551 case NE: 8552 case EQ: 8553 case UNGE: 8554 return 5; 8555 break; 8556 case LE: 8557 case UNGT: 8558 return 6; 8559 break; 8560 default: 8561 abort (); 8562 } 8563} 8564 8565/* Return cost of comparison done using fcomi operation. 8566 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 8567static int 8568ix86_fp_comparison_fcomi_cost (code) 8569 enum rtx_code code; 8570{ 8571 enum rtx_code bypass_code, first_code, second_code; 8572 /* Return arbitarily high cost when instruction is not supported - this 8573 prevents gcc from using it. */ 8574 if (!TARGET_CMOVE) 8575 return 1024; 8576 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8577 return (bypass_code != NIL || second_code != NIL) + 2; 8578} 8579 8580/* Return cost of comparison done using sahf operation. 8581 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 8582static int 8583ix86_fp_comparison_sahf_cost (code) 8584 enum rtx_code code; 8585{ 8586 enum rtx_code bypass_code, first_code, second_code; 8587 /* Return arbitarily high cost when instruction is not preferred - this 8588 avoids gcc from using it. */ 8589 if (!TARGET_USE_SAHF && !optimize_size) 8590 return 1024; 8591 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8592 return (bypass_code != NIL || second_code != NIL) + 3; 8593} 8594 8595/* Compute cost of the comparison done using any method. 8596 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 8597static int 8598ix86_fp_comparison_cost (code) 8599 enum rtx_code code; 8600{ 8601 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 8602 int min; 8603 8604 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 8605 sahf_cost = ix86_fp_comparison_sahf_cost (code); 8606 8607 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 8608 if (min > sahf_cost) 8609 min = sahf_cost; 8610 if (min > fcomi_cost) 8611 min = fcomi_cost; 8612 return min; 8613} 8614 8615/* Generate insn patterns to do a floating point compare of OPERANDS. */ 8616 8617static rtx 8618ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test) 8619 enum rtx_code code; 8620 rtx op0, op1, scratch; 8621 rtx *second_test; 8622 rtx *bypass_test; 8623{ 8624 enum machine_mode fpcmp_mode, intcmp_mode; 8625 rtx tmp, tmp2; 8626 int cost = ix86_fp_comparison_cost (code); 8627 enum rtx_code bypass_code, first_code, second_code; 8628 8629 fpcmp_mode = ix86_fp_compare_mode (code); 8630 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 8631 8632 if (second_test) 8633 *second_test = NULL_RTX; 8634 if (bypass_test) 8635 *bypass_test = NULL_RTX; 8636 8637 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8638 8639 /* Do fcomi/sahf based test when profitable. */ 8640 if ((bypass_code == NIL || bypass_test) 8641 && (second_code == NIL || second_test) 8642 && ix86_fp_comparison_arithmetics_cost (code) > cost) 8643 { 8644 if (TARGET_CMOVE) 8645 { 8646 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 8647 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 8648 tmp); 8649 emit_insn (tmp); 8650 } 8651 else 8652 { 8653 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 8654 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 8655 if (!scratch) 8656 scratch = gen_reg_rtx (HImode); 8657 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 8658 emit_insn (gen_x86_sahf_1 (scratch)); 8659 } 8660 8661 /* The FP codes work out to act like unsigned. */ 8662 intcmp_mode = fpcmp_mode; 8663 code = first_code; 8664 if (bypass_code != NIL) 8665 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 8666 gen_rtx_REG (intcmp_mode, FLAGS_REG), 8667 const0_rtx); 8668 if (second_code != NIL) 8669 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 8670 gen_rtx_REG (intcmp_mode, FLAGS_REG), 8671 const0_rtx); 8672 } 8673 else 8674 { 8675 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 8676 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 8677 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 8678 if (!scratch) 8679 scratch = gen_reg_rtx (HImode); 8680 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 8681 8682 /* In the unordered case, we have to check C2 for NaN's, which 8683 doesn't happen to work out to anything nice combination-wise. 8684 So do some bit twiddling on the value we've got in AH to come 8685 up with an appropriate set of condition codes. */ 8686 8687 intcmp_mode = CCNOmode; 8688 switch (code) 8689 { 8690 case GT: 8691 case UNGT: 8692 if (code == GT || !TARGET_IEEE_FP) 8693 { 8694 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 8695 code = EQ; 8696 } 8697 else 8698 { 8699 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8700 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 8701 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 8702 intcmp_mode = CCmode; 8703 code = GEU; 8704 } 8705 break; 8706 case LT: 8707 case UNLT: 8708 if (code == LT && TARGET_IEEE_FP) 8709 { 8710 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8711 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 8712 intcmp_mode = CCmode; 8713 code = EQ; 8714 } 8715 else 8716 { 8717 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 8718 code = NE; 8719 } 8720 break; 8721 case GE: 8722 case UNGE: 8723 if (code == GE || !TARGET_IEEE_FP) 8724 { 8725 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 8726 code = EQ; 8727 } 8728 else 8729 { 8730 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8731 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 8732 GEN_INT (0x01))); 8733 code = NE; 8734 } 8735 break; 8736 case LE: 8737 case UNLE: 8738 if (code == LE && TARGET_IEEE_FP) 8739 { 8740 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8741 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 8742 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 8743 intcmp_mode = CCmode; 8744 code = LTU; 8745 } 8746 else 8747 { 8748 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 8749 code = NE; 8750 } 8751 break; 8752 case EQ: 8753 case UNEQ: 8754 if (code == EQ && TARGET_IEEE_FP) 8755 { 8756 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8757 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 8758 intcmp_mode = CCmode; 8759 code = EQ; 8760 } 8761 else 8762 { 8763 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 8764 code = NE; 8765 break; 8766 } 8767 break; 8768 case NE: 8769 case LTGT: 8770 if (code == NE && TARGET_IEEE_FP) 8771 { 8772 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 8773 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 8774 GEN_INT (0x40))); 8775 code = NE; 8776 } 8777 else 8778 { 8779 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 8780 code = EQ; 8781 } 8782 break; 8783 8784 case UNORDERED: 8785 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 8786 code = NE; 8787 break; 8788 case ORDERED: 8789 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 8790 code = EQ; 8791 break; 8792 8793 default: 8794 abort (); 8795 } 8796 } 8797 8798 /* Return the test that should be put into the flags user, i.e. 8799 the bcc, scc, or cmov instruction. */ 8800 return gen_rtx_fmt_ee (code, VOIDmode, 8801 gen_rtx_REG (intcmp_mode, FLAGS_REG), 8802 const0_rtx); 8803} 8804 8805rtx 8806ix86_expand_compare (code, second_test, bypass_test) 8807 enum rtx_code code; 8808 rtx *second_test, *bypass_test; 8809{ 8810 rtx op0, op1, ret; 8811 op0 = ix86_compare_op0; 8812 op1 = ix86_compare_op1; 8813 8814 if (second_test) 8815 *second_test = NULL_RTX; 8816 if (bypass_test) 8817 *bypass_test = NULL_RTX; 8818 8819 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 8820 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 8821 second_test, bypass_test); 8822 else 8823 ret = ix86_expand_int_compare (code, op0, op1); 8824 8825 return ret; 8826} 8827 8828/* Return true if the CODE will result in nontrivial jump sequence. */ 8829bool 8830ix86_fp_jump_nontrivial_p (code) 8831 enum rtx_code code; 8832{ 8833 enum rtx_code bypass_code, first_code, second_code; 8834 if (!TARGET_CMOVE) 8835 return true; 8836 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8837 return bypass_code != NIL || second_code != NIL; 8838} 8839 8840void 8841ix86_expand_branch (code, label) 8842 enum rtx_code code; 8843 rtx label; 8844{ 8845 rtx tmp; 8846 8847 switch (GET_MODE (ix86_compare_op0)) 8848 { 8849 case QImode: 8850 case HImode: 8851 case SImode: 8852 simple: 8853 tmp = ix86_expand_compare (code, NULL, NULL); 8854 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 8855 gen_rtx_LABEL_REF (VOIDmode, label), 8856 pc_rtx); 8857 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 8858 return; 8859 8860 case SFmode: 8861 case DFmode: 8862 case XFmode: 8863 case TFmode: 8864 { 8865 rtvec vec; 8866 int use_fcomi; 8867 enum rtx_code bypass_code, first_code, second_code; 8868 8869 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 8870 &ix86_compare_op1); 8871 8872 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 8873 8874 /* Check whether we will use the natural sequence with one jump. If 8875 so, we can expand jump early. Otherwise delay expansion by 8876 creating compound insn to not confuse optimizers. */ 8877 if (bypass_code == NIL && second_code == NIL 8878 && TARGET_CMOVE) 8879 { 8880 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 8881 gen_rtx_LABEL_REF (VOIDmode, label), 8882 pc_rtx, NULL_RTX); 8883 } 8884 else 8885 { 8886 tmp = gen_rtx_fmt_ee (code, VOIDmode, 8887 ix86_compare_op0, ix86_compare_op1); 8888 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 8889 gen_rtx_LABEL_REF (VOIDmode, label), 8890 pc_rtx); 8891 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 8892 8893 use_fcomi = ix86_use_fcomi_compare (code); 8894 vec = rtvec_alloc (3 + !use_fcomi); 8895 RTVEC_ELT (vec, 0) = tmp; 8896 RTVEC_ELT (vec, 1) 8897 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 8898 RTVEC_ELT (vec, 2) 8899 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 8900 if (! use_fcomi) 8901 RTVEC_ELT (vec, 3) 8902 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 8903 8904 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 8905 } 8906 return; 8907 } 8908 8909 case DImode: 8910 if (TARGET_64BIT) 8911 goto simple; 8912 /* Expand DImode branch into multiple compare+branch. */ 8913 { 8914 rtx lo[2], hi[2], label2; 8915 enum rtx_code code1, code2, code3; 8916 8917 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 8918 { 8919 tmp = ix86_compare_op0; 8920 ix86_compare_op0 = ix86_compare_op1; 8921 ix86_compare_op1 = tmp; 8922 code = swap_condition (code); 8923 } 8924 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 8925 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 8926 8927 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 8928 avoid two branches. This costs one extra insn, so disable when 8929 optimizing for size. */ 8930 8931 if ((code == EQ || code == NE) 8932 && (!optimize_size 8933 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 8934 { 8935 rtx xor0, xor1; 8936 8937 xor1 = hi[0]; 8938 if (hi[1] != const0_rtx) 8939 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1], 8940 NULL_RTX, 0, OPTAB_WIDEN); 8941 8942 xor0 = lo[0]; 8943 if (lo[1] != const0_rtx) 8944 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1], 8945 NULL_RTX, 0, OPTAB_WIDEN); 8946 8947 tmp = expand_binop (SImode, ior_optab, xor1, xor0, 8948 NULL_RTX, 0, OPTAB_WIDEN); 8949 8950 ix86_compare_op0 = tmp; 8951 ix86_compare_op1 = const0_rtx; 8952 ix86_expand_branch (code, label); 8953 return; 8954 } 8955 8956 /* Otherwise, if we are doing less-than or greater-or-equal-than, 8957 op1 is a constant and the low word is zero, then we can just 8958 examine the high word. */ 8959 8960 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 8961 switch (code) 8962 { 8963 case LT: case LTU: case GE: case GEU: 8964 ix86_compare_op0 = hi[0]; 8965 ix86_compare_op1 = hi[1]; 8966 ix86_expand_branch (code, label); 8967 return; 8968 default: 8969 break; 8970 } 8971 8972 /* Otherwise, we need two or three jumps. */ 8973 8974 label2 = gen_label_rtx (); 8975 8976 code1 = code; 8977 code2 = swap_condition (code); 8978 code3 = unsigned_condition (code); 8979 8980 switch (code) 8981 { 8982 case LT: case GT: case LTU: case GTU: 8983 break; 8984 8985 case LE: code1 = LT; code2 = GT; break; 8986 case GE: code1 = GT; code2 = LT; break; 8987 case LEU: code1 = LTU; code2 = GTU; break; 8988 case GEU: code1 = GTU; code2 = LTU; break; 8989 8990 case EQ: code1 = NIL; code2 = NE; break; 8991 case NE: code2 = NIL; break; 8992 8993 default: 8994 abort (); 8995 } 8996 8997 /* 8998 * a < b => 8999 * if (hi(a) < hi(b)) goto true; 9000 * if (hi(a) > hi(b)) goto false; 9001 * if (lo(a) < lo(b)) goto true; 9002 * false: 9003 */ 9004 9005 ix86_compare_op0 = hi[0]; 9006 ix86_compare_op1 = hi[1]; 9007 9008 if (code1 != NIL) 9009 ix86_expand_branch (code1, label); 9010 if (code2 != NIL) 9011 ix86_expand_branch (code2, label2); 9012 9013 ix86_compare_op0 = lo[0]; 9014 ix86_compare_op1 = lo[1]; 9015 ix86_expand_branch (code3, label); 9016 9017 if (code2 != NIL) 9018 emit_label (label2); 9019 return; 9020 } 9021 9022 default: 9023 abort (); 9024 } 9025} 9026 9027/* Split branch based on floating point condition. */ 9028void 9029ix86_split_fp_branch (code, op1, op2, target1, target2, tmp) 9030 enum rtx_code code; 9031 rtx op1, op2, target1, target2, tmp; 9032{ 9033 rtx second, bypass; 9034 rtx label = NULL_RTX; 9035 rtx condition; 9036 int bypass_probability = -1, second_probability = -1, probability = -1; 9037 rtx i; 9038 9039 if (target2 != pc_rtx) 9040 { 9041 rtx tmp = target2; 9042 code = reverse_condition_maybe_unordered (code); 9043 target2 = target1; 9044 target1 = tmp; 9045 } 9046 9047 condition = ix86_expand_fp_compare (code, op1, op2, 9048 tmp, &second, &bypass); 9049 9050 if (split_branch_probability >= 0) 9051 { 9052 /* Distribute the probabilities across the jumps. 9053 Assume the BYPASS and SECOND to be always test 9054 for UNORDERED. */ 9055 probability = split_branch_probability; 9056 9057 /* Value of 1 is low enough to make no need for probability 9058 to be updated. Later we may run some experiments and see 9059 if unordered values are more frequent in practice. */ 9060 if (bypass) 9061 bypass_probability = 1; 9062 if (second) 9063 second_probability = 1; 9064 } 9065 if (bypass != NULL_RTX) 9066 { 9067 label = gen_label_rtx (); 9068 i = emit_jump_insn (gen_rtx_SET 9069 (VOIDmode, pc_rtx, 9070 gen_rtx_IF_THEN_ELSE (VOIDmode, 9071 bypass, 9072 gen_rtx_LABEL_REF (VOIDmode, 9073 label), 9074 pc_rtx))); 9075 if (bypass_probability >= 0) 9076 REG_NOTES (i) 9077 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9078 GEN_INT (bypass_probability), 9079 REG_NOTES (i)); 9080 } 9081 i = emit_jump_insn (gen_rtx_SET 9082 (VOIDmode, pc_rtx, 9083 gen_rtx_IF_THEN_ELSE (VOIDmode, 9084 condition, target1, target2))); 9085 if (probability >= 0) 9086 REG_NOTES (i) 9087 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9088 GEN_INT (probability), 9089 REG_NOTES (i)); 9090 if (second != NULL_RTX) 9091 { 9092 i = emit_jump_insn (gen_rtx_SET 9093 (VOIDmode, pc_rtx, 9094 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 9095 target2))); 9096 if (second_probability >= 0) 9097 REG_NOTES (i) 9098 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9099 GEN_INT (second_probability), 9100 REG_NOTES (i)); 9101 } 9102 if (label != NULL_RTX) 9103 emit_label (label); 9104} 9105 9106int 9107ix86_expand_setcc (code, dest) 9108 enum rtx_code code; 9109 rtx dest; 9110{ 9111 rtx ret, tmp, tmpreg; 9112 rtx second_test, bypass_test; 9113 9114 if (GET_MODE (ix86_compare_op0) == DImode 9115 && !TARGET_64BIT) 9116 return 0; /* FAIL */ 9117 9118 if (GET_MODE (dest) != QImode) 9119 abort (); 9120 9121 ret = ix86_expand_compare (code, &second_test, &bypass_test); 9122 PUT_MODE (ret, QImode); 9123 9124 tmp = dest; 9125 tmpreg = dest; 9126 9127 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 9128 if (bypass_test || second_test) 9129 { 9130 rtx test = second_test; 9131 int bypass = 0; 9132 rtx tmp2 = gen_reg_rtx (QImode); 9133 if (bypass_test) 9134 { 9135 if (second_test) 9136 abort (); 9137 test = bypass_test; 9138 bypass = 1; 9139 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 9140 } 9141 PUT_MODE (test, QImode); 9142 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 9143 9144 if (bypass) 9145 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 9146 else 9147 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 9148 } 9149 9150 return 1; /* DONE */ 9151} 9152 9153int 9154ix86_expand_int_movcc (operands) 9155 rtx operands[]; 9156{ 9157 enum rtx_code code = GET_CODE (operands[1]), compare_code; 9158 rtx compare_seq, compare_op; 9159 rtx second_test, bypass_test; 9160 enum machine_mode mode = GET_MODE (operands[0]); 9161 9162 /* When the compare code is not LTU or GEU, we can not use sbbl case. 9163 In case comparsion is done with immediate, we can convert it to LTU or 9164 GEU by altering the integer. */ 9165 9166 if ((code == LEU || code == GTU) 9167 && GET_CODE (ix86_compare_op1) == CONST_INT 9168 && mode != HImode 9169 && INTVAL (ix86_compare_op1) != -1 9170 /* For x86-64, the immediate field in the instruction is 32-bit 9171 signed, so we can't increment a DImode value above 0x7fffffff. */ 9172 && (!TARGET_64BIT 9173 || GET_MODE (ix86_compare_op0) != DImode 9174 || INTVAL (ix86_compare_op1) != 0x7fffffff) 9175 && GET_CODE (operands[2]) == CONST_INT 9176 && GET_CODE (operands[3]) == CONST_INT) 9177 { 9178 if (code == LEU) 9179 code = LTU; 9180 else 9181 code = GEU; 9182 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1, 9183 GET_MODE (ix86_compare_op0)); 9184 } 9185 9186 start_sequence (); 9187 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 9188 compare_seq = get_insns (); 9189 end_sequence (); 9190 9191 compare_code = GET_CODE (compare_op); 9192 9193 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 9194 HImode insns, we'd be swallowed in word prefix ops. */ 9195 9196 if (mode != HImode 9197 && (mode != DImode || TARGET_64BIT) 9198 && GET_CODE (operands[2]) == CONST_INT 9199 && GET_CODE (operands[3]) == CONST_INT) 9200 { 9201 rtx out = operands[0]; 9202 HOST_WIDE_INT ct = INTVAL (operands[2]); 9203 HOST_WIDE_INT cf = INTVAL (operands[3]); 9204 HOST_WIDE_INT diff; 9205 9206 if ((compare_code == LTU || compare_code == GEU) 9207 && !second_test && !bypass_test) 9208 { 9209 /* Detect overlap between destination and compare sources. */ 9210 rtx tmp = out; 9211 9212 /* To simplify rest of code, restrict to the GEU case. */ 9213 if (compare_code == LTU) 9214 { 9215 HOST_WIDE_INT tmp = ct; 9216 ct = cf; 9217 cf = tmp; 9218 compare_code = reverse_condition (compare_code); 9219 code = reverse_condition (code); 9220 } 9221 diff = ct - cf; 9222 9223 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 9224 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 9225 tmp = gen_reg_rtx (mode); 9226 9227 emit_insn (compare_seq); 9228 if (mode == DImode) 9229 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp)); 9230 else 9231 emit_insn (gen_x86_movsicc_0_m1 (tmp)); 9232 9233 if (diff == 1) 9234 { 9235 /* 9236 * cmpl op0,op1 9237 * sbbl dest,dest 9238 * [addl dest, ct] 9239 * 9240 * Size 5 - 8. 9241 */ 9242 if (ct) 9243 tmp = expand_simple_binop (mode, PLUS, 9244 tmp, GEN_INT (ct), 9245 tmp, 1, OPTAB_DIRECT); 9246 } 9247 else if (cf == -1) 9248 { 9249 /* 9250 * cmpl op0,op1 9251 * sbbl dest,dest 9252 * orl $ct, dest 9253 * 9254 * Size 8. 9255 */ 9256 tmp = expand_simple_binop (mode, IOR, 9257 tmp, GEN_INT (ct), 9258 tmp, 1, OPTAB_DIRECT); 9259 } 9260 else if (diff == -1 && ct) 9261 { 9262 /* 9263 * cmpl op0,op1 9264 * sbbl dest,dest 9265 * notl dest 9266 * [addl dest, cf] 9267 * 9268 * Size 8 - 11. 9269 */ 9270 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1); 9271 if (cf) 9272 tmp = expand_simple_binop (mode, PLUS, 9273 tmp, GEN_INT (cf), 9274 tmp, 1, OPTAB_DIRECT); 9275 } 9276 else 9277 { 9278 /* 9279 * cmpl op0,op1 9280 * sbbl dest,dest 9281 * [notl dest] 9282 * andl cf - ct, dest 9283 * [addl dest, ct] 9284 * 9285 * Size 8 - 11. 9286 */ 9287 9288 if (cf == 0) 9289 { 9290 cf = ct; 9291 ct = 0; 9292 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1); 9293 } 9294 9295 tmp = expand_simple_binop (mode, AND, 9296 tmp, 9297 gen_int_mode (cf - ct, mode), 9298 tmp, 1, OPTAB_DIRECT); 9299 if (ct) 9300 tmp = expand_simple_binop (mode, PLUS, 9301 tmp, GEN_INT (ct), 9302 tmp, 1, OPTAB_DIRECT); 9303 } 9304 9305 if (tmp != out) 9306 emit_move_insn (out, tmp); 9307 9308 return 1; /* DONE */ 9309 } 9310 9311 diff = ct - cf; 9312 if (diff < 0) 9313 { 9314 HOST_WIDE_INT tmp; 9315 tmp = ct, ct = cf, cf = tmp; 9316 diff = -diff; 9317 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 9318 { 9319 /* We may be reversing unordered compare to normal compare, that 9320 is not valid in general (we may convert non-trapping condition 9321 to trapping one), however on i386 we currently emit all 9322 comparisons unordered. */ 9323 compare_code = reverse_condition_maybe_unordered (compare_code); 9324 code = reverse_condition_maybe_unordered (code); 9325 } 9326 else 9327 { 9328 compare_code = reverse_condition (compare_code); 9329 code = reverse_condition (code); 9330 } 9331 } 9332 9333 compare_code = NIL; 9334 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 9335 && GET_CODE (ix86_compare_op1) == CONST_INT) 9336 { 9337 if (ix86_compare_op1 == const0_rtx 9338 && (code == LT || code == GE)) 9339 compare_code = code; 9340 else if (ix86_compare_op1 == constm1_rtx) 9341 { 9342 if (code == LE) 9343 compare_code = LT; 9344 else if (code == GT) 9345 compare_code = GE; 9346 } 9347 } 9348 9349 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 9350 if (compare_code != NIL 9351 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 9352 && (cf == -1 || ct == -1)) 9353 { 9354 /* If lea code below could be used, only optimize 9355 if it results in a 2 insn sequence. */ 9356 9357 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 9358 || diff == 3 || diff == 5 || diff == 9) 9359 || (compare_code == LT && ct == -1) 9360 || (compare_code == GE && cf == -1)) 9361 { 9362 /* 9363 * notl op1 (if necessary) 9364 * sarl $31, op1 9365 * orl cf, op1 9366 */ 9367 if (ct != -1) 9368 { 9369 cf = ct; 9370 ct = -1; 9371 code = reverse_condition (code); 9372 } 9373 9374 out = emit_store_flag (out, code, ix86_compare_op0, 9375 ix86_compare_op1, VOIDmode, 0, -1); 9376 9377 out = expand_simple_binop (mode, IOR, 9378 out, GEN_INT (cf), 9379 out, 1, OPTAB_DIRECT); 9380 if (out != operands[0]) 9381 emit_move_insn (operands[0], out); 9382 9383 return 1; /* DONE */ 9384 } 9385 } 9386 9387 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 9388 || diff == 3 || diff == 5 || diff == 9) 9389 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf)))) 9390 { 9391 /* 9392 * xorl dest,dest 9393 * cmpl op1,op2 9394 * setcc dest 9395 * lea cf(dest*(ct-cf)),dest 9396 * 9397 * Size 14. 9398 * 9399 * This also catches the degenerate setcc-only case. 9400 */ 9401 9402 rtx tmp; 9403 int nops; 9404 9405 out = emit_store_flag (out, code, ix86_compare_op0, 9406 ix86_compare_op1, VOIDmode, 0, 1); 9407 9408 nops = 0; 9409 /* On x86_64 the lea instruction operates on Pmode, so we need 9410 to get arithmetics done in proper mode to match. */ 9411 if (diff == 1) 9412 tmp = copy_rtx (out); 9413 else 9414 { 9415 rtx out1; 9416 out1 = copy_rtx (out); 9417 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 9418 nops++; 9419 if (diff & 1) 9420 { 9421 tmp = gen_rtx_PLUS (mode, tmp, out1); 9422 nops++; 9423 } 9424 } 9425 if (cf != 0) 9426 { 9427 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 9428 nops++; 9429 } 9430 if (tmp != out 9431 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out)) 9432 { 9433 if (nops == 1) 9434 out = force_operand (tmp, copy_rtx (out)); 9435 else 9436 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 9437 } 9438 if (out != operands[0]) 9439 emit_move_insn (operands[0], copy_rtx (out)); 9440 9441 return 1; /* DONE */ 9442 } 9443 9444 /* 9445 * General case: Jumpful: 9446 * xorl dest,dest cmpl op1, op2 9447 * cmpl op1, op2 movl ct, dest 9448 * setcc dest jcc 1f 9449 * decl dest movl cf, dest 9450 * andl (cf-ct),dest 1: 9451 * addl ct,dest 9452 * 9453 * Size 20. Size 14. 9454 * 9455 * This is reasonably steep, but branch mispredict costs are 9456 * high on modern cpus, so consider failing only if optimizing 9457 * for space. 9458 * 9459 * %%% Parameterize branch_cost on the tuning architecture, then 9460 * use that. The 80386 couldn't care less about mispredicts. 9461 */ 9462 9463 if (!optimize_size && !TARGET_CMOVE) 9464 { 9465 if (cf == 0) 9466 { 9467 cf = ct; 9468 ct = 0; 9469 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 9470 /* We may be reversing unordered compare to normal compare, 9471 that is not valid in general (we may convert non-trapping 9472 condition to trapping one), however on i386 we currently 9473 emit all comparisons unordered. */ 9474 code = reverse_condition_maybe_unordered (code); 9475 else 9476 { 9477 code = reverse_condition (code); 9478 if (compare_code != NIL) 9479 compare_code = reverse_condition (compare_code); 9480 } 9481 } 9482 9483 if (compare_code != NIL) 9484 { 9485 /* notl op1 (if needed) 9486 sarl $31, op1 9487 andl (cf-ct), op1 9488 addl ct, op1 9489 9490 For x < 0 (resp. x <= -1) there will be no notl, 9491 so if possible swap the constants to get rid of the 9492 complement. 9493 True/false will be -1/0 while code below (store flag 9494 followed by decrement) is 0/-1, so the constants need 9495 to be exchanged once more. */ 9496 9497 if (compare_code == GE || !cf) 9498 { 9499 code = reverse_condition (code); 9500 compare_code = LT; 9501 } 9502 else 9503 { 9504 HOST_WIDE_INT tmp = cf; 9505 cf = ct; 9506 ct = tmp; 9507 } 9508 9509 out = emit_store_flag (out, code, ix86_compare_op0, 9510 ix86_compare_op1, VOIDmode, 0, -1); 9511 } 9512 else 9513 { 9514 out = emit_store_flag (out, code, ix86_compare_op0, 9515 ix86_compare_op1, VOIDmode, 0, 1); 9516 9517 out = expand_simple_binop (mode, PLUS, out, constm1_rtx, 9518 out, 1, OPTAB_DIRECT); 9519 } 9520 9521 out = expand_simple_binop (mode, AND, out, 9522 gen_int_mode (cf - ct, mode), 9523 out, 1, OPTAB_DIRECT); 9524 if (ct) 9525 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct), 9526 out, 1, OPTAB_DIRECT); 9527 if (out != operands[0]) 9528 emit_move_insn (operands[0], out); 9529 9530 return 1; /* DONE */ 9531 } 9532 } 9533 9534 if (!TARGET_CMOVE) 9535 { 9536 /* Try a few things more with specific constants and a variable. */ 9537 9538 optab op; 9539 rtx var, orig_out, out, tmp; 9540 9541 if (optimize_size) 9542 return 0; /* FAIL */ 9543 9544 /* If one of the two operands is an interesting constant, load a 9545 constant with the above and mask it in with a logical operation. */ 9546 9547 if (GET_CODE (operands[2]) == CONST_INT) 9548 { 9549 var = operands[3]; 9550 if (INTVAL (operands[2]) == 0) 9551 operands[3] = constm1_rtx, op = and_optab; 9552 else if (INTVAL (operands[2]) == -1) 9553 operands[3] = const0_rtx, op = ior_optab; 9554 else 9555 return 0; /* FAIL */ 9556 } 9557 else if (GET_CODE (operands[3]) == CONST_INT) 9558 { 9559 var = operands[2]; 9560 if (INTVAL (operands[3]) == 0) 9561 operands[2] = constm1_rtx, op = and_optab; 9562 else if (INTVAL (operands[3]) == -1) 9563 operands[2] = const0_rtx, op = ior_optab; 9564 else 9565 return 0; /* FAIL */ 9566 } 9567 else 9568 return 0; /* FAIL */ 9569 9570 orig_out = operands[0]; 9571 tmp = gen_reg_rtx (mode); 9572 operands[0] = tmp; 9573 9574 /* Recurse to get the constant loaded. */ 9575 if (ix86_expand_int_movcc (operands) == 0) 9576 return 0; /* FAIL */ 9577 9578 /* Mask in the interesting variable. */ 9579 out = expand_binop (mode, op, var, tmp, orig_out, 0, 9580 OPTAB_WIDEN); 9581 if (out != orig_out) 9582 emit_move_insn (orig_out, out); 9583 9584 return 1; /* DONE */ 9585 } 9586 9587 /* 9588 * For comparison with above, 9589 * 9590 * movl cf,dest 9591 * movl ct,tmp 9592 * cmpl op1,op2 9593 * cmovcc tmp,dest 9594 * 9595 * Size 15. 9596 */ 9597 9598 if (! nonimmediate_operand (operands[2], mode)) 9599 operands[2] = force_reg (mode, operands[2]); 9600 if (! nonimmediate_operand (operands[3], mode)) 9601 operands[3] = force_reg (mode, operands[3]); 9602 9603 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 9604 { 9605 rtx tmp = gen_reg_rtx (mode); 9606 emit_move_insn (tmp, operands[3]); 9607 operands[3] = tmp; 9608 } 9609 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 9610 { 9611 rtx tmp = gen_reg_rtx (mode); 9612 emit_move_insn (tmp, operands[2]); 9613 operands[2] = tmp; 9614 } 9615 if (! register_operand (operands[2], VOIDmode) 9616 && ! register_operand (operands[3], VOIDmode)) 9617 operands[2] = force_reg (mode, operands[2]); 9618 9619 emit_insn (compare_seq); 9620 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9621 gen_rtx_IF_THEN_ELSE (mode, 9622 compare_op, operands[2], 9623 operands[3]))); 9624 if (bypass_test) 9625 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9626 gen_rtx_IF_THEN_ELSE (mode, 9627 bypass_test, 9628 operands[3], 9629 operands[0]))); 9630 if (second_test) 9631 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9632 gen_rtx_IF_THEN_ELSE (mode, 9633 second_test, 9634 operands[2], 9635 operands[0]))); 9636 9637 return 1; /* DONE */ 9638} 9639 9640int 9641ix86_expand_fp_movcc (operands) 9642 rtx operands[]; 9643{ 9644 enum rtx_code code; 9645 rtx tmp; 9646 rtx compare_op, second_test, bypass_test; 9647 9648 /* For SF/DFmode conditional moves based on comparisons 9649 in same mode, we may want to use SSE min/max instructions. */ 9650 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode) 9651 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode)) 9652 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]) 9653 /* The SSE comparisons does not support the LTGT/UNEQ pair. */ 9654 && (!TARGET_IEEE_FP 9655 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ)) 9656 /* We may be called from the post-reload splitter. */ 9657 && (!REG_P (operands[0]) 9658 || SSE_REG_P (operands[0]) 9659 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)) 9660 { 9661 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1; 9662 code = GET_CODE (operands[1]); 9663 9664 /* See if we have (cross) match between comparison operands and 9665 conditional move operands. */ 9666 if (rtx_equal_p (operands[2], op1)) 9667 { 9668 rtx tmp = op0; 9669 op0 = op1; 9670 op1 = tmp; 9671 code = reverse_condition_maybe_unordered (code); 9672 } 9673 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1)) 9674 { 9675 /* Check for min operation. */ 9676 if (code == LT) 9677 { 9678 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 9679 if (memory_operand (op0, VOIDmode)) 9680 op0 = force_reg (GET_MODE (operands[0]), op0); 9681 if (GET_MODE (operands[0]) == SFmode) 9682 emit_insn (gen_minsf3 (operands[0], op0, op1)); 9683 else 9684 emit_insn (gen_mindf3 (operands[0], op0, op1)); 9685 return 1; 9686 } 9687 /* Check for max operation. */ 9688 if (code == GT) 9689 { 9690 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 9691 if (memory_operand (op0, VOIDmode)) 9692 op0 = force_reg (GET_MODE (operands[0]), op0); 9693 if (GET_MODE (operands[0]) == SFmode) 9694 emit_insn (gen_maxsf3 (operands[0], op0, op1)); 9695 else 9696 emit_insn (gen_maxdf3 (operands[0], op0, op1)); 9697 return 1; 9698 } 9699 } 9700 /* Manage condition to be sse_comparison_operator. In case we are 9701 in non-ieee mode, try to canonicalize the destination operand 9702 to be first in the comparison - this helps reload to avoid extra 9703 moves. */ 9704 if (!sse_comparison_operator (operands[1], VOIDmode) 9705 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP)) 9706 { 9707 rtx tmp = ix86_compare_op0; 9708 ix86_compare_op0 = ix86_compare_op1; 9709 ix86_compare_op1 = tmp; 9710 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), 9711 VOIDmode, ix86_compare_op0, 9712 ix86_compare_op1); 9713 } 9714 /* Similary try to manage result to be first operand of conditional 9715 move. We also don't support the NE comparison on SSE, so try to 9716 avoid it. */ 9717 if ((rtx_equal_p (operands[0], operands[3]) 9718 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ)) 9719 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP)) 9720 { 9721 rtx tmp = operands[2]; 9722 operands[2] = operands[3]; 9723 operands[3] = tmp; 9724 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered 9725 (GET_CODE (operands[1])), 9726 VOIDmode, ix86_compare_op0, 9727 ix86_compare_op1); 9728 } 9729 if (GET_MODE (operands[0]) == SFmode) 9730 emit_insn (gen_sse_movsfcc (operands[0], operands[1], 9731 operands[2], operands[3], 9732 ix86_compare_op0, ix86_compare_op1)); 9733 else 9734 emit_insn (gen_sse_movdfcc (operands[0], operands[1], 9735 operands[2], operands[3], 9736 ix86_compare_op0, ix86_compare_op1)); 9737 return 1; 9738 } 9739 9740 /* The floating point conditional move instructions don't directly 9741 support conditions resulting from a signed integer comparison. */ 9742 9743 code = GET_CODE (operands[1]); 9744 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 9745 9746 /* The floating point conditional move instructions don't directly 9747 support signed integer comparisons. */ 9748 9749 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 9750 { 9751 if (second_test != NULL || bypass_test != NULL) 9752 abort (); 9753 tmp = gen_reg_rtx (QImode); 9754 ix86_expand_setcc (code, tmp); 9755 code = NE; 9756 ix86_compare_op0 = tmp; 9757 ix86_compare_op1 = const0_rtx; 9758 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 9759 } 9760 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 9761 { 9762 tmp = gen_reg_rtx (GET_MODE (operands[0])); 9763 emit_move_insn (tmp, operands[3]); 9764 operands[3] = tmp; 9765 } 9766 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 9767 { 9768 tmp = gen_reg_rtx (GET_MODE (operands[0])); 9769 emit_move_insn (tmp, operands[2]); 9770 operands[2] = tmp; 9771 } 9772 9773 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9774 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 9775 compare_op, 9776 operands[2], 9777 operands[3]))); 9778 if (bypass_test) 9779 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9780 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 9781 bypass_test, 9782 operands[3], 9783 operands[0]))); 9784 if (second_test) 9785 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 9786 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 9787 second_test, 9788 operands[2], 9789 operands[0]))); 9790 9791 return 1; 9792} 9793 9794/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 9795 works for floating pointer parameters and nonoffsetable memories. 9796 For pushes, it returns just stack offsets; the values will be saved 9797 in the right order. Maximally three parts are generated. */ 9798 9799static int 9800ix86_split_to_parts (operand, parts, mode) 9801 rtx operand; 9802 rtx *parts; 9803 enum machine_mode mode; 9804{ 9805 int size; 9806 9807 if (!TARGET_64BIT) 9808 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4); 9809 else 9810 size = (GET_MODE_SIZE (mode) + 4) / 8; 9811 9812 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand))) 9813 abort (); 9814 if (size < 2 || size > 3) 9815 abort (); 9816 9817 /* Optimize constant pool reference to immediates. This is used by fp 9818 moves, that force all constants to memory to allow combining. */ 9819 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand)) 9820 { 9821 rtx tmp = maybe_get_pool_constant (operand); 9822 if (tmp) 9823 operand = tmp; 9824 } 9825 9826 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 9827 { 9828 /* The only non-offsetable memories we handle are pushes. */ 9829 if (! push_operand (operand, VOIDmode)) 9830 abort (); 9831 9832 operand = copy_rtx (operand); 9833 PUT_MODE (operand, Pmode); 9834 parts[0] = parts[1] = parts[2] = operand; 9835 } 9836 else if (!TARGET_64BIT) 9837 { 9838 if (mode == DImode) 9839 split_di (&operand, 1, &parts[0], &parts[1]); 9840 else 9841 { 9842 if (REG_P (operand)) 9843 { 9844 if (!reload_completed) 9845 abort (); 9846 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 9847 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 9848 if (size == 3) 9849 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 9850 } 9851 else if (offsettable_memref_p (operand)) 9852 { 9853 operand = adjust_address (operand, SImode, 0); 9854 parts[0] = operand; 9855 parts[1] = adjust_address (operand, SImode, 4); 9856 if (size == 3) 9857 parts[2] = adjust_address (operand, SImode, 8); 9858 } 9859 else if (GET_CODE (operand) == CONST_DOUBLE) 9860 { 9861 REAL_VALUE_TYPE r; 9862 long l[4]; 9863 9864 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 9865 switch (mode) 9866 { 9867 case XFmode: 9868 case TFmode: 9869 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 9870 parts[2] = gen_int_mode (l[2], SImode); 9871 break; 9872 case DFmode: 9873 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 9874 break; 9875 default: 9876 abort (); 9877 } 9878 parts[1] = gen_int_mode (l[1], SImode); 9879 parts[0] = gen_int_mode (l[0], SImode); 9880 } 9881 else 9882 abort (); 9883 } 9884 } 9885 else 9886 { 9887 if (mode == TImode) 9888 split_ti (&operand, 1, &parts[0], &parts[1]); 9889 if (mode == XFmode || mode == TFmode) 9890 { 9891 if (REG_P (operand)) 9892 { 9893 if (!reload_completed) 9894 abort (); 9895 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 9896 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 9897 } 9898 else if (offsettable_memref_p (operand)) 9899 { 9900 operand = adjust_address (operand, DImode, 0); 9901 parts[0] = operand; 9902 parts[1] = adjust_address (operand, SImode, 8); 9903 } 9904 else if (GET_CODE (operand) == CONST_DOUBLE) 9905 { 9906 REAL_VALUE_TYPE r; 9907 long l[3]; 9908 9909 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 9910 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 9911 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 9912 if (HOST_BITS_PER_WIDE_INT >= 64) 9913 parts[0] 9914 = gen_int_mode 9915 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 9916 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 9917 DImode); 9918 else 9919 parts[0] = immed_double_const (l[0], l[1], DImode); 9920 parts[1] = gen_int_mode (l[2], SImode); 9921 } 9922 else 9923 abort (); 9924 } 9925 } 9926 9927 return size; 9928} 9929 9930/* Emit insns to perform a move or push of DI, DF, and XF values. 9931 Return false when normal moves are needed; true when all required 9932 insns have been emitted. Operands 2-4 contain the input values 9933 int the correct order; operands 5-7 contain the output values. */ 9934 9935void 9936ix86_split_long_move (operands) 9937 rtx operands[]; 9938{ 9939 rtx part[2][3]; 9940 int nparts; 9941 int push = 0; 9942 int collisions = 0; 9943 enum machine_mode mode = GET_MODE (operands[0]); 9944 9945 /* The DFmode expanders may ask us to move double. 9946 For 64bit target this is single move. By hiding the fact 9947 here we simplify i386.md splitters. */ 9948 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 9949 { 9950 /* Optimize constant pool reference to immediates. This is used by 9951 fp moves, that force all constants to memory to allow combining. */ 9952 9953 if (GET_CODE (operands[1]) == MEM 9954 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 9955 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 9956 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 9957 if (push_operand (operands[0], VOIDmode)) 9958 { 9959 operands[0] = copy_rtx (operands[0]); 9960 PUT_MODE (operands[0], Pmode); 9961 } 9962 else 9963 operands[0] = gen_lowpart (DImode, operands[0]); 9964 operands[1] = gen_lowpart (DImode, operands[1]); 9965 emit_move_insn (operands[0], operands[1]); 9966 return; 9967 } 9968 9969 /* The only non-offsettable memory we handle is push. */ 9970 if (push_operand (operands[0], VOIDmode)) 9971 push = 1; 9972 else if (GET_CODE (operands[0]) == MEM 9973 && ! offsettable_memref_p (operands[0])) 9974 abort (); 9975 9976 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 9977 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 9978 9979 /* When emitting push, take care for source operands on the stack. */ 9980 if (push && GET_CODE (operands[1]) == MEM 9981 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 9982 { 9983 if (nparts == 3) 9984 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 9985 XEXP (part[1][2], 0)); 9986 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 9987 XEXP (part[1][1], 0)); 9988 } 9989 9990 /* We need to do copy in the right order in case an address register 9991 of the source overlaps the destination. */ 9992 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 9993 { 9994 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 9995 collisions++; 9996 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 9997 collisions++; 9998 if (nparts == 3 9999 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 10000 collisions++; 10001 10002 /* Collision in the middle part can be handled by reordering. */ 10003 if (collisions == 1 && nparts == 3 10004 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 10005 { 10006 rtx tmp; 10007 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 10008 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 10009 } 10010 10011 /* If there are more collisions, we can't handle it by reordering. 10012 Do an lea to the last part and use only one colliding move. */ 10013 else if (collisions > 1) 10014 { 10015 rtx base; 10016 10017 collisions = 1; 10018 10019 base = part[0][nparts - 1]; 10020 10021 /* Handle the case when the last part isn't valid for lea. 10022 Happens in 64-bit mode storing the 12-byte XFmode. */ 10023 if (GET_MODE (base) != Pmode) 10024 base = gen_rtx_REG (Pmode, REGNO (base)); 10025 10026 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 10027 part[1][0] = replace_equiv_address (part[1][0], base); 10028 part[1][1] = replace_equiv_address (part[1][1], 10029 plus_constant (base, UNITS_PER_WORD)); 10030 if (nparts == 3) 10031 part[1][2] = replace_equiv_address (part[1][2], 10032 plus_constant (base, 8)); 10033 } 10034 } 10035 10036 if (push) 10037 { 10038 if (!TARGET_64BIT) 10039 { 10040 if (nparts == 3) 10041 { 10042 /* We use only first 12 bytes of TFmode value, but for pushing we 10043 are required to adjust stack as if we were pushing real 16byte 10044 value. */ 10045 if (mode == TFmode && !TARGET_64BIT) 10046 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 10047 GEN_INT (-4))); 10048 emit_move_insn (part[0][2], part[1][2]); 10049 } 10050 } 10051 else 10052 { 10053 /* In 64bit mode we don't have 32bit push available. In case this is 10054 register, it is OK - we will just use larger counterpart. We also 10055 retype memory - these comes from attempt to avoid REX prefix on 10056 moving of second half of TFmode value. */ 10057 if (GET_MODE (part[1][1]) == SImode) 10058 { 10059 if (GET_CODE (part[1][1]) == MEM) 10060 part[1][1] = adjust_address (part[1][1], DImode, 0); 10061 else if (REG_P (part[1][1])) 10062 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 10063 else 10064 abort (); 10065 if (GET_MODE (part[1][0]) == SImode) 10066 part[1][0] = part[1][1]; 10067 } 10068 } 10069 emit_move_insn (part[0][1], part[1][1]); 10070 emit_move_insn (part[0][0], part[1][0]); 10071 return; 10072 } 10073 10074 /* Choose correct order to not overwrite the source before it is copied. */ 10075 if ((REG_P (part[0][0]) 10076 && REG_P (part[1][1]) 10077 && (REGNO (part[0][0]) == REGNO (part[1][1]) 10078 || (nparts == 3 10079 && REGNO (part[0][0]) == REGNO (part[1][2])))) 10080 || (collisions > 0 10081 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 10082 { 10083 if (nparts == 3) 10084 { 10085 operands[2] = part[0][2]; 10086 operands[3] = part[0][1]; 10087 operands[4] = part[0][0]; 10088 operands[5] = part[1][2]; 10089 operands[6] = part[1][1]; 10090 operands[7] = part[1][0]; 10091 } 10092 else 10093 { 10094 operands[2] = part[0][1]; 10095 operands[3] = part[0][0]; 10096 operands[5] = part[1][1]; 10097 operands[6] = part[1][0]; 10098 } 10099 } 10100 else 10101 { 10102 if (nparts == 3) 10103 { 10104 operands[2] = part[0][0]; 10105 operands[3] = part[0][1]; 10106 operands[4] = part[0][2]; 10107 operands[5] = part[1][0]; 10108 operands[6] = part[1][1]; 10109 operands[7] = part[1][2]; 10110 } 10111 else 10112 { 10113 operands[2] = part[0][0]; 10114 operands[3] = part[0][1]; 10115 operands[5] = part[1][0]; 10116 operands[6] = part[1][1]; 10117 } 10118 } 10119 emit_move_insn (operands[2], operands[5]); 10120 emit_move_insn (operands[3], operands[6]); 10121 if (nparts == 3) 10122 emit_move_insn (operands[4], operands[7]); 10123 10124 return; 10125} 10126 10127void 10128ix86_split_ashldi (operands, scratch) 10129 rtx *operands, scratch; 10130{ 10131 rtx low[2], high[2]; 10132 int count; 10133 10134 if (GET_CODE (operands[2]) == CONST_INT) 10135 { 10136 split_di (operands, 2, low, high); 10137 count = INTVAL (operands[2]) & 63; 10138 10139 if (count >= 32) 10140 { 10141 emit_move_insn (high[0], low[1]); 10142 emit_move_insn (low[0], const0_rtx); 10143 10144 if (count > 32) 10145 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32))); 10146 } 10147 else 10148 { 10149 if (!rtx_equal_p (operands[0], operands[1])) 10150 emit_move_insn (operands[0], operands[1]); 10151 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); 10152 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count))); 10153 } 10154 } 10155 else 10156 { 10157 if (!rtx_equal_p (operands[0], operands[1])) 10158 emit_move_insn (operands[0], operands[1]); 10159 10160 split_di (operands, 1, low, high); 10161 10162 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); 10163 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); 10164 10165 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10166 { 10167 if (! no_new_pseudos) 10168 scratch = force_reg (SImode, const0_rtx); 10169 else 10170 emit_move_insn (scratch, const0_rtx); 10171 10172 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], 10173 scratch)); 10174 } 10175 else 10176 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 10177 } 10178} 10179 10180void 10181ix86_split_ashrdi (operands, scratch) 10182 rtx *operands, scratch; 10183{ 10184 rtx low[2], high[2]; 10185 int count; 10186 10187 if (GET_CODE (operands[2]) == CONST_INT) 10188 { 10189 split_di (operands, 2, low, high); 10190 count = INTVAL (operands[2]) & 63; 10191 10192 if (count >= 32) 10193 { 10194 emit_move_insn (low[0], high[1]); 10195 10196 if (! reload_completed) 10197 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31))); 10198 else 10199 { 10200 emit_move_insn (high[0], low[0]); 10201 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); 10202 } 10203 10204 if (count > 32) 10205 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); 10206 } 10207 else 10208 { 10209 if (!rtx_equal_p (operands[0], operands[1])) 10210 emit_move_insn (operands[0], operands[1]); 10211 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 10212 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count))); 10213 } 10214 } 10215 else 10216 { 10217 if (!rtx_equal_p (operands[0], operands[1])) 10218 emit_move_insn (operands[0], operands[1]); 10219 10220 split_di (operands, 1, low, high); 10221 10222 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 10223 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); 10224 10225 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10226 { 10227 if (! no_new_pseudos) 10228 scratch = gen_reg_rtx (SImode); 10229 emit_move_insn (scratch, high[0]); 10230 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); 10231 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 10232 scratch)); 10233 } 10234 else 10235 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 10236 } 10237} 10238 10239void 10240ix86_split_lshrdi (operands, scratch) 10241 rtx *operands, scratch; 10242{ 10243 rtx low[2], high[2]; 10244 int count; 10245 10246 if (GET_CODE (operands[2]) == CONST_INT) 10247 { 10248 split_di (operands, 2, low, high); 10249 count = INTVAL (operands[2]) & 63; 10250 10251 if (count >= 32) 10252 { 10253 emit_move_insn (low[0], high[1]); 10254 emit_move_insn (high[0], const0_rtx); 10255 10256 if (count > 32) 10257 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); 10258 } 10259 else 10260 { 10261 if (!rtx_equal_p (operands[0], operands[1])) 10262 emit_move_insn (operands[0], operands[1]); 10263 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 10264 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count))); 10265 } 10266 } 10267 else 10268 { 10269 if (!rtx_equal_p (operands[0], operands[1])) 10270 emit_move_insn (operands[0], operands[1]); 10271 10272 split_di (operands, 1, low, high); 10273 10274 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 10275 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); 10276 10277 /* Heh. By reversing the arguments, we can reuse this pattern. */ 10278 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10279 { 10280 if (! no_new_pseudos) 10281 scratch = force_reg (SImode, const0_rtx); 10282 else 10283 emit_move_insn (scratch, const0_rtx); 10284 10285 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 10286 scratch)); 10287 } 10288 else 10289 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 10290 } 10291} 10292 10293/* Helper function for the string operations below. Dest VARIABLE whether 10294 it is aligned to VALUE bytes. If true, jump to the label. */ 10295static rtx 10296ix86_expand_aligntest (variable, value) 10297 rtx variable; 10298 int value; 10299{ 10300 rtx label = gen_label_rtx (); 10301 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 10302 if (GET_MODE (variable) == DImode) 10303 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 10304 else 10305 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 10306 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 10307 1, label); 10308 return label; 10309} 10310 10311/* Adjust COUNTER by the VALUE. */ 10312static void 10313ix86_adjust_counter (countreg, value) 10314 rtx countreg; 10315 HOST_WIDE_INT value; 10316{ 10317 if (GET_MODE (countreg) == DImode) 10318 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 10319 else 10320 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 10321} 10322 10323/* Zero extend possibly SImode EXP to Pmode register. */ 10324rtx 10325ix86_zero_extend_to_Pmode (exp) 10326 rtx exp; 10327{ 10328 rtx r; 10329 if (GET_MODE (exp) == VOIDmode) 10330 return force_reg (Pmode, exp); 10331 if (GET_MODE (exp) == Pmode) 10332 return copy_to_mode_reg (Pmode, exp); 10333 r = gen_reg_rtx (Pmode); 10334 emit_insn (gen_zero_extendsidi2 (r, exp)); 10335 return r; 10336} 10337 10338/* Expand string move (memcpy) operation. Use i386 string operations when 10339 profitable. expand_clrstr contains similar code. */ 10340int 10341ix86_expand_movstr (dst, src, count_exp, align_exp) 10342 rtx dst, src, count_exp, align_exp; 10343{ 10344 rtx srcreg, destreg, countreg; 10345 enum machine_mode counter_mode; 10346 HOST_WIDE_INT align = 0; 10347 unsigned HOST_WIDE_INT count = 0; 10348 rtx insns; 10349 10350 start_sequence (); 10351 10352 if (GET_CODE (align_exp) == CONST_INT) 10353 align = INTVAL (align_exp); 10354 10355 /* This simple hack avoids all inlining code and simplifies code below. */ 10356 if (!TARGET_ALIGN_STRINGOPS) 10357 align = 64; 10358 10359 if (GET_CODE (count_exp) == CONST_INT) 10360 count = INTVAL (count_exp); 10361 10362 /* Figure out proper mode for counter. For 32bits it is always SImode, 10363 for 64bits use SImode when possible, otherwise DImode. 10364 Set count to number of bytes copied when known at compile time. */ 10365 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 10366 || x86_64_zero_extended_value (count_exp)) 10367 counter_mode = SImode; 10368 else 10369 counter_mode = DImode; 10370 10371 if (counter_mode != SImode && counter_mode != DImode) 10372 abort (); 10373 10374 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 10375 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 10376 10377 emit_insn (gen_cld ()); 10378 10379 /* When optimizing for size emit simple rep ; movsb instruction for 10380 counts not divisible by 4. */ 10381 10382 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 10383 { 10384 countreg = ix86_zero_extend_to_Pmode (count_exp); 10385 if (TARGET_64BIT) 10386 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg, 10387 destreg, srcreg, countreg)); 10388 else 10389 emit_insn (gen_rep_movqi (destreg, srcreg, countreg, 10390 destreg, srcreg, countreg)); 10391 } 10392 10393 /* For constant aligned (or small unaligned) copies use rep movsl 10394 followed by code copying the rest. For PentiumPro ensure 8 byte 10395 alignment to allow rep movsl acceleration. */ 10396 10397 else if (count != 0 10398 && (align >= 8 10399 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 10400 || optimize_size || count < (unsigned int) 64)) 10401 { 10402 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 10403 if (count & ~(size - 1)) 10404 { 10405 countreg = copy_to_mode_reg (counter_mode, 10406 GEN_INT ((count >> (size == 4 ? 2 : 3)) 10407 & (TARGET_64BIT ? -1 : 0x3fffffff))); 10408 countreg = ix86_zero_extend_to_Pmode (countreg); 10409 if (size == 4) 10410 { 10411 if (TARGET_64BIT) 10412 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg, 10413 destreg, srcreg, countreg)); 10414 else 10415 emit_insn (gen_rep_movsi (destreg, srcreg, countreg, 10416 destreg, srcreg, countreg)); 10417 } 10418 else 10419 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg, 10420 destreg, srcreg, countreg)); 10421 } 10422 if (size == 8 && (count & 0x04)) 10423 emit_insn (gen_strmovsi (destreg, srcreg)); 10424 if (count & 0x02) 10425 emit_insn (gen_strmovhi (destreg, srcreg)); 10426 if (count & 0x01) 10427 emit_insn (gen_strmovqi (destreg, srcreg)); 10428 } 10429 /* The generic code based on the glibc implementation: 10430 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 10431 allowing accelerated copying there) 10432 - copy the data using rep movsl 10433 - copy the rest. */ 10434 else 10435 { 10436 rtx countreg2; 10437 rtx label = NULL; 10438 int desired_alignment = (TARGET_PENTIUMPRO 10439 && (count == 0 || count >= (unsigned int) 260) 10440 ? 8 : UNITS_PER_WORD); 10441 10442 /* In case we don't know anything about the alignment, default to 10443 library version, since it is usually equally fast and result in 10444 shorter code. */ 10445 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 10446 { 10447 end_sequence (); 10448 return 0; 10449 } 10450 10451 if (TARGET_SINGLE_STRINGOP) 10452 emit_insn (gen_cld ()); 10453 10454 countreg2 = gen_reg_rtx (Pmode); 10455 countreg = copy_to_mode_reg (counter_mode, count_exp); 10456 10457 /* We don't use loops to align destination and to copy parts smaller 10458 than 4 bytes, because gcc is able to optimize such code better (in 10459 the case the destination or the count really is aligned, gcc is often 10460 able to predict the branches) and also it is friendlier to the 10461 hardware branch prediction. 10462 10463 Using loops is benefical for generic case, because we can 10464 handle small counts using the loops. Many CPUs (such as Athlon) 10465 have large REP prefix setup costs. 10466 10467 This is quite costy. Maybe we can revisit this decision later or 10468 add some customizability to this code. */ 10469 10470 if (count == 0 && align < desired_alignment) 10471 { 10472 label = gen_label_rtx (); 10473 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 10474 LEU, 0, counter_mode, 1, label); 10475 } 10476 if (align <= 1) 10477 { 10478 rtx label = ix86_expand_aligntest (destreg, 1); 10479 emit_insn (gen_strmovqi (destreg, srcreg)); 10480 ix86_adjust_counter (countreg, 1); 10481 emit_label (label); 10482 LABEL_NUSES (label) = 1; 10483 } 10484 if (align <= 2) 10485 { 10486 rtx label = ix86_expand_aligntest (destreg, 2); 10487 emit_insn (gen_strmovhi (destreg, srcreg)); 10488 ix86_adjust_counter (countreg, 2); 10489 emit_label (label); 10490 LABEL_NUSES (label) = 1; 10491 } 10492 if (align <= 4 && desired_alignment > 4) 10493 { 10494 rtx label = ix86_expand_aligntest (destreg, 4); 10495 emit_insn (gen_strmovsi (destreg, srcreg)); 10496 ix86_adjust_counter (countreg, 4); 10497 emit_label (label); 10498 LABEL_NUSES (label) = 1; 10499 } 10500 10501 if (label && desired_alignment > 4 && !TARGET_64BIT) 10502 { 10503 emit_label (label); 10504 LABEL_NUSES (label) = 1; 10505 label = NULL_RTX; 10506 } 10507 if (!TARGET_SINGLE_STRINGOP) 10508 emit_insn (gen_cld ()); 10509 if (TARGET_64BIT) 10510 { 10511 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 10512 GEN_INT (3))); 10513 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2, 10514 destreg, srcreg, countreg2)); 10515 } 10516 else 10517 { 10518 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 10519 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2, 10520 destreg, srcreg, countreg2)); 10521 } 10522 10523 if (label) 10524 { 10525 emit_label (label); 10526 LABEL_NUSES (label) = 1; 10527 } 10528 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 10529 emit_insn (gen_strmovsi (destreg, srcreg)); 10530 if ((align <= 4 || count == 0) && TARGET_64BIT) 10531 { 10532 rtx label = ix86_expand_aligntest (countreg, 4); 10533 emit_insn (gen_strmovsi (destreg, srcreg)); 10534 emit_label (label); 10535 LABEL_NUSES (label) = 1; 10536 } 10537 if (align > 2 && count != 0 && (count & 2)) 10538 emit_insn (gen_strmovhi (destreg, srcreg)); 10539 if (align <= 2 || count == 0) 10540 { 10541 rtx label = ix86_expand_aligntest (countreg, 2); 10542 emit_insn (gen_strmovhi (destreg, srcreg)); 10543 emit_label (label); 10544 LABEL_NUSES (label) = 1; 10545 } 10546 if (align > 1 && count != 0 && (count & 1)) 10547 emit_insn (gen_strmovqi (destreg, srcreg)); 10548 if (align <= 1 || count == 0) 10549 { 10550 rtx label = ix86_expand_aligntest (countreg, 1); 10551 emit_insn (gen_strmovqi (destreg, srcreg)); 10552 emit_label (label); 10553 LABEL_NUSES (label) = 1; 10554 } 10555 } 10556 10557 insns = get_insns (); 10558 end_sequence (); 10559 10560 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg); 10561 emit_insn (insns); 10562 return 1; 10563} 10564 10565/* Expand string clear operation (bzero). Use i386 string operations when 10566 profitable. expand_movstr contains similar code. */ 10567int 10568ix86_expand_clrstr (src, count_exp, align_exp) 10569 rtx src, count_exp, align_exp; 10570{ 10571 rtx destreg, zeroreg, countreg; 10572 enum machine_mode counter_mode; 10573 HOST_WIDE_INT align = 0; 10574 unsigned HOST_WIDE_INT count = 0; 10575 10576 if (GET_CODE (align_exp) == CONST_INT) 10577 align = INTVAL (align_exp); 10578 10579 /* This simple hack avoids all inlining code and simplifies code below. */ 10580 if (!TARGET_ALIGN_STRINGOPS) 10581 align = 32; 10582 10583 if (GET_CODE (count_exp) == CONST_INT) 10584 count = INTVAL (count_exp); 10585 /* Figure out proper mode for counter. For 32bits it is always SImode, 10586 for 64bits use SImode when possible, otherwise DImode. 10587 Set count to number of bytes copied when known at compile time. */ 10588 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 10589 || x86_64_zero_extended_value (count_exp)) 10590 counter_mode = SImode; 10591 else 10592 counter_mode = DImode; 10593 10594 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 10595 10596 emit_insn (gen_cld ()); 10597 10598 /* When optimizing for size emit simple rep ; movsb instruction for 10599 counts not divisible by 4. */ 10600 10601 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 10602 { 10603 countreg = ix86_zero_extend_to_Pmode (count_exp); 10604 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 10605 if (TARGET_64BIT) 10606 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg, 10607 destreg, countreg)); 10608 else 10609 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg, 10610 destreg, countreg)); 10611 } 10612 else if (count != 0 10613 && (align >= 8 10614 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 10615 || optimize_size || count < (unsigned int) 64)) 10616 { 10617 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 10618 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 10619 if (count & ~(size - 1)) 10620 { 10621 countreg = copy_to_mode_reg (counter_mode, 10622 GEN_INT ((count >> (size == 4 ? 2 : 3)) 10623 & (TARGET_64BIT ? -1 : 0x3fffffff))); 10624 countreg = ix86_zero_extend_to_Pmode (countreg); 10625 if (size == 4) 10626 { 10627 if (TARGET_64BIT) 10628 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg, 10629 destreg, countreg)); 10630 else 10631 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg, 10632 destreg, countreg)); 10633 } 10634 else 10635 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg, 10636 destreg, countreg)); 10637 } 10638 if (size == 8 && (count & 0x04)) 10639 emit_insn (gen_strsetsi (destreg, 10640 gen_rtx_SUBREG (SImode, zeroreg, 0))); 10641 if (count & 0x02) 10642 emit_insn (gen_strsethi (destreg, 10643 gen_rtx_SUBREG (HImode, zeroreg, 0))); 10644 if (count & 0x01) 10645 emit_insn (gen_strsetqi (destreg, 10646 gen_rtx_SUBREG (QImode, zeroreg, 0))); 10647 } 10648 else 10649 { 10650 rtx countreg2; 10651 rtx label = NULL; 10652 /* Compute desired alignment of the string operation. */ 10653 int desired_alignment = (TARGET_PENTIUMPRO 10654 && (count == 0 || count >= (unsigned int) 260) 10655 ? 8 : UNITS_PER_WORD); 10656 10657 /* In case we don't know anything about the alignment, default to 10658 library version, since it is usually equally fast and result in 10659 shorter code. */ 10660 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD) 10661 return 0; 10662 10663 if (TARGET_SINGLE_STRINGOP) 10664 emit_insn (gen_cld ()); 10665 10666 countreg2 = gen_reg_rtx (Pmode); 10667 countreg = copy_to_mode_reg (counter_mode, count_exp); 10668 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 10669 10670 if (count == 0 && align < desired_alignment) 10671 { 10672 label = gen_label_rtx (); 10673 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 10674 LEU, 0, counter_mode, 1, label); 10675 } 10676 if (align <= 1) 10677 { 10678 rtx label = ix86_expand_aligntest (destreg, 1); 10679 emit_insn (gen_strsetqi (destreg, 10680 gen_rtx_SUBREG (QImode, zeroreg, 0))); 10681 ix86_adjust_counter (countreg, 1); 10682 emit_label (label); 10683 LABEL_NUSES (label) = 1; 10684 } 10685 if (align <= 2) 10686 { 10687 rtx label = ix86_expand_aligntest (destreg, 2); 10688 emit_insn (gen_strsethi (destreg, 10689 gen_rtx_SUBREG (HImode, zeroreg, 0))); 10690 ix86_adjust_counter (countreg, 2); 10691 emit_label (label); 10692 LABEL_NUSES (label) = 1; 10693 } 10694 if (align <= 4 && desired_alignment > 4) 10695 { 10696 rtx label = ix86_expand_aligntest (destreg, 4); 10697 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT 10698 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 10699 : zeroreg))); 10700 ix86_adjust_counter (countreg, 4); 10701 emit_label (label); 10702 LABEL_NUSES (label) = 1; 10703 } 10704 10705 if (label && desired_alignment > 4 && !TARGET_64BIT) 10706 { 10707 emit_label (label); 10708 LABEL_NUSES (label) = 1; 10709 label = NULL_RTX; 10710 } 10711 10712 if (!TARGET_SINGLE_STRINGOP) 10713 emit_insn (gen_cld ()); 10714 if (TARGET_64BIT) 10715 { 10716 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 10717 GEN_INT (3))); 10718 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg, 10719 destreg, countreg2)); 10720 } 10721 else 10722 { 10723 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2))); 10724 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg, 10725 destreg, countreg2)); 10726 } 10727 if (label) 10728 { 10729 emit_label (label); 10730 LABEL_NUSES (label) = 1; 10731 } 10732 10733 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 10734 emit_insn (gen_strsetsi (destreg, 10735 gen_rtx_SUBREG (SImode, zeroreg, 0))); 10736 if (TARGET_64BIT && (align <= 4 || count == 0)) 10737 { 10738 rtx label = ix86_expand_aligntest (countreg, 4); 10739 emit_insn (gen_strsetsi (destreg, 10740 gen_rtx_SUBREG (SImode, zeroreg, 0))); 10741 emit_label (label); 10742 LABEL_NUSES (label) = 1; 10743 } 10744 if (align > 2 && count != 0 && (count & 2)) 10745 emit_insn (gen_strsethi (destreg, 10746 gen_rtx_SUBREG (HImode, zeroreg, 0))); 10747 if (align <= 2 || count == 0) 10748 { 10749 rtx label = ix86_expand_aligntest (countreg, 2); 10750 emit_insn (gen_strsethi (destreg, 10751 gen_rtx_SUBREG (HImode, zeroreg, 0))); 10752 emit_label (label); 10753 LABEL_NUSES (label) = 1; 10754 } 10755 if (align > 1 && count != 0 && (count & 1)) 10756 emit_insn (gen_strsetqi (destreg, 10757 gen_rtx_SUBREG (QImode, zeroreg, 0))); 10758 if (align <= 1 || count == 0) 10759 { 10760 rtx label = ix86_expand_aligntest (countreg, 1); 10761 emit_insn (gen_strsetqi (destreg, 10762 gen_rtx_SUBREG (QImode, zeroreg, 0))); 10763 emit_label (label); 10764 LABEL_NUSES (label) = 1; 10765 } 10766 } 10767 return 1; 10768} 10769/* Expand strlen. */ 10770int 10771ix86_expand_strlen (out, src, eoschar, align) 10772 rtx out, src, eoschar, align; 10773{ 10774 rtx addr, scratch1, scratch2, scratch3, scratch4; 10775 10776 /* The generic case of strlen expander is long. Avoid it's 10777 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 10778 10779 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 10780 && !TARGET_INLINE_ALL_STRINGOPS 10781 && !optimize_size 10782 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 10783 return 0; 10784 10785 addr = force_reg (Pmode, XEXP (src, 0)); 10786 scratch1 = gen_reg_rtx (Pmode); 10787 10788 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 10789 && !optimize_size) 10790 { 10791 /* Well it seems that some optimizer does not combine a call like 10792 foo(strlen(bar), strlen(bar)); 10793 when the move and the subtraction is done here. It does calculate 10794 the length just once when these instructions are done inside of 10795 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 10796 often used and I use one fewer register for the lifetime of 10797 output_strlen_unroll() this is better. */ 10798 10799 emit_move_insn (out, addr); 10800 10801 ix86_expand_strlensi_unroll_1 (out, align); 10802 10803 /* strlensi_unroll_1 returns the address of the zero at the end of 10804 the string, like memchr(), so compute the length by subtracting 10805 the start address. */ 10806 if (TARGET_64BIT) 10807 emit_insn (gen_subdi3 (out, out, addr)); 10808 else 10809 emit_insn (gen_subsi3 (out, out, addr)); 10810 } 10811 else 10812 { 10813 scratch2 = gen_reg_rtx (Pmode); 10814 scratch3 = gen_reg_rtx (Pmode); 10815 scratch4 = force_reg (Pmode, constm1_rtx); 10816 10817 emit_move_insn (scratch3, addr); 10818 eoschar = force_reg (QImode, eoschar); 10819 10820 emit_insn (gen_cld ()); 10821 if (TARGET_64BIT) 10822 { 10823 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar, 10824 align, scratch4, scratch3)); 10825 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 10826 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 10827 } 10828 else 10829 { 10830 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar, 10831 align, scratch4, scratch3)); 10832 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 10833 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 10834 } 10835 } 10836 return 1; 10837} 10838 10839/* Expand the appropriate insns for doing strlen if not just doing 10840 repnz; scasb 10841 10842 out = result, initialized with the start address 10843 align_rtx = alignment of the address. 10844 scratch = scratch register, initialized with the startaddress when 10845 not aligned, otherwise undefined 10846 10847 This is just the body. It needs the initialisations mentioned above and 10848 some address computing at the end. These things are done in i386.md. */ 10849 10850static void 10851ix86_expand_strlensi_unroll_1 (out, align_rtx) 10852 rtx out, align_rtx; 10853{ 10854 int align; 10855 rtx tmp; 10856 rtx align_2_label = NULL_RTX; 10857 rtx align_3_label = NULL_RTX; 10858 rtx align_4_label = gen_label_rtx (); 10859 rtx end_0_label = gen_label_rtx (); 10860 rtx mem; 10861 rtx tmpreg = gen_reg_rtx (SImode); 10862 rtx scratch = gen_reg_rtx (SImode); 10863 10864 align = 0; 10865 if (GET_CODE (align_rtx) == CONST_INT) 10866 align = INTVAL (align_rtx); 10867 10868 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 10869 10870 /* Is there a known alignment and is it less than 4? */ 10871 if (align < 4) 10872 { 10873 rtx scratch1 = gen_reg_rtx (Pmode); 10874 emit_move_insn (scratch1, out); 10875 /* Is there a known alignment and is it not 2? */ 10876 if (align != 2) 10877 { 10878 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 10879 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 10880 10881 /* Leave just the 3 lower bits. */ 10882 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 10883 NULL_RTX, 0, OPTAB_WIDEN); 10884 10885 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 10886 Pmode, 1, align_4_label); 10887 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL, 10888 Pmode, 1, align_2_label); 10889 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL, 10890 Pmode, 1, align_3_label); 10891 } 10892 else 10893 { 10894 /* Since the alignment is 2, we have to check 2 or 0 bytes; 10895 check if is aligned to 4 - byte. */ 10896 10897 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2), 10898 NULL_RTX, 0, OPTAB_WIDEN); 10899 10900 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 10901 Pmode, 1, align_4_label); 10902 } 10903 10904 mem = gen_rtx_MEM (QImode, out); 10905 10906 /* Now compare the bytes. */ 10907 10908 /* Compare the first n unaligned byte on a byte per byte basis. */ 10909 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 10910 QImode, 1, end_0_label); 10911 10912 /* Increment the address. */ 10913 if (TARGET_64BIT) 10914 emit_insn (gen_adddi3 (out, out, const1_rtx)); 10915 else 10916 emit_insn (gen_addsi3 (out, out, const1_rtx)); 10917 10918 /* Not needed with an alignment of 2 */ 10919 if (align != 2) 10920 { 10921 emit_label (align_2_label); 10922 10923 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 10924 end_0_label); 10925 10926 if (TARGET_64BIT) 10927 emit_insn (gen_adddi3 (out, out, const1_rtx)); 10928 else 10929 emit_insn (gen_addsi3 (out, out, const1_rtx)); 10930 10931 emit_label (align_3_label); 10932 } 10933 10934 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 10935 end_0_label); 10936 10937 if (TARGET_64BIT) 10938 emit_insn (gen_adddi3 (out, out, const1_rtx)); 10939 else 10940 emit_insn (gen_addsi3 (out, out, const1_rtx)); 10941 } 10942 10943 /* Generate loop to check 4 bytes at a time. It is not a good idea to 10944 align this loop. It gives only huge programs, but does not help to 10945 speed up. */ 10946 emit_label (align_4_label); 10947 10948 mem = gen_rtx_MEM (SImode, out); 10949 emit_move_insn (scratch, mem); 10950 if (TARGET_64BIT) 10951 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 10952 else 10953 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 10954 10955 /* This formula yields a nonzero result iff one of the bytes is zero. 10956 This saves three branches inside loop and many cycles. */ 10957 10958 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 10959 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 10960 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 10961 emit_insn (gen_andsi3 (tmpreg, tmpreg, 10962 gen_int_mode (0x80808080, SImode))); 10963 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 10964 align_4_label); 10965 10966 if (TARGET_CMOVE) 10967 { 10968 rtx reg = gen_reg_rtx (SImode); 10969 rtx reg2 = gen_reg_rtx (Pmode); 10970 emit_move_insn (reg, tmpreg); 10971 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 10972 10973 /* If zero is not in the first two bytes, move two bytes forward. */ 10974 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 10975 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 10976 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 10977 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 10978 gen_rtx_IF_THEN_ELSE (SImode, tmp, 10979 reg, 10980 tmpreg))); 10981 /* Emit lea manually to avoid clobbering of flags. */ 10982 emit_insn (gen_rtx_SET (SImode, reg2, 10983 gen_rtx_PLUS (Pmode, out, GEN_INT (2)))); 10984 10985 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 10986 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 10987 emit_insn (gen_rtx_SET (VOIDmode, out, 10988 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 10989 reg2, 10990 out))); 10991 10992 } 10993 else 10994 { 10995 rtx end_2_label = gen_label_rtx (); 10996 /* Is zero in the first two bytes? */ 10997 10998 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 10999 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11000 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 11001 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 11002 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 11003 pc_rtx); 11004 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 11005 JUMP_LABEL (tmp) = end_2_label; 11006 11007 /* Not in the first two. Move two bytes forward. */ 11008 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 11009 if (TARGET_64BIT) 11010 emit_insn (gen_adddi3 (out, out, GEN_INT (2))); 11011 else 11012 emit_insn (gen_addsi3 (out, out, GEN_INT (2))); 11013 11014 emit_label (end_2_label); 11015 11016 } 11017 11018 /* Avoid branch in fixing the byte. */ 11019 tmpreg = gen_lowpart (QImode, tmpreg); 11020 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 11021 if (TARGET_64BIT) 11022 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3))); 11023 else 11024 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3))); 11025 11026 emit_label (end_0_label); 11027} 11028 11029void 11030ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop) 11031 rtx retval, fnaddr, callarg1, callarg2, pop; 11032{ 11033 rtx use = NULL, call; 11034 11035 if (pop == const0_rtx) 11036 pop = NULL; 11037 if (TARGET_64BIT && pop) 11038 abort (); 11039 11040#if TARGET_MACHO 11041 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 11042 fnaddr = machopic_indirect_call_target (fnaddr); 11043#else 11044 /* Static functions and indirect calls don't need the pic register. */ 11045 if (! TARGET_64BIT && flag_pic 11046 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 11047 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0))) 11048 use_reg (&use, pic_offset_table_rtx); 11049 11050 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 11051 { 11052 rtx al = gen_rtx_REG (QImode, 0); 11053 emit_move_insn (al, callarg2); 11054 use_reg (&use, al); 11055 } 11056#endif /* TARGET_MACHO */ 11057 11058 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 11059 { 11060 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 11061 fnaddr = gen_rtx_MEM (QImode, fnaddr); 11062 } 11063 11064 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 11065 if (retval) 11066 call = gen_rtx_SET (VOIDmode, retval, call); 11067 if (pop) 11068 { 11069 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 11070 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 11071 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 11072 } 11073 11074 call = emit_call_insn (call); 11075 if (use) 11076 CALL_INSN_FUNCTION_USAGE (call) = use; 11077} 11078 11079 11080/* Clear stack slot assignments remembered from previous functions. 11081 This is called from INIT_EXPANDERS once before RTL is emitted for each 11082 function. */ 11083 11084static struct machine_function * 11085ix86_init_machine_status () 11086{ 11087 return ggc_alloc_cleared (sizeof (struct machine_function)); 11088} 11089 11090/* Return a MEM corresponding to a stack slot with mode MODE. 11091 Allocate a new slot if necessary. 11092 11093 The RTL for a function can have several slots available: N is 11094 which slot to use. */ 11095 11096rtx 11097assign_386_stack_local (mode, n) 11098 enum machine_mode mode; 11099 int n; 11100{ 11101 if (n < 0 || n >= MAX_386_STACK_LOCALS) 11102 abort (); 11103 11104 if (ix86_stack_locals[(int) mode][n] == NULL_RTX) 11105 ix86_stack_locals[(int) mode][n] 11106 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 11107 11108 return ix86_stack_locals[(int) mode][n]; 11109} 11110 11111/* Construct the SYMBOL_REF for the tls_get_addr function. */ 11112 11113static GTY(()) rtx ix86_tls_symbol; 11114rtx 11115ix86_tls_get_addr () 11116{ 11117 11118 if (!ix86_tls_symbol) 11119 { 11120 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 11121 (TARGET_GNU_TLS && !TARGET_64BIT) 11122 ? "___tls_get_addr" 11123 : "__tls_get_addr"); 11124 } 11125 11126 return ix86_tls_symbol; 11127} 11128 11129/* Calculate the length of the memory address in the instruction 11130 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 11131 11132static int 11133memory_address_length (addr) 11134 rtx addr; 11135{ 11136 struct ix86_address parts; 11137 rtx base, index, disp; 11138 int len; 11139 11140 if (GET_CODE (addr) == PRE_DEC 11141 || GET_CODE (addr) == POST_INC 11142 || GET_CODE (addr) == PRE_MODIFY 11143 || GET_CODE (addr) == POST_MODIFY) 11144 return 0; 11145 11146 if (! ix86_decompose_address (addr, &parts)) 11147 abort (); 11148 11149 base = parts.base; 11150 index = parts.index; 11151 disp = parts.disp; 11152 len = 0; 11153 11154 /* Register Indirect. */ 11155 if (base && !index && !disp) 11156 { 11157 /* Special cases: ebp and esp need the two-byte modrm form. */ 11158 if (addr == stack_pointer_rtx 11159 || addr == arg_pointer_rtx 11160 || addr == frame_pointer_rtx 11161 || addr == hard_frame_pointer_rtx) 11162 len = 1; 11163 } 11164 11165 /* Direct Addressing. */ 11166 else if (disp && !base && !index) 11167 len = 4; 11168 11169 else 11170 { 11171 /* Find the length of the displacement constant. */ 11172 if (disp) 11173 { 11174 if (GET_CODE (disp) == CONST_INT 11175 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K') 11176 && base) 11177 len = 1; 11178 else 11179 len = 4; 11180 } 11181 11182 /* An index requires the two-byte modrm form. */ 11183 if (index) 11184 len += 1; 11185 } 11186 11187 return len; 11188} 11189 11190/* Compute default value for "length_immediate" attribute. When SHORTFORM 11191 is set, expect that insn have 8bit immediate alternative. */ 11192int 11193ix86_attr_length_immediate_default (insn, shortform) 11194 rtx insn; 11195 int shortform; 11196{ 11197 int len = 0; 11198 int i; 11199 extract_insn_cached (insn); 11200 for (i = recog_data.n_operands - 1; i >= 0; --i) 11201 if (CONSTANT_P (recog_data.operand[i])) 11202 { 11203 if (len) 11204 abort (); 11205 if (shortform 11206 && GET_CODE (recog_data.operand[i]) == CONST_INT 11207 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K')) 11208 len = 1; 11209 else 11210 { 11211 switch (get_attr_mode (insn)) 11212 { 11213 case MODE_QI: 11214 len+=1; 11215 break; 11216 case MODE_HI: 11217 len+=2; 11218 break; 11219 case MODE_SI: 11220 len+=4; 11221 break; 11222 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 11223 case MODE_DI: 11224 len+=4; 11225 break; 11226 default: 11227 fatal_insn ("unknown insn mode", insn); 11228 } 11229 } 11230 } 11231 return len; 11232} 11233/* Compute default value for "length_address" attribute. */ 11234int 11235ix86_attr_length_address_default (insn) 11236 rtx insn; 11237{ 11238 int i; 11239 11240 if (get_attr_type (insn) == TYPE_LEA) 11241 { 11242 rtx set = PATTERN (insn); 11243 if (GET_CODE (set) == SET) 11244 ; 11245 else if (GET_CODE (set) == PARALLEL 11246 && GET_CODE (XVECEXP (set, 0, 0)) == SET) 11247 set = XVECEXP (set, 0, 0); 11248 else 11249 { 11250#ifdef ENABLE_CHECKING 11251 abort (); 11252#endif 11253 return 0; 11254 } 11255 11256 return memory_address_length (SET_SRC (set)); 11257 } 11258 11259 extract_insn_cached (insn); 11260 for (i = recog_data.n_operands - 1; i >= 0; --i) 11261 if (GET_CODE (recog_data.operand[i]) == MEM) 11262 { 11263 return memory_address_length (XEXP (recog_data.operand[i], 0)); 11264 break; 11265 } 11266 return 0; 11267} 11268 11269/* Return the maximum number of instructions a cpu can issue. */ 11270 11271static int 11272ix86_issue_rate () 11273{ 11274 switch (ix86_cpu) 11275 { 11276 case PROCESSOR_PENTIUM: 11277 case PROCESSOR_K6: 11278 return 2; 11279 11280 case PROCESSOR_PENTIUMPRO: 11281 case PROCESSOR_PENTIUM4: 11282 case PROCESSOR_ATHLON: 11283 return 3; 11284 11285 default: 11286 return 1; 11287 } 11288} 11289 11290/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 11291 by DEP_INSN and nothing set by DEP_INSN. */ 11292 11293static int 11294ix86_flags_dependant (insn, dep_insn, insn_type) 11295 rtx insn, dep_insn; 11296 enum attr_type insn_type; 11297{ 11298 rtx set, set2; 11299 11300 /* Simplify the test for uninteresting insns. */ 11301 if (insn_type != TYPE_SETCC 11302 && insn_type != TYPE_ICMOV 11303 && insn_type != TYPE_FCMOV 11304 && insn_type != TYPE_IBR) 11305 return 0; 11306 11307 if ((set = single_set (dep_insn)) != 0) 11308 { 11309 set = SET_DEST (set); 11310 set2 = NULL_RTX; 11311 } 11312 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 11313 && XVECLEN (PATTERN (dep_insn), 0) == 2 11314 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 11315 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 11316 { 11317 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 11318 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 11319 } 11320 else 11321 return 0; 11322 11323 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 11324 return 0; 11325 11326 /* This test is true if the dependent insn reads the flags but 11327 not any other potentially set register. */ 11328 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 11329 return 0; 11330 11331 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 11332 return 0; 11333 11334 return 1; 11335} 11336 11337/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 11338 address with operands set by DEP_INSN. */ 11339 11340static int 11341ix86_agi_dependant (insn, dep_insn, insn_type) 11342 rtx insn, dep_insn; 11343 enum attr_type insn_type; 11344{ 11345 rtx addr; 11346 11347 if (insn_type == TYPE_LEA 11348 && TARGET_PENTIUM) 11349 { 11350 addr = PATTERN (insn); 11351 if (GET_CODE (addr) == SET) 11352 ; 11353 else if (GET_CODE (addr) == PARALLEL 11354 && GET_CODE (XVECEXP (addr, 0, 0)) == SET) 11355 addr = XVECEXP (addr, 0, 0); 11356 else 11357 abort (); 11358 addr = SET_SRC (addr); 11359 } 11360 else 11361 { 11362 int i; 11363 extract_insn_cached (insn); 11364 for (i = recog_data.n_operands - 1; i >= 0; --i) 11365 if (GET_CODE (recog_data.operand[i]) == MEM) 11366 { 11367 addr = XEXP (recog_data.operand[i], 0); 11368 goto found; 11369 } 11370 return 0; 11371 found:; 11372 } 11373 11374 return modified_in_p (addr, dep_insn); 11375} 11376 11377static int 11378ix86_adjust_cost (insn, link, dep_insn, cost) 11379 rtx insn, link, dep_insn; 11380 int cost; 11381{ 11382 enum attr_type insn_type, dep_insn_type; 11383 enum attr_memory memory, dep_memory; 11384 rtx set, set2; 11385 int dep_insn_code_number; 11386 11387 /* Anti and output depenancies have zero cost on all CPUs. */ 11388 if (REG_NOTE_KIND (link) != 0) 11389 return 0; 11390 11391 dep_insn_code_number = recog_memoized (dep_insn); 11392 11393 /* If we can't recognize the insns, we can't really do anything. */ 11394 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 11395 return cost; 11396 11397 insn_type = get_attr_type (insn); 11398 dep_insn_type = get_attr_type (dep_insn); 11399 11400 switch (ix86_cpu) 11401 { 11402 case PROCESSOR_PENTIUM: 11403 /* Address Generation Interlock adds a cycle of latency. */ 11404 if (ix86_agi_dependant (insn, dep_insn, insn_type)) 11405 cost += 1; 11406 11407 /* ??? Compares pair with jump/setcc. */ 11408 if (ix86_flags_dependant (insn, dep_insn, insn_type)) 11409 cost = 0; 11410 11411 /* Floating point stores require value to be ready one cycle ealier. */ 11412 if (insn_type == TYPE_FMOV 11413 && get_attr_memory (insn) == MEMORY_STORE 11414 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 11415 cost += 1; 11416 break; 11417 11418 case PROCESSOR_PENTIUMPRO: 11419 memory = get_attr_memory (insn); 11420 dep_memory = get_attr_memory (dep_insn); 11421 11422 /* Since we can't represent delayed latencies of load+operation, 11423 increase the cost here for non-imov insns. */ 11424 if (dep_insn_type != TYPE_IMOV 11425 && dep_insn_type != TYPE_FMOV 11426 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)) 11427 cost += 1; 11428 11429 /* INT->FP conversion is expensive. */ 11430 if (get_attr_fp_int_src (dep_insn)) 11431 cost += 5; 11432 11433 /* There is one cycle extra latency between an FP op and a store. */ 11434 if (insn_type == TYPE_FMOV 11435 && (set = single_set (dep_insn)) != NULL_RTX 11436 && (set2 = single_set (insn)) != NULL_RTX 11437 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 11438 && GET_CODE (SET_DEST (set2)) == MEM) 11439 cost += 1; 11440 11441 /* Show ability of reorder buffer to hide latency of load by executing 11442 in parallel with previous instruction in case 11443 previous instruction is not needed to compute the address. */ 11444 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 11445 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 11446 { 11447 /* Claim moves to take one cycle, as core can issue one load 11448 at time and the next load can start cycle later. */ 11449 if (dep_insn_type == TYPE_IMOV 11450 || dep_insn_type == TYPE_FMOV) 11451 cost = 1; 11452 else if (cost > 1) 11453 cost--; 11454 } 11455 break; 11456 11457 case PROCESSOR_K6: 11458 memory = get_attr_memory (insn); 11459 dep_memory = get_attr_memory (dep_insn); 11460 /* The esp dependency is resolved before the instruction is really 11461 finished. */ 11462 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 11463 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 11464 return 1; 11465 11466 /* Since we can't represent delayed latencies of load+operation, 11467 increase the cost here for non-imov insns. */ 11468 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 11469 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; 11470 11471 /* INT->FP conversion is expensive. */ 11472 if (get_attr_fp_int_src (dep_insn)) 11473 cost += 5; 11474 11475 /* Show ability of reorder buffer to hide latency of load by executing 11476 in parallel with previous instruction in case 11477 previous instruction is not needed to compute the address. */ 11478 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 11479 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 11480 { 11481 /* Claim moves to take one cycle, as core can issue one load 11482 at time and the next load can start cycle later. */ 11483 if (dep_insn_type == TYPE_IMOV 11484 || dep_insn_type == TYPE_FMOV) 11485 cost = 1; 11486 else if (cost > 2) 11487 cost -= 2; 11488 else 11489 cost = 1; 11490 } 11491 break; 11492 11493 case PROCESSOR_ATHLON: 11494 memory = get_attr_memory (insn); 11495 dep_memory = get_attr_memory (dep_insn); 11496 11497 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 11498 { 11499 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV) 11500 cost += 2; 11501 else 11502 cost += 3; 11503 } 11504 /* Show ability of reorder buffer to hide latency of load by executing 11505 in parallel with previous instruction in case 11506 previous instruction is not needed to compute the address. */ 11507 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 11508 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 11509 { 11510 /* Claim moves to take one cycle, as core can issue one load 11511 at time and the next load can start cycle later. */ 11512 if (dep_insn_type == TYPE_IMOV 11513 || dep_insn_type == TYPE_FMOV) 11514 cost = 0; 11515 else if (cost >= 3) 11516 cost -= 3; 11517 else 11518 cost = 0; 11519 } 11520 11521 default: 11522 break; 11523 } 11524 11525 return cost; 11526} 11527 11528static union 11529{ 11530 struct ppro_sched_data 11531 { 11532 rtx decode[3]; 11533 int issued_this_cycle; 11534 } ppro; 11535} ix86_sched_data; 11536 11537static enum attr_ppro_uops 11538ix86_safe_ppro_uops (insn) 11539 rtx insn; 11540{ 11541 if (recog_memoized (insn) >= 0) 11542 return get_attr_ppro_uops (insn); 11543 else 11544 return PPRO_UOPS_MANY; 11545} 11546 11547static void 11548ix86_dump_ppro_packet (dump) 11549 FILE *dump; 11550{ 11551 if (ix86_sched_data.ppro.decode[0]) 11552 { 11553 fprintf (dump, "PPRO packet: %d", 11554 INSN_UID (ix86_sched_data.ppro.decode[0])); 11555 if (ix86_sched_data.ppro.decode[1]) 11556 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1])); 11557 if (ix86_sched_data.ppro.decode[2]) 11558 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2])); 11559 fputc ('\n', dump); 11560 } 11561} 11562 11563/* We're beginning a new block. Initialize data structures as necessary. */ 11564 11565static void 11566ix86_sched_init (dump, sched_verbose, veclen) 11567 FILE *dump ATTRIBUTE_UNUSED; 11568 int sched_verbose ATTRIBUTE_UNUSED; 11569 int veclen ATTRIBUTE_UNUSED; 11570{ 11571 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data)); 11572} 11573 11574/* Shift INSN to SLOT, and shift everything else down. */ 11575 11576static void 11577ix86_reorder_insn (insnp, slot) 11578 rtx *insnp, *slot; 11579{ 11580 if (insnp != slot) 11581 { 11582 rtx insn = *insnp; 11583 do 11584 insnp[0] = insnp[1]; 11585 while (++insnp != slot); 11586 *insnp = insn; 11587 } 11588} 11589 11590static void 11591ix86_sched_reorder_ppro (ready, e_ready) 11592 rtx *ready; 11593 rtx *e_ready; 11594{ 11595 rtx decode[3]; 11596 enum attr_ppro_uops cur_uops; 11597 int issued_this_cycle; 11598 rtx *insnp; 11599 int i; 11600 11601 /* At this point .ppro.decode contains the state of the three 11602 decoders from last "cycle". That is, those insns that were 11603 actually independent. But here we're scheduling for the 11604 decoder, and we may find things that are decodable in the 11605 same cycle. */ 11606 11607 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode)); 11608 issued_this_cycle = 0; 11609 11610 insnp = e_ready; 11611 cur_uops = ix86_safe_ppro_uops (*insnp); 11612 11613 /* If the decoders are empty, and we've a complex insn at the 11614 head of the priority queue, let it issue without complaint. */ 11615 if (decode[0] == NULL) 11616 { 11617 if (cur_uops == PPRO_UOPS_MANY) 11618 { 11619 decode[0] = *insnp; 11620 goto ppro_done; 11621 } 11622 11623 /* Otherwise, search for a 2-4 uop unsn to issue. */ 11624 while (cur_uops != PPRO_UOPS_FEW) 11625 { 11626 if (insnp == ready) 11627 break; 11628 cur_uops = ix86_safe_ppro_uops (*--insnp); 11629 } 11630 11631 /* If so, move it to the head of the line. */ 11632 if (cur_uops == PPRO_UOPS_FEW) 11633 ix86_reorder_insn (insnp, e_ready); 11634 11635 /* Issue the head of the queue. */ 11636 issued_this_cycle = 1; 11637 decode[0] = *e_ready--; 11638 } 11639 11640 /* Look for simple insns to fill in the other two slots. */ 11641 for (i = 1; i < 3; ++i) 11642 if (decode[i] == NULL) 11643 { 11644 if (ready > e_ready) 11645 goto ppro_done; 11646 11647 insnp = e_ready; 11648 cur_uops = ix86_safe_ppro_uops (*insnp); 11649 while (cur_uops != PPRO_UOPS_ONE) 11650 { 11651 if (insnp == ready) 11652 break; 11653 cur_uops = ix86_safe_ppro_uops (*--insnp); 11654 } 11655 11656 /* Found one. Move it to the head of the queue and issue it. */ 11657 if (cur_uops == PPRO_UOPS_ONE) 11658 { 11659 ix86_reorder_insn (insnp, e_ready); 11660 decode[i] = *e_ready--; 11661 issued_this_cycle++; 11662 continue; 11663 } 11664 11665 /* ??? Didn't find one. Ideally, here we would do a lazy split 11666 of 2-uop insns, issue one and queue the other. */ 11667 } 11668 11669 ppro_done: 11670 if (issued_this_cycle == 0) 11671 issued_this_cycle = 1; 11672 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle; 11673} 11674 11675/* We are about to being issuing insns for this clock cycle. 11676 Override the default sort algorithm to better slot instructions. */ 11677static int 11678ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var) 11679 FILE *dump ATTRIBUTE_UNUSED; 11680 int sched_verbose ATTRIBUTE_UNUSED; 11681 rtx *ready; 11682 int *n_readyp; 11683 int clock_var ATTRIBUTE_UNUSED; 11684{ 11685 int n_ready = *n_readyp; 11686 rtx *e_ready = ready + n_ready - 1; 11687 11688 /* Make sure to go ahead and initialize key items in 11689 ix86_sched_data if we are not going to bother trying to 11690 reorder the ready queue. */ 11691 if (n_ready < 2) 11692 { 11693 ix86_sched_data.ppro.issued_this_cycle = 1; 11694 goto out; 11695 } 11696 11697 switch (ix86_cpu) 11698 { 11699 default: 11700 break; 11701 11702 case PROCESSOR_PENTIUMPRO: 11703 ix86_sched_reorder_ppro (ready, e_ready); 11704 break; 11705 } 11706 11707out: 11708 return ix86_issue_rate (); 11709} 11710 11711/* We are about to issue INSN. Return the number of insns left on the 11712 ready queue that can be issued this cycle. */ 11713 11714static int 11715ix86_variable_issue (dump, sched_verbose, insn, can_issue_more) 11716 FILE *dump; 11717 int sched_verbose; 11718 rtx insn; 11719 int can_issue_more; 11720{ 11721 int i; 11722 switch (ix86_cpu) 11723 { 11724 default: 11725 return can_issue_more - 1; 11726 11727 case PROCESSOR_PENTIUMPRO: 11728 { 11729 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn); 11730 11731 if (uops == PPRO_UOPS_MANY) 11732 { 11733 if (sched_verbose) 11734 ix86_dump_ppro_packet (dump); 11735 ix86_sched_data.ppro.decode[0] = insn; 11736 ix86_sched_data.ppro.decode[1] = NULL; 11737 ix86_sched_data.ppro.decode[2] = NULL; 11738 if (sched_verbose) 11739 ix86_dump_ppro_packet (dump); 11740 ix86_sched_data.ppro.decode[0] = NULL; 11741 } 11742 else if (uops == PPRO_UOPS_FEW) 11743 { 11744 if (sched_verbose) 11745 ix86_dump_ppro_packet (dump); 11746 ix86_sched_data.ppro.decode[0] = insn; 11747 ix86_sched_data.ppro.decode[1] = NULL; 11748 ix86_sched_data.ppro.decode[2] = NULL; 11749 } 11750 else 11751 { 11752 for (i = 0; i < 3; ++i) 11753 if (ix86_sched_data.ppro.decode[i] == NULL) 11754 { 11755 ix86_sched_data.ppro.decode[i] = insn; 11756 break; 11757 } 11758 if (i == 3) 11759 abort (); 11760 if (i == 2) 11761 { 11762 if (sched_verbose) 11763 ix86_dump_ppro_packet (dump); 11764 ix86_sched_data.ppro.decode[0] = NULL; 11765 ix86_sched_data.ppro.decode[1] = NULL; 11766 ix86_sched_data.ppro.decode[2] = NULL; 11767 } 11768 } 11769 } 11770 return --ix86_sched_data.ppro.issued_this_cycle; 11771 } 11772} 11773 11774static int 11775ia32_use_dfa_pipeline_interface () 11776{ 11777 if (ix86_cpu == PROCESSOR_PENTIUM) 11778 return 1; 11779 return 0; 11780} 11781 11782/* How many alternative schedules to try. This should be as wide as the 11783 scheduling freedom in the DFA, but no wider. Making this value too 11784 large results extra work for the scheduler. */ 11785 11786static int 11787ia32_multipass_dfa_lookahead () 11788{ 11789 if (ix86_cpu == PROCESSOR_PENTIUM) 11790 return 2; 11791 else 11792 return 0; 11793} 11794 11795 11796/* Walk through INSNS and look for MEM references whose address is DSTREG or 11797 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as 11798 appropriate. */ 11799 11800void 11801ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg) 11802 rtx insns; 11803 rtx dstref, srcref, dstreg, srcreg; 11804{ 11805 rtx insn; 11806 11807 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn)) 11808 if (INSN_P (insn)) 11809 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref, 11810 dstreg, srcreg); 11811} 11812 11813/* Subroutine of above to actually do the updating by recursively walking 11814 the rtx. */ 11815 11816static void 11817ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg) 11818 rtx x; 11819 rtx dstref, srcref, dstreg, srcreg; 11820{ 11821 enum rtx_code code = GET_CODE (x); 11822 const char *format_ptr = GET_RTX_FORMAT (code); 11823 int i, j; 11824 11825 if (code == MEM && XEXP (x, 0) == dstreg) 11826 MEM_COPY_ATTRIBUTES (x, dstref); 11827 else if (code == MEM && XEXP (x, 0) == srcreg) 11828 MEM_COPY_ATTRIBUTES (x, srcref); 11829 11830 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++) 11831 { 11832 if (*format_ptr == 'e') 11833 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref, 11834 dstreg, srcreg); 11835 else if (*format_ptr == 'E') 11836 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 11837 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref, 11838 dstreg, srcreg); 11839 } 11840} 11841 11842/* Compute the alignment given to a constant that is being placed in memory. 11843 EXP is the constant and ALIGN is the alignment that the object would 11844 ordinarily have. 11845 The value of this function is used instead of that alignment to align 11846 the object. */ 11847 11848int 11849ix86_constant_alignment (exp, align) 11850 tree exp; 11851 int align; 11852{ 11853 if (TREE_CODE (exp) == REAL_CST) 11854 { 11855 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 11856 return 64; 11857 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 11858 return 128; 11859 } 11860 else if (TREE_CODE (exp) == STRING_CST && !TARGET_NO_ALIGN_LONG_STRINGS 11861 && TREE_STRING_LENGTH (exp) >= 31 && align < 256) 11862 return 256; 11863 11864 return align; 11865} 11866 11867/* Compute the alignment for a static variable. 11868 TYPE is the data type, and ALIGN is the alignment that 11869 the object would ordinarily have. The value of this function is used 11870 instead of that alignment to align the object. */ 11871 11872int 11873ix86_data_alignment (type, align) 11874 tree type; 11875 int align; 11876{ 11877 if (AGGREGATE_TYPE_P (type) 11878 && TYPE_SIZE (type) 11879 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 11880 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256 11881 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256) 11882 return 256; 11883 11884 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 11885 to 16byte boundary. */ 11886 if (TARGET_64BIT) 11887 { 11888 if (AGGREGATE_TYPE_P (type) 11889 && TYPE_SIZE (type) 11890 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 11891 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 11892 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 11893 return 128; 11894 } 11895 11896 if (TREE_CODE (type) == ARRAY_TYPE) 11897 { 11898 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 11899 return 64; 11900 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 11901 return 128; 11902 } 11903 else if (TREE_CODE (type) == COMPLEX_TYPE) 11904 { 11905 11906 if (TYPE_MODE (type) == DCmode && align < 64) 11907 return 64; 11908 if (TYPE_MODE (type) == XCmode && align < 128) 11909 return 128; 11910 } 11911 else if ((TREE_CODE (type) == RECORD_TYPE 11912 || TREE_CODE (type) == UNION_TYPE 11913 || TREE_CODE (type) == QUAL_UNION_TYPE) 11914 && TYPE_FIELDS (type)) 11915 { 11916 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 11917 return 64; 11918 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 11919 return 128; 11920 } 11921 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 11922 || TREE_CODE (type) == INTEGER_TYPE) 11923 { 11924 if (TYPE_MODE (type) == DFmode && align < 64) 11925 return 64; 11926 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 11927 return 128; 11928 } 11929 11930 return align; 11931} 11932 11933/* Compute the alignment for a local variable. 11934 TYPE is the data type, and ALIGN is the alignment that 11935 the object would ordinarily have. The value of this macro is used 11936 instead of that alignment to align the object. */ 11937 11938int 11939ix86_local_alignment (type, align) 11940 tree type; 11941 int align; 11942{ 11943 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 11944 to 16byte boundary. */ 11945 if (TARGET_64BIT) 11946 { 11947 if (AGGREGATE_TYPE_P (type) 11948 && TYPE_SIZE (type) 11949 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 11950 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 11951 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 11952 return 128; 11953 } 11954 if (TREE_CODE (type) == ARRAY_TYPE) 11955 { 11956 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 11957 return 64; 11958 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 11959 return 128; 11960 } 11961 else if (TREE_CODE (type) == COMPLEX_TYPE) 11962 { 11963 if (TYPE_MODE (type) == DCmode && align < 64) 11964 return 64; 11965 if (TYPE_MODE (type) == XCmode && align < 128) 11966 return 128; 11967 } 11968 else if ((TREE_CODE (type) == RECORD_TYPE 11969 || TREE_CODE (type) == UNION_TYPE 11970 || TREE_CODE (type) == QUAL_UNION_TYPE) 11971 && TYPE_FIELDS (type)) 11972 { 11973 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 11974 return 64; 11975 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 11976 return 128; 11977 } 11978 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 11979 || TREE_CODE (type) == INTEGER_TYPE) 11980 { 11981 11982 if (TYPE_MODE (type) == DFmode && align < 64) 11983 return 64; 11984 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 11985 return 128; 11986 } 11987 return align; 11988} 11989 11990/* Emit RTL insns to initialize the variable parts of a trampoline. 11991 FNADDR is an RTX for the address of the function's pure code. 11992 CXT is an RTX for the static chain value for the function. */ 11993void 11994x86_initialize_trampoline (tramp, fnaddr, cxt) 11995 rtx tramp, fnaddr, cxt; 11996{ 11997 if (!TARGET_64BIT) 11998 { 11999 /* Compute offset from the end of the jmp to the target function. */ 12000 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 12001 plus_constant (tramp, 10), 12002 NULL_RTX, 1, OPTAB_DIRECT); 12003 emit_move_insn (gen_rtx_MEM (QImode, tramp), 12004 gen_int_mode (0xb9, QImode)); 12005 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 12006 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 12007 gen_int_mode (0xe9, QImode)); 12008 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 12009 } 12010 else 12011 { 12012 int offset = 0; 12013 /* Try to load address using shorter movl instead of movabs. 12014 We may want to support movq for kernel mode, but kernel does not use 12015 trampolines at the moment. */ 12016 if (x86_64_zero_extended_value (fnaddr)) 12017 { 12018 fnaddr = copy_to_mode_reg (DImode, fnaddr); 12019 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12020 gen_int_mode (0xbb41, HImode)); 12021 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 12022 gen_lowpart (SImode, fnaddr)); 12023 offset += 6; 12024 } 12025 else 12026 { 12027 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12028 gen_int_mode (0xbb49, HImode)); 12029 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 12030 fnaddr); 12031 offset += 10; 12032 } 12033 /* Load static chain using movabs to r10. */ 12034 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12035 gen_int_mode (0xba49, HImode)); 12036 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 12037 cxt); 12038 offset += 10; 12039 /* Jump to the r11 */ 12040 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12041 gen_int_mode (0xff49, HImode)); 12042 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 12043 gen_int_mode (0xe3, QImode)); 12044 offset += 3; 12045 if (offset > TRAMPOLINE_SIZE) 12046 abort (); 12047 } 12048 12049#ifdef TRANSFER_FROM_TRAMPOLINE 12050 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), 12051 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 12052#endif 12053} 12054 12055#define def_builtin(MASK, NAME, TYPE, CODE) \ 12056do { \ 12057 if ((MASK) & target_flags \ 12058 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 12059 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 12060 NULL, NULL_TREE); \ 12061} while (0) 12062 12063struct builtin_description 12064{ 12065 const unsigned int mask; 12066 const enum insn_code icode; 12067 const char *const name; 12068 const enum ix86_builtins code; 12069 const enum rtx_code comparison; 12070 const unsigned int flag; 12071}; 12072 12073/* Used for builtins that are enabled both by -msse and -msse2. */ 12074#define MASK_SSE1 (MASK_SSE | MASK_SSE2) 12075#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT) 12076#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT) 12077 12078static const struct builtin_description bdesc_comi[] = 12079{ 12080 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 12081 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 12082 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 12083 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 12084 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 12085 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 12086 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 12087 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 12088 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 12089 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 12090 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 12091 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 12092 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 12093 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 12094 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 12095 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 12096 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 12097 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 12098 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 12099 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 12100 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 12101 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 12102 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 12103 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 12104}; 12105 12106static const struct builtin_description bdesc_2arg[] = 12107{ 12108 /* SSE */ 12109 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 12110 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 12111 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 12112 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 12113 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 12114 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 12115 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 12116 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 12117 12118 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 12119 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 12120 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 12121 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, 12122 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, 12123 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 12124 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, 12125 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, 12126 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, 12127 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, 12128 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, 12129 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, 12130 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 12131 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 12132 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 12133 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 12134 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, 12135 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, 12136 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, 12137 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, 12138 12139 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 12140 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 12141 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 12142 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 12143 12144 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 12145 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 12146 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 12147 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 12148 12149 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 12150 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 12151 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 12152 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 12153 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 12154 12155 /* MMX */ 12156 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 12157 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 12158 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 12159 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 12160 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 12161 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 12162 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 12163 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 12164 12165 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 12166 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 12167 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 12168 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 12169 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 12170 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 12171 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 12172 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 12173 12174 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 12175 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 12176 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 12177 12178 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 12179 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 12180 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 12181 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 12182 12183 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 12184 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 12185 12186 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 12187 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 12188 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 12189 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 12190 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 12191 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 12192 12193 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 12194 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 12195 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 12196 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 12197 12198 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 12199 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 12200 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 12201 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 12202 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 12203 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 12204 12205 /* Special. */ 12206 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 12207 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 12208 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 12209 12210 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 12211 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 12212 { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 12213 12214 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 12215 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 12216 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 12217 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 12218 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 12219 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 12220 12221 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 12222 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 12223 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 12224 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 12225 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 12226 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 12227 12228 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 12229 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 12230 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 12231 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 12232 12233 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 12234 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 12235 12236 /* SSE2 */ 12237 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 12238 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 12239 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 12240 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 12241 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 12242 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 12243 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 12244 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 12245 12246 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 12247 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 12248 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 12249 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 }, 12250 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 }, 12251 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 12252 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 }, 12253 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 }, 12254 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 }, 12255 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 }, 12256 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 }, 12257 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 }, 12258 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 12259 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 12260 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 12261 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 12262 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 }, 12263 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 }, 12264 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 }, 12265 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 }, 12266 12267 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 12268 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 12269 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 12270 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 12271 12272 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 12273 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 12274 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 12275 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 12276 12277 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 12278 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 12279 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 12280 12281 /* SSE2 MMX */ 12282 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 12283 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 12284 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 12285 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 12286 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 12287 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 12288 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 12289 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 12290 12291 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 12292 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 12293 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 12294 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 12295 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 12296 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 12297 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 12298 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 12299 12300 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 12301 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 12302 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 }, 12303 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 }, 12304 12305 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 12306 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 12307 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 12308 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 12309 12310 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 12311 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 12312 12313 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 12314 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 12315 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 12316 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 12317 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 12318 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 12319 12320 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 12321 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 12322 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 12323 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 12324 12325 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 12326 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 12327 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 12328 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 12329 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 12330 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 12331 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 12332 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 12333 12334 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 12335 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 12336 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 12337 12338 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 12339 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 12340 12341 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 }, 12342 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 12343 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 }, 12344 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 12345 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 }, 12346 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 12347 12348 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 }, 12349 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 12350 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 }, 12351 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 12352 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 }, 12353 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 12354 12355 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 }, 12356 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 12357 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 }, 12358 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 12359 12360 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 12361 12362 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 12363 { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 12364 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 12365 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } 12366}; 12367 12368static const struct builtin_description bdesc_1arg[] = 12369{ 12370 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 12371 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 12372 12373 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 12374 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 12375 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 12376 12377 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 12378 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 12379 { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 12380 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 12381 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 12382 { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 12383 12384 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 12385 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 12386 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 }, 12387 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 }, 12388 12389 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 12390 12391 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 12392 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 12393 12394 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 12395 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 12396 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 12397 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 12398 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 12399 12400 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 12401 12402 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 12403 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 12404 { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 12405 { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 12406 12407 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 12408 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 12409 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 12410 12411 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 } 12412}; 12413 12414void 12415ix86_init_builtins () 12416{ 12417 if (TARGET_MMX) 12418 ix86_init_mmx_sse_builtins (); 12419} 12420 12421/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 12422 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 12423 builtins. */ 12424static void 12425ix86_init_mmx_sse_builtins () 12426{ 12427 const struct builtin_description * d; 12428 size_t i; 12429 12430 tree pchar_type_node = build_pointer_type (char_type_node); 12431 tree pcchar_type_node = build_pointer_type ( 12432 build_type_variant (char_type_node, 1, 0)); 12433 tree pfloat_type_node = build_pointer_type (float_type_node); 12434 tree pcfloat_type_node = build_pointer_type ( 12435 build_type_variant (float_type_node, 1, 0)); 12436 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 12437 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 12438 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 12439 12440 /* Comparisons. */ 12441 tree int_ftype_v4sf_v4sf 12442 = build_function_type_list (integer_type_node, 12443 V4SF_type_node, V4SF_type_node, NULL_TREE); 12444 tree v4si_ftype_v4sf_v4sf 12445 = build_function_type_list (V4SI_type_node, 12446 V4SF_type_node, V4SF_type_node, NULL_TREE); 12447 /* MMX/SSE/integer conversions. */ 12448 tree int_ftype_v4sf 12449 = build_function_type_list (integer_type_node, 12450 V4SF_type_node, NULL_TREE); 12451 tree int64_ftype_v4sf 12452 = build_function_type_list (long_long_integer_type_node, 12453 V4SF_type_node, NULL_TREE); 12454 tree int_ftype_v8qi 12455 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 12456 tree v4sf_ftype_v4sf_int 12457 = build_function_type_list (V4SF_type_node, 12458 V4SF_type_node, integer_type_node, NULL_TREE); 12459 tree v4sf_ftype_v4sf_int64 12460 = build_function_type_list (V4SF_type_node, 12461 V4SF_type_node, long_long_integer_type_node, 12462 NULL_TREE); 12463 tree v4sf_ftype_v4sf_v2si 12464 = build_function_type_list (V4SF_type_node, 12465 V4SF_type_node, V2SI_type_node, NULL_TREE); 12466 tree int_ftype_v4hi_int 12467 = build_function_type_list (integer_type_node, 12468 V4HI_type_node, integer_type_node, NULL_TREE); 12469 tree v4hi_ftype_v4hi_int_int 12470 = build_function_type_list (V4HI_type_node, V4HI_type_node, 12471 integer_type_node, integer_type_node, 12472 NULL_TREE); 12473 /* Miscellaneous. */ 12474 tree v8qi_ftype_v4hi_v4hi 12475 = build_function_type_list (V8QI_type_node, 12476 V4HI_type_node, V4HI_type_node, NULL_TREE); 12477 tree v4hi_ftype_v2si_v2si 12478 = build_function_type_list (V4HI_type_node, 12479 V2SI_type_node, V2SI_type_node, NULL_TREE); 12480 tree v4sf_ftype_v4sf_v4sf_int 12481 = build_function_type_list (V4SF_type_node, 12482 V4SF_type_node, V4SF_type_node, 12483 integer_type_node, NULL_TREE); 12484 tree v2si_ftype_v4hi_v4hi 12485 = build_function_type_list (V2SI_type_node, 12486 V4HI_type_node, V4HI_type_node, NULL_TREE); 12487 tree v4hi_ftype_v4hi_int 12488 = build_function_type_list (V4HI_type_node, 12489 V4HI_type_node, integer_type_node, NULL_TREE); 12490 tree v4hi_ftype_v4hi_di 12491 = build_function_type_list (V4HI_type_node, 12492 V4HI_type_node, long_long_unsigned_type_node, 12493 NULL_TREE); 12494 tree v2si_ftype_v2si_di 12495 = build_function_type_list (V2SI_type_node, 12496 V2SI_type_node, long_long_unsigned_type_node, 12497 NULL_TREE); 12498 tree void_ftype_void 12499 = build_function_type (void_type_node, void_list_node); 12500 tree void_ftype_unsigned 12501 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 12502 tree unsigned_ftype_void 12503 = build_function_type (unsigned_type_node, void_list_node); 12504 tree di_ftype_void 12505 = build_function_type (long_long_unsigned_type_node, void_list_node); 12506 tree v4sf_ftype_void 12507 = build_function_type (V4SF_type_node, void_list_node); 12508 tree v2si_ftype_v4sf 12509 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 12510 /* Loads/stores. */ 12511 tree void_ftype_v8qi_v8qi_pchar 12512 = build_function_type_list (void_type_node, 12513 V8QI_type_node, V8QI_type_node, 12514 pchar_type_node, NULL_TREE); 12515 tree v4sf_ftype_pcfloat 12516 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 12517 /* @@@ the type is bogus */ 12518 tree v4sf_ftype_v4sf_pv2si 12519 = build_function_type_list (V4SF_type_node, 12520 V4SF_type_node, pv2si_type_node, NULL_TREE); 12521 tree void_ftype_pv2si_v4sf 12522 = build_function_type_list (void_type_node, 12523 pv2si_type_node, V4SF_type_node, NULL_TREE); 12524 tree void_ftype_pfloat_v4sf 12525 = build_function_type_list (void_type_node, 12526 pfloat_type_node, V4SF_type_node, NULL_TREE); 12527 tree void_ftype_pdi_di 12528 = build_function_type_list (void_type_node, 12529 pdi_type_node, long_long_unsigned_type_node, 12530 NULL_TREE); 12531 tree void_ftype_pv2di_v2di 12532 = build_function_type_list (void_type_node, 12533 pv2di_type_node, V2DI_type_node, NULL_TREE); 12534 /* Normal vector unops. */ 12535 tree v4sf_ftype_v4sf 12536 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 12537 12538 /* Normal vector binops. */ 12539 tree v4sf_ftype_v4sf_v4sf 12540 = build_function_type_list (V4SF_type_node, 12541 V4SF_type_node, V4SF_type_node, NULL_TREE); 12542 tree v8qi_ftype_v8qi_v8qi 12543 = build_function_type_list (V8QI_type_node, 12544 V8QI_type_node, V8QI_type_node, NULL_TREE); 12545 tree v4hi_ftype_v4hi_v4hi 12546 = build_function_type_list (V4HI_type_node, 12547 V4HI_type_node, V4HI_type_node, NULL_TREE); 12548 tree v2si_ftype_v2si_v2si 12549 = build_function_type_list (V2SI_type_node, 12550 V2SI_type_node, V2SI_type_node, NULL_TREE); 12551 tree di_ftype_di_di 12552 = build_function_type_list (long_long_unsigned_type_node, 12553 long_long_unsigned_type_node, 12554 long_long_unsigned_type_node, NULL_TREE); 12555 12556 tree v2si_ftype_v2sf 12557 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 12558 tree v2sf_ftype_v2si 12559 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 12560 tree v2si_ftype_v2si 12561 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 12562 tree v2sf_ftype_v2sf 12563 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 12564 tree v2sf_ftype_v2sf_v2sf 12565 = build_function_type_list (V2SF_type_node, 12566 V2SF_type_node, V2SF_type_node, NULL_TREE); 12567 tree v2si_ftype_v2sf_v2sf 12568 = build_function_type_list (V2SI_type_node, 12569 V2SF_type_node, V2SF_type_node, NULL_TREE); 12570 tree pint_type_node = build_pointer_type (integer_type_node); 12571 tree pcint_type_node = build_pointer_type ( 12572 build_type_variant (integer_type_node, 1, 0)); 12573 tree pdouble_type_node = build_pointer_type (double_type_node); 12574 tree pcdouble_type_node = build_pointer_type ( 12575 build_type_variant (double_type_node, 1, 0)); 12576 tree int_ftype_v2df_v2df 12577 = build_function_type_list (integer_type_node, 12578 V2DF_type_node, V2DF_type_node, NULL_TREE); 12579 12580 tree ti_ftype_void 12581 = build_function_type (intTI_type_node, void_list_node); 12582 tree v2di_ftype_void 12583 = build_function_type (V2DI_type_node, void_list_node); 12584 tree ti_ftype_ti_ti 12585 = build_function_type_list (intTI_type_node, 12586 intTI_type_node, intTI_type_node, NULL_TREE); 12587 tree void_ftype_pcvoid 12588 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 12589 tree v2di_ftype_di 12590 = build_function_type_list (V2DI_type_node, 12591 long_long_unsigned_type_node, NULL_TREE); 12592 tree di_ftype_v2di 12593 = build_function_type_list (long_long_unsigned_type_node, 12594 V2DI_type_node, NULL_TREE); 12595 tree v4sf_ftype_v4si 12596 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 12597 tree v4si_ftype_v4sf 12598 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 12599 tree v2df_ftype_v4si 12600 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 12601 tree v4si_ftype_v2df 12602 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 12603 tree v2si_ftype_v2df 12604 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 12605 tree v4sf_ftype_v2df 12606 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 12607 tree v2df_ftype_v2si 12608 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 12609 tree v2df_ftype_v4sf 12610 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 12611 tree int_ftype_v2df 12612 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 12613 tree int64_ftype_v2df 12614 = build_function_type_list (long_long_integer_type_node, 12615 V2DF_type_node, NULL_TREE); 12616 tree v2df_ftype_v2df_int 12617 = build_function_type_list (V2DF_type_node, 12618 V2DF_type_node, integer_type_node, NULL_TREE); 12619 tree v2df_ftype_v2df_int64 12620 = build_function_type_list (V2DF_type_node, 12621 V2DF_type_node, long_long_integer_type_node, 12622 NULL_TREE); 12623 tree v4sf_ftype_v4sf_v2df 12624 = build_function_type_list (V4SF_type_node, 12625 V4SF_type_node, V2DF_type_node, NULL_TREE); 12626 tree v2df_ftype_v2df_v4sf 12627 = build_function_type_list (V2DF_type_node, 12628 V2DF_type_node, V4SF_type_node, NULL_TREE); 12629 tree v2df_ftype_v2df_v2df_int 12630 = build_function_type_list (V2DF_type_node, 12631 V2DF_type_node, V2DF_type_node, 12632 integer_type_node, 12633 NULL_TREE); 12634 tree v2df_ftype_v2df_pv2si 12635 = build_function_type_list (V2DF_type_node, 12636 V2DF_type_node, pv2si_type_node, NULL_TREE); 12637 tree void_ftype_pv2si_v2df 12638 = build_function_type_list (void_type_node, 12639 pv2si_type_node, V2DF_type_node, NULL_TREE); 12640 tree void_ftype_pdouble_v2df 12641 = build_function_type_list (void_type_node, 12642 pdouble_type_node, V2DF_type_node, NULL_TREE); 12643 tree void_ftype_pint_int 12644 = build_function_type_list (void_type_node, 12645 pint_type_node, integer_type_node, NULL_TREE); 12646 tree void_ftype_v16qi_v16qi_pchar 12647 = build_function_type_list (void_type_node, 12648 V16QI_type_node, V16QI_type_node, 12649 pchar_type_node, NULL_TREE); 12650 tree v2df_ftype_pcdouble 12651 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 12652 tree v2df_ftype_v2df_v2df 12653 = build_function_type_list (V2DF_type_node, 12654 V2DF_type_node, V2DF_type_node, NULL_TREE); 12655 tree v16qi_ftype_v16qi_v16qi 12656 = build_function_type_list (V16QI_type_node, 12657 V16QI_type_node, V16QI_type_node, NULL_TREE); 12658 tree v8hi_ftype_v8hi_v8hi 12659 = build_function_type_list (V8HI_type_node, 12660 V8HI_type_node, V8HI_type_node, NULL_TREE); 12661 tree v4si_ftype_v4si_v4si 12662 = build_function_type_list (V4SI_type_node, 12663 V4SI_type_node, V4SI_type_node, NULL_TREE); 12664 tree v2di_ftype_v2di_v2di 12665 = build_function_type_list (V2DI_type_node, 12666 V2DI_type_node, V2DI_type_node, NULL_TREE); 12667 tree v2di_ftype_v2df_v2df 12668 = build_function_type_list (V2DI_type_node, 12669 V2DF_type_node, V2DF_type_node, NULL_TREE); 12670 tree v2df_ftype_v2df 12671 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 12672 tree v2df_ftype_double 12673 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE); 12674 tree v2df_ftype_double_double 12675 = build_function_type_list (V2DF_type_node, 12676 double_type_node, double_type_node, NULL_TREE); 12677 tree int_ftype_v8hi_int 12678 = build_function_type_list (integer_type_node, 12679 V8HI_type_node, integer_type_node, NULL_TREE); 12680 tree v8hi_ftype_v8hi_int_int 12681 = build_function_type_list (V8HI_type_node, 12682 V8HI_type_node, integer_type_node, 12683 integer_type_node, NULL_TREE); 12684 tree v2di_ftype_v2di_int 12685 = build_function_type_list (V2DI_type_node, 12686 V2DI_type_node, integer_type_node, NULL_TREE); 12687 tree v4si_ftype_v4si_int 12688 = build_function_type_list (V4SI_type_node, 12689 V4SI_type_node, integer_type_node, NULL_TREE); 12690 tree v8hi_ftype_v8hi_int 12691 = build_function_type_list (V8HI_type_node, 12692 V8HI_type_node, integer_type_node, NULL_TREE); 12693 tree v8hi_ftype_v8hi_v2di 12694 = build_function_type_list (V8HI_type_node, 12695 V8HI_type_node, V2DI_type_node, NULL_TREE); 12696 tree v4si_ftype_v4si_v2di 12697 = build_function_type_list (V4SI_type_node, 12698 V4SI_type_node, V2DI_type_node, NULL_TREE); 12699 tree v4si_ftype_v8hi_v8hi 12700 = build_function_type_list (V4SI_type_node, 12701 V8HI_type_node, V8HI_type_node, NULL_TREE); 12702 tree di_ftype_v8qi_v8qi 12703 = build_function_type_list (long_long_unsigned_type_node, 12704 V8QI_type_node, V8QI_type_node, NULL_TREE); 12705 tree v2di_ftype_v16qi_v16qi 12706 = build_function_type_list (V2DI_type_node, 12707 V16QI_type_node, V16QI_type_node, NULL_TREE); 12708 tree int_ftype_v16qi 12709 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 12710 tree v16qi_ftype_pcchar 12711 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 12712 tree void_ftype_pchar_v16qi 12713 = build_function_type_list (void_type_node, 12714 pchar_type_node, V16QI_type_node, NULL_TREE); 12715 tree v4si_ftype_pcint 12716 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); 12717 tree void_ftype_pcint_v4si 12718 = build_function_type_list (void_type_node, 12719 pcint_type_node, V4SI_type_node, NULL_TREE); 12720 tree v2di_ftype_v2di 12721 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); 12722 12723 /* Add all builtins that are more or less simple operations on two 12724 operands. */ 12725 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 12726 { 12727 /* Use one of the operands; the target can have a different mode for 12728 mask-generating compares. */ 12729 enum machine_mode mode; 12730 tree type; 12731 12732 if (d->name == 0) 12733 continue; 12734 mode = insn_data[d->icode].operand[1].mode; 12735 12736 switch (mode) 12737 { 12738 case V16QImode: 12739 type = v16qi_ftype_v16qi_v16qi; 12740 break; 12741 case V8HImode: 12742 type = v8hi_ftype_v8hi_v8hi; 12743 break; 12744 case V4SImode: 12745 type = v4si_ftype_v4si_v4si; 12746 break; 12747 case V2DImode: 12748 type = v2di_ftype_v2di_v2di; 12749 break; 12750 case V2DFmode: 12751 type = v2df_ftype_v2df_v2df; 12752 break; 12753 case TImode: 12754 type = ti_ftype_ti_ti; 12755 break; 12756 case V4SFmode: 12757 type = v4sf_ftype_v4sf_v4sf; 12758 break; 12759 case V8QImode: 12760 type = v8qi_ftype_v8qi_v8qi; 12761 break; 12762 case V4HImode: 12763 type = v4hi_ftype_v4hi_v4hi; 12764 break; 12765 case V2SImode: 12766 type = v2si_ftype_v2si_v2si; 12767 break; 12768 case DImode: 12769 type = di_ftype_di_di; 12770 break; 12771 12772 default: 12773 abort (); 12774 } 12775 12776 /* Override for comparisons. */ 12777 if (d->icode == CODE_FOR_maskcmpv4sf3 12778 || d->icode == CODE_FOR_maskncmpv4sf3 12779 || d->icode == CODE_FOR_vmmaskcmpv4sf3 12780 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 12781 type = v4si_ftype_v4sf_v4sf; 12782 12783 if (d->icode == CODE_FOR_maskcmpv2df3 12784 || d->icode == CODE_FOR_maskncmpv2df3 12785 || d->icode == CODE_FOR_vmmaskcmpv2df3 12786 || d->icode == CODE_FOR_vmmaskncmpv2df3) 12787 type = v2di_ftype_v2df_v2df; 12788 12789 def_builtin (d->mask, d->name, type, d->code); 12790 } 12791 12792 /* Add the remaining MMX insns with somewhat more complicated types. */ 12793 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); 12794 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 12795 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 12796 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 12797 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 12798 12799 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 12800 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 12801 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 12802 12803 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 12804 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 12805 12806 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 12807 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 12808 12809 /* comi/ucomi insns. */ 12810 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 12811 if (d->mask == MASK_SSE2) 12812 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 12813 else 12814 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 12815 12816 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 12817 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 12818 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 12819 12820 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 12821 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 12822 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 12823 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 12824 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 12825 def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 12826 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 12827 def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 12828 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 12829 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 12830 def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 12831 12832 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); 12833 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); 12834 12835 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 12836 12837 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); 12838 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 12839 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); 12840 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); 12841 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 12842 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); 12843 12844 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 12845 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 12846 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 12847 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 12848 12849 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 12850 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 12851 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 12852 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 12853 12854 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 12855 12856 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 12857 12858 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 12859 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 12860 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 12861 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 12862 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 12863 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 12864 12865 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 12866 12867 /* Original 3DNow! */ 12868 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 12869 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 12870 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 12871 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 12872 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 12873 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 12874 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 12875 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 12876 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 12877 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 12878 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 12879 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 12880 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 12881 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 12882 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 12883 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 12884 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 12885 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 12886 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 12887 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 12888 12889 /* 3DNow! extension as used in the Athlon CPU. */ 12890 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 12891 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 12892 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 12893 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 12894 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 12895 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 12896 12897 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); 12898 12899 /* SSE2 */ 12900 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128); 12901 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128); 12902 12903 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 12904 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ); 12905 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q); 12906 12907 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD); 12908 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 12909 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD); 12910 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD); 12911 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 12912 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); 12913 12914 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD); 12915 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD); 12916 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD); 12917 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD); 12918 12919 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 12920 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 12921 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 12922 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 12923 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 12924 12925 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 12926 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 12927 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 12928 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 12929 12930 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 12931 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 12932 12933 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 12934 12935 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 12936 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 12937 12938 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 12939 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 12940 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 12941 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 12942 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 12943 12944 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 12945 12946 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 12947 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 12948 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 12949 def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 12950 12951 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 12952 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 12953 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 12954 12955 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 12956 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 12957 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 12958 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 12959 12960 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1); 12961 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD); 12962 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD); 12963 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1); 12964 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD); 12965 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1); 12966 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD); 12967 12968 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 12969 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 12970 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 12971 12972 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA); 12973 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 12974 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD); 12975 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA); 12976 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 12977 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); 12978 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); 12979 12980 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); 12981 12982 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128); 12983 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128); 12984 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 12985 12986 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128); 12987 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128); 12988 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 12989 12990 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128); 12991 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128); 12992 12993 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 12994 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 12995 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 12996 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 12997 12998 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 12999 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 13000 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 13001 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 13002 13003 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 13004 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 13005 13006 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 13007} 13008 13009/* Errors in the source file can cause expand_expr to return const0_rtx 13010 where we expect a vector. To avoid crashing, use one of the vector 13011 clear instructions. */ 13012static rtx 13013safe_vector_operand (x, mode) 13014 rtx x; 13015 enum machine_mode mode; 13016{ 13017 if (x != const0_rtx) 13018 return x; 13019 x = gen_reg_rtx (mode); 13020 13021 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) 13022 emit_insn (gen_mmx_clrdi (mode == DImode ? x 13023 : gen_rtx_SUBREG (DImode, x, 0))); 13024 else 13025 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x 13026 : gen_rtx_SUBREG (V4SFmode, x, 0))); 13027 return x; 13028} 13029 13030/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 13031 13032static rtx 13033ix86_expand_binop_builtin (icode, arglist, target) 13034 enum insn_code icode; 13035 tree arglist; 13036 rtx target; 13037{ 13038 rtx pat; 13039 tree arg0 = TREE_VALUE (arglist); 13040 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13041 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13042 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13043 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13044 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13045 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 13046 13047 if (VECTOR_MODE_P (mode0)) 13048 op0 = safe_vector_operand (op0, mode0); 13049 if (VECTOR_MODE_P (mode1)) 13050 op1 = safe_vector_operand (op1, mode1); 13051 13052 if (! target 13053 || GET_MODE (target) != tmode 13054 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13055 target = gen_reg_rtx (tmode); 13056 13057 if (GET_MODE (op1) == SImode && mode1 == TImode) 13058 { 13059 rtx x = gen_reg_rtx (V4SImode); 13060 emit_insn (gen_sse2_loadd (x, op1)); 13061 op1 = gen_lowpart (TImode, x); 13062 } 13063 13064 /* In case the insn wants input operands in modes different from 13065 the result, abort. */ 13066 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1) 13067 abort (); 13068 13069 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13070 op0 = copy_to_mode_reg (mode0, op0); 13071 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13072 op1 = copy_to_mode_reg (mode1, op1); 13073 13074 /* In the commutative cases, both op0 and op1 are nonimmediate_operand, 13075 yet one of the two must not be a memory. This is normally enforced 13076 by expanders, but we didn't bother to create one here. */ 13077 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) 13078 op0 = copy_to_mode_reg (mode0, op0); 13079 13080 pat = GEN_FCN (icode) (target, op0, op1); 13081 if (! pat) 13082 return 0; 13083 emit_insn (pat); 13084 return target; 13085} 13086 13087/* Subroutine of ix86_expand_builtin to take care of stores. */ 13088 13089static rtx 13090ix86_expand_store_builtin (icode, arglist) 13091 enum insn_code icode; 13092 tree arglist; 13093{ 13094 rtx pat; 13095 tree arg0 = TREE_VALUE (arglist); 13096 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13097 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13098 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13099 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 13100 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 13101 13102 if (VECTOR_MODE_P (mode1)) 13103 op1 = safe_vector_operand (op1, mode1); 13104 13105 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13106 op1 = copy_to_mode_reg (mode1, op1); 13107 13108 pat = GEN_FCN (icode) (op0, op1); 13109 if (pat) 13110 emit_insn (pat); 13111 return 0; 13112} 13113 13114/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 13115 13116static rtx 13117ix86_expand_unop_builtin (icode, arglist, target, do_load) 13118 enum insn_code icode; 13119 tree arglist; 13120 rtx target; 13121 int do_load; 13122{ 13123 rtx pat; 13124 tree arg0 = TREE_VALUE (arglist); 13125 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13126 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13127 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13128 13129 if (! target 13130 || GET_MODE (target) != tmode 13131 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13132 target = gen_reg_rtx (tmode); 13133 if (do_load) 13134 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13135 else 13136 { 13137 if (VECTOR_MODE_P (mode0)) 13138 op0 = safe_vector_operand (op0, mode0); 13139 13140 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13141 op0 = copy_to_mode_reg (mode0, op0); 13142 } 13143 13144 pat = GEN_FCN (icode) (target, op0); 13145 if (! pat) 13146 return 0; 13147 emit_insn (pat); 13148 return target; 13149} 13150 13151/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 13152 sqrtss, rsqrtss, rcpss. */ 13153 13154static rtx 13155ix86_expand_unop1_builtin (icode, arglist, target) 13156 enum insn_code icode; 13157 tree arglist; 13158 rtx target; 13159{ 13160 rtx pat; 13161 tree arg0 = TREE_VALUE (arglist); 13162 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13163 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13164 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13165 13166 if (! target 13167 || GET_MODE (target) != tmode 13168 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13169 target = gen_reg_rtx (tmode); 13170 13171 if (VECTOR_MODE_P (mode0)) 13172 op0 = safe_vector_operand (op0, mode0); 13173 13174 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13175 op0 = copy_to_mode_reg (mode0, op0); 13176 13177 op1 = op0; 13178 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 13179 op1 = copy_to_mode_reg (mode0, op1); 13180 13181 pat = GEN_FCN (icode) (target, op0, op1); 13182 if (! pat) 13183 return 0; 13184 emit_insn (pat); 13185 return target; 13186} 13187 13188/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 13189 13190static rtx 13191ix86_expand_sse_compare (d, arglist, target) 13192 const struct builtin_description *d; 13193 tree arglist; 13194 rtx target; 13195{ 13196 rtx pat; 13197 tree arg0 = TREE_VALUE (arglist); 13198 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13199 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13200 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13201 rtx op2; 13202 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 13203 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 13204 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 13205 enum rtx_code comparison = d->comparison; 13206 13207 if (VECTOR_MODE_P (mode0)) 13208 op0 = safe_vector_operand (op0, mode0); 13209 if (VECTOR_MODE_P (mode1)) 13210 op1 = safe_vector_operand (op1, mode1); 13211 13212 /* Swap operands if we have a comparison that isn't available in 13213 hardware. */ 13214 if (d->flag) 13215 { 13216 rtx tmp = gen_reg_rtx (mode1); 13217 emit_move_insn (tmp, op1); 13218 op1 = op0; 13219 op0 = tmp; 13220 } 13221 13222 if (! target 13223 || GET_MODE (target) != tmode 13224 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 13225 target = gen_reg_rtx (tmode); 13226 13227 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 13228 op0 = copy_to_mode_reg (mode0, op0); 13229 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 13230 op1 = copy_to_mode_reg (mode1, op1); 13231 13232 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 13233 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 13234 if (! pat) 13235 return 0; 13236 emit_insn (pat); 13237 return target; 13238} 13239 13240/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 13241 13242static rtx 13243ix86_expand_sse_comi (d, arglist, target) 13244 const struct builtin_description *d; 13245 tree arglist; 13246 rtx target; 13247{ 13248 rtx pat; 13249 tree arg0 = TREE_VALUE (arglist); 13250 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13251 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13252 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13253 rtx op2; 13254 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 13255 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 13256 enum rtx_code comparison = d->comparison; 13257 13258 if (VECTOR_MODE_P (mode0)) 13259 op0 = safe_vector_operand (op0, mode0); 13260 if (VECTOR_MODE_P (mode1)) 13261 op1 = safe_vector_operand (op1, mode1); 13262 13263 /* Swap operands if we have a comparison that isn't available in 13264 hardware. */ 13265 if (d->flag) 13266 { 13267 rtx tmp = op1; 13268 op1 = op0; 13269 op0 = tmp; 13270 } 13271 13272 target = gen_reg_rtx (SImode); 13273 emit_move_insn (target, const0_rtx); 13274 target = gen_rtx_SUBREG (QImode, target, 0); 13275 13276 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 13277 op0 = copy_to_mode_reg (mode0, op0); 13278 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 13279 op1 = copy_to_mode_reg (mode1, op1); 13280 13281 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 13282 pat = GEN_FCN (d->icode) (op0, op1); 13283 if (! pat) 13284 return 0; 13285 emit_insn (pat); 13286 emit_insn (gen_rtx_SET (VOIDmode, 13287 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 13288 gen_rtx_fmt_ee (comparison, QImode, 13289 SET_DEST (pat), 13290 const0_rtx))); 13291 13292 return SUBREG_REG (target); 13293} 13294 13295/* Expand an expression EXP that calls a built-in function, 13296 with result going to TARGET if that's convenient 13297 (and in mode MODE if that's convenient). 13298 SUBTARGET may be used as the target for computing one of EXP's operands. 13299 IGNORE is nonzero if the value is to be ignored. */ 13300 13301rtx 13302ix86_expand_builtin (exp, target, subtarget, mode, ignore) 13303 tree exp; 13304 rtx target; 13305 rtx subtarget ATTRIBUTE_UNUSED; 13306 enum machine_mode mode ATTRIBUTE_UNUSED; 13307 int ignore ATTRIBUTE_UNUSED; 13308{ 13309 const struct builtin_description *d; 13310 size_t i; 13311 enum insn_code icode; 13312 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 13313 tree arglist = TREE_OPERAND (exp, 1); 13314 tree arg0, arg1, arg2; 13315 rtx op0, op1, op2, pat; 13316 enum machine_mode tmode, mode0, mode1, mode2; 13317 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 13318 13319 switch (fcode) 13320 { 13321 case IX86_BUILTIN_EMMS: 13322 emit_insn (gen_emms ()); 13323 return 0; 13324 13325 case IX86_BUILTIN_SFENCE: 13326 emit_insn (gen_sfence ()); 13327 return 0; 13328 13329 case IX86_BUILTIN_PEXTRW: 13330 case IX86_BUILTIN_PEXTRW128: 13331 icode = (fcode == IX86_BUILTIN_PEXTRW 13332 ? CODE_FOR_mmx_pextrw 13333 : CODE_FOR_sse2_pextrw); 13334 arg0 = TREE_VALUE (arglist); 13335 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13336 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13337 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13338 tmode = insn_data[icode].operand[0].mode; 13339 mode0 = insn_data[icode].operand[1].mode; 13340 mode1 = insn_data[icode].operand[2].mode; 13341 13342 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13343 op0 = copy_to_mode_reg (mode0, op0); 13344 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13345 { 13346 /* @@@ better error message */ 13347 error ("selector must be an immediate"); 13348 return gen_reg_rtx (tmode); 13349 } 13350 if (target == 0 13351 || GET_MODE (target) != tmode 13352 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13353 target = gen_reg_rtx (tmode); 13354 pat = GEN_FCN (icode) (target, op0, op1); 13355 if (! pat) 13356 return 0; 13357 emit_insn (pat); 13358 return target; 13359 13360 case IX86_BUILTIN_PINSRW: 13361 case IX86_BUILTIN_PINSRW128: 13362 icode = (fcode == IX86_BUILTIN_PINSRW 13363 ? CODE_FOR_mmx_pinsrw 13364 : CODE_FOR_sse2_pinsrw); 13365 arg0 = TREE_VALUE (arglist); 13366 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13367 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 13368 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13369 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13370 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 13371 tmode = insn_data[icode].operand[0].mode; 13372 mode0 = insn_data[icode].operand[1].mode; 13373 mode1 = insn_data[icode].operand[2].mode; 13374 mode2 = insn_data[icode].operand[3].mode; 13375 13376 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13377 op0 = copy_to_mode_reg (mode0, op0); 13378 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13379 op1 = copy_to_mode_reg (mode1, op1); 13380 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 13381 { 13382 /* @@@ better error message */ 13383 error ("selector must be an immediate"); 13384 return const0_rtx; 13385 } 13386 if (target == 0 13387 || GET_MODE (target) != tmode 13388 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13389 target = gen_reg_rtx (tmode); 13390 pat = GEN_FCN (icode) (target, op0, op1, op2); 13391 if (! pat) 13392 return 0; 13393 emit_insn (pat); 13394 return target; 13395 13396 case IX86_BUILTIN_MASKMOVQ: 13397 case IX86_BUILTIN_MASKMOVDQU: 13398 icode = (fcode == IX86_BUILTIN_MASKMOVQ 13399 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) 13400 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64 13401 : CODE_FOR_sse2_maskmovdqu)); 13402 /* Note the arg order is different from the operand order. */ 13403 arg1 = TREE_VALUE (arglist); 13404 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 13405 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 13406 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13407 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13408 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 13409 mode0 = insn_data[icode].operand[0].mode; 13410 mode1 = insn_data[icode].operand[1].mode; 13411 mode2 = insn_data[icode].operand[2].mode; 13412 13413 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 13414 op0 = copy_to_mode_reg (mode0, op0); 13415 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 13416 op1 = copy_to_mode_reg (mode1, op1); 13417 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 13418 op2 = copy_to_mode_reg (mode2, op2); 13419 pat = GEN_FCN (icode) (op0, op1, op2); 13420 if (! pat) 13421 return 0; 13422 emit_insn (pat); 13423 return 0; 13424 13425 case IX86_BUILTIN_SQRTSS: 13426 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target); 13427 case IX86_BUILTIN_RSQRTSS: 13428 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target); 13429 case IX86_BUILTIN_RCPSS: 13430 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); 13431 13432 case IX86_BUILTIN_LOADAPS: 13433 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); 13434 13435 case IX86_BUILTIN_LOADUPS: 13436 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 13437 13438 case IX86_BUILTIN_STOREAPS: 13439 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); 13440 13441 case IX86_BUILTIN_STOREUPS: 13442 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 13443 13444 case IX86_BUILTIN_LOADSS: 13445 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1); 13446 13447 case IX86_BUILTIN_STORESS: 13448 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist); 13449 13450 case IX86_BUILTIN_LOADHPS: 13451 case IX86_BUILTIN_LOADLPS: 13452 case IX86_BUILTIN_LOADHPD: 13453 case IX86_BUILTIN_LOADLPD: 13454 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps 13455 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps 13456 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd 13457 : CODE_FOR_sse2_movlpd); 13458 arg0 = TREE_VALUE (arglist); 13459 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13460 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13461 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13462 tmode = insn_data[icode].operand[0].mode; 13463 mode0 = insn_data[icode].operand[1].mode; 13464 mode1 = insn_data[icode].operand[2].mode; 13465 13466 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13467 op0 = copy_to_mode_reg (mode0, op0); 13468 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 13469 if (target == 0 13470 || GET_MODE (target) != tmode 13471 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13472 target = gen_reg_rtx (tmode); 13473 pat = GEN_FCN (icode) (target, op0, op1); 13474 if (! pat) 13475 return 0; 13476 emit_insn (pat); 13477 return target; 13478 13479 case IX86_BUILTIN_STOREHPS: 13480 case IX86_BUILTIN_STORELPS: 13481 case IX86_BUILTIN_STOREHPD: 13482 case IX86_BUILTIN_STORELPD: 13483 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps 13484 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps 13485 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd 13486 : CODE_FOR_sse2_movlpd); 13487 arg0 = TREE_VALUE (arglist); 13488 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13489 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13490 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13491 mode0 = insn_data[icode].operand[1].mode; 13492 mode1 = insn_data[icode].operand[2].mode; 13493 13494 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13495 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13496 op1 = copy_to_mode_reg (mode1, op1); 13497 13498 pat = GEN_FCN (icode) (op0, op0, op1); 13499 if (! pat) 13500 return 0; 13501 emit_insn (pat); 13502 return 0; 13503 13504 case IX86_BUILTIN_MOVNTPS: 13505 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 13506 case IX86_BUILTIN_MOVNTQ: 13507 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 13508 13509 case IX86_BUILTIN_LDMXCSR: 13510 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); 13511 target = assign_386_stack_local (SImode, 0); 13512 emit_move_insn (target, op0); 13513 emit_insn (gen_ldmxcsr (target)); 13514 return 0; 13515 13516 case IX86_BUILTIN_STMXCSR: 13517 target = assign_386_stack_local (SImode, 0); 13518 emit_insn (gen_stmxcsr (target)); 13519 return copy_to_mode_reg (SImode, target); 13520 13521 case IX86_BUILTIN_SHUFPS: 13522 case IX86_BUILTIN_SHUFPD: 13523 icode = (fcode == IX86_BUILTIN_SHUFPS 13524 ? CODE_FOR_sse_shufps 13525 : CODE_FOR_sse2_shufpd); 13526 arg0 = TREE_VALUE (arglist); 13527 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13528 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 13529 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13530 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13531 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 13532 tmode = insn_data[icode].operand[0].mode; 13533 mode0 = insn_data[icode].operand[1].mode; 13534 mode1 = insn_data[icode].operand[2].mode; 13535 mode2 = insn_data[icode].operand[3].mode; 13536 13537 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13538 op0 = copy_to_mode_reg (mode0, op0); 13539 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13540 op1 = copy_to_mode_reg (mode1, op1); 13541 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 13542 { 13543 /* @@@ better error message */ 13544 error ("mask must be an immediate"); 13545 return gen_reg_rtx (tmode); 13546 } 13547 if (target == 0 13548 || GET_MODE (target) != tmode 13549 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13550 target = gen_reg_rtx (tmode); 13551 pat = GEN_FCN (icode) (target, op0, op1, op2); 13552 if (! pat) 13553 return 0; 13554 emit_insn (pat); 13555 return target; 13556 13557 case IX86_BUILTIN_PSHUFW: 13558 case IX86_BUILTIN_PSHUFD: 13559 case IX86_BUILTIN_PSHUFHW: 13560 case IX86_BUILTIN_PSHUFLW: 13561 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw 13562 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw 13563 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd 13564 : CODE_FOR_mmx_pshufw); 13565 arg0 = TREE_VALUE (arglist); 13566 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13567 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13568 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13569 tmode = insn_data[icode].operand[0].mode; 13570 mode1 = insn_data[icode].operand[1].mode; 13571 mode2 = insn_data[icode].operand[2].mode; 13572 13573 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 13574 op0 = copy_to_mode_reg (mode1, op0); 13575 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 13576 { 13577 /* @@@ better error message */ 13578 error ("mask must be an immediate"); 13579 return const0_rtx; 13580 } 13581 if (target == 0 13582 || GET_MODE (target) != tmode 13583 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13584 target = gen_reg_rtx (tmode); 13585 pat = GEN_FCN (icode) (target, op0, op1); 13586 if (! pat) 13587 return 0; 13588 emit_insn (pat); 13589 return target; 13590 13591 case IX86_BUILTIN_PSLLDQI128: 13592 case IX86_BUILTIN_PSRLDQI128: 13593 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 13594 : CODE_FOR_sse2_lshrti3); 13595 arg0 = TREE_VALUE (arglist); 13596 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13597 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13598 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13599 tmode = insn_data[icode].operand[0].mode; 13600 mode1 = insn_data[icode].operand[1].mode; 13601 mode2 = insn_data[icode].operand[2].mode; 13602 13603 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 13604 { 13605 op0 = copy_to_reg (op0); 13606 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 13607 } 13608 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 13609 { 13610 error ("shift must be an immediate"); 13611 return const0_rtx; 13612 } 13613 target = gen_reg_rtx (V2DImode); 13614 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1); 13615 if (! pat) 13616 return 0; 13617 emit_insn (pat); 13618 return target; 13619 13620 case IX86_BUILTIN_FEMMS: 13621 emit_insn (gen_femms ()); 13622 return NULL_RTX; 13623 13624 case IX86_BUILTIN_PAVGUSB: 13625 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target); 13626 13627 case IX86_BUILTIN_PF2ID: 13628 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0); 13629 13630 case IX86_BUILTIN_PFACC: 13631 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target); 13632 13633 case IX86_BUILTIN_PFADD: 13634 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target); 13635 13636 case IX86_BUILTIN_PFCMPEQ: 13637 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target); 13638 13639 case IX86_BUILTIN_PFCMPGE: 13640 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target); 13641 13642 case IX86_BUILTIN_PFCMPGT: 13643 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target); 13644 13645 case IX86_BUILTIN_PFMAX: 13646 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target); 13647 13648 case IX86_BUILTIN_PFMIN: 13649 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target); 13650 13651 case IX86_BUILTIN_PFMUL: 13652 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target); 13653 13654 case IX86_BUILTIN_PFRCP: 13655 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0); 13656 13657 case IX86_BUILTIN_PFRCPIT1: 13658 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target); 13659 13660 case IX86_BUILTIN_PFRCPIT2: 13661 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target); 13662 13663 case IX86_BUILTIN_PFRSQIT1: 13664 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target); 13665 13666 case IX86_BUILTIN_PFRSQRT: 13667 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0); 13668 13669 case IX86_BUILTIN_PFSUB: 13670 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target); 13671 13672 case IX86_BUILTIN_PFSUBR: 13673 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target); 13674 13675 case IX86_BUILTIN_PI2FD: 13676 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0); 13677 13678 case IX86_BUILTIN_PMULHRW: 13679 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); 13680 13681 case IX86_BUILTIN_PF2IW: 13682 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); 13683 13684 case IX86_BUILTIN_PFNACC: 13685 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target); 13686 13687 case IX86_BUILTIN_PFPNACC: 13688 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target); 13689 13690 case IX86_BUILTIN_PI2FW: 13691 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0); 13692 13693 case IX86_BUILTIN_PSWAPDSI: 13694 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0); 13695 13696 case IX86_BUILTIN_PSWAPDSF: 13697 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); 13698 13699 case IX86_BUILTIN_SSE_ZERO: 13700 target = gen_reg_rtx (V4SFmode); 13701 emit_insn (gen_sse_clrv4sf (target)); 13702 return target; 13703 13704 case IX86_BUILTIN_MMX_ZERO: 13705 target = gen_reg_rtx (DImode); 13706 emit_insn (gen_mmx_clrdi (target)); 13707 return target; 13708 13709 case IX86_BUILTIN_CLRTI: 13710 target = gen_reg_rtx (V2DImode); 13711 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0))); 13712 return target; 13713 13714 13715 case IX86_BUILTIN_SQRTSD: 13716 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target); 13717 case IX86_BUILTIN_LOADAPD: 13718 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1); 13719 case IX86_BUILTIN_LOADUPD: 13720 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); 13721 13722 case IX86_BUILTIN_STOREAPD: 13723 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 13724 case IX86_BUILTIN_STOREUPD: 13725 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); 13726 13727 case IX86_BUILTIN_LOADSD: 13728 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1); 13729 13730 case IX86_BUILTIN_STORESD: 13731 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist); 13732 13733 case IX86_BUILTIN_SETPD1: 13734 target = assign_386_stack_local (DFmode, 0); 13735 arg0 = TREE_VALUE (arglist); 13736 emit_move_insn (adjust_address (target, DFmode, 0), 13737 expand_expr (arg0, NULL_RTX, VOIDmode, 0)); 13738 op0 = gen_reg_rtx (V2DFmode); 13739 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0))); 13740 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0))); 13741 return op0; 13742 13743 case IX86_BUILTIN_SETPD: 13744 target = assign_386_stack_local (V2DFmode, 0); 13745 arg0 = TREE_VALUE (arglist); 13746 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13747 emit_move_insn (adjust_address (target, DFmode, 0), 13748 expand_expr (arg0, NULL_RTX, VOIDmode, 0)); 13749 emit_move_insn (adjust_address (target, DFmode, 8), 13750 expand_expr (arg1, NULL_RTX, VOIDmode, 0)); 13751 op0 = gen_reg_rtx (V2DFmode); 13752 emit_insn (gen_sse2_movapd (op0, target)); 13753 return op0; 13754 13755 case IX86_BUILTIN_LOADRPD: 13756 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, 13757 gen_reg_rtx (V2DFmode), 1); 13758 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1))); 13759 return target; 13760 13761 case IX86_BUILTIN_LOADPD1: 13762 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, 13763 gen_reg_rtx (V2DFmode), 1); 13764 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx)); 13765 return target; 13766 13767 case IX86_BUILTIN_STOREPD1: 13768 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 13769 case IX86_BUILTIN_STORERPD: 13770 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 13771 13772 case IX86_BUILTIN_CLRPD: 13773 target = gen_reg_rtx (V2DFmode); 13774 emit_insn (gen_sse_clrv2df (target)); 13775 return target; 13776 13777 case IX86_BUILTIN_MFENCE: 13778 emit_insn (gen_sse2_mfence ()); 13779 return 0; 13780 case IX86_BUILTIN_LFENCE: 13781 emit_insn (gen_sse2_lfence ()); 13782 return 0; 13783 13784 case IX86_BUILTIN_CLFLUSH: 13785 arg0 = TREE_VALUE (arglist); 13786 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13787 icode = CODE_FOR_sse2_clflush; 13788 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 13789 op0 = copy_to_mode_reg (Pmode, op0); 13790 13791 emit_insn (gen_sse2_clflush (op0)); 13792 return 0; 13793 13794 case IX86_BUILTIN_MOVNTPD: 13795 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); 13796 case IX86_BUILTIN_MOVNTDQ: 13797 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); 13798 case IX86_BUILTIN_MOVNTI: 13799 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); 13800 13801 case IX86_BUILTIN_LOADDQA: 13802 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1); 13803 case IX86_BUILTIN_LOADDQU: 13804 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); 13805 case IX86_BUILTIN_LOADD: 13806 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1); 13807 13808 case IX86_BUILTIN_STOREDQA: 13809 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist); 13810 case IX86_BUILTIN_STOREDQU: 13811 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); 13812 case IX86_BUILTIN_STORED: 13813 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist); 13814 13815 default: 13816 break; 13817 } 13818 13819 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 13820 if (d->code == fcode) 13821 { 13822 /* Compares are treated specially. */ 13823 if (d->icode == CODE_FOR_maskcmpv4sf3 13824 || d->icode == CODE_FOR_vmmaskcmpv4sf3 13825 || d->icode == CODE_FOR_maskncmpv4sf3 13826 || d->icode == CODE_FOR_vmmaskncmpv4sf3 13827 || d->icode == CODE_FOR_maskcmpv2df3 13828 || d->icode == CODE_FOR_vmmaskcmpv2df3 13829 || d->icode == CODE_FOR_maskncmpv2df3 13830 || d->icode == CODE_FOR_vmmaskncmpv2df3) 13831 return ix86_expand_sse_compare (d, arglist, target); 13832 13833 return ix86_expand_binop_builtin (d->icode, arglist, target); 13834 } 13835 13836 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 13837 if (d->code == fcode) 13838 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 13839 13840 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 13841 if (d->code == fcode) 13842 return ix86_expand_sse_comi (d, arglist, target); 13843 13844 /* @@@ Should really do something sensible here. */ 13845 return 0; 13846} 13847 13848/* Store OPERAND to the memory after reload is completed. This means 13849 that we can't easily use assign_stack_local. */ 13850rtx 13851ix86_force_to_memory (mode, operand) 13852 enum machine_mode mode; 13853 rtx operand; 13854{ 13855 rtx result; 13856 if (!reload_completed) 13857 abort (); 13858 if (TARGET_64BIT && TARGET_RED_ZONE) 13859 { 13860 result = gen_rtx_MEM (mode, 13861 gen_rtx_PLUS (Pmode, 13862 stack_pointer_rtx, 13863 GEN_INT (-RED_ZONE_SIZE))); 13864 emit_move_insn (result, operand); 13865 } 13866 else if (TARGET_64BIT && !TARGET_RED_ZONE) 13867 { 13868 switch (mode) 13869 { 13870 case HImode: 13871 case SImode: 13872 operand = gen_lowpart (DImode, operand); 13873 /* FALLTHRU */ 13874 case DImode: 13875 emit_insn ( 13876 gen_rtx_SET (VOIDmode, 13877 gen_rtx_MEM (DImode, 13878 gen_rtx_PRE_DEC (DImode, 13879 stack_pointer_rtx)), 13880 operand)); 13881 break; 13882 default: 13883 abort (); 13884 } 13885 result = gen_rtx_MEM (mode, stack_pointer_rtx); 13886 } 13887 else 13888 { 13889 switch (mode) 13890 { 13891 case DImode: 13892 { 13893 rtx operands[2]; 13894 split_di (&operand, 1, operands, operands + 1); 13895 emit_insn ( 13896 gen_rtx_SET (VOIDmode, 13897 gen_rtx_MEM (SImode, 13898 gen_rtx_PRE_DEC (Pmode, 13899 stack_pointer_rtx)), 13900 operands[1])); 13901 emit_insn ( 13902 gen_rtx_SET (VOIDmode, 13903 gen_rtx_MEM (SImode, 13904 gen_rtx_PRE_DEC (Pmode, 13905 stack_pointer_rtx)), 13906 operands[0])); 13907 } 13908 break; 13909 case HImode: 13910 /* It is better to store HImodes as SImodes. */ 13911 if (!TARGET_PARTIAL_REG_STALL) 13912 operand = gen_lowpart (SImode, operand); 13913 /* FALLTHRU */ 13914 case SImode: 13915 emit_insn ( 13916 gen_rtx_SET (VOIDmode, 13917 gen_rtx_MEM (GET_MODE (operand), 13918 gen_rtx_PRE_DEC (SImode, 13919 stack_pointer_rtx)), 13920 operand)); 13921 break; 13922 default: 13923 abort (); 13924 } 13925 result = gen_rtx_MEM (mode, stack_pointer_rtx); 13926 } 13927 return result; 13928} 13929 13930/* Free operand from the memory. */ 13931void 13932ix86_free_from_memory (mode) 13933 enum machine_mode mode; 13934{ 13935 if (!TARGET_64BIT || !TARGET_RED_ZONE) 13936 { 13937 int size; 13938 13939 if (mode == DImode || TARGET_64BIT) 13940 size = 8; 13941 else if (mode == HImode && TARGET_PARTIAL_REG_STALL) 13942 size = 2; 13943 else 13944 size = 4; 13945 /* Use LEA to deallocate stack space. In peephole2 it will be converted 13946 to pop or add instruction if registers are available. */ 13947 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 13948 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 13949 GEN_INT (size)))); 13950 } 13951} 13952 13953/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 13954 QImode must go into class Q_REGS. 13955 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 13956 movdf to do mem-to-mem moves through integer regs. */ 13957enum reg_class 13958ix86_preferred_reload_class (x, class) 13959 rtx x; 13960 enum reg_class class; 13961{ 13962 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x))) 13963 return NO_REGS; 13964 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 13965 { 13966 /* SSE can't load any constant directly yet. */ 13967 if (SSE_CLASS_P (class)) 13968 return NO_REGS; 13969 /* Floats can load 0 and 1. */ 13970 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x)) 13971 { 13972 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */ 13973 if (MAYBE_SSE_CLASS_P (class)) 13974 return (reg_class_subset_p (class, GENERAL_REGS) 13975 ? GENERAL_REGS : FLOAT_REGS); 13976 else 13977 return class; 13978 } 13979 /* General regs can load everything. */ 13980 if (reg_class_subset_p (class, GENERAL_REGS)) 13981 return GENERAL_REGS; 13982 /* In case we haven't resolved FLOAT or SSE yet, give up. */ 13983 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)) 13984 return NO_REGS; 13985 } 13986 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x)) 13987 return NO_REGS; 13988 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS)) 13989 return Q_REGS; 13990 return class; 13991} 13992 13993/* If we are copying between general and FP registers, we need a memory 13994 location. The same is true for SSE and MMX registers. 13995 13996 The macro can't work reliably when one of the CLASSES is class containing 13997 registers from multiple units (SSE, MMX, integer). We avoid this by never 13998 combining those units in single alternative in the machine description. 13999 Ensure that this constraint holds to avoid unexpected surprises. 14000 14001 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 14002 enforce these sanity checks. */ 14003int 14004ix86_secondary_memory_needed (class1, class2, mode, strict) 14005 enum reg_class class1, class2; 14006 enum machine_mode mode; 14007 int strict; 14008{ 14009 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 14010 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 14011 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 14012 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 14013 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 14014 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 14015 { 14016 if (strict) 14017 abort (); 14018 else 14019 return 1; 14020 } 14021 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2) 14022 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2) 14023 && (mode) != SImode) 14024 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 14025 && (mode) != SImode)); 14026} 14027/* Return the cost of moving data from a register in class CLASS1 to 14028 one in class CLASS2. 14029 14030 It is not required that the cost always equal 2 when FROM is the same as TO; 14031 on some machines it is expensive to move between registers if they are not 14032 general registers. */ 14033int 14034ix86_register_move_cost (mode, class1, class2) 14035 enum machine_mode mode; 14036 enum reg_class class1, class2; 14037{ 14038 /* In case we require secondary memory, compute cost of the store followed 14039 by load. In order to avoid bad register allocation choices, we need 14040 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 14041 14042 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 14043 { 14044 int cost = 1; 14045 14046 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 14047 MEMORY_MOVE_COST (mode, class1, 1)); 14048 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 14049 MEMORY_MOVE_COST (mode, class2, 1)); 14050 14051 /* In case of copying from general_purpose_register we may emit multiple 14052 stores followed by single load causing memory size mismatch stall. 14053 Count this as arbitarily high cost of 20. */ 14054 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 14055 cost += 20; 14056 14057 /* In the case of FP/MMX moves, the registers actually overlap, and we 14058 have to switch modes in order to treat them differently. */ 14059 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 14060 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 14061 cost += 20; 14062 14063 return cost; 14064 } 14065 14066 /* Moves between SSE/MMX and integer unit are expensive. */ 14067 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 14068 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 14069 return ix86_cost->mmxsse_to_integer; 14070 if (MAYBE_FLOAT_CLASS_P (class1)) 14071 return ix86_cost->fp_move; 14072 if (MAYBE_SSE_CLASS_P (class1)) 14073 return ix86_cost->sse_move; 14074 if (MAYBE_MMX_CLASS_P (class1)) 14075 return ix86_cost->mmx_move; 14076 return 2; 14077} 14078 14079/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 14080int 14081ix86_hard_regno_mode_ok (regno, mode) 14082 int regno; 14083 enum machine_mode mode; 14084{ 14085 /* Flags and only flags can only hold CCmode values. */ 14086 if (CC_REGNO_P (regno)) 14087 return GET_MODE_CLASS (mode) == MODE_CC; 14088 if (GET_MODE_CLASS (mode) == MODE_CC 14089 || GET_MODE_CLASS (mode) == MODE_RANDOM 14090 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 14091 return 0; 14092 if (FP_REGNO_P (regno)) 14093 return VALID_FP_MODE_P (mode); 14094 if (SSE_REGNO_P (regno)) 14095 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0); 14096 if (MMX_REGNO_P (regno)) 14097 return (TARGET_MMX 14098 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0); 14099 /* We handle both integer and floats in the general purpose registers. 14100 In future we should be able to handle vector modes as well. */ 14101 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) 14102 return 0; 14103 /* Take care for QImode values - they can be in non-QI regs, but then 14104 they do cause partial register stalls. */ 14105 if (regno < 4 || mode != QImode || TARGET_64BIT) 14106 return 1; 14107 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL; 14108} 14109 14110/* Return the cost of moving data of mode M between a 14111 register and memory. A value of 2 is the default; this cost is 14112 relative to those in `REGISTER_MOVE_COST'. 14113 14114 If moving between registers and memory is more expensive than 14115 between two registers, you should define this macro to express the 14116 relative cost. 14117 14118 Model also increased moving costs of QImode registers in non 14119 Q_REGS classes. 14120 */ 14121int 14122ix86_memory_move_cost (mode, class, in) 14123 enum machine_mode mode; 14124 enum reg_class class; 14125 int in; 14126{ 14127 if (FLOAT_CLASS_P (class)) 14128 { 14129 int index; 14130 switch (mode) 14131 { 14132 case SFmode: 14133 index = 0; 14134 break; 14135 case DFmode: 14136 index = 1; 14137 break; 14138 case XFmode: 14139 case TFmode: 14140 index = 2; 14141 break; 14142 default: 14143 return 100; 14144 } 14145 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 14146 } 14147 if (SSE_CLASS_P (class)) 14148 { 14149 int index; 14150 switch (GET_MODE_SIZE (mode)) 14151 { 14152 case 4: 14153 index = 0; 14154 break; 14155 case 8: 14156 index = 1; 14157 break; 14158 case 16: 14159 index = 2; 14160 break; 14161 default: 14162 return 100; 14163 } 14164 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 14165 } 14166 if (MMX_CLASS_P (class)) 14167 { 14168 int index; 14169 switch (GET_MODE_SIZE (mode)) 14170 { 14171 case 4: 14172 index = 0; 14173 break; 14174 case 8: 14175 index = 1; 14176 break; 14177 default: 14178 return 100; 14179 } 14180 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 14181 } 14182 switch (GET_MODE_SIZE (mode)) 14183 { 14184 case 1: 14185 if (in) 14186 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 14187 : ix86_cost->movzbl_load); 14188 else 14189 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 14190 : ix86_cost->int_store[0] + 4); 14191 break; 14192 case 2: 14193 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 14194 default: 14195 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 14196 if (mode == TFmode) 14197 mode = XFmode; 14198 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 14199 * ((int) GET_MODE_SIZE (mode) 14200 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD); 14201 } 14202} 14203 14204#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) 14205static void 14206ix86_svr3_asm_out_constructor (symbol, priority) 14207 rtx symbol; 14208 int priority ATTRIBUTE_UNUSED; 14209{ 14210 init_section (); 14211 fputs ("\tpushl $", asm_out_file); 14212 assemble_name (asm_out_file, XSTR (symbol, 0)); 14213 fputc ('\n', asm_out_file); 14214} 14215#endif 14216 14217#if TARGET_MACHO 14218 14219static int current_machopic_label_num; 14220 14221/* Given a symbol name and its associated stub, write out the 14222 definition of the stub. */ 14223 14224void 14225machopic_output_stub (file, symb, stub) 14226 FILE *file; 14227 const char *symb, *stub; 14228{ 14229 unsigned int length; 14230 char *binder_name, *symbol_name, lazy_ptr_name[32]; 14231 int label = ++current_machopic_label_num; 14232 14233 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 14234 symb = (*targetm.strip_name_encoding) (symb); 14235 14236 length = strlen (stub); 14237 binder_name = alloca (length + 32); 14238 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 14239 14240 length = strlen (symb); 14241 symbol_name = alloca (length + 32); 14242 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 14243 14244 sprintf (lazy_ptr_name, "L%d$lz", label); 14245 14246 if (MACHOPIC_PURE) 14247 machopic_picsymbol_stub_section (); 14248 else 14249 machopic_symbol_stub_section (); 14250 14251 fprintf (file, "%s:\n", stub); 14252 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 14253 14254 if (MACHOPIC_PURE) 14255 { 14256 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label); 14257 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 14258 fprintf (file, "\tjmp %%edx\n"); 14259 } 14260 else 14261 fprintf (file, "\tjmp *%s\n", lazy_ptr_name); 14262 14263 fprintf (file, "%s:\n", binder_name); 14264 14265 if (MACHOPIC_PURE) 14266 { 14267 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 14268 fprintf (file, "\tpushl %%eax\n"); 14269 } 14270 else 14271 fprintf (file, "\t pushl $%s\n", lazy_ptr_name); 14272 14273 fprintf (file, "\tjmp dyld_stub_binding_helper\n"); 14274 14275 machopic_lazy_symbol_ptr_section (); 14276 fprintf (file, "%s:\n", lazy_ptr_name); 14277 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 14278 fprintf (file, "\t.long %s\n", binder_name); 14279} 14280#endif /* TARGET_MACHO */ 14281 14282/* Order the registers for register allocator. */ 14283 14284void 14285x86_order_regs_for_local_alloc () 14286{ 14287 int pos = 0; 14288 int i; 14289 14290 /* First allocate the local general purpose registers. */ 14291 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 14292 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 14293 reg_alloc_order [pos++] = i; 14294 14295 /* Global general purpose registers. */ 14296 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 14297 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 14298 reg_alloc_order [pos++] = i; 14299 14300 /* x87 registers come first in case we are doing FP math 14301 using them. */ 14302 if (!TARGET_SSE_MATH) 14303 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 14304 reg_alloc_order [pos++] = i; 14305 14306 /* SSE registers. */ 14307 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 14308 reg_alloc_order [pos++] = i; 14309 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 14310 reg_alloc_order [pos++] = i; 14311 14312 /* x87 registerts. */ 14313 if (TARGET_SSE_MATH) 14314 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 14315 reg_alloc_order [pos++] = i; 14316 14317 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 14318 reg_alloc_order [pos++] = i; 14319 14320 /* Initialize the rest of array as we do not allocate some registers 14321 at all. */ 14322 while (pos < FIRST_PSEUDO_REGISTER) 14323 reg_alloc_order [pos++] = 0; 14324} 14325 14326/* Returns an expression indicating where the this parameter is 14327 located on entry to the FUNCTION. */ 14328 14329static rtx 14330x86_this_parameter (function) 14331 tree function; 14332{ 14333 tree type = TREE_TYPE (function); 14334 14335 if (TARGET_64BIT) 14336 { 14337 int n = aggregate_value_p (TREE_TYPE (type)) != 0; 14338 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 14339 } 14340 14341 if (ix86_fntype_regparm (type) > 0) 14342 { 14343 tree parm; 14344 14345 parm = TYPE_ARG_TYPES (type); 14346 /* Figure out whether or not the function has a variable number of 14347 arguments. */ 14348 for (; parm; parm = TREE_CHAIN (parm)) 14349 if (TREE_VALUE (parm) == void_type_node) 14350 break; 14351 /* If not, the this parameter is in %eax. */ 14352 if (parm) 14353 return gen_rtx_REG (SImode, 0); 14354 } 14355 14356 if (aggregate_value_p (TREE_TYPE (type))) 14357 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 14358 else 14359 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 14360} 14361 14362/* Determine whether x86_output_mi_thunk can succeed. */ 14363 14364static bool 14365x86_can_output_mi_thunk (thunk, delta, vcall_offset, function) 14366 tree thunk ATTRIBUTE_UNUSED; 14367 HOST_WIDE_INT delta ATTRIBUTE_UNUSED; 14368 HOST_WIDE_INT vcall_offset; 14369 tree function; 14370{ 14371 /* 64-bit can handle anything. */ 14372 if (TARGET_64BIT) 14373 return true; 14374 14375 /* For 32-bit, everything's fine if we have one free register. */ 14376 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3) 14377 return true; 14378 14379 /* Need a free register for vcall_offset. */ 14380 if (vcall_offset) 14381 return false; 14382 14383 /* Need a free register for GOT references. */ 14384 if (flag_pic && !(*targetm.binds_local_p) (function)) 14385 return false; 14386 14387 /* Otherwise ok. */ 14388 return true; 14389} 14390 14391/* Output the assembler code for a thunk function. THUNK_DECL is the 14392 declaration for the thunk function itself, FUNCTION is the decl for 14393 the target function. DELTA is an immediate constant offset to be 14394 added to THIS. If VCALL_OFFSET is non-zero, the word at 14395 *(*this + vcall_offset) should be added to THIS. */ 14396 14397static void 14398x86_output_mi_thunk (file, thunk, delta, vcall_offset, function) 14399 FILE *file ATTRIBUTE_UNUSED; 14400 tree thunk ATTRIBUTE_UNUSED; 14401 HOST_WIDE_INT delta; 14402 HOST_WIDE_INT vcall_offset; 14403 tree function; 14404{ 14405 rtx xops[3]; 14406 rtx this = x86_this_parameter (function); 14407 rtx this_reg, tmp; 14408 14409 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 14410 pull it in now and let DELTA benefit. */ 14411 if (REG_P (this)) 14412 this_reg = this; 14413 else if (vcall_offset) 14414 { 14415 /* Put the this parameter into %eax. */ 14416 xops[0] = this; 14417 xops[1] = this_reg = gen_rtx_REG (Pmode, 0); 14418 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 14419 } 14420 else 14421 this_reg = NULL_RTX; 14422 14423 /* Adjust the this parameter by a fixed constant. */ 14424 if (delta) 14425 { 14426 xops[0] = GEN_INT (delta); 14427 xops[1] = this_reg ? this_reg : this; 14428 if (TARGET_64BIT) 14429 { 14430 if (!x86_64_general_operand (xops[0], DImode)) 14431 { 14432 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 14433 xops[1] = tmp; 14434 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 14435 xops[0] = tmp; 14436 xops[1] = this; 14437 } 14438 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 14439 } 14440 else 14441 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 14442 } 14443 14444 /* Adjust the this parameter by a value stored in the vtable. */ 14445 if (vcall_offset) 14446 { 14447 if (TARGET_64BIT) 14448 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 14449 else 14450 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 14451 14452 xops[0] = gen_rtx_MEM (Pmode, this_reg); 14453 xops[1] = tmp; 14454 if (TARGET_64BIT) 14455 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 14456 else 14457 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 14458 14459 /* Adjust the this parameter. */ 14460 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 14461 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 14462 { 14463 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 14464 xops[0] = GEN_INT (vcall_offset); 14465 xops[1] = tmp2; 14466 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 14467 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 14468 } 14469 xops[1] = this_reg; 14470 if (TARGET_64BIT) 14471 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 14472 else 14473 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 14474 } 14475 14476 /* If necessary, drop THIS back to its stack slot. */ 14477 if (this_reg && this_reg != this) 14478 { 14479 xops[0] = this_reg; 14480 xops[1] = this; 14481 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 14482 } 14483 14484 xops[0] = DECL_RTL (function); 14485 if (TARGET_64BIT) 14486 { 14487 if (!flag_pic || (*targetm.binds_local_p) (function)) 14488 output_asm_insn ("jmp\t%P0", xops); 14489 else 14490 { 14491 tmp = XEXP (xops[0], 0); 14492 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL); 14493 tmp = gen_rtx_CONST (Pmode, tmp); 14494 tmp = gen_rtx_MEM (QImode, tmp); 14495 xops[0] = tmp; 14496 output_asm_insn ("jmp\t%A0", xops); 14497 } 14498 } 14499 else 14500 { 14501 if (!flag_pic || (*targetm.binds_local_p) (function)) 14502 output_asm_insn ("jmp\t%P0", xops); 14503 else 14504 { 14505 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 14506 output_set_got (tmp); 14507 14508 xops[1] = tmp; 14509 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 14510 output_asm_insn ("jmp\t{*}%1", xops); 14511 } 14512 } 14513} 14514 14515int 14516x86_field_alignment (field, computed) 14517 tree field; 14518 int computed; 14519{ 14520 enum machine_mode mode; 14521 tree type = TREE_TYPE (field); 14522 14523 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 14524 return computed; 14525 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 14526 ? get_inner_array_type (type) : type); 14527 if (mode == DFmode || mode == DCmode 14528 || GET_MODE_CLASS (mode) == MODE_INT 14529 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 14530 return MIN (32, computed); 14531 return computed; 14532} 14533 14534/* Output assembler code to FILE to increment profiler label # LABELNO 14535 for profiling a function entry. */ 14536void 14537x86_function_profiler (file, labelno) 14538 FILE *file; 14539 int labelno; 14540{ 14541 if (TARGET_64BIT) 14542 if (flag_pic) 14543 { 14544#ifndef NO_PROFILE_COUNTERS 14545 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); 14546#endif 14547 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 14548 } 14549 else 14550 { 14551#ifndef NO_PROFILE_COUNTERS 14552 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); 14553#endif 14554 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 14555 } 14556 else if (flag_pic) 14557 { 14558#ifndef NO_PROFILE_COUNTERS 14559 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 14560 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 14561#endif 14562 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 14563 } 14564 else 14565 { 14566#ifndef NO_PROFILE_COUNTERS 14567 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 14568 PROFILE_COUNT_REGISTER); 14569#endif 14570 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 14571 } 14572} 14573 14574/* Implement machine specific optimizations. 14575 At the moment we implement single transformation: AMD Athlon works faster 14576 when RET is not destination of conditional jump or directly preceeded 14577 by other jump instruction. We avoid the penalty by inserting NOP just 14578 before the RET instructions in such cases. */ 14579void 14580x86_machine_dependent_reorg (first) 14581 rtx first ATTRIBUTE_UNUSED; 14582{ 14583 edge e; 14584 14585 if (!TARGET_ATHLON || !optimize || optimize_size) 14586 return; 14587 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) 14588 { 14589 basic_block bb = e->src; 14590 rtx ret = bb->end; 14591 rtx prev; 14592 bool insert = false; 14593 14594 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb)) 14595 continue; 14596 prev = prev_nonnote_insn (ret); 14597 if (prev && GET_CODE (prev) == CODE_LABEL) 14598 { 14599 edge e; 14600 for (e = bb->pred; e; e = e->pred_next) 14601 if (EDGE_FREQUENCY (e) && e->src->index > 0 14602 && !(e->flags & EDGE_FALLTHRU)) 14603 insert = 1; 14604 } 14605 if (!insert) 14606 { 14607 prev = prev_real_insn (ret); 14608 if (prev && GET_CODE (prev) == JUMP_INSN 14609 && any_condjump_p (prev)) 14610 insert = 1; 14611 } 14612 if (insert) 14613 emit_insn_before (gen_nop (), ret); 14614 } 14615} 14616 14617#include "gt-i386.h" 14618