i386.c revision 132743
1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GCC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to 19the Free Software Foundation, 59 Temple Place - Suite 330, 20Boston, MA 02111-1307, USA. */ 21 22 23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 132743 2004-07-28 04:47:35Z kan $ */ 24 25 26#include "config.h" 27#include "system.h" 28#include "coretypes.h" 29#include "tm.h" 30#include "rtl.h" 31#include "tree.h" 32#include "tm_p.h" 33#include "regs.h" 34#include "hard-reg-set.h" 35#include "real.h" 36#include "insn-config.h" 37#include "conditions.h" 38#include "output.h" 39#include "insn-attr.h" 40#include "flags.h" 41#include "except.h" 42#include "function.h" 43#include "recog.h" 44#include "expr.h" 45#include "optabs.h" 46#include "toplev.h" 47#include "basic-block.h" 48#include "ggc.h" 49#include "target.h" 50#include "target-def.h" 51#include "langhooks.h" 52#include "cgraph.h" 53 54#ifndef CHECK_STACK_LIMIT 55#define CHECK_STACK_LIMIT (-1) 56#endif 57 58/* Return index of given mode in mult and division cost tables. */ 59#define MODE_INDEX(mode) \ 60 ((mode) == QImode ? 0 \ 61 : (mode) == HImode ? 1 \ 62 : (mode) == SImode ? 2 \ 63 : (mode) == DImode ? 3 \ 64 : 4) 65 66/* Processor costs (relative to an add) */ 67static const 68struct processor_costs size_cost = { /* costs for tunning for size */ 69 2, /* cost of an add instruction */ 70 3, /* cost of a lea instruction */ 71 2, /* variable shift costs */ 72 3, /* constant shift costs */ 73 {3, 3, 3, 3, 5}, /* cost of starting a multiply */ 74 0, /* cost of multiply per each bit set */ 75 {3, 3, 3, 3, 5}, /* cost of a divide/mod */ 76 3, /* cost of movsx */ 77 3, /* cost of movzx */ 78 0, /* "large" insn */ 79 2, /* MOVE_RATIO */ 80 2, /* cost for loading QImode using movzbl */ 81 {2, 2, 2}, /* cost of loading integer registers 82 in QImode, HImode and SImode. 83 Relative to reg-reg move (2). */ 84 {2, 2, 2}, /* cost of storing integer registers */ 85 2, /* cost of reg,reg fld/fst */ 86 {2, 2, 2}, /* cost of loading fp registers 87 in SFmode, DFmode and XFmode */ 88 {2, 2, 2}, /* cost of loading integer registers */ 89 3, /* cost of moving MMX register */ 90 {3, 3}, /* cost of loading MMX registers 91 in SImode and DImode */ 92 {3, 3}, /* cost of storing MMX registers 93 in SImode and DImode */ 94 3, /* cost of moving SSE register */ 95 {3, 3, 3}, /* cost of loading SSE registers 96 in SImode, DImode and TImode */ 97 {3, 3, 3}, /* cost of storing SSE registers 98 in SImode, DImode and TImode */ 99 3, /* MMX or SSE register to integer */ 100 0, /* size of prefetch block */ 101 0, /* number of parallel prefetches */ 102 1, /* Branch cost */ 103 2, /* cost of FADD and FSUB insns. */ 104 2, /* cost of FMUL instruction. */ 105 2, /* cost of FDIV instruction. */ 106 2, /* cost of FABS instruction. */ 107 2, /* cost of FCHS instruction. */ 108 2, /* cost of FSQRT instruction. */ 109}; 110 111/* Processor costs (relative to an add) */ 112static const 113struct processor_costs i386_cost = { /* 386 specific costs */ 114 1, /* cost of an add instruction */ 115 1, /* cost of a lea instruction */ 116 3, /* variable shift costs */ 117 2, /* constant shift costs */ 118 {6, 6, 6, 6, 6}, /* cost of starting a multiply */ 119 1, /* cost of multiply per each bit set */ 120 {23, 23, 23, 23, 23}, /* cost of a divide/mod */ 121 3, /* cost of movsx */ 122 2, /* cost of movzx */ 123 15, /* "large" insn */ 124 3, /* MOVE_RATIO */ 125 4, /* cost for loading QImode using movzbl */ 126 {2, 4, 2}, /* cost of loading integer registers 127 in QImode, HImode and SImode. 128 Relative to reg-reg move (2). */ 129 {2, 4, 2}, /* cost of storing integer registers */ 130 2, /* cost of reg,reg fld/fst */ 131 {8, 8, 8}, /* cost of loading fp registers 132 in SFmode, DFmode and XFmode */ 133 {8, 8, 8}, /* cost of loading integer registers */ 134 2, /* cost of moving MMX register */ 135 {4, 8}, /* cost of loading MMX registers 136 in SImode and DImode */ 137 {4, 8}, /* cost of storing MMX registers 138 in SImode and DImode */ 139 2, /* cost of moving SSE register */ 140 {4, 8, 16}, /* cost of loading SSE registers 141 in SImode, DImode and TImode */ 142 {4, 8, 16}, /* cost of storing SSE registers 143 in SImode, DImode and TImode */ 144 3, /* MMX or SSE register to integer */ 145 0, /* size of prefetch block */ 146 0, /* number of parallel prefetches */ 147 1, /* Branch cost */ 148 23, /* cost of FADD and FSUB insns. */ 149 27, /* cost of FMUL instruction. */ 150 88, /* cost of FDIV instruction. */ 151 22, /* cost of FABS instruction. */ 152 24, /* cost of FCHS instruction. */ 153 122, /* cost of FSQRT instruction. */ 154}; 155 156static const 157struct processor_costs i486_cost = { /* 486 specific costs */ 158 1, /* cost of an add instruction */ 159 1, /* cost of a lea instruction */ 160 3, /* variable shift costs */ 161 2, /* constant shift costs */ 162 {12, 12, 12, 12, 12}, /* cost of starting a multiply */ 163 1, /* cost of multiply per each bit set */ 164 {40, 40, 40, 40, 40}, /* cost of a divide/mod */ 165 3, /* cost of movsx */ 166 2, /* cost of movzx */ 167 15, /* "large" insn */ 168 3, /* MOVE_RATIO */ 169 4, /* cost for loading QImode using movzbl */ 170 {2, 4, 2}, /* cost of loading integer registers 171 in QImode, HImode and SImode. 172 Relative to reg-reg move (2). */ 173 {2, 4, 2}, /* cost of storing integer registers */ 174 2, /* cost of reg,reg fld/fst */ 175 {8, 8, 8}, /* cost of loading fp registers 176 in SFmode, DFmode and XFmode */ 177 {8, 8, 8}, /* cost of loading integer registers */ 178 2, /* cost of moving MMX register */ 179 {4, 8}, /* cost of loading MMX registers 180 in SImode and DImode */ 181 {4, 8}, /* cost of storing MMX registers 182 in SImode and DImode */ 183 2, /* cost of moving SSE register */ 184 {4, 8, 16}, /* cost of loading SSE registers 185 in SImode, DImode and TImode */ 186 {4, 8, 16}, /* cost of storing SSE registers 187 in SImode, DImode and TImode */ 188 3, /* MMX or SSE register to integer */ 189 0, /* size of prefetch block */ 190 0, /* number of parallel prefetches */ 191 1, /* Branch cost */ 192 8, /* cost of FADD and FSUB insns. */ 193 16, /* cost of FMUL instruction. */ 194 73, /* cost of FDIV instruction. */ 195 3, /* cost of FABS instruction. */ 196 3, /* cost of FCHS instruction. */ 197 83, /* cost of FSQRT instruction. */ 198}; 199 200static const 201struct processor_costs pentium_cost = { 202 1, /* cost of an add instruction */ 203 1, /* cost of a lea instruction */ 204 4, /* variable shift costs */ 205 1, /* constant shift costs */ 206 {11, 11, 11, 11, 11}, /* cost of starting a multiply */ 207 0, /* cost of multiply per each bit set */ 208 {25, 25, 25, 25, 25}, /* cost of a divide/mod */ 209 3, /* cost of movsx */ 210 2, /* cost of movzx */ 211 8, /* "large" insn */ 212 6, /* MOVE_RATIO */ 213 6, /* cost for loading QImode using movzbl */ 214 {2, 4, 2}, /* cost of loading integer registers 215 in QImode, HImode and SImode. 216 Relative to reg-reg move (2). */ 217 {2, 4, 2}, /* cost of storing integer registers */ 218 2, /* cost of reg,reg fld/fst */ 219 {2, 2, 6}, /* cost of loading fp registers 220 in SFmode, DFmode and XFmode */ 221 {4, 4, 6}, /* cost of loading integer registers */ 222 8, /* cost of moving MMX register */ 223 {8, 8}, /* cost of loading MMX registers 224 in SImode and DImode */ 225 {8, 8}, /* cost of storing MMX registers 226 in SImode and DImode */ 227 2, /* cost of moving SSE register */ 228 {4, 8, 16}, /* cost of loading SSE registers 229 in SImode, DImode and TImode */ 230 {4, 8, 16}, /* cost of storing SSE registers 231 in SImode, DImode and TImode */ 232 3, /* MMX or SSE register to integer */ 233 0, /* size of prefetch block */ 234 0, /* number of parallel prefetches */ 235 2, /* Branch cost */ 236 3, /* cost of FADD and FSUB insns. */ 237 3, /* cost of FMUL instruction. */ 238 39, /* cost of FDIV instruction. */ 239 1, /* cost of FABS instruction. */ 240 1, /* cost of FCHS instruction. */ 241 70, /* cost of FSQRT instruction. */ 242}; 243 244static const 245struct processor_costs pentiumpro_cost = { 246 1, /* cost of an add instruction */ 247 1, /* cost of a lea instruction */ 248 1, /* variable shift costs */ 249 1, /* constant shift costs */ 250 {4, 4, 4, 4, 4}, /* cost of starting a multiply */ 251 0, /* cost of multiply per each bit set */ 252 {17, 17, 17, 17, 17}, /* cost of a divide/mod */ 253 1, /* cost of movsx */ 254 1, /* cost of movzx */ 255 8, /* "large" insn */ 256 6, /* MOVE_RATIO */ 257 2, /* cost for loading QImode using movzbl */ 258 {4, 4, 4}, /* cost of loading integer registers 259 in QImode, HImode and SImode. 260 Relative to reg-reg move (2). */ 261 {2, 2, 2}, /* cost of storing integer registers */ 262 2, /* cost of reg,reg fld/fst */ 263 {2, 2, 6}, /* cost of loading fp registers 264 in SFmode, DFmode and XFmode */ 265 {4, 4, 6}, /* cost of loading integer registers */ 266 2, /* cost of moving MMX register */ 267 {2, 2}, /* cost of loading MMX registers 268 in SImode and DImode */ 269 {2, 2}, /* cost of storing MMX registers 270 in SImode and DImode */ 271 2, /* cost of moving SSE register */ 272 {2, 2, 8}, /* cost of loading SSE registers 273 in SImode, DImode and TImode */ 274 {2, 2, 8}, /* cost of storing SSE registers 275 in SImode, DImode and TImode */ 276 3, /* MMX or SSE register to integer */ 277 32, /* size of prefetch block */ 278 6, /* number of parallel prefetches */ 279 2, /* Branch cost */ 280 3, /* cost of FADD and FSUB insns. */ 281 5, /* cost of FMUL instruction. */ 282 56, /* cost of FDIV instruction. */ 283 2, /* cost of FABS instruction. */ 284 2, /* cost of FCHS instruction. */ 285 56, /* cost of FSQRT instruction. */ 286}; 287 288static const 289struct processor_costs k6_cost = { 290 1, /* cost of an add instruction */ 291 2, /* cost of a lea instruction */ 292 1, /* variable shift costs */ 293 1, /* constant shift costs */ 294 {3, 3, 3, 3, 3}, /* cost of starting a multiply */ 295 0, /* cost of multiply per each bit set */ 296 {18, 18, 18, 18, 18}, /* cost of a divide/mod */ 297 2, /* cost of movsx */ 298 2, /* cost of movzx */ 299 8, /* "large" insn */ 300 4, /* MOVE_RATIO */ 301 3, /* cost for loading QImode using movzbl */ 302 {4, 5, 4}, /* cost of loading integer registers 303 in QImode, HImode and SImode. 304 Relative to reg-reg move (2). */ 305 {2, 3, 2}, /* cost of storing integer registers */ 306 4, /* cost of reg,reg fld/fst */ 307 {6, 6, 6}, /* cost of loading fp registers 308 in SFmode, DFmode and XFmode */ 309 {4, 4, 4}, /* cost of loading integer registers */ 310 2, /* cost of moving MMX register */ 311 {2, 2}, /* cost of loading MMX registers 312 in SImode and DImode */ 313 {2, 2}, /* cost of storing MMX registers 314 in SImode and DImode */ 315 2, /* cost of moving SSE register */ 316 {2, 2, 8}, /* cost of loading SSE registers 317 in SImode, DImode and TImode */ 318 {2, 2, 8}, /* cost of storing SSE registers 319 in SImode, DImode and TImode */ 320 6, /* MMX or SSE register to integer */ 321 32, /* size of prefetch block */ 322 1, /* number of parallel prefetches */ 323 1, /* Branch cost */ 324 2, /* cost of FADD and FSUB insns. */ 325 2, /* cost of FMUL instruction. */ 326 56, /* cost of FDIV instruction. */ 327 2, /* cost of FABS instruction. */ 328 2, /* cost of FCHS instruction. */ 329 56, /* cost of FSQRT instruction. */ 330}; 331 332static const 333struct processor_costs athlon_cost = { 334 1, /* cost of an add instruction */ 335 2, /* cost of a lea instruction */ 336 1, /* variable shift costs */ 337 1, /* constant shift costs */ 338 {5, 5, 5, 5, 5}, /* cost of starting a multiply */ 339 0, /* cost of multiply per each bit set */ 340 {18, 26, 42, 74, 74}, /* cost of a divide/mod */ 341 1, /* cost of movsx */ 342 1, /* cost of movzx */ 343 8, /* "large" insn */ 344 9, /* MOVE_RATIO */ 345 4, /* cost for loading QImode using movzbl */ 346 {3, 4, 3}, /* cost of loading integer registers 347 in QImode, HImode and SImode. 348 Relative to reg-reg move (2). */ 349 {3, 4, 3}, /* cost of storing integer registers */ 350 4, /* cost of reg,reg fld/fst */ 351 {4, 4, 12}, /* cost of loading fp registers 352 in SFmode, DFmode and XFmode */ 353 {6, 6, 8}, /* cost of loading integer registers */ 354 2, /* cost of moving MMX register */ 355 {4, 4}, /* cost of loading MMX registers 356 in SImode and DImode */ 357 {4, 4}, /* cost of storing MMX registers 358 in SImode and DImode */ 359 2, /* cost of moving SSE register */ 360 {4, 4, 6}, /* cost of loading SSE registers 361 in SImode, DImode and TImode */ 362 {4, 4, 5}, /* cost of storing SSE registers 363 in SImode, DImode and TImode */ 364 5, /* MMX or SSE register to integer */ 365 64, /* size of prefetch block */ 366 6, /* number of parallel prefetches */ 367 2, /* Branch cost */ 368 4, /* cost of FADD and FSUB insns. */ 369 4, /* cost of FMUL instruction. */ 370 24, /* cost of FDIV instruction. */ 371 2, /* cost of FABS instruction. */ 372 2, /* cost of FCHS instruction. */ 373 35, /* cost of FSQRT instruction. */ 374}; 375 376static const 377struct processor_costs k8_cost = { 378 1, /* cost of an add instruction */ 379 2, /* cost of a lea instruction */ 380 1, /* variable shift costs */ 381 1, /* constant shift costs */ 382 {3, 4, 3, 4, 5}, /* cost of starting a multiply */ 383 0, /* cost of multiply per each bit set */ 384 {18, 26, 42, 74, 74}, /* cost of a divide/mod */ 385 1, /* cost of movsx */ 386 1, /* cost of movzx */ 387 8, /* "large" insn */ 388 9, /* MOVE_RATIO */ 389 4, /* cost for loading QImode using movzbl */ 390 {3, 4, 3}, /* cost of loading integer registers 391 in QImode, HImode and SImode. 392 Relative to reg-reg move (2). */ 393 {3, 4, 3}, /* cost of storing integer registers */ 394 4, /* cost of reg,reg fld/fst */ 395 {4, 4, 12}, /* cost of loading fp registers 396 in SFmode, DFmode and XFmode */ 397 {6, 6, 8}, /* cost of loading integer registers */ 398 2, /* cost of moving MMX register */ 399 {3, 3}, /* cost of loading MMX registers 400 in SImode and DImode */ 401 {4, 4}, /* cost of storing MMX registers 402 in SImode and DImode */ 403 2, /* cost of moving SSE register */ 404 {4, 3, 6}, /* cost of loading SSE registers 405 in SImode, DImode and TImode */ 406 {4, 4, 5}, /* cost of storing SSE registers 407 in SImode, DImode and TImode */ 408 5, /* MMX or SSE register to integer */ 409 64, /* size of prefetch block */ 410 6, /* number of parallel prefetches */ 411 2, /* Branch cost */ 412 4, /* cost of FADD and FSUB insns. */ 413 4, /* cost of FMUL instruction. */ 414 19, /* cost of FDIV instruction. */ 415 2, /* cost of FABS instruction. */ 416 2, /* cost of FCHS instruction. */ 417 35, /* cost of FSQRT instruction. */ 418}; 419 420static const 421struct processor_costs pentium4_cost = { 422 1, /* cost of an add instruction */ 423 1, /* cost of a lea instruction */ 424 4, /* variable shift costs */ 425 4, /* constant shift costs */ 426 {15, 15, 15, 15, 15}, /* cost of starting a multiply */ 427 0, /* cost of multiply per each bit set */ 428 {56, 56, 56, 56, 56}, /* cost of a divide/mod */ 429 1, /* cost of movsx */ 430 1, /* cost of movzx */ 431 16, /* "large" insn */ 432 6, /* MOVE_RATIO */ 433 2, /* cost for loading QImode using movzbl */ 434 {4, 5, 4}, /* cost of loading integer registers 435 in QImode, HImode and SImode. 436 Relative to reg-reg move (2). */ 437 {2, 3, 2}, /* cost of storing integer registers */ 438 2, /* cost of reg,reg fld/fst */ 439 {2, 2, 6}, /* cost of loading fp registers 440 in SFmode, DFmode and XFmode */ 441 {4, 4, 6}, /* cost of loading integer registers */ 442 2, /* cost of moving MMX register */ 443 {2, 2}, /* cost of loading MMX registers 444 in SImode and DImode */ 445 {2, 2}, /* cost of storing MMX registers 446 in SImode and DImode */ 447 12, /* cost of moving SSE register */ 448 {12, 12, 12}, /* cost of loading SSE registers 449 in SImode, DImode and TImode */ 450 {2, 2, 8}, /* cost of storing SSE registers 451 in SImode, DImode and TImode */ 452 10, /* MMX or SSE register to integer */ 453 64, /* size of prefetch block */ 454 6, /* number of parallel prefetches */ 455 2, /* Branch cost */ 456 5, /* cost of FADD and FSUB insns. */ 457 7, /* cost of FMUL instruction. */ 458 43, /* cost of FDIV instruction. */ 459 2, /* cost of FABS instruction. */ 460 2, /* cost of FCHS instruction. */ 461 43, /* cost of FSQRT instruction. */ 462}; 463 464const struct processor_costs *ix86_cost = &pentium_cost; 465 466/* Processor feature/optimization bitmasks. */ 467#define m_386 (1<<PROCESSOR_I386) 468#define m_486 (1<<PROCESSOR_I486) 469#define m_PENT (1<<PROCESSOR_PENTIUM) 470#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 471#define m_K6 (1<<PROCESSOR_K6) 472#define m_ATHLON (1<<PROCESSOR_ATHLON) 473#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 474#define m_K8 (1<<PROCESSOR_K8) 475#define m_ATHLON_K8 (m_K8 | m_ATHLON) 476 477const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8; 478const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4; 479const int x86_zero_extend_with_and = m_486 | m_PENT; 480const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */; 481const int x86_double_with_add = ~m_386; 482const int x86_use_bit_test = m_386; 483const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6; 484const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4; 485const int x86_3dnow_a = m_ATHLON_K8; 486const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4; 487const int x86_branch_hints = m_PENT4; 488const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4; 489const int x86_partial_reg_stall = m_PPRO; 490const int x86_use_loop = m_K6; 491const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT); 492const int x86_use_mov0 = m_K6; 493const int x86_use_cltd = ~(m_PENT | m_K6); 494const int x86_read_modify_write = ~m_PENT; 495const int x86_read_modify = ~(m_PENT | m_PPRO); 496const int x86_split_long_moves = m_PPRO; 497const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8; 498const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 499const int x86_single_stringop = m_386 | m_PENT4; 500const int x86_qimode_math = ~(0); 501const int x86_promote_qi_regs = 0; 502const int x86_himode_math = ~(m_PPRO); 503const int x86_promote_hi_regs = m_PPRO; 504const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4; 505const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4; 506const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4; 507const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4; 508const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO); 509const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4; 510const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4; 511const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO; 512const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO; 513const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO; 514const int x86_decompose_lea = m_PENT4; 515const int x86_shift1 = ~m_486; 516const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4; 517const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO; 518/* Set for machines where the type and dependencies are resolved on SSE register 519 parts instead of whole registers, so we may maintain just lower part of 520 scalar values in proper format leaving the upper part undefined. */ 521const int x86_sse_partial_regs = m_ATHLON_K8; 522/* Athlon optimizes partial-register FPS special case, thus avoiding the 523 need for extra instructions beforehand */ 524const int x86_sse_partial_regs_for_cvtsd2ss = 0; 525const int x86_sse_typeless_stores = m_ATHLON_K8; 526const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4; 527const int x86_use_ffreep = m_ATHLON_K8; 528const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6; 529const int x86_inter_unit_moves = ~(m_ATHLON_K8); 530const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO; 531 532/* In case the average insn count for single function invocation is 533 lower than this constant, emit fast (but longer) prologue and 534 epilogue code. */ 535#define FAST_PROLOGUE_INSN_COUNT 20 536 537/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 538static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 539static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 540static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 541 542/* Array of the smallest class containing reg number REGNO, indexed by 543 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 544 545enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 546{ 547 /* ax, dx, cx, bx */ 548 AREG, DREG, CREG, BREG, 549 /* si, di, bp, sp */ 550 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 551 /* FP registers */ 552 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 553 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 554 /* arg pointer */ 555 NON_Q_REGS, 556 /* flags, fpsr, dirflag, frame */ 557 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 558 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 559 SSE_REGS, SSE_REGS, 560 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 561 MMX_REGS, MMX_REGS, 562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 563 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 564 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 565 SSE_REGS, SSE_REGS, 566}; 567 568/* The "default" register map used in 32bit mode. */ 569 570int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 571{ 572 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 573 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 574 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 575 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 576 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 578 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 579}; 580 581static int const x86_64_int_parameter_registers[6] = 582{ 583 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 584 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 585}; 586 587static int const x86_64_int_return_registers[4] = 588{ 589 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 590}; 591 592/* The "default" register map used in 64bit mode. */ 593int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 594{ 595 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 596 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 597 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 598 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 599 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 600 8,9,10,11,12,13,14,15, /* extended integer registers */ 601 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 602}; 603 604/* Define the register numbers to be used in Dwarf debugging information. 605 The SVR4 reference port C compiler uses the following register numbers 606 in its Dwarf output code: 607 0 for %eax (gcc regno = 0) 608 1 for %ecx (gcc regno = 2) 609 2 for %edx (gcc regno = 1) 610 3 for %ebx (gcc regno = 3) 611 4 for %esp (gcc regno = 7) 612 5 for %ebp (gcc regno = 6) 613 6 for %esi (gcc regno = 4) 614 7 for %edi (gcc regno = 5) 615 The following three DWARF register numbers are never generated by 616 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 617 believes these numbers have these meanings. 618 8 for %eip (no gcc equivalent) 619 9 for %eflags (gcc regno = 17) 620 10 for %trapno (no gcc equivalent) 621 It is not at all clear how we should number the FP stack registers 622 for the x86 architecture. If the version of SDB on x86/svr4 were 623 a bit less brain dead with respect to floating-point then we would 624 have a precedent to follow with respect to DWARF register numbers 625 for x86 FP registers, but the SDB on x86/svr4 is so completely 626 broken with respect to FP registers that it is hardly worth thinking 627 of it as something to strive for compatibility with. 628 The version of x86/svr4 SDB I have at the moment does (partially) 629 seem to believe that DWARF register number 11 is associated with 630 the x86 register %st(0), but that's about all. Higher DWARF 631 register numbers don't seem to be associated with anything in 632 particular, and even for DWARF regno 11, SDB only seems to under- 633 stand that it should say that a variable lives in %st(0) (when 634 asked via an `=' command) if we said it was in DWARF regno 11, 635 but SDB still prints garbage when asked for the value of the 636 variable in question (via a `/' command). 637 (Also note that the labels SDB prints for various FP stack regs 638 when doing an `x' command are all wrong.) 639 Note that these problems generally don't affect the native SVR4 640 C compiler because it doesn't allow the use of -O with -g and 641 because when it is *not* optimizing, it allocates a memory 642 location for each floating-point variable, and the memory 643 location is what gets described in the DWARF AT_location 644 attribute for the variable in question. 645 Regardless of the severe mental illness of the x86/svr4 SDB, we 646 do something sensible here and we use the following DWARF 647 register numbers. Note that these are all stack-top-relative 648 numbers. 649 11 for %st(0) (gcc regno = 8) 650 12 for %st(1) (gcc regno = 9) 651 13 for %st(2) (gcc regno = 10) 652 14 for %st(3) (gcc regno = 11) 653 15 for %st(4) (gcc regno = 12) 654 16 for %st(5) (gcc regno = 13) 655 17 for %st(6) (gcc regno = 14) 656 18 for %st(7) (gcc regno = 15) 657*/ 658int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 659{ 660 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 661 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 662 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 663 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 664 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 666 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 667}; 668 669/* Test and compare insns in i386.md store the information needed to 670 generate branch and scc insns here. */ 671 672rtx ix86_compare_op0 = NULL_RTX; 673rtx ix86_compare_op1 = NULL_RTX; 674 675#define MAX_386_STACK_LOCALS 3 676/* Size of the register save area. */ 677#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 678 679/* Define the structure for the machine field in struct function. */ 680 681struct stack_local_entry GTY(()) 682{ 683 unsigned short mode; 684 unsigned short n; 685 rtx rtl; 686 struct stack_local_entry *next; 687}; 688 689/* Structure describing stack frame layout. 690 Stack grows downward: 691 692 [arguments] 693 <- ARG_POINTER 694 saved pc 695 696 saved frame pointer if frame_pointer_needed 697 <- HARD_FRAME_POINTER 698 [saved regs] 699 700 [padding1] \ 701 ) 702 [va_arg registers] ( 703 > to_allocate <- FRAME_POINTER 704 [frame] ( 705 ) 706 [padding2] / 707 */ 708struct ix86_frame 709{ 710 int nregs; 711 int padding1; 712 int va_arg_size; 713 HOST_WIDE_INT frame; 714 int padding2; 715 int outgoing_arguments_size; 716 int red_zone_size; 717 718 HOST_WIDE_INT to_allocate; 719 /* The offsets relative to ARG_POINTER. */ 720 HOST_WIDE_INT frame_pointer_offset; 721 HOST_WIDE_INT hard_frame_pointer_offset; 722 HOST_WIDE_INT stack_pointer_offset; 723 724 /* When save_regs_using_mov is set, emit prologue using 725 move instead of push instructions. */ 726 bool save_regs_using_mov; 727}; 728 729/* Used to enable/disable debugging features. */ 730const char *ix86_debug_arg_string, *ix86_debug_addr_string; 731/* Code model option as passed by user. */ 732const char *ix86_cmodel_string; 733/* Parsed value. */ 734enum cmodel ix86_cmodel; 735/* Asm dialect. */ 736const char *ix86_asm_string; 737enum asm_dialect ix86_asm_dialect = ASM_ATT; 738/* TLS dialext. */ 739const char *ix86_tls_dialect_string; 740enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 741 742/* Which unit we are generating floating point math for. */ 743enum fpmath_unit ix86_fpmath; 744 745/* Which cpu are we scheduling for. */ 746enum processor_type ix86_tune; 747/* Which instruction set architecture to use. */ 748enum processor_type ix86_arch; 749 750/* Strings to hold which cpu and instruction set architecture to use. */ 751const char *ix86_tune_string; /* for -mtune=<xxx> */ 752const char *ix86_arch_string; /* for -march=<xxx> */ 753const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */ 754 755/* # of registers to use to pass arguments. */ 756const char *ix86_regparm_string; 757 758/* true if sse prefetch instruction is not NOOP. */ 759int x86_prefetch_sse; 760 761/* ix86_regparm_string as a number */ 762int ix86_regparm; 763 764/* Alignment to use for loops and jumps: */ 765 766/* Power of two alignment for loops. */ 767const char *ix86_align_loops_string; 768 769/* Power of two alignment for non-loop jumps. */ 770const char *ix86_align_jumps_string; 771 772/* Power of two alignment for stack boundary in bytes. */ 773const char *ix86_preferred_stack_boundary_string; 774 775/* Preferred alignment for stack boundary in bits. */ 776int ix86_preferred_stack_boundary; 777 778/* Values 1-5: see jump.c */ 779int ix86_branch_cost; 780const char *ix86_branch_cost_string; 781 782/* Power of two alignment for functions. */ 783const char *ix86_align_funcs_string; 784 785/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 786static char internal_label_prefix[16]; 787static int internal_label_prefix_len; 788 789static int local_symbolic_operand (rtx, enum machine_mode); 790static int tls_symbolic_operand_1 (rtx, enum tls_model); 791static void output_pic_addr_const (FILE *, rtx, int); 792static void put_condition_code (enum rtx_code, enum machine_mode, 793 int, int, FILE *); 794static const char *get_some_local_dynamic_name (void); 795static int get_some_local_dynamic_name_1 (rtx *, void *); 796static rtx maybe_get_pool_constant (rtx); 797static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); 798static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, 799 rtx *); 800static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); 801static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, 802 enum machine_mode); 803static rtx get_thread_pointer (int); 804static rtx legitimize_tls_address (rtx, enum tls_model, int); 805static void get_pc_thunk_name (char [32], unsigned int); 806static rtx gen_push (rtx); 807static int memory_address_length (rtx addr); 808static int ix86_flags_dependant (rtx, rtx, enum attr_type); 809static int ix86_agi_dependant (rtx, rtx, enum attr_type); 810static enum attr_ppro_uops ix86_safe_ppro_uops (rtx); 811static void ix86_dump_ppro_packet (FILE *); 812static void ix86_reorder_insn (rtx *, rtx *); 813static struct machine_function * ix86_init_machine_status (void); 814static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); 815static int ix86_nsaved_regs (void); 816static void ix86_emit_save_regs (void); 817static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); 818static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); 819static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); 820static void ix86_sched_reorder_ppro (rtx *, rtx *); 821static HOST_WIDE_INT ix86_GOT_alias_set (void); 822static void ix86_adjust_counter (rtx, HOST_WIDE_INT); 823static rtx ix86_expand_aligntest (rtx, int); 824static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); 825static int ix86_issue_rate (void); 826static int ix86_adjust_cost (rtx, rtx, rtx, int); 827static void ix86_sched_init (FILE *, int, int); 828static int ix86_sched_reorder (FILE *, int, rtx *, int *, int); 829static int ix86_variable_issue (FILE *, int, rtx, int); 830static int ia32_use_dfa_pipeline_interface (void); 831static int ia32_multipass_dfa_lookahead (void); 832static void ix86_init_mmx_sse_builtins (void); 833static rtx x86_this_parameter (tree); 834static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 835 HOST_WIDE_INT, tree); 836static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); 837static void x86_file_start (void); 838static void ix86_reorg (void); 839static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); 840static tree ix86_build_builtin_va_list (void); 841 842struct ix86_address 843{ 844 rtx base, index, disp; 845 HOST_WIDE_INT scale; 846 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg; 847}; 848 849static int ix86_decompose_address (rtx, struct ix86_address *); 850static int ix86_address_cost (rtx); 851static bool ix86_cannot_force_const_mem (rtx); 852static rtx ix86_delegitimize_address (rtx); 853 854struct builtin_description; 855static rtx ix86_expand_sse_comi (const struct builtin_description *, 856 tree, rtx); 857static rtx ix86_expand_sse_compare (const struct builtin_description *, 858 tree, rtx); 859static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); 860static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); 861static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); 862static rtx ix86_expand_store_builtin (enum insn_code, tree); 863static rtx safe_vector_operand (rtx, enum machine_mode); 864static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code); 865static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *, 866 enum rtx_code *, enum rtx_code *); 867static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); 868static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); 869static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); 870static int ix86_fp_comparison_sahf_cost (enum rtx_code code); 871static int ix86_fp_comparison_cost (enum rtx_code code); 872static unsigned int ix86_select_alt_pic_regnum (void); 873static int ix86_save_reg (unsigned int, int); 874static void ix86_compute_frame_layout (struct ix86_frame *); 875static int ix86_comp_type_attributes (tree, tree); 876static int ix86_function_regparm (tree, tree); 877const struct attribute_spec ix86_attribute_table[]; 878static bool ix86_function_ok_for_sibcall (tree, tree); 879static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *); 880static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *); 881static int ix86_value_regno (enum machine_mode); 882static bool contains_128bit_aligned_vector_p (tree); 883static bool ix86_ms_bitfield_layout_p (tree); 884static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); 885static int extended_reg_mentioned_1 (rtx *, void *); 886static bool ix86_rtx_costs (rtx, int, int, int *); 887static int min_insn_size (rtx); 888static void k8_avoid_jump_misspredicts (void); 889 890#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) 891static void ix86_svr3_asm_out_constructor (rtx, int); 892#endif 893 894/* Register class used for passing given 64bit part of the argument. 895 These represent classes as documented by the PS ABI, with the exception 896 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 897 use SF or DFmode move instead of DImode to avoid reformatting penalties. 898 899 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 900 whenever possible (upper half does contain padding). 901 */ 902enum x86_64_reg_class 903 { 904 X86_64_NO_CLASS, 905 X86_64_INTEGER_CLASS, 906 X86_64_INTEGERSI_CLASS, 907 X86_64_SSE_CLASS, 908 X86_64_SSESF_CLASS, 909 X86_64_SSEDF_CLASS, 910 X86_64_SSEUP_CLASS, 911 X86_64_X87_CLASS, 912 X86_64_X87UP_CLASS, 913 X86_64_MEMORY_CLASS 914 }; 915static const char * const x86_64_reg_class_name[] = 916 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"}; 917 918#define MAX_CLASSES 4 919static int classify_argument (enum machine_mode, tree, 920 enum x86_64_reg_class [MAX_CLASSES], int); 921static int examine_argument (enum machine_mode, tree, int, int *, int *); 922static rtx construct_container (enum machine_mode, tree, int, int, int, 923 const int *, int); 924static enum x86_64_reg_class merge_classes (enum x86_64_reg_class, 925 enum x86_64_reg_class); 926 927/* Table of constants used by fldpi, fldln2, etc.... */ 928static REAL_VALUE_TYPE ext_80387_constants_table [5]; 929static bool ext_80387_constants_init = 0; 930static void init_ext_80387_constants (void); 931 932/* Initialize the GCC target structure. */ 933#undef TARGET_ATTRIBUTE_TABLE 934#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 935#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 936# undef TARGET_MERGE_DECL_ATTRIBUTES 937# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 938#endif 939 940#undef TARGET_COMP_TYPE_ATTRIBUTES 941#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 942 943#undef TARGET_INIT_BUILTINS 944#define TARGET_INIT_BUILTINS ix86_init_builtins 945 946#undef TARGET_EXPAND_BUILTIN 947#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 948 949#undef TARGET_ASM_FUNCTION_EPILOGUE 950#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 951 952#undef TARGET_ASM_OPEN_PAREN 953#define TARGET_ASM_OPEN_PAREN "" 954#undef TARGET_ASM_CLOSE_PAREN 955#define TARGET_ASM_CLOSE_PAREN "" 956 957#undef TARGET_ASM_ALIGNED_HI_OP 958#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 959#undef TARGET_ASM_ALIGNED_SI_OP 960#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 961#ifdef ASM_QUAD 962#undef TARGET_ASM_ALIGNED_DI_OP 963#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 964#endif 965 966#undef TARGET_ASM_UNALIGNED_HI_OP 967#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 968#undef TARGET_ASM_UNALIGNED_SI_OP 969#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 970#undef TARGET_ASM_UNALIGNED_DI_OP 971#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 972 973#undef TARGET_SCHED_ADJUST_COST 974#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 975#undef TARGET_SCHED_ISSUE_RATE 976#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 977#undef TARGET_SCHED_VARIABLE_ISSUE 978#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue 979#undef TARGET_SCHED_INIT 980#define TARGET_SCHED_INIT ix86_sched_init 981#undef TARGET_SCHED_REORDER 982#define TARGET_SCHED_REORDER ix86_sched_reorder 983#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE 984#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \ 985 ia32_use_dfa_pipeline_interface 986#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 987#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 988 ia32_multipass_dfa_lookahead 989 990#undef TARGET_FUNCTION_OK_FOR_SIBCALL 991#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 992 993#ifdef HAVE_AS_TLS 994#undef TARGET_HAVE_TLS 995#define TARGET_HAVE_TLS true 996#endif 997#undef TARGET_CANNOT_FORCE_CONST_MEM 998#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 999 1000#undef TARGET_DELEGITIMIZE_ADDRESS 1001#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 1002 1003#undef TARGET_MS_BITFIELD_LAYOUT_P 1004#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 1005 1006#undef TARGET_ASM_OUTPUT_MI_THUNK 1007#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 1008#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1009#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 1010 1011#undef TARGET_ASM_FILE_START 1012#define TARGET_ASM_FILE_START x86_file_start 1013 1014#undef TARGET_RTX_COSTS 1015#define TARGET_RTX_COSTS ix86_rtx_costs 1016#undef TARGET_ADDRESS_COST 1017#define TARGET_ADDRESS_COST ix86_address_cost 1018 1019#undef TARGET_FIXED_CONDITION_CODE_REGS 1020#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 1021#undef TARGET_CC_MODES_COMPATIBLE 1022#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 1023 1024#undef TARGET_MACHINE_DEPENDENT_REORG 1025#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 1026 1027#undef TARGET_BUILD_BUILTIN_VA_LIST 1028#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 1029 1030struct gcc_target targetm = TARGET_INITIALIZER; 1031 1032/* The svr4 ABI for the i386 says that records and unions are returned 1033 in memory. */ 1034#ifndef DEFAULT_PCC_STRUCT_RETURN 1035#define DEFAULT_PCC_STRUCT_RETURN 1 1036#endif 1037 1038/* Sometimes certain combinations of command options do not make 1039 sense on a particular target machine. You can define a macro 1040 `OVERRIDE_OPTIONS' to take account of this. This macro, if 1041 defined, is executed once just after all the command options have 1042 been parsed. 1043 1044 Don't use this macro to turn on various extra optimizations for 1045 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 1046 1047void 1048override_options (void) 1049{ 1050 int i; 1051 /* Comes from final.c -- no real reason to change it. */ 1052#define MAX_CODE_ALIGN 16 1053 1054 static struct ptt 1055 { 1056 const struct processor_costs *cost; /* Processor costs */ 1057 const int target_enable; /* Target flags to enable. */ 1058 const int target_disable; /* Target flags to disable. */ 1059 const int align_loop; /* Default alignments. */ 1060 const int align_loop_max_skip; 1061 const int align_jump; 1062 const int align_jump_max_skip; 1063 const int align_func; 1064 } 1065 const processor_target_table[PROCESSOR_max] = 1066 { 1067 {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, 1068 {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, 1069 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, 1070 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, 1071 {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, 1072 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, 1073 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, 1074 {&k8_cost, 0, 0, 16, 7, 16, 7, 16} 1075 }; 1076 1077 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 1078 static struct pta 1079 { 1080 const char *const name; /* processor name or nickname. */ 1081 const enum processor_type processor; 1082 const enum pta_flags 1083 { 1084 PTA_SSE = 1, 1085 PTA_SSE2 = 2, 1086 PTA_SSE3 = 4, 1087 PTA_MMX = 8, 1088 PTA_PREFETCH_SSE = 16, 1089 PTA_3DNOW = 32, 1090 PTA_3DNOW_A = 64, 1091 PTA_64BIT = 128 1092 } flags; 1093 } 1094 const processor_alias_table[] = 1095 { 1096 {"i386", PROCESSOR_I386, 0}, 1097 {"i486", PROCESSOR_I486, 0}, 1098 {"i586", PROCESSOR_PENTIUM, 0}, 1099 {"pentium", PROCESSOR_PENTIUM, 0}, 1100 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 1101 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 1102 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1103 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1104 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, 1105 {"i686", PROCESSOR_PENTIUMPRO, 0}, 1106 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 1107 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 1108 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1109 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1110 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, 1111 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1112 | PTA_MMX | PTA_PREFETCH_SSE}, 1113 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1114 | PTA_MMX | PTA_PREFETCH_SSE}, 1115 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 1116 | PTA_MMX | PTA_PREFETCH_SSE}, 1117 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT 1118 | PTA_MMX | PTA_PREFETCH_SSE}, 1119 {"k6", PROCESSOR_K6, PTA_MMX}, 1120 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1121 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1122 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1123 | PTA_3DNOW_A}, 1124 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1125 | PTA_3DNOW | PTA_3DNOW_A}, 1126 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1127 | PTA_3DNOW_A | PTA_SSE}, 1128 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1129 | PTA_3DNOW_A | PTA_SSE}, 1130 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1131 | PTA_3DNOW_A | PTA_SSE}, 1132 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT 1133 | PTA_SSE | PTA_SSE2 }, 1134 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1135 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1136 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1137 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1138 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1139 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1140 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1141 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1142 }; 1143 1144 int const pta_size = ARRAY_SIZE (processor_alias_table); 1145 1146 /* Set the default values for switches whose default depends on TARGET_64BIT 1147 in case they weren't overwritten by command line options. */ 1148 if (TARGET_64BIT) 1149 { 1150 if (flag_omit_frame_pointer == 2) 1151 flag_omit_frame_pointer = 1; 1152 if (flag_asynchronous_unwind_tables == 2) 1153 flag_asynchronous_unwind_tables = 1; 1154 if (flag_pcc_struct_return == 2) 1155 flag_pcc_struct_return = 0; 1156 } 1157 else 1158 { 1159 if (flag_omit_frame_pointer == 2) 1160 flag_omit_frame_pointer = 0; 1161 if (flag_asynchronous_unwind_tables == 2) 1162 flag_asynchronous_unwind_tables = 0; 1163 if (flag_pcc_struct_return == 2) 1164 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1165 } 1166 1167#ifdef SUBTARGET_OVERRIDE_OPTIONS 1168 SUBTARGET_OVERRIDE_OPTIONS; 1169#endif 1170 1171 if (!ix86_tune_string && ix86_arch_string) 1172 ix86_tune_string = ix86_arch_string; 1173 if (!ix86_tune_string) 1174 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; 1175 if (!ix86_arch_string) 1176 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; 1177 1178 if (ix86_cmodel_string != 0) 1179 { 1180 if (!strcmp (ix86_cmodel_string, "small")) 1181 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1182 else if (flag_pic) 1183 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1184 else if (!strcmp (ix86_cmodel_string, "32")) 1185 ix86_cmodel = CM_32; 1186 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1187 ix86_cmodel = CM_KERNEL; 1188 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic) 1189 ix86_cmodel = CM_MEDIUM; 1190 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1191 ix86_cmodel = CM_LARGE; 1192 else 1193 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1194 } 1195 else 1196 { 1197 ix86_cmodel = CM_32; 1198 if (TARGET_64BIT) 1199 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1200 } 1201 if (ix86_asm_string != 0) 1202 { 1203 if (!strcmp (ix86_asm_string, "intel")) 1204 ix86_asm_dialect = ASM_INTEL; 1205 else if (!strcmp (ix86_asm_string, "att")) 1206 ix86_asm_dialect = ASM_ATT; 1207 else 1208 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1209 } 1210 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1211 error ("code model `%s' not supported in the %s bit mode", 1212 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1213 if (ix86_cmodel == CM_LARGE) 1214 sorry ("code model `large' not supported yet"); 1215 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1216 sorry ("%i-bit mode not compiled in", 1217 (target_flags & MASK_64BIT) ? 64 : 32); 1218 1219 for (i = 0; i < pta_size; i++) 1220 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1221 { 1222 ix86_arch = processor_alias_table[i].processor; 1223 /* Default cpu tuning to the architecture. */ 1224 ix86_tune = ix86_arch; 1225 if (processor_alias_table[i].flags & PTA_MMX 1226 && !(target_flags_explicit & MASK_MMX)) 1227 target_flags |= MASK_MMX; 1228 if (processor_alias_table[i].flags & PTA_3DNOW 1229 && !(target_flags_explicit & MASK_3DNOW)) 1230 target_flags |= MASK_3DNOW; 1231 if (processor_alias_table[i].flags & PTA_3DNOW_A 1232 && !(target_flags_explicit & MASK_3DNOW_A)) 1233 target_flags |= MASK_3DNOW_A; 1234 if (processor_alias_table[i].flags & PTA_SSE 1235 && !(target_flags_explicit & MASK_SSE)) 1236 target_flags |= MASK_SSE; 1237 if (processor_alias_table[i].flags & PTA_SSE2 1238 && !(target_flags_explicit & MASK_SSE2)) 1239 target_flags |= MASK_SSE2; 1240 if (processor_alias_table[i].flags & PTA_SSE3 1241 && !(target_flags_explicit & MASK_SSE3)) 1242 target_flags |= MASK_SSE3; 1243 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1244 x86_prefetch_sse = true; 1245 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1246 error ("CPU you selected does not support x86-64 instruction set"); 1247 break; 1248 } 1249 1250 if (i == pta_size) 1251 error ("bad value (%s) for -march= switch", ix86_arch_string); 1252 1253 for (i = 0; i < pta_size; i++) 1254 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 1255 { 1256 ix86_tune = processor_alias_table[i].processor; 1257 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1258 error ("CPU you selected does not support x86-64 instruction set"); 1259 1260 /* Intel CPUs have always interpreted SSE prefetch instructions as 1261 NOPs; so, we can enable SSE prefetch instructions even when 1262 -mtune (rather than -march) points us to a processor that has them. 1263 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 1264 higher processors. */ 1265 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) 1266 x86_prefetch_sse = true; 1267 break; 1268 } 1269 if (i == pta_size) 1270 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1271 1272 if (optimize_size) 1273 ix86_cost = &size_cost; 1274 else 1275 ix86_cost = processor_target_table[ix86_tune].cost; 1276 target_flags |= processor_target_table[ix86_tune].target_enable; 1277 target_flags &= ~processor_target_table[ix86_tune].target_disable; 1278 1279 /* Arrange to set up i386_stack_locals for all functions. */ 1280 init_machine_status = ix86_init_machine_status; 1281 1282 /* Validate -mregparm= value. */ 1283 if (ix86_regparm_string) 1284 { 1285 i = atoi (ix86_regparm_string); 1286 if (i < 0 || i > REGPARM_MAX) 1287 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1288 else 1289 ix86_regparm = i; 1290 } 1291 else 1292 if (TARGET_64BIT) 1293 ix86_regparm = REGPARM_MAX; 1294 1295 /* If the user has provided any of the -malign-* options, 1296 warn and use that value only if -falign-* is not set. 1297 Remove this code in GCC 3.2 or later. */ 1298 if (ix86_align_loops_string) 1299 { 1300 warning ("-malign-loops is obsolete, use -falign-loops"); 1301 if (align_loops == 0) 1302 { 1303 i = atoi (ix86_align_loops_string); 1304 if (i < 0 || i > MAX_CODE_ALIGN) 1305 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1306 else 1307 align_loops = 1 << i; 1308 } 1309 } 1310 1311 if (ix86_align_jumps_string) 1312 { 1313 warning ("-malign-jumps is obsolete, use -falign-jumps"); 1314 if (align_jumps == 0) 1315 { 1316 i = atoi (ix86_align_jumps_string); 1317 if (i < 0 || i > MAX_CODE_ALIGN) 1318 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1319 else 1320 align_jumps = 1 << i; 1321 } 1322 } 1323 1324 if (ix86_align_funcs_string) 1325 { 1326 warning ("-malign-functions is obsolete, use -falign-functions"); 1327 if (align_functions == 0) 1328 { 1329 i = atoi (ix86_align_funcs_string); 1330 if (i < 0 || i > MAX_CODE_ALIGN) 1331 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1332 else 1333 align_functions = 1 << i; 1334 } 1335 } 1336 1337 /* Default align_* from the processor table. */ 1338 if (align_loops == 0) 1339 { 1340 align_loops = processor_target_table[ix86_tune].align_loop; 1341 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 1342 } 1343 if (align_jumps == 0) 1344 { 1345 align_jumps = processor_target_table[ix86_tune].align_jump; 1346 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 1347 } 1348 if (align_functions == 0) 1349 { 1350 align_functions = processor_target_table[ix86_tune].align_func; 1351 } 1352 1353 /* Validate -mpreferred-stack-boundary= value, or provide default. 1354 The default of 128 bits is for Pentium III's SSE __m128, but we 1355 don't want additional code to keep the stack aligned when 1356 optimizing for code size. */ 1357 ix86_preferred_stack_boundary = (optimize_size 1358 ? TARGET_64BIT ? 128 : 32 1359 : 128); 1360 if (ix86_preferred_stack_boundary_string) 1361 { 1362 i = atoi (ix86_preferred_stack_boundary_string); 1363 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 1364 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1365 TARGET_64BIT ? 4 : 2); 1366 else 1367 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1368 } 1369 1370 /* Validate -mbranch-cost= value, or provide default. */ 1371 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost; 1372 if (ix86_branch_cost_string) 1373 { 1374 i = atoi (ix86_branch_cost_string); 1375 if (i < 0 || i > 5) 1376 error ("-mbranch-cost=%d is not between 0 and 5", i); 1377 else 1378 ix86_branch_cost = i; 1379 } 1380 1381 if (ix86_tls_dialect_string) 1382 { 1383 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 1384 ix86_tls_dialect = TLS_DIALECT_GNU; 1385 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 1386 ix86_tls_dialect = TLS_DIALECT_SUN; 1387 else 1388 error ("bad value (%s) for -mtls-dialect= switch", 1389 ix86_tls_dialect_string); 1390 } 1391 1392 /* Keep nonleaf frame pointers. */ 1393 if (TARGET_OMIT_LEAF_FRAME_POINTER) 1394 flag_omit_frame_pointer = 1; 1395 1396 /* If we're doing fast math, we don't care about comparison order 1397 wrt NaNs. This lets us use a shorter comparison sequence. */ 1398 if (flag_unsafe_math_optimizations) 1399 target_flags &= ~MASK_IEEE_FP; 1400 1401 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1402 since the insns won't need emulation. */ 1403 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1404 target_flags &= ~MASK_NO_FANCY_MATH_387; 1405 1406 /* Turn on SSE2 builtins for -msse3. */ 1407 if (TARGET_SSE3) 1408 target_flags |= MASK_SSE2; 1409 1410 /* Turn on SSE builtins for -msse2. */ 1411 if (TARGET_SSE2) 1412 target_flags |= MASK_SSE; 1413 1414 if (TARGET_64BIT) 1415 { 1416 if (TARGET_ALIGN_DOUBLE) 1417 error ("-malign-double makes no sense in the 64bit mode"); 1418 if (TARGET_RTD) 1419 error ("-mrtd calling convention not supported in the 64bit mode"); 1420 /* Enable by default the SSE and MMX builtins. */ 1421 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE); 1422 ix86_fpmath = FPMATH_SSE; 1423 } 1424 else 1425 { 1426 ix86_fpmath = FPMATH_387; 1427 /* i386 ABI does not specify red zone. It still makes sense to use it 1428 when programmer takes care to stack from being destroyed. */ 1429 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 1430 target_flags |= MASK_NO_RED_ZONE; 1431 } 1432 1433 if (ix86_fpmath_string != 0) 1434 { 1435 if (! strcmp (ix86_fpmath_string, "387")) 1436 ix86_fpmath = FPMATH_387; 1437 else if (! strcmp (ix86_fpmath_string, "sse")) 1438 { 1439 if (!TARGET_SSE) 1440 { 1441 warning ("SSE instruction set disabled, using 387 arithmetics"); 1442 ix86_fpmath = FPMATH_387; 1443 } 1444 else 1445 ix86_fpmath = FPMATH_SSE; 1446 } 1447 else if (! strcmp (ix86_fpmath_string, "387,sse") 1448 || ! strcmp (ix86_fpmath_string, "sse,387")) 1449 { 1450 if (!TARGET_SSE) 1451 { 1452 warning ("SSE instruction set disabled, using 387 arithmetics"); 1453 ix86_fpmath = FPMATH_387; 1454 } 1455 else if (!TARGET_80387) 1456 { 1457 warning ("387 instruction set disabled, using SSE arithmetics"); 1458 ix86_fpmath = FPMATH_SSE; 1459 } 1460 else 1461 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1462 } 1463 else 1464 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1465 } 1466 1467 /* It makes no sense to ask for just SSE builtins, so MMX is also turned 1468 on by -msse. */ 1469 if (TARGET_SSE) 1470 { 1471 target_flags |= MASK_MMX; 1472 x86_prefetch_sse = true; 1473 } 1474 1475 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ 1476 if (TARGET_3DNOW) 1477 { 1478 target_flags |= MASK_MMX; 1479 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX 1480 extensions it adds. */ 1481 if (x86_3dnow_a & (1 << ix86_arch)) 1482 target_flags |= MASK_3DNOW_A; 1483 } 1484 if ((x86_accumulate_outgoing_args & TUNEMASK) 1485 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 1486 && !optimize_size) 1487 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1488 1489 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1490 { 1491 char *p; 1492 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1493 p = strchr (internal_label_prefix, 'X'); 1494 internal_label_prefix_len = p - internal_label_prefix; 1495 *p = '\0'; 1496 } 1497} 1498 1499void 1500optimization_options (int level, int size ATTRIBUTE_UNUSED) 1501{ 1502 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 1503 make the problem with not enough registers even worse. */ 1504#ifdef INSN_SCHEDULING 1505 if (level > 1) 1506 flag_schedule_insns = 0; 1507#endif 1508 1509 /* The default values of these switches depend on the TARGET_64BIT 1510 that is not known at this moment. Mark these values with 2 and 1511 let user the to override these. In case there is no command line option 1512 specifying them, we will set the defaults in override_options. */ 1513 if (optimize >= 1) 1514 flag_omit_frame_pointer = 2; 1515 flag_pcc_struct_return = 2; 1516 flag_asynchronous_unwind_tables = 2; 1517} 1518 1519/* Table of valid machine attributes. */ 1520const struct attribute_spec ix86_attribute_table[] = 1521{ 1522 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 1523 /* Stdcall attribute says callee is responsible for popping arguments 1524 if they are not variable. */ 1525 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1526 /* Fastcall attribute says callee is responsible for popping arguments 1527 if they are not variable. */ 1528 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1529 /* Cdecl attribute says the callee is a normal C declaration */ 1530 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute }, 1531 /* Regparm attribute specifies how many integer arguments are to be 1532 passed in registers. */ 1533 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute }, 1534#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES 1535 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1536 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute }, 1537 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 1538#endif 1539 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 1540 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 1541 { NULL, 0, 0, false, false, false, NULL } 1542}; 1543 1544/* Decide whether we can make a sibling call to a function. DECL is the 1545 declaration of the function being targeted by the call and EXP is the 1546 CALL_EXPR representing the call. */ 1547 1548static bool 1549ix86_function_ok_for_sibcall (tree decl, tree exp) 1550{ 1551 /* If we are generating position-independent code, we cannot sibcall 1552 optimize any indirect call, or a direct call to a global function, 1553 as the PLT requires %ebx be live. */ 1554 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl))) 1555 return false; 1556 1557 /* If we are returning floats on the 80387 register stack, we cannot 1558 make a sibcall from a function that doesn't return a float to a 1559 function that does or, conversely, from a function that does return 1560 a float to a function that doesn't; the necessary stack adjustment 1561 would not be executed. */ 1562 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp))) 1563 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl))))) 1564 return false; 1565 1566 /* If this call is indirect, we'll need to be able to use a call-clobbered 1567 register for the address of the target function. Make sure that all 1568 such registers are not used for passing parameters. */ 1569 if (!decl && !TARGET_64BIT) 1570 { 1571 tree type; 1572 1573 /* We're looking at the CALL_EXPR, we need the type of the function. */ 1574 type = TREE_OPERAND (exp, 0); /* pointer expression */ 1575 type = TREE_TYPE (type); /* pointer type */ 1576 type = TREE_TYPE (type); /* function type */ 1577 1578 if (ix86_function_regparm (type, NULL) >= 3) 1579 { 1580 /* ??? Need to count the actual number of registers to be used, 1581 not the possible number of registers. Fix later. */ 1582 return false; 1583 } 1584 } 1585 1586 /* Otherwise okay. That also includes certain types of indirect calls. */ 1587 return true; 1588} 1589 1590/* Handle a "cdecl", "stdcall", or "fastcall" attribute; 1591 arguments as in struct attribute_spec.handler. */ 1592static tree 1593ix86_handle_cdecl_attribute (tree *node, tree name, 1594 tree args ATTRIBUTE_UNUSED, 1595 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 1596{ 1597 if (TREE_CODE (*node) != FUNCTION_TYPE 1598 && TREE_CODE (*node) != METHOD_TYPE 1599 && TREE_CODE (*node) != FIELD_DECL 1600 && TREE_CODE (*node) != TYPE_DECL) 1601 { 1602 warning ("`%s' attribute only applies to functions", 1603 IDENTIFIER_POINTER (name)); 1604 *no_add_attrs = true; 1605 } 1606 else 1607 { 1608 if (is_attribute_p ("fastcall", name)) 1609 { 1610 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 1611 { 1612 error ("fastcall and stdcall attributes are not compatible"); 1613 } 1614 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 1615 { 1616 error ("fastcall and regparm attributes are not compatible"); 1617 } 1618 } 1619 else if (is_attribute_p ("stdcall", name)) 1620 { 1621 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 1622 { 1623 error ("fastcall and stdcall attributes are not compatible"); 1624 } 1625 } 1626 } 1627 1628 if (TARGET_64BIT) 1629 { 1630 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name)); 1631 *no_add_attrs = true; 1632 } 1633 1634 return NULL_TREE; 1635} 1636 1637/* Handle a "regparm" attribute; 1638 arguments as in struct attribute_spec.handler. */ 1639static tree 1640ix86_handle_regparm_attribute (tree *node, tree name, tree args, 1641 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 1642{ 1643 if (TREE_CODE (*node) != FUNCTION_TYPE 1644 && TREE_CODE (*node) != METHOD_TYPE 1645 && TREE_CODE (*node) != FIELD_DECL 1646 && TREE_CODE (*node) != TYPE_DECL) 1647 { 1648 warning ("`%s' attribute only applies to functions", 1649 IDENTIFIER_POINTER (name)); 1650 *no_add_attrs = true; 1651 } 1652 else 1653 { 1654 tree cst; 1655 1656 cst = TREE_VALUE (args); 1657 if (TREE_CODE (cst) != INTEGER_CST) 1658 { 1659 warning ("`%s' attribute requires an integer constant argument", 1660 IDENTIFIER_POINTER (name)); 1661 *no_add_attrs = true; 1662 } 1663 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 1664 { 1665 warning ("argument to `%s' attribute larger than %d", 1666 IDENTIFIER_POINTER (name), REGPARM_MAX); 1667 *no_add_attrs = true; 1668 } 1669 1670 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 1671 { 1672 error ("fastcall and regparm attributes are not compatible"); 1673 } 1674 } 1675 1676 return NULL_TREE; 1677} 1678 1679/* Return 0 if the attributes for two types are incompatible, 1 if they 1680 are compatible, and 2 if they are nearly compatible (which causes a 1681 warning to be generated). */ 1682 1683static int 1684ix86_comp_type_attributes (tree type1, tree type2) 1685{ 1686 /* Check for mismatch of non-default calling convention. */ 1687 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 1688 1689 if (TREE_CODE (type1) != FUNCTION_TYPE) 1690 return 1; 1691 1692 /* Check for mismatched fastcall types */ 1693 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 1694 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 1695 return 0; 1696 1697 /* Check for mismatched return types (cdecl vs stdcall). */ 1698 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 1699 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 1700 return 0; 1701 if (ix86_function_regparm (type1, NULL) 1702 != ix86_function_regparm (type2, NULL)) 1703 return 0; 1704 return 1; 1705} 1706 1707/* Return the regparm value for a fuctio with the indicated TYPE and DECL. 1708 DECL may be NULL when calling function indirectly 1709 or considering a libcall. */ 1710 1711static int 1712ix86_function_regparm (tree type, tree decl) 1713{ 1714 tree attr; 1715 int regparm = ix86_regparm; 1716 bool user_convention = false; 1717 1718 if (!TARGET_64BIT) 1719 { 1720 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 1721 if (attr) 1722 { 1723 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1724 user_convention = true; 1725 } 1726 1727 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 1728 { 1729 regparm = 2; 1730 user_convention = true; 1731 } 1732 1733 /* Use register calling convention for local functions when possible. */ 1734 if (!TARGET_64BIT && !user_convention && decl 1735 && flag_unit_at_a_time && !profile_flag) 1736 { 1737 struct cgraph_local_info *i = cgraph_local_info (decl); 1738 if (i && i->local) 1739 { 1740 /* We can't use regparm(3) for nested functions as these use 1741 static chain pointer in third argument. */ 1742 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl)) 1743 regparm = 2; 1744 else 1745 regparm = 3; 1746 } 1747 } 1748 } 1749 return regparm; 1750} 1751 1752/* Return true if EAX is live at the start of the function. Used by 1753 ix86_expand_prologue to determine if we need special help before 1754 calling allocate_stack_worker. */ 1755 1756static bool 1757ix86_eax_live_at_start_p (void) 1758{ 1759 /* Cheat. Don't bother working forward from ix86_function_regparm 1760 to the function type to whether an actual argument is located in 1761 eax. Instead just look at cfg info, which is still close enough 1762 to correct at this point. This gives false positives for broken 1763 functions that might use uninitialized data that happens to be 1764 allocated in eax, but who cares? */ 1765 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0); 1766} 1767 1768/* Value is the number of bytes of arguments automatically 1769 popped when returning from a subroutine call. 1770 FUNDECL is the declaration node of the function (as a tree), 1771 FUNTYPE is the data type of the function (as a tree), 1772 or for a library call it is an identifier node for the subroutine name. 1773 SIZE is the number of bytes of arguments passed on the stack. 1774 1775 On the 80386, the RTD insn may be used to pop them if the number 1776 of args is fixed, but if the number is variable then the caller 1777 must pop them all. RTD can't be used for library calls now 1778 because the library is compiled with the Unix compiler. 1779 Use of RTD is a selectable option, since it is incompatible with 1780 standard Unix calling sequences. If the option is not selected, 1781 the caller must always pop the args. 1782 1783 The attribute stdcall is equivalent to RTD on a per module basis. */ 1784 1785int 1786ix86_return_pops_args (tree fundecl, tree funtype, int size) 1787{ 1788 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 1789 1790 /* Cdecl functions override -mrtd, and never pop the stack. */ 1791 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 1792 1793 /* Stdcall and fastcall functions will pop the stack if not 1794 variable args. */ 1795 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 1796 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 1797 rtd = 1; 1798 1799 if (rtd 1800 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 1801 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 1802 == void_type_node))) 1803 return size; 1804 } 1805 1806 /* Lose any fake structure return argument if it is passed on the stack. */ 1807 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 1808 && !TARGET_64BIT) 1809 { 1810 int nregs = ix86_function_regparm (funtype, fundecl); 1811 1812 if (!nregs) 1813 return GET_MODE_SIZE (Pmode); 1814 } 1815 1816 return 0; 1817} 1818 1819/* Argument support functions. */ 1820 1821/* Return true when register may be used to pass function parameters. */ 1822bool 1823ix86_function_arg_regno_p (int regno) 1824{ 1825 int i; 1826 if (!TARGET_64BIT) 1827 return (regno < REGPARM_MAX 1828 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 1829 if (SSE_REGNO_P (regno) && TARGET_SSE) 1830 return true; 1831 /* RAX is used as hidden argument to va_arg functions. */ 1832 if (!regno) 1833 return true; 1834 for (i = 0; i < REGPARM_MAX; i++) 1835 if (regno == x86_64_int_parameter_registers[i]) 1836 return true; 1837 return false; 1838} 1839 1840/* Initialize a variable CUM of type CUMULATIVE_ARGS 1841 for a call to a function whose data type is FNTYPE. 1842 For a library call, FNTYPE is 0. */ 1843 1844void 1845init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 1846 tree fntype, /* tree ptr for function decl */ 1847 rtx libname, /* SYMBOL_REF of library name or 0 */ 1848 tree fndecl) 1849{ 1850 static CUMULATIVE_ARGS zero_cum; 1851 tree param, next_param; 1852 1853 if (TARGET_DEBUG_ARG) 1854 { 1855 fprintf (stderr, "\ninit_cumulative_args ("); 1856 if (fntype) 1857 fprintf (stderr, "fntype code = %s, ret code = %s", 1858 tree_code_name[(int) TREE_CODE (fntype)], 1859 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 1860 else 1861 fprintf (stderr, "no fntype"); 1862 1863 if (libname) 1864 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 1865 } 1866 1867 *cum = zero_cum; 1868 1869 /* Set up the number of registers to use for passing arguments. */ 1870 if (fntype) 1871 cum->nregs = ix86_function_regparm (fntype, fndecl); 1872 else 1873 cum->nregs = ix86_regparm; 1874 cum->sse_nregs = SSE_REGPARM_MAX; 1875 cum->mmx_nregs = MMX_REGPARM_MAX; 1876 cum->warn_sse = true; 1877 cum->warn_mmx = true; 1878 cum->maybe_vaarg = false; 1879 1880 /* Use ecx and edx registers if function has fastcall attribute */ 1881 if (fntype && !TARGET_64BIT) 1882 { 1883 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 1884 { 1885 cum->nregs = 2; 1886 cum->fastcall = 1; 1887 } 1888 } 1889 1890 1891 /* Determine if this function has variable arguments. This is 1892 indicated by the last argument being 'void_type_mode' if there 1893 are no variable arguments. If there are variable arguments, then 1894 we won't pass anything in registers */ 1895 1896 if (cum->nregs || !TARGET_MMX || !TARGET_SSE) 1897 { 1898 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 1899 param != 0; param = next_param) 1900 { 1901 next_param = TREE_CHAIN (param); 1902 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 1903 { 1904 if (!TARGET_64BIT) 1905 { 1906 cum->nregs = 0; 1907 cum->sse_nregs = 0; 1908 cum->mmx_nregs = 0; 1909 cum->warn_sse = 0; 1910 cum->warn_mmx = 0; 1911 cum->fastcall = 0; 1912 } 1913 cum->maybe_vaarg = true; 1914 } 1915 } 1916 } 1917 if ((!fntype && !libname) 1918 || (fntype && !TYPE_ARG_TYPES (fntype))) 1919 cum->maybe_vaarg = 1; 1920 1921 if (TARGET_DEBUG_ARG) 1922 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 1923 1924 return; 1925} 1926 1927/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 1928 of this code is to classify each 8bytes of incoming argument by the register 1929 class and assign registers accordingly. */ 1930 1931/* Return the union class of CLASS1 and CLASS2. 1932 See the x86-64 PS ABI for details. */ 1933 1934static enum x86_64_reg_class 1935merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 1936{ 1937 /* Rule #1: If both classes are equal, this is the resulting class. */ 1938 if (class1 == class2) 1939 return class1; 1940 1941 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 1942 the other class. */ 1943 if (class1 == X86_64_NO_CLASS) 1944 return class2; 1945 if (class2 == X86_64_NO_CLASS) 1946 return class1; 1947 1948 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 1949 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 1950 return X86_64_MEMORY_CLASS; 1951 1952 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 1953 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 1954 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 1955 return X86_64_INTEGERSI_CLASS; 1956 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 1957 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 1958 return X86_64_INTEGER_CLASS; 1959 1960 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */ 1961 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS 1962 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS) 1963 return X86_64_MEMORY_CLASS; 1964 1965 /* Rule #6: Otherwise class SSE is used. */ 1966 return X86_64_SSE_CLASS; 1967} 1968 1969/* Classify the argument of type TYPE and mode MODE. 1970 CLASSES will be filled by the register class used to pass each word 1971 of the operand. The number of words is returned. In case the parameter 1972 should be passed in memory, 0 is returned. As a special case for zero 1973 sized containers, classes[0] will be NO_CLASS and 1 is returned. 1974 1975 BIT_OFFSET is used internally for handling records and specifies offset 1976 of the offset in bits modulo 256 to avoid overflow cases. 1977 1978 See the x86-64 PS ABI for details. 1979*/ 1980 1981static int 1982classify_argument (enum machine_mode mode, tree type, 1983 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 1984{ 1985 HOST_WIDE_INT bytes = 1986 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1987 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 1988 1989 /* Variable sized entities are always passed/returned in memory. */ 1990 if (bytes < 0) 1991 return 0; 1992 1993 if (mode != VOIDmode 1994 && MUST_PASS_IN_STACK (mode, type)) 1995 return 0; 1996 1997 if (type && AGGREGATE_TYPE_P (type)) 1998 { 1999 int i; 2000 tree field; 2001 enum x86_64_reg_class subclasses[MAX_CLASSES]; 2002 2003 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 2004 if (bytes > 16) 2005 return 0; 2006 2007 for (i = 0; i < words; i++) 2008 classes[i] = X86_64_NO_CLASS; 2009 2010 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 2011 signalize memory class, so handle it as special case. */ 2012 if (!words) 2013 { 2014 classes[0] = X86_64_NO_CLASS; 2015 return 1; 2016 } 2017 2018 /* Classify each field of record and merge classes. */ 2019 if (TREE_CODE (type) == RECORD_TYPE) 2020 { 2021 /* For classes first merge in the field of the subclasses. */ 2022 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 2023 { 2024 tree bases = TYPE_BINFO_BASETYPES (type); 2025 int n_bases = TREE_VEC_LENGTH (bases); 2026 int i; 2027 2028 for (i = 0; i < n_bases; ++i) 2029 { 2030 tree binfo = TREE_VEC_ELT (bases, i); 2031 int num; 2032 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 2033 tree type = BINFO_TYPE (binfo); 2034 2035 num = classify_argument (TYPE_MODE (type), 2036 type, subclasses, 2037 (offset + bit_offset) % 256); 2038 if (!num) 2039 return 0; 2040 for (i = 0; i < num; i++) 2041 { 2042 int pos = (offset + (bit_offset % 64)) / 8 / 8; 2043 classes[i + pos] = 2044 merge_classes (subclasses[i], classes[i + pos]); 2045 } 2046 } 2047 } 2048 /* And now merge the fields of structure. */ 2049 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2050 { 2051 if (TREE_CODE (field) == FIELD_DECL) 2052 { 2053 int num; 2054 2055 /* Bitfields are always classified as integer. Handle them 2056 early, since later code would consider them to be 2057 misaligned integers. */ 2058 if (DECL_BIT_FIELD (field)) 2059 { 2060 for (i = int_bit_position (field) / 8 / 8; 2061 i < (int_bit_position (field) 2062 + tree_low_cst (DECL_SIZE (field), 0) 2063 + 63) / 8 / 8; i++) 2064 classes[i] = 2065 merge_classes (X86_64_INTEGER_CLASS, 2066 classes[i]); 2067 } 2068 else 2069 { 2070 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 2071 TREE_TYPE (field), subclasses, 2072 (int_bit_position (field) 2073 + bit_offset) % 256); 2074 if (!num) 2075 return 0; 2076 for (i = 0; i < num; i++) 2077 { 2078 int pos = 2079 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 2080 classes[i + pos] = 2081 merge_classes (subclasses[i], classes[i + pos]); 2082 } 2083 } 2084 } 2085 } 2086 } 2087 /* Arrays are handled as small records. */ 2088 else if (TREE_CODE (type) == ARRAY_TYPE) 2089 { 2090 int num; 2091 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 2092 TREE_TYPE (type), subclasses, bit_offset); 2093 if (!num) 2094 return 0; 2095 2096 /* The partial classes are now full classes. */ 2097 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 2098 subclasses[0] = X86_64_SSE_CLASS; 2099 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 2100 subclasses[0] = X86_64_INTEGER_CLASS; 2101 2102 for (i = 0; i < words; i++) 2103 classes[i] = subclasses[i % num]; 2104 } 2105 /* Unions are similar to RECORD_TYPE but offset is always 0. */ 2106 else if (TREE_CODE (type) == UNION_TYPE 2107 || TREE_CODE (type) == QUAL_UNION_TYPE) 2108 { 2109 /* For classes first merge in the field of the subclasses. */ 2110 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL) 2111 { 2112 tree bases = TYPE_BINFO_BASETYPES (type); 2113 int n_bases = TREE_VEC_LENGTH (bases); 2114 int i; 2115 2116 for (i = 0; i < n_bases; ++i) 2117 { 2118 tree binfo = TREE_VEC_ELT (bases, i); 2119 int num; 2120 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8; 2121 tree type = BINFO_TYPE (binfo); 2122 2123 num = classify_argument (TYPE_MODE (type), 2124 type, subclasses, 2125 (offset + (bit_offset % 64)) % 256); 2126 if (!num) 2127 return 0; 2128 for (i = 0; i < num; i++) 2129 { 2130 int pos = (offset + (bit_offset % 64)) / 8 / 8; 2131 classes[i + pos] = 2132 merge_classes (subclasses[i], classes[i + pos]); 2133 } 2134 } 2135 } 2136 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2137 { 2138 if (TREE_CODE (field) == FIELD_DECL) 2139 { 2140 int num; 2141 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 2142 TREE_TYPE (field), subclasses, 2143 bit_offset); 2144 if (!num) 2145 return 0; 2146 for (i = 0; i < num; i++) 2147 classes[i] = merge_classes (subclasses[i], classes[i]); 2148 } 2149 } 2150 } 2151 else if (TREE_CODE (type) == SET_TYPE) 2152 { 2153 if (bytes <= 4) 2154 { 2155 classes[0] = X86_64_INTEGERSI_CLASS; 2156 return 1; 2157 } 2158 else if (bytes <= 8) 2159 { 2160 classes[0] = X86_64_INTEGER_CLASS; 2161 return 1; 2162 } 2163 else if (bytes <= 12) 2164 { 2165 classes[0] = X86_64_INTEGER_CLASS; 2166 classes[1] = X86_64_INTEGERSI_CLASS; 2167 return 2; 2168 } 2169 else 2170 { 2171 classes[0] = X86_64_INTEGER_CLASS; 2172 classes[1] = X86_64_INTEGER_CLASS; 2173 return 2; 2174 } 2175 } 2176 else 2177 abort (); 2178 2179 /* Final merger cleanup. */ 2180 for (i = 0; i < words; i++) 2181 { 2182 /* If one class is MEMORY, everything should be passed in 2183 memory. */ 2184 if (classes[i] == X86_64_MEMORY_CLASS) 2185 return 0; 2186 2187 /* The X86_64_SSEUP_CLASS should be always preceded by 2188 X86_64_SSE_CLASS. */ 2189 if (classes[i] == X86_64_SSEUP_CLASS 2190 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 2191 classes[i] = X86_64_SSE_CLASS; 2192 2193 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 2194 if (classes[i] == X86_64_X87UP_CLASS 2195 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 2196 classes[i] = X86_64_SSE_CLASS; 2197 } 2198 return words; 2199 } 2200 2201 /* Compute alignment needed. We align all types to natural boundaries with 2202 exception of XFmode that is aligned to 64bits. */ 2203 if (mode != VOIDmode && mode != BLKmode) 2204 { 2205 int mode_alignment = GET_MODE_BITSIZE (mode); 2206 2207 if (mode == XFmode) 2208 mode_alignment = 128; 2209 else if (mode == XCmode) 2210 mode_alignment = 256; 2211 if (COMPLEX_MODE_P (mode)) 2212 mode_alignment /= 2; 2213 /* Misaligned fields are always returned in memory. */ 2214 if (bit_offset % mode_alignment) 2215 return 0; 2216 } 2217 2218 /* Classification of atomic types. */ 2219 switch (mode) 2220 { 2221 case DImode: 2222 case SImode: 2223 case HImode: 2224 case QImode: 2225 case CSImode: 2226 case CHImode: 2227 case CQImode: 2228 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 2229 classes[0] = X86_64_INTEGERSI_CLASS; 2230 else 2231 classes[0] = X86_64_INTEGER_CLASS; 2232 return 1; 2233 case CDImode: 2234 case TImode: 2235 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 2236 return 2; 2237 case CTImode: 2238 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 2239 classes[2] = classes[3] = X86_64_INTEGER_CLASS; 2240 return 4; 2241 case SFmode: 2242 if (!(bit_offset % 64)) 2243 classes[0] = X86_64_SSESF_CLASS; 2244 else 2245 classes[0] = X86_64_SSE_CLASS; 2246 return 1; 2247 case DFmode: 2248 classes[0] = X86_64_SSEDF_CLASS; 2249 return 1; 2250 case XFmode: 2251 classes[0] = X86_64_X87_CLASS; 2252 classes[1] = X86_64_X87UP_CLASS; 2253 return 2; 2254 case TFmode: 2255 case TCmode: 2256 return 0; 2257 case XCmode: 2258 classes[0] = X86_64_X87_CLASS; 2259 classes[1] = X86_64_X87UP_CLASS; 2260 classes[2] = X86_64_X87_CLASS; 2261 classes[3] = X86_64_X87UP_CLASS; 2262 return 4; 2263 case DCmode: 2264 classes[0] = X86_64_SSEDF_CLASS; 2265 classes[1] = X86_64_SSEDF_CLASS; 2266 return 2; 2267 case SCmode: 2268 classes[0] = X86_64_SSE_CLASS; 2269 return 1; 2270 case V4SFmode: 2271 case V4SImode: 2272 case V16QImode: 2273 case V8HImode: 2274 case V2DFmode: 2275 case V2DImode: 2276 classes[0] = X86_64_SSE_CLASS; 2277 classes[1] = X86_64_SSEUP_CLASS; 2278 return 2; 2279 case V2SFmode: 2280 case V2SImode: 2281 case V4HImode: 2282 case V8QImode: 2283 return 0; 2284 case BLKmode: 2285 case VOIDmode: 2286 return 0; 2287 default: 2288 abort (); 2289 } 2290} 2291 2292/* Examine the argument and return set number of register required in each 2293 class. Return 0 iff parameter should be passed in memory. */ 2294static int 2295examine_argument (enum machine_mode mode, tree type, int in_return, 2296 int *int_nregs, int *sse_nregs) 2297{ 2298 enum x86_64_reg_class class[MAX_CLASSES]; 2299 int n = classify_argument (mode, type, class, 0); 2300 2301 *int_nregs = 0; 2302 *sse_nregs = 0; 2303 if (!n) 2304 return 0; 2305 for (n--; n >= 0; n--) 2306 switch (class[n]) 2307 { 2308 case X86_64_INTEGER_CLASS: 2309 case X86_64_INTEGERSI_CLASS: 2310 (*int_nregs)++; 2311 break; 2312 case X86_64_SSE_CLASS: 2313 case X86_64_SSESF_CLASS: 2314 case X86_64_SSEDF_CLASS: 2315 (*sse_nregs)++; 2316 break; 2317 case X86_64_NO_CLASS: 2318 case X86_64_SSEUP_CLASS: 2319 break; 2320 case X86_64_X87_CLASS: 2321 case X86_64_X87UP_CLASS: 2322 if (!in_return) 2323 return 0; 2324 break; 2325 case X86_64_MEMORY_CLASS: 2326 abort (); 2327 } 2328 return 1; 2329} 2330/* Construct container for the argument used by GCC interface. See 2331 FUNCTION_ARG for the detailed description. */ 2332static rtx 2333construct_container (enum machine_mode mode, tree type, int in_return, 2334 int nintregs, int nsseregs, const int * intreg, 2335 int sse_regno) 2336{ 2337 enum machine_mode tmpmode; 2338 int bytes = 2339 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2340 enum x86_64_reg_class class[MAX_CLASSES]; 2341 int n; 2342 int i; 2343 int nexps = 0; 2344 int needed_sseregs, needed_intregs; 2345 rtx exp[MAX_CLASSES]; 2346 rtx ret; 2347 2348 n = classify_argument (mode, type, class, 0); 2349 if (TARGET_DEBUG_ARG) 2350 { 2351 if (!n) 2352 fprintf (stderr, "Memory class\n"); 2353 else 2354 { 2355 fprintf (stderr, "Classes:"); 2356 for (i = 0; i < n; i++) 2357 { 2358 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 2359 } 2360 fprintf (stderr, "\n"); 2361 } 2362 } 2363 if (!n) 2364 return NULL; 2365 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs)) 2366 return NULL; 2367 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 2368 return NULL; 2369 2370 /* First construct simple cases. Avoid SCmode, since we want to use 2371 single register to pass this type. */ 2372 if (n == 1 && mode != SCmode) 2373 switch (class[0]) 2374 { 2375 case X86_64_INTEGER_CLASS: 2376 case X86_64_INTEGERSI_CLASS: 2377 return gen_rtx_REG (mode, intreg[0]); 2378 case X86_64_SSE_CLASS: 2379 case X86_64_SSESF_CLASS: 2380 case X86_64_SSEDF_CLASS: 2381 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2382 case X86_64_X87_CLASS: 2383 return gen_rtx_REG (mode, FIRST_STACK_REG); 2384 case X86_64_NO_CLASS: 2385 /* Zero sized array, struct or class. */ 2386 return NULL; 2387 default: 2388 abort (); 2389 } 2390 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS 2391 && mode != BLKmode) 2392 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 2393 if (n == 2 2394 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 2395 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 2396 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 2397 && class[1] == X86_64_INTEGER_CLASS 2398 && (mode == CDImode || mode == TImode || mode == TFmode) 2399 && intreg[0] + 1 == intreg[1]) 2400 return gen_rtx_REG (mode, intreg[0]); 2401 if (n == 4 2402 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS 2403 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS 2404 && mode != BLKmode) 2405 return gen_rtx_REG (XCmode, FIRST_STACK_REG); 2406 2407 /* Otherwise figure out the entries of the PARALLEL. */ 2408 for (i = 0; i < n; i++) 2409 { 2410 switch (class[i]) 2411 { 2412 case X86_64_NO_CLASS: 2413 break; 2414 case X86_64_INTEGER_CLASS: 2415 case X86_64_INTEGERSI_CLASS: 2416 /* Merge TImodes on aligned occasions here too. */ 2417 if (i * 8 + 8 > bytes) 2418 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 2419 else if (class[i] == X86_64_INTEGERSI_CLASS) 2420 tmpmode = SImode; 2421 else 2422 tmpmode = DImode; 2423 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 2424 if (tmpmode == BLKmode) 2425 tmpmode = DImode; 2426 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2427 gen_rtx_REG (tmpmode, *intreg), 2428 GEN_INT (i*8)); 2429 intreg++; 2430 break; 2431 case X86_64_SSESF_CLASS: 2432 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2433 gen_rtx_REG (SFmode, 2434 SSE_REGNO (sse_regno)), 2435 GEN_INT (i*8)); 2436 sse_regno++; 2437 break; 2438 case X86_64_SSEDF_CLASS: 2439 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2440 gen_rtx_REG (DFmode, 2441 SSE_REGNO (sse_regno)), 2442 GEN_INT (i*8)); 2443 sse_regno++; 2444 break; 2445 case X86_64_SSE_CLASS: 2446 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 2447 tmpmode = TImode; 2448 else 2449 tmpmode = DImode; 2450 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 2451 gen_rtx_REG (tmpmode, 2452 SSE_REGNO (sse_regno)), 2453 GEN_INT (i*8)); 2454 if (tmpmode == TImode) 2455 i++; 2456 sse_regno++; 2457 break; 2458 default: 2459 abort (); 2460 } 2461 } 2462 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 2463 for (i = 0; i < nexps; i++) 2464 XVECEXP (ret, 0, i) = exp [i]; 2465 return ret; 2466} 2467 2468/* Update the data in CUM to advance over an argument 2469 of mode MODE and data type TYPE. 2470 (TYPE is null for libcalls where that information may not be available.) */ 2471 2472void 2473function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */ 2474 enum machine_mode mode, /* current arg mode */ 2475 tree type, /* type of the argument or 0 if lib support */ 2476 int named) /* whether or not the argument was named */ 2477{ 2478 int bytes = 2479 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2480 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2481 2482 if (TARGET_DEBUG_ARG) 2483 fprintf (stderr, 2484 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n", 2485 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named); 2486 if (TARGET_64BIT) 2487 { 2488 int int_nregs, sse_nregs; 2489 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 2490 cum->words += words; 2491 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 2492 { 2493 cum->nregs -= int_nregs; 2494 cum->sse_nregs -= sse_nregs; 2495 cum->regno += int_nregs; 2496 cum->sse_regno += sse_nregs; 2497 } 2498 else 2499 cum->words += words; 2500 } 2501 else 2502 { 2503 if (TARGET_SSE && SSE_REG_MODE_P (mode) 2504 && (!type || !AGGREGATE_TYPE_P (type))) 2505 { 2506 cum->sse_words += words; 2507 cum->sse_nregs -= 1; 2508 cum->sse_regno += 1; 2509 if (cum->sse_nregs <= 0) 2510 { 2511 cum->sse_nregs = 0; 2512 cum->sse_regno = 0; 2513 } 2514 } 2515 else if (TARGET_MMX && MMX_REG_MODE_P (mode) 2516 && (!type || !AGGREGATE_TYPE_P (type))) 2517 { 2518 cum->mmx_words += words; 2519 cum->mmx_nregs -= 1; 2520 cum->mmx_regno += 1; 2521 if (cum->mmx_nregs <= 0) 2522 { 2523 cum->mmx_nregs = 0; 2524 cum->mmx_regno = 0; 2525 } 2526 } 2527 else 2528 { 2529 cum->words += words; 2530 cum->nregs -= words; 2531 cum->regno += words; 2532 2533 if (cum->nregs <= 0) 2534 { 2535 cum->nregs = 0; 2536 cum->regno = 0; 2537 } 2538 } 2539 } 2540 return; 2541} 2542 2543/* Define where to put the arguments to a function. 2544 Value is zero to push the argument on the stack, 2545 or a hard register in which to store the argument. 2546 2547 MODE is the argument's machine mode. 2548 TYPE is the data type of the argument (as a tree). 2549 This is null for libcalls where that information may 2550 not be available. 2551 CUM is a variable of type CUMULATIVE_ARGS which gives info about 2552 the preceding args and about the function being called. 2553 NAMED is nonzero if this argument is a named parameter 2554 (otherwise it is an extra parameter matching an ellipsis). */ 2555 2556rtx 2557function_arg (CUMULATIVE_ARGS *cum, /* current arg information */ 2558 enum machine_mode mode, /* current arg mode */ 2559 tree type, /* type of the argument or 0 if lib support */ 2560 int named) /* != 0 for normal args, == 0 for ... args */ 2561{ 2562 rtx ret = NULL_RTX; 2563 int bytes = 2564 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2565 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2566 static bool warnedsse, warnedmmx; 2567 2568 /* Handle a hidden AL argument containing number of registers for varargs 2569 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 2570 any AL settings. */ 2571 if (mode == VOIDmode) 2572 { 2573 if (TARGET_64BIT) 2574 return GEN_INT (cum->maybe_vaarg 2575 ? (cum->sse_nregs < 0 2576 ? SSE_REGPARM_MAX 2577 : cum->sse_regno) 2578 : -1); 2579 else 2580 return constm1_rtx; 2581 } 2582 if (TARGET_64BIT) 2583 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs, 2584 &x86_64_int_parameter_registers [cum->regno], 2585 cum->sse_regno); 2586 else 2587 switch (mode) 2588 { 2589 /* For now, pass fp/complex values on the stack. */ 2590 default: 2591 break; 2592 2593 case BLKmode: 2594 if (bytes < 0) 2595 break; 2596 /* FALLTHRU */ 2597 case DImode: 2598 case SImode: 2599 case HImode: 2600 case QImode: 2601 if (words <= cum->nregs) 2602 { 2603 int regno = cum->regno; 2604 2605 /* Fastcall allocates the first two DWORD (SImode) or 2606 smaller arguments to ECX and EDX. */ 2607 if (cum->fastcall) 2608 { 2609 if (mode == BLKmode || mode == DImode) 2610 break; 2611 2612 /* ECX not EAX is the first allocated register. */ 2613 if (regno == 0) 2614 regno = 2; 2615 } 2616 ret = gen_rtx_REG (mode, regno); 2617 } 2618 break; 2619 case TImode: 2620 case V16QImode: 2621 case V8HImode: 2622 case V4SImode: 2623 case V2DImode: 2624 case V4SFmode: 2625 case V2DFmode: 2626 if (!type || !AGGREGATE_TYPE_P (type)) 2627 { 2628 if (!TARGET_SSE && !warnedmmx && cum->warn_sse) 2629 { 2630 warnedsse = true; 2631 warning ("SSE vector argument without SSE enabled " 2632 "changes the ABI"); 2633 } 2634 if (cum->sse_nregs) 2635 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG); 2636 } 2637 break; 2638 case V8QImode: 2639 case V4HImode: 2640 case V2SImode: 2641 case V2SFmode: 2642 if (!type || !AGGREGATE_TYPE_P (type)) 2643 { 2644 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 2645 { 2646 warnedmmx = true; 2647 warning ("MMX vector argument without MMX enabled " 2648 "changes the ABI"); 2649 } 2650 if (cum->mmx_nregs) 2651 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG); 2652 } 2653 break; 2654 } 2655 2656 if (TARGET_DEBUG_ARG) 2657 { 2658 fprintf (stderr, 2659 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 2660 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 2661 2662 if (ret) 2663 print_simple_rtl (stderr, ret); 2664 else 2665 fprintf (stderr, ", stack"); 2666 2667 fprintf (stderr, " )\n"); 2668 } 2669 2670 return ret; 2671} 2672 2673/* A C expression that indicates when an argument must be passed by 2674 reference. If nonzero for an argument, a copy of that argument is 2675 made in memory and a pointer to the argument is passed instead of 2676 the argument itself. The pointer is passed in whatever way is 2677 appropriate for passing a pointer to that type. */ 2678 2679int 2680function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 2681 enum machine_mode mode ATTRIBUTE_UNUSED, 2682 tree type, int named ATTRIBUTE_UNUSED) 2683{ 2684 if (!TARGET_64BIT) 2685 return 0; 2686 2687 if (type && int_size_in_bytes (type) == -1) 2688 { 2689 if (TARGET_DEBUG_ARG) 2690 fprintf (stderr, "function_arg_pass_by_reference\n"); 2691 return 1; 2692 } 2693 2694 return 0; 2695} 2696 2697/* Return true when TYPE should be 128bit aligned for 32bit argument passing 2698 ABI */ 2699static bool 2700contains_128bit_aligned_vector_p (tree type) 2701{ 2702 enum machine_mode mode = TYPE_MODE (type); 2703 if (SSE_REG_MODE_P (mode) 2704 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 2705 return true; 2706 if (TYPE_ALIGN (type) < 128) 2707 return false; 2708 2709 if (AGGREGATE_TYPE_P (type)) 2710 { 2711 /* Walk the aggregates recursively. */ 2712 if (TREE_CODE (type) == RECORD_TYPE 2713 || TREE_CODE (type) == UNION_TYPE 2714 || TREE_CODE (type) == QUAL_UNION_TYPE) 2715 { 2716 tree field; 2717 2718 if (TYPE_BINFO (type) != NULL 2719 && TYPE_BINFO_BASETYPES (type) != NULL) 2720 { 2721 tree bases = TYPE_BINFO_BASETYPES (type); 2722 int n_bases = TREE_VEC_LENGTH (bases); 2723 int i; 2724 2725 for (i = 0; i < n_bases; ++i) 2726 { 2727 tree binfo = TREE_VEC_ELT (bases, i); 2728 tree type = BINFO_TYPE (binfo); 2729 2730 if (contains_128bit_aligned_vector_p (type)) 2731 return true; 2732 } 2733 } 2734 /* And now merge the fields of structure. */ 2735 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2736 { 2737 if (TREE_CODE (field) == FIELD_DECL 2738 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 2739 return true; 2740 } 2741 } 2742 /* Just for use if some languages passes arrays by value. */ 2743 else if (TREE_CODE (type) == ARRAY_TYPE) 2744 { 2745 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 2746 return true; 2747 } 2748 else 2749 abort (); 2750 } 2751 return false; 2752} 2753 2754/* Gives the alignment boundary, in bits, of an argument with the 2755 specified mode and type. */ 2756 2757int 2758ix86_function_arg_boundary (enum machine_mode mode, tree type) 2759{ 2760 int align; 2761 if (type) 2762 align = TYPE_ALIGN (type); 2763 else 2764 align = GET_MODE_ALIGNMENT (mode); 2765 if (align < PARM_BOUNDARY) 2766 align = PARM_BOUNDARY; 2767 if (!TARGET_64BIT) 2768 { 2769 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 2770 make an exception for SSE modes since these require 128bit 2771 alignment. 2772 2773 The handling here differs from field_alignment. ICC aligns MMX 2774 arguments to 4 byte boundaries, while structure fields are aligned 2775 to 8 byte boundaries. */ 2776 if (!type) 2777 { 2778 if (!SSE_REG_MODE_P (mode)) 2779 align = PARM_BOUNDARY; 2780 } 2781 else 2782 { 2783 if (!contains_128bit_aligned_vector_p (type)) 2784 align = PARM_BOUNDARY; 2785 } 2786 } 2787 if (align > 128) 2788 align = 128; 2789 return align; 2790} 2791 2792/* Return true if N is a possible register number of function value. */ 2793bool 2794ix86_function_value_regno_p (int regno) 2795{ 2796 if (!TARGET_64BIT) 2797 { 2798 return ((regno) == 0 2799 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 2800 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 2801 } 2802 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 2803 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 2804 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 2805} 2806 2807/* Define how to find the value returned by a function. 2808 VALTYPE is the data type of the value (as a tree). 2809 If the precise function being called is known, FUNC is its FUNCTION_DECL; 2810 otherwise, FUNC is 0. */ 2811rtx 2812ix86_function_value (tree valtype) 2813{ 2814 if (TARGET_64BIT) 2815 { 2816 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1, 2817 REGPARM_MAX, SSE_REGPARM_MAX, 2818 x86_64_int_return_registers, 0); 2819 /* For zero sized structures, construct_container return NULL, but we need 2820 to keep rest of compiler happy by returning meaningful value. */ 2821 if (!ret) 2822 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 2823 return ret; 2824 } 2825 else 2826 return gen_rtx_REG (TYPE_MODE (valtype), 2827 ix86_value_regno (TYPE_MODE (valtype))); 2828} 2829 2830/* Return false iff type is returned in memory. */ 2831int 2832ix86_return_in_memory (tree type) 2833{ 2834 int needed_intregs, needed_sseregs, size; 2835 enum machine_mode mode = TYPE_MODE (type); 2836 2837 if (TARGET_64BIT) 2838 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 2839 2840 if (mode == BLKmode) 2841 return 1; 2842 2843 size = int_size_in_bytes (type); 2844 2845 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 2846 return 0; 2847 2848 if (VECTOR_MODE_P (mode) || mode == TImode) 2849 { 2850 /* User-created vectors small enough to fit in EAX. */ 2851 if (size < 8) 2852 return 0; 2853 2854 /* MMX/3dNow values are returned on the stack, since we've 2855 got to EMMS/FEMMS before returning. */ 2856 if (size == 8) 2857 return 1; 2858 2859 /* SSE values are returned in XMM0. */ 2860 /* ??? Except when it doesn't exist? We have a choice of 2861 either (1) being abi incompatible with a -march switch, 2862 or (2) generating an error here. Given no good solution, 2863 I think the safest thing is one warning. The user won't 2864 be able to use -Werror, but.... */ 2865 if (size == 16) 2866 { 2867 static bool warned; 2868 2869 if (TARGET_SSE) 2870 return 0; 2871 2872 if (!warned) 2873 { 2874 warned = true; 2875 warning ("SSE vector return without SSE enabled " 2876 "changes the ABI"); 2877 } 2878 return 1; 2879 } 2880 } 2881 2882 if (mode == XFmode) 2883 return 0; 2884 2885 if (size > 12) 2886 return 1; 2887 return 0; 2888} 2889 2890/* Define how to find the value returned by a library function 2891 assuming the value has mode MODE. */ 2892rtx 2893ix86_libcall_value (enum machine_mode mode) 2894{ 2895 if (TARGET_64BIT) 2896 { 2897 switch (mode) 2898 { 2899 case SFmode: 2900 case SCmode: 2901 case DFmode: 2902 case DCmode: 2903 return gen_rtx_REG (mode, FIRST_SSE_REG); 2904 case XFmode: 2905 case XCmode: 2906 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 2907 case TFmode: 2908 case TCmode: 2909 return NULL; 2910 default: 2911 return gen_rtx_REG (mode, 0); 2912 } 2913 } 2914 else 2915 return gen_rtx_REG (mode, ix86_value_regno (mode)); 2916} 2917 2918/* Given a mode, return the register to use for a return value. */ 2919 2920static int 2921ix86_value_regno (enum machine_mode mode) 2922{ 2923 /* Floating point return values in %st(0). */ 2924 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387) 2925 return FIRST_FLOAT_REG; 2926 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 2927 we prevent this case when sse is not available. */ 2928 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 2929 return FIRST_SSE_REG; 2930 /* Everything else in %eax. */ 2931 return 0; 2932} 2933 2934/* Create the va_list data type. */ 2935 2936static tree 2937ix86_build_builtin_va_list (void) 2938{ 2939 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 2940 2941 /* For i386 we use plain pointer to argument area. */ 2942 if (!TARGET_64BIT) 2943 return build_pointer_type (char_type_node); 2944 2945 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 2946 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 2947 2948 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 2949 unsigned_type_node); 2950 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 2951 unsigned_type_node); 2952 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 2953 ptr_type_node); 2954 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 2955 ptr_type_node); 2956 2957 DECL_FIELD_CONTEXT (f_gpr) = record; 2958 DECL_FIELD_CONTEXT (f_fpr) = record; 2959 DECL_FIELD_CONTEXT (f_ovf) = record; 2960 DECL_FIELD_CONTEXT (f_sav) = record; 2961 2962 TREE_CHAIN (record) = type_decl; 2963 TYPE_NAME (record) = type_decl; 2964 TYPE_FIELDS (record) = f_gpr; 2965 TREE_CHAIN (f_gpr) = f_fpr; 2966 TREE_CHAIN (f_fpr) = f_ovf; 2967 TREE_CHAIN (f_ovf) = f_sav; 2968 2969 layout_type (record); 2970 2971 /* The correct type is an array type of one element. */ 2972 return build_array_type (record, build_index_type (size_zero_node)); 2973} 2974 2975/* Perform any needed actions needed for a function that is receiving a 2976 variable number of arguments. 2977 2978 CUM is as above. 2979 2980 MODE and TYPE are the mode and type of the current parameter. 2981 2982 PRETEND_SIZE is a variable that should be set to the amount of stack 2983 that must be pushed by the prolog to pretend that our caller pushed 2984 it. 2985 2986 Normally, this macro will push all remaining incoming registers on the 2987 stack and set PRETEND_SIZE to the length of the registers pushed. */ 2988 2989void 2990ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 2991 tree type, int *pretend_size ATTRIBUTE_UNUSED, 2992 int no_rtl) 2993{ 2994 CUMULATIVE_ARGS next_cum; 2995 rtx save_area = NULL_RTX, mem; 2996 rtx label; 2997 rtx label_ref; 2998 rtx tmp_reg; 2999 rtx nsse_reg; 3000 int set; 3001 tree fntype; 3002 int stdarg_p; 3003 int i; 3004 3005 if (!TARGET_64BIT) 3006 return; 3007 3008 /* Indicate to allocate space on the stack for varargs save area. */ 3009 ix86_save_varrargs_registers = 1; 3010 3011 cfun->stack_alignment_needed = 128; 3012 3013 fntype = TREE_TYPE (current_function_decl); 3014 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 3015 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 3016 != void_type_node)); 3017 3018 /* For varargs, we do not want to skip the dummy va_dcl argument. 3019 For stdargs, we do want to skip the last named argument. */ 3020 next_cum = *cum; 3021 if (stdarg_p) 3022 function_arg_advance (&next_cum, mode, type, 1); 3023 3024 if (!no_rtl) 3025 save_area = frame_pointer_rtx; 3026 3027 set = get_varargs_alias_set (); 3028 3029 for (i = next_cum.regno; i < ix86_regparm; i++) 3030 { 3031 mem = gen_rtx_MEM (Pmode, 3032 plus_constant (save_area, i * UNITS_PER_WORD)); 3033 set_mem_alias_set (mem, set); 3034 emit_move_insn (mem, gen_rtx_REG (Pmode, 3035 x86_64_int_parameter_registers[i])); 3036 } 3037 3038 if (next_cum.sse_nregs) 3039 { 3040 /* Now emit code to save SSE registers. The AX parameter contains number 3041 of SSE parameter registers used to call this function. We use 3042 sse_prologue_save insn template that produces computed jump across 3043 SSE saves. We need some preparation work to get this working. */ 3044 3045 label = gen_label_rtx (); 3046 label_ref = gen_rtx_LABEL_REF (Pmode, label); 3047 3048 /* Compute address to jump to : 3049 label - 5*eax + nnamed_sse_arguments*5 */ 3050 tmp_reg = gen_reg_rtx (Pmode); 3051 nsse_reg = gen_reg_rtx (Pmode); 3052 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 3053 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 3054 gen_rtx_MULT (Pmode, nsse_reg, 3055 GEN_INT (4)))); 3056 if (next_cum.sse_regno) 3057 emit_move_insn 3058 (nsse_reg, 3059 gen_rtx_CONST (DImode, 3060 gen_rtx_PLUS (DImode, 3061 label_ref, 3062 GEN_INT (next_cum.sse_regno * 4)))); 3063 else 3064 emit_move_insn (nsse_reg, label_ref); 3065 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 3066 3067 /* Compute address of memory block we save into. We always use pointer 3068 pointing 127 bytes after first byte to store - this is needed to keep 3069 instruction size limited by 4 bytes. */ 3070 tmp_reg = gen_reg_rtx (Pmode); 3071 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 3072 plus_constant (save_area, 3073 8 * REGPARM_MAX + 127))); 3074 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 3075 set_mem_alias_set (mem, set); 3076 set_mem_align (mem, BITS_PER_WORD); 3077 3078 /* And finally do the dirty job! */ 3079 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 3080 GEN_INT (next_cum.sse_regno), label)); 3081 } 3082 3083} 3084 3085/* Implement va_start. */ 3086 3087void 3088ix86_va_start (tree valist, rtx nextarg) 3089{ 3090 HOST_WIDE_INT words, n_gpr, n_fpr; 3091 tree f_gpr, f_fpr, f_ovf, f_sav; 3092 tree gpr, fpr, ovf, sav, t; 3093 3094 /* Only 64bit target needs something special. */ 3095 if (!TARGET_64BIT) 3096 { 3097 std_expand_builtin_va_start (valist, nextarg); 3098 return; 3099 } 3100 3101 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 3102 f_fpr = TREE_CHAIN (f_gpr); 3103 f_ovf = TREE_CHAIN (f_fpr); 3104 f_sav = TREE_CHAIN (f_ovf); 3105 3106 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 3107 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 3108 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 3109 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 3110 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 3111 3112 /* Count number of gp and fp argument registers used. */ 3113 words = current_function_args_info.words; 3114 n_gpr = current_function_args_info.regno; 3115 n_fpr = current_function_args_info.sse_regno; 3116 3117 if (TARGET_DEBUG_ARG) 3118 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 3119 (int) words, (int) n_gpr, (int) n_fpr); 3120 3121 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, 3122 build_int_2 (n_gpr * 8, 0)); 3123 TREE_SIDE_EFFECTS (t) = 1; 3124 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3125 3126 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, 3127 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0)); 3128 TREE_SIDE_EFFECTS (t) = 1; 3129 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3130 3131 /* Find the overflow area. */ 3132 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); 3133 if (words != 0) 3134 t = build (PLUS_EXPR, TREE_TYPE (ovf), t, 3135 build_int_2 (words * UNITS_PER_WORD, 0)); 3136 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 3137 TREE_SIDE_EFFECTS (t) = 1; 3138 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3139 3140 /* Find the register save area. 3141 Prologue of the function save it right above stack frame. */ 3142 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx); 3143 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t); 3144 TREE_SIDE_EFFECTS (t) = 1; 3145 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3146} 3147 3148/* Implement va_arg. */ 3149rtx 3150ix86_va_arg (tree valist, tree type) 3151{ 3152 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 3153 tree f_gpr, f_fpr, f_ovf, f_sav; 3154 tree gpr, fpr, ovf, sav, t; 3155 int size, rsize; 3156 rtx lab_false, lab_over = NULL_RTX; 3157 rtx addr_rtx, r; 3158 rtx container; 3159 int indirect_p = 0; 3160 3161 /* Only 64bit target needs something special. */ 3162 if (!TARGET_64BIT) 3163 { 3164 return std_expand_builtin_va_arg (valist, type); 3165 } 3166 3167 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 3168 f_fpr = TREE_CHAIN (f_gpr); 3169 f_ovf = TREE_CHAIN (f_fpr); 3170 f_sav = TREE_CHAIN (f_ovf); 3171 3172 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 3173 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr); 3174 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr); 3175 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf); 3176 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); 3177 3178 size = int_size_in_bytes (type); 3179 if (size == -1) 3180 { 3181 /* Passed by reference. */ 3182 indirect_p = 1; 3183 type = build_pointer_type (type); 3184 size = int_size_in_bytes (type); 3185 } 3186 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3187 3188 container = construct_container (TYPE_MODE (type), type, 0, 3189 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 3190 /* 3191 * Pull the value out of the saved registers ... 3192 */ 3193 3194 addr_rtx = gen_reg_rtx (Pmode); 3195 3196 if (container) 3197 { 3198 rtx int_addr_rtx, sse_addr_rtx; 3199 int needed_intregs, needed_sseregs; 3200 int need_temp; 3201 3202 lab_over = gen_label_rtx (); 3203 lab_false = gen_label_rtx (); 3204 3205 examine_argument (TYPE_MODE (type), type, 0, 3206 &needed_intregs, &needed_sseregs); 3207 3208 3209 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64) 3210 || TYPE_ALIGN (type) > 128); 3211 3212 /* In case we are passing structure, verify that it is consecutive block 3213 on the register save area. If not we need to do moves. */ 3214 if (!need_temp && !REG_P (container)) 3215 { 3216 /* Verify that all registers are strictly consecutive */ 3217 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 3218 { 3219 int i; 3220 3221 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 3222 { 3223 rtx slot = XVECEXP (container, 0, i); 3224 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 3225 || INTVAL (XEXP (slot, 1)) != i * 16) 3226 need_temp = 1; 3227 } 3228 } 3229 else 3230 { 3231 int i; 3232 3233 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 3234 { 3235 rtx slot = XVECEXP (container, 0, i); 3236 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 3237 || INTVAL (XEXP (slot, 1)) != i * 8) 3238 need_temp = 1; 3239 } 3240 } 3241 } 3242 if (!need_temp) 3243 { 3244 int_addr_rtx = addr_rtx; 3245 sse_addr_rtx = addr_rtx; 3246 } 3247 else 3248 { 3249 int_addr_rtx = gen_reg_rtx (Pmode); 3250 sse_addr_rtx = gen_reg_rtx (Pmode); 3251 } 3252 /* First ensure that we fit completely in registers. */ 3253 if (needed_intregs) 3254 { 3255 emit_cmp_and_jump_insns (expand_expr 3256 (gpr, NULL_RTX, SImode, EXPAND_NORMAL), 3257 GEN_INT ((REGPARM_MAX - needed_intregs + 3258 1) * 8), GE, const1_rtx, SImode, 3259 1, lab_false); 3260 } 3261 if (needed_sseregs) 3262 { 3263 emit_cmp_and_jump_insns (expand_expr 3264 (fpr, NULL_RTX, SImode, EXPAND_NORMAL), 3265 GEN_INT ((SSE_REGPARM_MAX - 3266 needed_sseregs + 1) * 16 + 3267 REGPARM_MAX * 8), GE, const1_rtx, 3268 SImode, 1, lab_false); 3269 } 3270 3271 /* Compute index to start of area used for integer regs. */ 3272 if (needed_intregs) 3273 { 3274 t = build (PLUS_EXPR, ptr_type_node, sav, gpr); 3275 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL); 3276 if (r != int_addr_rtx) 3277 emit_move_insn (int_addr_rtx, r); 3278 } 3279 if (needed_sseregs) 3280 { 3281 t = build (PLUS_EXPR, ptr_type_node, sav, fpr); 3282 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL); 3283 if (r != sse_addr_rtx) 3284 emit_move_insn (sse_addr_rtx, r); 3285 } 3286 if (need_temp) 3287 { 3288 int i; 3289 rtx mem; 3290 rtx x; 3291 3292 /* Never use the memory itself, as it has the alias set. */ 3293 x = XEXP (assign_temp (type, 0, 1, 0), 0); 3294 mem = gen_rtx_MEM (BLKmode, x); 3295 force_operand (x, addr_rtx); 3296 set_mem_alias_set (mem, get_varargs_alias_set ()); 3297 set_mem_align (mem, BITS_PER_UNIT); 3298 3299 for (i = 0; i < XVECLEN (container, 0); i++) 3300 { 3301 rtx slot = XVECEXP (container, 0, i); 3302 rtx reg = XEXP (slot, 0); 3303 enum machine_mode mode = GET_MODE (reg); 3304 rtx src_addr; 3305 rtx src_mem; 3306 int src_offset; 3307 rtx dest_mem; 3308 3309 if (SSE_REGNO_P (REGNO (reg))) 3310 { 3311 src_addr = sse_addr_rtx; 3312 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 3313 } 3314 else 3315 { 3316 src_addr = int_addr_rtx; 3317 src_offset = REGNO (reg) * 8; 3318 } 3319 src_mem = gen_rtx_MEM (mode, src_addr); 3320 set_mem_alias_set (src_mem, get_varargs_alias_set ()); 3321 src_mem = adjust_address (src_mem, mode, src_offset); 3322 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1))); 3323 emit_move_insn (dest_mem, src_mem); 3324 } 3325 } 3326 3327 if (needed_intregs) 3328 { 3329 t = 3330 build (PLUS_EXPR, TREE_TYPE (gpr), gpr, 3331 build_int_2 (needed_intregs * 8, 0)); 3332 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 3333 TREE_SIDE_EFFECTS (t) = 1; 3334 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3335 } 3336 if (needed_sseregs) 3337 { 3338 t = 3339 build (PLUS_EXPR, TREE_TYPE (fpr), fpr, 3340 build_int_2 (needed_sseregs * 16, 0)); 3341 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 3342 TREE_SIDE_EFFECTS (t) = 1; 3343 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3344 } 3345 3346 emit_jump_insn (gen_jump (lab_over)); 3347 emit_barrier (); 3348 emit_label (lab_false); 3349 } 3350 3351 /* ... otherwise out of the overflow area. */ 3352 3353 /* Care for on-stack alignment if needed. */ 3354 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64) 3355 t = ovf; 3356 else 3357 { 3358 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 3359 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0)); 3360 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1)); 3361 } 3362 t = save_expr (t); 3363 3364 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL); 3365 if (r != addr_rtx) 3366 emit_move_insn (addr_rtx, r); 3367 3368 t = 3369 build (PLUS_EXPR, TREE_TYPE (t), t, 3370 build_int_2 (rsize * UNITS_PER_WORD, 0)); 3371 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 3372 TREE_SIDE_EFFECTS (t) = 1; 3373 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3374 3375 if (container) 3376 emit_label (lab_over); 3377 3378 if (indirect_p) 3379 { 3380 r = gen_rtx_MEM (Pmode, addr_rtx); 3381 set_mem_alias_set (r, get_varargs_alias_set ()); 3382 emit_move_insn (addr_rtx, r); 3383 } 3384 3385 return addr_rtx; 3386} 3387 3388/* Return nonzero if OP is either a i387 or SSE fp register. */ 3389int 3390any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3391{ 3392 return ANY_FP_REG_P (op); 3393} 3394 3395/* Return nonzero if OP is an i387 fp register. */ 3396int 3397fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3398{ 3399 return FP_REG_P (op); 3400} 3401 3402/* Return nonzero if OP is a non-fp register_operand. */ 3403int 3404register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode) 3405{ 3406 return register_operand (op, mode) && !ANY_FP_REG_P (op); 3407} 3408 3409/* Return nonzero if OP is a register operand other than an 3410 i387 fp register. */ 3411int 3412register_and_not_fp_reg_operand (rtx op, enum machine_mode mode) 3413{ 3414 return register_operand (op, mode) && !FP_REG_P (op); 3415} 3416 3417/* Return nonzero if OP is general operand representable on x86_64. */ 3418 3419int 3420x86_64_general_operand (rtx op, enum machine_mode mode) 3421{ 3422 if (!TARGET_64BIT) 3423 return general_operand (op, mode); 3424 if (nonimmediate_operand (op, mode)) 3425 return 1; 3426 return x86_64_sign_extended_value (op); 3427} 3428 3429/* Return nonzero if OP is general operand representable on x86_64 3430 as either sign extended or zero extended constant. */ 3431 3432int 3433x86_64_szext_general_operand (rtx op, enum machine_mode mode) 3434{ 3435 if (!TARGET_64BIT) 3436 return general_operand (op, mode); 3437 if (nonimmediate_operand (op, mode)) 3438 return 1; 3439 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 3440} 3441 3442/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 3443 3444int 3445x86_64_nonmemory_operand (rtx op, enum machine_mode mode) 3446{ 3447 if (!TARGET_64BIT) 3448 return nonmemory_operand (op, mode); 3449 if (register_operand (op, mode)) 3450 return 1; 3451 return x86_64_sign_extended_value (op); 3452} 3453 3454/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */ 3455 3456int 3457x86_64_movabs_operand (rtx op, enum machine_mode mode) 3458{ 3459 if (!TARGET_64BIT || !flag_pic) 3460 return nonmemory_operand (op, mode); 3461 if (register_operand (op, mode) || x86_64_sign_extended_value (op)) 3462 return 1; 3463 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op)) 3464 return 1; 3465 return 0; 3466} 3467 3468/* Return nonzero if OPNUM's MEM should be matched 3469 in movabs* patterns. */ 3470 3471int 3472ix86_check_movabs (rtx insn, int opnum) 3473{ 3474 rtx set, mem; 3475 3476 set = PATTERN (insn); 3477 if (GET_CODE (set) == PARALLEL) 3478 set = XVECEXP (set, 0, 0); 3479 if (GET_CODE (set) != SET) 3480 abort (); 3481 mem = XEXP (set, opnum); 3482 while (GET_CODE (mem) == SUBREG) 3483 mem = SUBREG_REG (mem); 3484 if (GET_CODE (mem) != MEM) 3485 abort (); 3486 return (volatile_ok || !MEM_VOLATILE_P (mem)); 3487} 3488 3489/* Return nonzero if OP is nonmemory operand representable on x86_64. */ 3490 3491int 3492x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode) 3493{ 3494 if (!TARGET_64BIT) 3495 return nonmemory_operand (op, mode); 3496 if (register_operand (op, mode)) 3497 return 1; 3498 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op); 3499} 3500 3501/* Return nonzero if OP is immediate operand representable on x86_64. */ 3502 3503int 3504x86_64_immediate_operand (rtx op, enum machine_mode mode) 3505{ 3506 if (!TARGET_64BIT) 3507 return immediate_operand (op, mode); 3508 return x86_64_sign_extended_value (op); 3509} 3510 3511/* Return nonzero if OP is immediate operand representable on x86_64. */ 3512 3513int 3514x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3515{ 3516 return x86_64_zero_extended_value (op); 3517} 3518 3519/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand 3520 for shift & compare patterns, as shifting by 0 does not change flags), 3521 else return zero. */ 3522 3523int 3524const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3525{ 3526 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31); 3527} 3528 3529/* Returns 1 if OP is either a symbol reference or a sum of a symbol 3530 reference and a constant. */ 3531 3532int 3533symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3534{ 3535 switch (GET_CODE (op)) 3536 { 3537 case SYMBOL_REF: 3538 case LABEL_REF: 3539 return 1; 3540 3541 case CONST: 3542 op = XEXP (op, 0); 3543 if (GET_CODE (op) == SYMBOL_REF 3544 || GET_CODE (op) == LABEL_REF 3545 || (GET_CODE (op) == UNSPEC 3546 && (XINT (op, 1) == UNSPEC_GOT 3547 || XINT (op, 1) == UNSPEC_GOTOFF 3548 || XINT (op, 1) == UNSPEC_GOTPCREL))) 3549 return 1; 3550 if (GET_CODE (op) != PLUS 3551 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3552 return 0; 3553 3554 op = XEXP (op, 0); 3555 if (GET_CODE (op) == SYMBOL_REF 3556 || GET_CODE (op) == LABEL_REF) 3557 return 1; 3558 /* Only @GOTOFF gets offsets. */ 3559 if (GET_CODE (op) != UNSPEC 3560 || XINT (op, 1) != UNSPEC_GOTOFF) 3561 return 0; 3562 3563 op = XVECEXP (op, 0, 0); 3564 if (GET_CODE (op) == SYMBOL_REF 3565 || GET_CODE (op) == LABEL_REF) 3566 return 1; 3567 return 0; 3568 3569 default: 3570 return 0; 3571 } 3572} 3573 3574/* Return true if the operand contains a @GOT or @GOTOFF reference. */ 3575 3576int 3577pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3578{ 3579 if (GET_CODE (op) != CONST) 3580 return 0; 3581 op = XEXP (op, 0); 3582 if (TARGET_64BIT) 3583 { 3584 if (GET_CODE (op) == UNSPEC 3585 && XINT (op, 1) == UNSPEC_GOTPCREL) 3586 return 1; 3587 if (GET_CODE (op) == PLUS 3588 && GET_CODE (XEXP (op, 0)) == UNSPEC 3589 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL) 3590 return 1; 3591 } 3592 else 3593 { 3594 if (GET_CODE (op) == UNSPEC) 3595 return 1; 3596 if (GET_CODE (op) != PLUS 3597 || GET_CODE (XEXP (op, 1)) != CONST_INT) 3598 return 0; 3599 op = XEXP (op, 0); 3600 if (GET_CODE (op) == UNSPEC) 3601 return 1; 3602 } 3603 return 0; 3604} 3605 3606/* Return true if OP is a symbolic operand that resolves locally. */ 3607 3608static int 3609local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3610{ 3611 if (GET_CODE (op) == CONST 3612 && GET_CODE (XEXP (op, 0)) == PLUS 3613 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3614 op = XEXP (XEXP (op, 0), 0); 3615 3616 if (GET_CODE (op) == LABEL_REF) 3617 return 1; 3618 3619 if (GET_CODE (op) != SYMBOL_REF) 3620 return 0; 3621 3622 if (SYMBOL_REF_LOCAL_P (op)) 3623 return 1; 3624 3625 /* There is, however, a not insubstantial body of code in the rest of 3626 the compiler that assumes it can just stick the results of 3627 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */ 3628 /* ??? This is a hack. Should update the body of the compiler to 3629 always create a DECL an invoke targetm.encode_section_info. */ 3630 if (strncmp (XSTR (op, 0), internal_label_prefix, 3631 internal_label_prefix_len) == 0) 3632 return 1; 3633 3634 return 0; 3635} 3636 3637/* Test for various thread-local symbols. */ 3638 3639int 3640tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3641{ 3642 if (GET_CODE (op) != SYMBOL_REF) 3643 return 0; 3644 return SYMBOL_REF_TLS_MODEL (op); 3645} 3646 3647static inline int 3648tls_symbolic_operand_1 (rtx op, enum tls_model kind) 3649{ 3650 if (GET_CODE (op) != SYMBOL_REF) 3651 return 0; 3652 return SYMBOL_REF_TLS_MODEL (op) == kind; 3653} 3654 3655int 3656global_dynamic_symbolic_operand (rtx op, 3657 enum machine_mode mode ATTRIBUTE_UNUSED) 3658{ 3659 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC); 3660} 3661 3662int 3663local_dynamic_symbolic_operand (rtx op, 3664 enum machine_mode mode ATTRIBUTE_UNUSED) 3665{ 3666 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC); 3667} 3668 3669int 3670initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3671{ 3672 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC); 3673} 3674 3675int 3676local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3677{ 3678 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC); 3679} 3680 3681/* Test for a valid operand for a call instruction. Don't allow the 3682 arg pointer register or virtual regs since they may decay into 3683 reg + const, which the patterns can't handle. */ 3684 3685int 3686call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3687{ 3688 /* Disallow indirect through a virtual register. This leads to 3689 compiler aborts when trying to eliminate them. */ 3690 if (GET_CODE (op) == REG 3691 && (op == arg_pointer_rtx 3692 || op == frame_pointer_rtx 3693 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 3694 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 3695 return 0; 3696 3697 /* Disallow `call 1234'. Due to varying assembler lameness this 3698 gets either rejected or translated to `call .+1234'. */ 3699 if (GET_CODE (op) == CONST_INT) 3700 return 0; 3701 3702 /* Explicitly allow SYMBOL_REF even if pic. */ 3703 if (GET_CODE (op) == SYMBOL_REF) 3704 return 1; 3705 3706 /* Otherwise we can allow any general_operand in the address. */ 3707 return general_operand (op, Pmode); 3708} 3709 3710/* Test for a valid operand for a call instruction. Don't allow the 3711 arg pointer register or virtual regs since they may decay into 3712 reg + const, which the patterns can't handle. */ 3713 3714int 3715sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3716{ 3717 /* Disallow indirect through a virtual register. This leads to 3718 compiler aborts when trying to eliminate them. */ 3719 if (GET_CODE (op) == REG 3720 && (op == arg_pointer_rtx 3721 || op == frame_pointer_rtx 3722 || (REGNO (op) >= FIRST_PSEUDO_REGISTER 3723 && REGNO (op) <= LAST_VIRTUAL_REGISTER))) 3724 return 0; 3725 3726 /* Explicitly allow SYMBOL_REF even if pic. */ 3727 if (GET_CODE (op) == SYMBOL_REF) 3728 return 1; 3729 3730 /* Otherwise we can only allow register operands. */ 3731 return register_operand (op, Pmode); 3732} 3733 3734int 3735constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3736{ 3737 if (GET_CODE (op) == CONST 3738 && GET_CODE (XEXP (op, 0)) == PLUS 3739 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 3740 op = XEXP (XEXP (op, 0), 0); 3741 return GET_CODE (op) == SYMBOL_REF; 3742} 3743 3744/* Match exactly zero and one. */ 3745 3746int 3747const0_operand (rtx op, enum machine_mode mode) 3748{ 3749 return op == CONST0_RTX (mode); 3750} 3751 3752int 3753const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3754{ 3755 return op == const1_rtx; 3756} 3757 3758/* Match 2, 4, or 8. Used for leal multiplicands. */ 3759 3760int 3761const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3762{ 3763 return (GET_CODE (op) == CONST_INT 3764 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8)); 3765} 3766 3767int 3768const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3769{ 3770 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4); 3771} 3772 3773int 3774const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3775{ 3776 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8); 3777} 3778 3779int 3780const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3781{ 3782 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16); 3783} 3784 3785int 3786const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3787{ 3788 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256); 3789} 3790 3791 3792/* True if this is a constant appropriate for an increment or decrement. */ 3793 3794int 3795incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3796{ 3797 /* On Pentium4, the inc and dec operations causes extra dependency on flag 3798 registers, since carry flag is not set. */ 3799 if (TARGET_PENTIUM4 && !optimize_size) 3800 return 0; 3801 return op == const1_rtx || op == constm1_rtx; 3802} 3803 3804/* Return nonzero if OP is acceptable as operand of DImode shift 3805 expander. */ 3806 3807int 3808shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3809{ 3810 if (TARGET_64BIT) 3811 return nonimmediate_operand (op, mode); 3812 else 3813 return register_operand (op, mode); 3814} 3815 3816/* Return false if this is the stack pointer, or any other fake 3817 register eliminable to the stack pointer. Otherwise, this is 3818 a register operand. 3819 3820 This is used to prevent esp from being used as an index reg. 3821 Which would only happen in pathological cases. */ 3822 3823int 3824reg_no_sp_operand (rtx op, enum machine_mode mode) 3825{ 3826 rtx t = op; 3827 if (GET_CODE (t) == SUBREG) 3828 t = SUBREG_REG (t); 3829 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx) 3830 return 0; 3831 3832 return register_operand (op, mode); 3833} 3834 3835int 3836mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3837{ 3838 return MMX_REG_P (op); 3839} 3840 3841/* Return false if this is any eliminable register. Otherwise 3842 general_operand. */ 3843 3844int 3845general_no_elim_operand (rtx op, enum machine_mode mode) 3846{ 3847 rtx t = op; 3848 if (GET_CODE (t) == SUBREG) 3849 t = SUBREG_REG (t); 3850 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3851 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3852 || t == virtual_stack_dynamic_rtx) 3853 return 0; 3854 if (REG_P (t) 3855 && REGNO (t) >= FIRST_VIRTUAL_REGISTER 3856 && REGNO (t) <= LAST_VIRTUAL_REGISTER) 3857 return 0; 3858 3859 return general_operand (op, mode); 3860} 3861 3862/* Return false if this is any eliminable register. Otherwise 3863 register_operand or const_int. */ 3864 3865int 3866nonmemory_no_elim_operand (rtx op, enum machine_mode mode) 3867{ 3868 rtx t = op; 3869 if (GET_CODE (t) == SUBREG) 3870 t = SUBREG_REG (t); 3871 if (t == arg_pointer_rtx || t == frame_pointer_rtx 3872 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx 3873 || t == virtual_stack_dynamic_rtx) 3874 return 0; 3875 3876 return GET_CODE (op) == CONST_INT || register_operand (op, mode); 3877} 3878 3879/* Return false if this is any eliminable register or stack register, 3880 otherwise work like register_operand. */ 3881 3882int 3883index_register_operand (rtx op, enum machine_mode mode) 3884{ 3885 rtx t = op; 3886 if (GET_CODE (t) == SUBREG) 3887 t = SUBREG_REG (t); 3888 if (!REG_P (t)) 3889 return 0; 3890 if (t == arg_pointer_rtx 3891 || t == frame_pointer_rtx 3892 || t == virtual_incoming_args_rtx 3893 || t == virtual_stack_vars_rtx 3894 || t == virtual_stack_dynamic_rtx 3895 || REGNO (t) == STACK_POINTER_REGNUM) 3896 return 0; 3897 3898 return general_operand (op, mode); 3899} 3900 3901/* Return true if op is a Q_REGS class register. */ 3902 3903int 3904q_regs_operand (rtx op, enum machine_mode mode) 3905{ 3906 if (mode != VOIDmode && GET_MODE (op) != mode) 3907 return 0; 3908 if (GET_CODE (op) == SUBREG) 3909 op = SUBREG_REG (op); 3910 return ANY_QI_REG_P (op); 3911} 3912 3913/* Return true if op is an flags register. */ 3914 3915int 3916flags_reg_operand (rtx op, enum machine_mode mode) 3917{ 3918 if (mode != VOIDmode && GET_MODE (op) != mode) 3919 return 0; 3920 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode; 3921} 3922 3923/* Return true if op is a NON_Q_REGS class register. */ 3924 3925int 3926non_q_regs_operand (rtx op, enum machine_mode mode) 3927{ 3928 if (mode != VOIDmode && GET_MODE (op) != mode) 3929 return 0; 3930 if (GET_CODE (op) == SUBREG) 3931 op = SUBREG_REG (op); 3932 return NON_QI_REG_P (op); 3933} 3934 3935int 3936zero_extended_scalar_load_operand (rtx op, 3937 enum machine_mode mode ATTRIBUTE_UNUSED) 3938{ 3939 unsigned n_elts; 3940 if (GET_CODE (op) != MEM) 3941 return 0; 3942 op = maybe_get_pool_constant (op); 3943 if (!op) 3944 return 0; 3945 if (GET_CODE (op) != CONST_VECTOR) 3946 return 0; 3947 n_elts = 3948 (GET_MODE_SIZE (GET_MODE (op)) / 3949 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op)))); 3950 for (n_elts--; n_elts > 0; n_elts--) 3951 { 3952 rtx elt = CONST_VECTOR_ELT (op, n_elts); 3953 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op)))) 3954 return 0; 3955 } 3956 return 1; 3957} 3958 3959/* Return 1 when OP is operand acceptable for standard SSE move. */ 3960int 3961vector_move_operand (rtx op, enum machine_mode mode) 3962{ 3963 if (nonimmediate_operand (op, mode)) 3964 return 1; 3965 if (GET_MODE (op) != mode && mode != VOIDmode) 3966 return 0; 3967 return (op == CONST0_RTX (GET_MODE (op))); 3968} 3969 3970/* Return true if op if a valid address, and does not contain 3971 a segment override. */ 3972 3973int 3974no_seg_address_operand (rtx op, enum machine_mode mode) 3975{ 3976 struct ix86_address parts; 3977 3978 if (! address_operand (op, mode)) 3979 return 0; 3980 3981 if (! ix86_decompose_address (op, &parts)) 3982 abort (); 3983 3984 return parts.seg == SEG_DEFAULT; 3985} 3986 3987/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS 3988 insns. */ 3989int 3990sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 3991{ 3992 enum rtx_code code = GET_CODE (op); 3993 switch (code) 3994 { 3995 /* Operations supported directly. */ 3996 case EQ: 3997 case LT: 3998 case LE: 3999 case UNORDERED: 4000 case NE: 4001 case UNGE: 4002 case UNGT: 4003 case ORDERED: 4004 return 1; 4005 /* These are equivalent to ones above in non-IEEE comparisons. */ 4006 case UNEQ: 4007 case UNLT: 4008 case UNLE: 4009 case LTGT: 4010 case GE: 4011 case GT: 4012 return !TARGET_IEEE_FP; 4013 default: 4014 return 0; 4015 } 4016} 4017/* Return 1 if OP is a valid comparison operator in valid mode. */ 4018int 4019ix86_comparison_operator (rtx op, enum machine_mode mode) 4020{ 4021 enum machine_mode inmode; 4022 enum rtx_code code = GET_CODE (op); 4023 if (mode != VOIDmode && GET_MODE (op) != mode) 4024 return 0; 4025 if (GET_RTX_CLASS (code) != '<') 4026 return 0; 4027 inmode = GET_MODE (XEXP (op, 0)); 4028 4029 if (inmode == CCFPmode || inmode == CCFPUmode) 4030 { 4031 enum rtx_code second_code, bypass_code; 4032 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 4033 return (bypass_code == NIL && second_code == NIL); 4034 } 4035 switch (code) 4036 { 4037 case EQ: case NE: 4038 return 1; 4039 case LT: case GE: 4040 if (inmode == CCmode || inmode == CCGCmode 4041 || inmode == CCGOCmode || inmode == CCNOmode) 4042 return 1; 4043 return 0; 4044 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU: 4045 if (inmode == CCmode) 4046 return 1; 4047 return 0; 4048 case GT: case LE: 4049 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode) 4050 return 1; 4051 return 0; 4052 default: 4053 return 0; 4054 } 4055} 4056 4057/* Return 1 if OP is a valid comparison operator testing carry flag 4058 to be set. */ 4059int 4060ix86_carry_flag_operator (rtx op, enum machine_mode mode) 4061{ 4062 enum machine_mode inmode; 4063 enum rtx_code code = GET_CODE (op); 4064 4065 if (mode != VOIDmode && GET_MODE (op) != mode) 4066 return 0; 4067 if (GET_RTX_CLASS (code) != '<') 4068 return 0; 4069 inmode = GET_MODE (XEXP (op, 0)); 4070 if (GET_CODE (XEXP (op, 0)) != REG 4071 || REGNO (XEXP (op, 0)) != 17 4072 || XEXP (op, 1) != const0_rtx) 4073 return 0; 4074 4075 if (inmode == CCFPmode || inmode == CCFPUmode) 4076 { 4077 enum rtx_code second_code, bypass_code; 4078 4079 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 4080 if (bypass_code != NIL || second_code != NIL) 4081 return 0; 4082 code = ix86_fp_compare_code_to_integer (code); 4083 } 4084 else if (inmode != CCmode) 4085 return 0; 4086 return code == LTU; 4087} 4088 4089/* Return 1 if OP is a comparison operator that can be issued by fcmov. */ 4090 4091int 4092fcmov_comparison_operator (rtx op, enum machine_mode mode) 4093{ 4094 enum machine_mode inmode; 4095 enum rtx_code code = GET_CODE (op); 4096 4097 if (mode != VOIDmode && GET_MODE (op) != mode) 4098 return 0; 4099 if (GET_RTX_CLASS (code) != '<') 4100 return 0; 4101 inmode = GET_MODE (XEXP (op, 0)); 4102 if (inmode == CCFPmode || inmode == CCFPUmode) 4103 { 4104 enum rtx_code second_code, bypass_code; 4105 4106 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 4107 if (bypass_code != NIL || second_code != NIL) 4108 return 0; 4109 code = ix86_fp_compare_code_to_integer (code); 4110 } 4111 /* i387 supports just limited amount of conditional codes. */ 4112 switch (code) 4113 { 4114 case LTU: case GTU: case LEU: case GEU: 4115 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode) 4116 return 1; 4117 return 0; 4118 case ORDERED: case UNORDERED: 4119 case EQ: case NE: 4120 return 1; 4121 default: 4122 return 0; 4123 } 4124} 4125 4126/* Return 1 if OP is a binary operator that can be promoted to wider mode. */ 4127 4128int 4129promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4130{ 4131 switch (GET_CODE (op)) 4132 { 4133 case MULT: 4134 /* Modern CPUs have same latency for HImode and SImode multiply, 4135 but 386 and 486 do HImode multiply faster. */ 4136 return ix86_tune > PROCESSOR_I486; 4137 case PLUS: 4138 case AND: 4139 case IOR: 4140 case XOR: 4141 case ASHIFT: 4142 return 1; 4143 default: 4144 return 0; 4145 } 4146} 4147 4148/* Nearly general operand, but accept any const_double, since we wish 4149 to be able to drop them into memory rather than have them get pulled 4150 into registers. */ 4151 4152int 4153cmp_fp_expander_operand (rtx op, enum machine_mode mode) 4154{ 4155 if (mode != VOIDmode && mode != GET_MODE (op)) 4156 return 0; 4157 if (GET_CODE (op) == CONST_DOUBLE) 4158 return 1; 4159 return general_operand (op, mode); 4160} 4161 4162/* Match an SI or HImode register for a zero_extract. */ 4163 4164int 4165ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4166{ 4167 int regno; 4168 if ((!TARGET_64BIT || GET_MODE (op) != DImode) 4169 && GET_MODE (op) != SImode && GET_MODE (op) != HImode) 4170 return 0; 4171 4172 if (!register_operand (op, VOIDmode)) 4173 return 0; 4174 4175 /* Be careful to accept only registers having upper parts. */ 4176 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op)); 4177 return (regno > LAST_VIRTUAL_REGISTER || regno < 4); 4178} 4179 4180/* Return 1 if this is a valid binary floating-point operation. 4181 OP is the expression matched, and MODE is its mode. */ 4182 4183int 4184binary_fp_operator (rtx op, enum machine_mode mode) 4185{ 4186 if (mode != VOIDmode && mode != GET_MODE (op)) 4187 return 0; 4188 4189 switch (GET_CODE (op)) 4190 { 4191 case PLUS: 4192 case MINUS: 4193 case MULT: 4194 case DIV: 4195 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT; 4196 4197 default: 4198 return 0; 4199 } 4200} 4201 4202int 4203mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4204{ 4205 return GET_CODE (op) == MULT; 4206} 4207 4208int 4209div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) 4210{ 4211 return GET_CODE (op) == DIV; 4212} 4213 4214int 4215arith_or_logical_operator (rtx op, enum machine_mode mode) 4216{ 4217 return ((mode == VOIDmode || GET_MODE (op) == mode) 4218 && (GET_RTX_CLASS (GET_CODE (op)) == 'c' 4219 || GET_RTX_CLASS (GET_CODE (op)) == '2')); 4220} 4221 4222/* Returns 1 if OP is memory operand with a displacement. */ 4223 4224int 4225memory_displacement_operand (rtx op, enum machine_mode mode) 4226{ 4227 struct ix86_address parts; 4228 4229 if (! memory_operand (op, mode)) 4230 return 0; 4231 4232 if (! ix86_decompose_address (XEXP (op, 0), &parts)) 4233 abort (); 4234 4235 return parts.disp != NULL_RTX; 4236} 4237 4238/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0, 4239 re-recognize the operand to avoid a copy_to_mode_reg that will fail. 4240 4241 ??? It seems likely that this will only work because cmpsi is an 4242 expander, and no actual insns use this. */ 4243 4244int 4245cmpsi_operand (rtx op, enum machine_mode mode) 4246{ 4247 if (nonimmediate_operand (op, mode)) 4248 return 1; 4249 4250 if (GET_CODE (op) == AND 4251 && GET_MODE (op) == SImode 4252 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT 4253 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT 4254 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT 4255 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8 4256 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8 4257 && GET_CODE (XEXP (op, 1)) == CONST_INT) 4258 return 1; 4259 4260 return 0; 4261} 4262 4263/* Returns 1 if OP is memory operand that can not be represented by the 4264 modRM array. */ 4265 4266int 4267long_memory_operand (rtx op, enum machine_mode mode) 4268{ 4269 if (! memory_operand (op, mode)) 4270 return 0; 4271 4272 return memory_address_length (op) != 0; 4273} 4274 4275/* Return nonzero if the rtx is known aligned. */ 4276 4277int 4278aligned_operand (rtx op, enum machine_mode mode) 4279{ 4280 struct ix86_address parts; 4281 4282 if (!general_operand (op, mode)) 4283 return 0; 4284 4285 /* Registers and immediate operands are always "aligned". */ 4286 if (GET_CODE (op) != MEM) 4287 return 1; 4288 4289 /* Don't even try to do any aligned optimizations with volatiles. */ 4290 if (MEM_VOLATILE_P (op)) 4291 return 0; 4292 4293 op = XEXP (op, 0); 4294 4295 /* Pushes and pops are only valid on the stack pointer. */ 4296 if (GET_CODE (op) == PRE_DEC 4297 || GET_CODE (op) == POST_INC) 4298 return 1; 4299 4300 /* Decode the address. */ 4301 if (! ix86_decompose_address (op, &parts)) 4302 abort (); 4303 4304 /* Look for some component that isn't known to be aligned. */ 4305 if (parts.index) 4306 { 4307 if (parts.scale < 4 4308 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32) 4309 return 0; 4310 } 4311 if (parts.base) 4312 { 4313 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32) 4314 return 0; 4315 } 4316 if (parts.disp) 4317 { 4318 if (GET_CODE (parts.disp) != CONST_INT 4319 || (INTVAL (parts.disp) & 3) != 0) 4320 return 0; 4321 } 4322 4323 /* Didn't find one -- this must be an aligned address. */ 4324 return 1; 4325} 4326 4327/* Initialize the table of extra 80387 mathematical constants. */ 4328 4329static void 4330init_ext_80387_constants (void) 4331{ 4332 static const char * cst[5] = 4333 { 4334 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 4335 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 4336 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 4337 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 4338 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 4339 }; 4340 int i; 4341 4342 for (i = 0; i < 5; i++) 4343 { 4344 real_from_string (&ext_80387_constants_table[i], cst[i]); 4345 /* Ensure each constant is rounded to XFmode precision. */ 4346 real_convert (&ext_80387_constants_table[i], 4347 XFmode, &ext_80387_constants_table[i]); 4348 } 4349 4350 ext_80387_constants_init = 1; 4351} 4352 4353/* Return true if the constant is something that can be loaded with 4354 a special instruction. */ 4355 4356int 4357standard_80387_constant_p (rtx x) 4358{ 4359 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 4360 return -1; 4361 4362 if (x == CONST0_RTX (GET_MODE (x))) 4363 return 1; 4364 if (x == CONST1_RTX (GET_MODE (x))) 4365 return 2; 4366 4367 /* For XFmode constants, try to find a special 80387 instruction on 4368 those CPUs that benefit from them. */ 4369 if (GET_MODE (x) == XFmode 4370 && x86_ext_80387_constants & TUNEMASK) 4371 { 4372 REAL_VALUE_TYPE r; 4373 int i; 4374 4375 if (! ext_80387_constants_init) 4376 init_ext_80387_constants (); 4377 4378 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 4379 for (i = 0; i < 5; i++) 4380 if (real_identical (&r, &ext_80387_constants_table[i])) 4381 return i + 3; 4382 } 4383 4384 return 0; 4385} 4386 4387/* Return the opcode of the special instruction to be used to load 4388 the constant X. */ 4389 4390const char * 4391standard_80387_constant_opcode (rtx x) 4392{ 4393 switch (standard_80387_constant_p (x)) 4394 { 4395 case 1: 4396 return "fldz"; 4397 case 2: 4398 return "fld1"; 4399 case 3: 4400 return "fldlg2"; 4401 case 4: 4402 return "fldln2"; 4403 case 5: 4404 return "fldl2e"; 4405 case 6: 4406 return "fldl2t"; 4407 case 7: 4408 return "fldpi"; 4409 } 4410 abort (); 4411} 4412 4413/* Return the CONST_DOUBLE representing the 80387 constant that is 4414 loaded by the specified special instruction. The argument IDX 4415 matches the return value from standard_80387_constant_p. */ 4416 4417rtx 4418standard_80387_constant_rtx (int idx) 4419{ 4420 int i; 4421 4422 if (! ext_80387_constants_init) 4423 init_ext_80387_constants (); 4424 4425 switch (idx) 4426 { 4427 case 3: 4428 case 4: 4429 case 5: 4430 case 6: 4431 case 7: 4432 i = idx - 3; 4433 break; 4434 4435 default: 4436 abort (); 4437 } 4438 4439 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 4440 XFmode); 4441} 4442 4443/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 4444 */ 4445int 4446standard_sse_constant_p (rtx x) 4447{ 4448 if (x == const0_rtx) 4449 return 1; 4450 return (x == CONST0_RTX (GET_MODE (x))); 4451} 4452 4453/* Returns 1 if OP contains a symbol reference */ 4454 4455int 4456symbolic_reference_mentioned_p (rtx op) 4457{ 4458 const char *fmt; 4459 int i; 4460 4461 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 4462 return 1; 4463 4464 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4465 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4466 { 4467 if (fmt[i] == 'E') 4468 { 4469 int j; 4470 4471 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4472 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 4473 return 1; 4474 } 4475 4476 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 4477 return 1; 4478 } 4479 4480 return 0; 4481} 4482 4483/* Return 1 if it is appropriate to emit `ret' instructions in the 4484 body of a function. Do this only if the epilogue is simple, needing a 4485 couple of insns. Prior to reloading, we can't tell how many registers 4486 must be saved, so return 0 then. Return 0 if there is no frame 4487 marker to de-allocate. 4488 4489 If NON_SAVING_SETJMP is defined and true, then it is not possible 4490 for the epilogue to be simple, so return 0. This is a special case 4491 since NON_SAVING_SETJMP will not cause regs_ever_live to change 4492 until final, but jump_optimize may need to know sooner if a 4493 `return' is OK. */ 4494 4495int 4496ix86_can_use_return_insn_p (void) 4497{ 4498 struct ix86_frame frame; 4499 4500#ifdef NON_SAVING_SETJMP 4501 if (NON_SAVING_SETJMP && current_function_calls_setjmp) 4502 return 0; 4503#endif 4504 4505 if (! reload_completed || frame_pointer_needed) 4506 return 0; 4507 4508 /* Don't allow more than 32 pop, since that's all we can do 4509 with one instruction. */ 4510 if (current_function_pops_args 4511 && current_function_args_size >= 32768) 4512 return 0; 4513 4514 ix86_compute_frame_layout (&frame); 4515 return frame.to_allocate == 0 && frame.nregs == 0; 4516} 4517 4518/* Return 1 if VALUE can be stored in the sign extended immediate field. */ 4519int 4520x86_64_sign_extended_value (rtx value) 4521{ 4522 switch (GET_CODE (value)) 4523 { 4524 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known 4525 to be at least 32 and this all acceptable constants are 4526 represented as CONST_INT. */ 4527 case CONST_INT: 4528 if (HOST_BITS_PER_WIDE_INT == 32) 4529 return 1; 4530 else 4531 { 4532 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode); 4533 return trunc_int_for_mode (val, SImode) == val; 4534 } 4535 break; 4536 4537 /* For certain code models, the symbolic references are known to fit. 4538 in CM_SMALL_PIC model we know it fits if it is local to the shared 4539 library. Don't count TLS SYMBOL_REFs here, since they should fit 4540 only if inside of UNSPEC handled below. */ 4541 case SYMBOL_REF: 4542 /* TLS symbols are not constant. */ 4543 if (tls_symbolic_operand (value, Pmode)) 4544 return false; 4545 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL); 4546 4547 /* For certain code models, the code is near as well. */ 4548 case LABEL_REF: 4549 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM 4550 || ix86_cmodel == CM_KERNEL); 4551 4552 /* We also may accept the offsetted memory references in certain special 4553 cases. */ 4554 case CONST: 4555 if (GET_CODE (XEXP (value, 0)) == UNSPEC) 4556 switch (XINT (XEXP (value, 0), 1)) 4557 { 4558 case UNSPEC_GOTPCREL: 4559 case UNSPEC_DTPOFF: 4560 case UNSPEC_GOTNTPOFF: 4561 case UNSPEC_NTPOFF: 4562 return 1; 4563 default: 4564 break; 4565 } 4566 if (GET_CODE (XEXP (value, 0)) == PLUS) 4567 { 4568 rtx op1 = XEXP (XEXP (value, 0), 0); 4569 rtx op2 = XEXP (XEXP (value, 0), 1); 4570 HOST_WIDE_INT offset; 4571 4572 if (ix86_cmodel == CM_LARGE) 4573 return 0; 4574 if (GET_CODE (op2) != CONST_INT) 4575 return 0; 4576 offset = trunc_int_for_mode (INTVAL (op2), DImode); 4577 switch (GET_CODE (op1)) 4578 { 4579 case SYMBOL_REF: 4580 /* For CM_SMALL assume that latest object is 16MB before 4581 end of 31bits boundary. We may also accept pretty 4582 large negative constants knowing that all objects are 4583 in the positive half of address space. */ 4584 if (ix86_cmodel == CM_SMALL 4585 && offset < 16*1024*1024 4586 && trunc_int_for_mode (offset, SImode) == offset) 4587 return 1; 4588 /* For CM_KERNEL we know that all object resist in the 4589 negative half of 32bits address space. We may not 4590 accept negative offsets, since they may be just off 4591 and we may accept pretty large positive ones. */ 4592 if (ix86_cmodel == CM_KERNEL 4593 && offset > 0 4594 && trunc_int_for_mode (offset, SImode) == offset) 4595 return 1; 4596 break; 4597 case LABEL_REF: 4598 /* These conditions are similar to SYMBOL_REF ones, just the 4599 constraints for code models differ. */ 4600 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 4601 && offset < 16*1024*1024 4602 && trunc_int_for_mode (offset, SImode) == offset) 4603 return 1; 4604 if (ix86_cmodel == CM_KERNEL 4605 && offset > 0 4606 && trunc_int_for_mode (offset, SImode) == offset) 4607 return 1; 4608 break; 4609 case UNSPEC: 4610 switch (XINT (op1, 1)) 4611 { 4612 case UNSPEC_DTPOFF: 4613 case UNSPEC_NTPOFF: 4614 if (offset > 0 4615 && trunc_int_for_mode (offset, SImode) == offset) 4616 return 1; 4617 } 4618 break; 4619 default: 4620 return 0; 4621 } 4622 } 4623 return 0; 4624 default: 4625 return 0; 4626 } 4627} 4628 4629/* Return 1 if VALUE can be stored in the zero extended immediate field. */ 4630int 4631x86_64_zero_extended_value (rtx value) 4632{ 4633 switch (GET_CODE (value)) 4634 { 4635 case CONST_DOUBLE: 4636 if (HOST_BITS_PER_WIDE_INT == 32) 4637 return (GET_MODE (value) == VOIDmode 4638 && !CONST_DOUBLE_HIGH (value)); 4639 else 4640 return 0; 4641 case CONST_INT: 4642 if (HOST_BITS_PER_WIDE_INT == 32) 4643 return INTVAL (value) >= 0; 4644 else 4645 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff); 4646 break; 4647 4648 /* For certain code models, the symbolic references are known to fit. */ 4649 case SYMBOL_REF: 4650 /* TLS symbols are not constant. */ 4651 if (tls_symbolic_operand (value, Pmode)) 4652 return false; 4653 return ix86_cmodel == CM_SMALL; 4654 4655 /* For certain code models, the code is near as well. */ 4656 case LABEL_REF: 4657 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM; 4658 4659 /* We also may accept the offsetted memory references in certain special 4660 cases. */ 4661 case CONST: 4662 if (GET_CODE (XEXP (value, 0)) == PLUS) 4663 { 4664 rtx op1 = XEXP (XEXP (value, 0), 0); 4665 rtx op2 = XEXP (XEXP (value, 0), 1); 4666 4667 if (ix86_cmodel == CM_LARGE) 4668 return 0; 4669 switch (GET_CODE (op1)) 4670 { 4671 case SYMBOL_REF: 4672 return 0; 4673 /* For small code model we may accept pretty large positive 4674 offsets, since one bit is available for free. Negative 4675 offsets are limited by the size of NULL pointer area 4676 specified by the ABI. */ 4677 if (ix86_cmodel == CM_SMALL 4678 && GET_CODE (op2) == CONST_INT 4679 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 4680 && (trunc_int_for_mode (INTVAL (op2), SImode) 4681 == INTVAL (op2))) 4682 return 1; 4683 /* ??? For the kernel, we may accept adjustment of 4684 -0x10000000, since we know that it will just convert 4685 negative address space to positive, but perhaps this 4686 is not worthwhile. */ 4687 break; 4688 case LABEL_REF: 4689 /* These conditions are similar to SYMBOL_REF ones, just the 4690 constraints for code models differ. */ 4691 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM) 4692 && GET_CODE (op2) == CONST_INT 4693 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000 4694 && (trunc_int_for_mode (INTVAL (op2), SImode) 4695 == INTVAL (op2))) 4696 return 1; 4697 break; 4698 default: 4699 return 0; 4700 } 4701 } 4702 return 0; 4703 default: 4704 return 0; 4705 } 4706} 4707 4708/* Value should be nonzero if functions must have frame pointers. 4709 Zero means the frame pointer need not be set up (and parms may 4710 be accessed via the stack pointer) in functions that seem suitable. */ 4711 4712int 4713ix86_frame_pointer_required (void) 4714{ 4715 /* If we accessed previous frames, then the generated code expects 4716 to be able to access the saved ebp value in our frame. */ 4717 if (cfun->machine->accesses_prev_frame) 4718 return 1; 4719 4720 /* Several x86 os'es need a frame pointer for other reasons, 4721 usually pertaining to setjmp. */ 4722 if (SUBTARGET_FRAME_POINTER_REQUIRED) 4723 return 1; 4724 4725 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 4726 the frame pointer by default. Turn it back on now if we've not 4727 got a leaf function. */ 4728 if (TARGET_OMIT_LEAF_FRAME_POINTER 4729 && (!current_function_is_leaf)) 4730 return 1; 4731 4732 if (current_function_profile) 4733 return 1; 4734 4735 return 0; 4736} 4737 4738/* Record that the current function accesses previous call frames. */ 4739 4740void 4741ix86_setup_frame_addresses (void) 4742{ 4743 cfun->machine->accesses_prev_frame = 1; 4744} 4745 4746#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY) 4747# define USE_HIDDEN_LINKONCE 1 4748#else 4749# define USE_HIDDEN_LINKONCE 0 4750#endif 4751 4752static int pic_labels_used; 4753 4754/* Fills in the label name that should be used for a pc thunk for 4755 the given register. */ 4756 4757static void 4758get_pc_thunk_name (char name[32], unsigned int regno) 4759{ 4760 if (USE_HIDDEN_LINKONCE) 4761 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 4762 else 4763 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 4764} 4765 4766 4767/* This function generates code for -fpic that loads %ebx with 4768 the return address of the caller and then returns. */ 4769 4770void 4771ix86_file_end (void) 4772{ 4773 rtx xops[2]; 4774 int regno; 4775 4776 for (regno = 0; regno < 8; ++regno) 4777 { 4778 char name[32]; 4779 4780 if (! ((pic_labels_used >> regno) & 1)) 4781 continue; 4782 4783 get_pc_thunk_name (name, regno); 4784 4785 if (USE_HIDDEN_LINKONCE) 4786 { 4787 tree decl; 4788 4789 decl = build_decl (FUNCTION_DECL, get_identifier (name), 4790 error_mark_node); 4791 TREE_PUBLIC (decl) = 1; 4792 TREE_STATIC (decl) = 1; 4793 DECL_ONE_ONLY (decl) = 1; 4794 4795 (*targetm.asm_out.unique_section) (decl, 0); 4796 named_section (decl, NULL, 0); 4797 4798 (*targetm.asm_out.globalize_label) (asm_out_file, name); 4799 fputs ("\t.hidden\t", asm_out_file); 4800 assemble_name (asm_out_file, name); 4801 fputc ('\n', asm_out_file); 4802 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 4803 } 4804 else 4805 { 4806 text_section (); 4807 ASM_OUTPUT_LABEL (asm_out_file, name); 4808 } 4809 4810 xops[0] = gen_rtx_REG (SImode, regno); 4811 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 4812 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 4813 output_asm_insn ("ret", xops); 4814 } 4815 4816 if (NEED_INDICATE_EXEC_STACK) 4817 file_end_indicate_exec_stack (); 4818} 4819 4820/* Emit code for the SET_GOT patterns. */ 4821 4822const char * 4823output_set_got (rtx dest) 4824{ 4825 rtx xops[3]; 4826 4827 xops[0] = dest; 4828 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 4829 4830 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 4831 { 4832 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); 4833 4834 if (!flag_pic) 4835 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 4836 else 4837 output_asm_insn ("call\t%a2", xops); 4838 4839#if TARGET_MACHO 4840 /* Output the "canonical" label name ("Lxx$pb") here too. This 4841 is what will be referred to by the Mach-O PIC subsystem. */ 4842 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 4843#endif 4844 (*targetm.asm_out.internal_label) (asm_out_file, "L", 4845 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 4846 4847 if (flag_pic) 4848 output_asm_insn ("pop{l}\t%0", xops); 4849 } 4850 else 4851 { 4852 char name[32]; 4853 get_pc_thunk_name (name, REGNO (dest)); 4854 pic_labels_used |= 1 << REGNO (dest); 4855 4856 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4857 xops[2] = gen_rtx_MEM (QImode, xops[2]); 4858 output_asm_insn ("call\t%X2", xops); 4859 } 4860 4861 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 4862 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 4863 else if (!TARGET_MACHO) 4864 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops); 4865 4866 return ""; 4867} 4868 4869/* Generate an "push" pattern for input ARG. */ 4870 4871static rtx 4872gen_push (rtx arg) 4873{ 4874 return gen_rtx_SET (VOIDmode, 4875 gen_rtx_MEM (Pmode, 4876 gen_rtx_PRE_DEC (Pmode, 4877 stack_pointer_rtx)), 4878 arg); 4879} 4880 4881/* Return >= 0 if there is an unused call-clobbered register available 4882 for the entire function. */ 4883 4884static unsigned int 4885ix86_select_alt_pic_regnum (void) 4886{ 4887 if (current_function_is_leaf && !current_function_profile) 4888 { 4889 int i; 4890 for (i = 2; i >= 0; --i) 4891 if (!regs_ever_live[i]) 4892 return i; 4893 } 4894 4895 return INVALID_REGNUM; 4896} 4897 4898/* Return 1 if we need to save REGNO. */ 4899static int 4900ix86_save_reg (unsigned int regno, int maybe_eh_return) 4901{ 4902 if (pic_offset_table_rtx 4903 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 4904 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 4905 || current_function_profile 4906 || current_function_calls_eh_return 4907 || current_function_uses_const_pool)) 4908 { 4909 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 4910 return 0; 4911 return 1; 4912 } 4913 4914 if (current_function_calls_eh_return && maybe_eh_return) 4915 { 4916 unsigned i; 4917 for (i = 0; ; i++) 4918 { 4919 unsigned test = EH_RETURN_DATA_REGNO (i); 4920 if (test == INVALID_REGNUM) 4921 break; 4922 if (test == regno) 4923 return 1; 4924 } 4925 } 4926 4927 return (regs_ever_live[regno] 4928 && !call_used_regs[regno] 4929 && !fixed_regs[regno] 4930 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 4931} 4932 4933/* Return number of registers to be saved on the stack. */ 4934 4935static int 4936ix86_nsaved_regs (void) 4937{ 4938 int nregs = 0; 4939 int regno; 4940 4941 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 4942 if (ix86_save_reg (regno, true)) 4943 nregs++; 4944 return nregs; 4945} 4946 4947/* Return the offset between two registers, one to be eliminated, and the other 4948 its replacement, at the start of a routine. */ 4949 4950HOST_WIDE_INT 4951ix86_initial_elimination_offset (int from, int to) 4952{ 4953 struct ix86_frame frame; 4954 ix86_compute_frame_layout (&frame); 4955 4956 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 4957 return frame.hard_frame_pointer_offset; 4958 else if (from == FRAME_POINTER_REGNUM 4959 && to == HARD_FRAME_POINTER_REGNUM) 4960 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 4961 else 4962 { 4963 if (to != STACK_POINTER_REGNUM) 4964 abort (); 4965 else if (from == ARG_POINTER_REGNUM) 4966 return frame.stack_pointer_offset; 4967 else if (from != FRAME_POINTER_REGNUM) 4968 abort (); 4969 else 4970 return frame.stack_pointer_offset - frame.frame_pointer_offset; 4971 } 4972} 4973 4974/* Fill structure ix86_frame about frame of currently computed function. */ 4975 4976static void 4977ix86_compute_frame_layout (struct ix86_frame *frame) 4978{ 4979 HOST_WIDE_INT total_size; 4980 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 4981 HOST_WIDE_INT offset; 4982 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 4983 HOST_WIDE_INT size = get_frame_size (); 4984 4985 frame->nregs = ix86_nsaved_regs (); 4986 total_size = size; 4987 4988 /* During reload iteration the amount of registers saved can change. 4989 Recompute the value as needed. Do not recompute when amount of registers 4990 didn't change as reload does mutiple calls to the function and does not 4991 expect the decision to change within single iteration. */ 4992 if (!optimize_size 4993 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 4994 { 4995 int count = frame->nregs; 4996 4997 cfun->machine->use_fast_prologue_epilogue_nregs = count; 4998 /* The fast prologue uses move instead of push to save registers. This 4999 is significantly longer, but also executes faster as modern hardware 5000 can execute the moves in parallel, but can't do that for push/pop. 5001 5002 Be careful about choosing what prologue to emit: When function takes 5003 many instructions to execute we may use slow version as well as in 5004 case function is known to be outside hot spot (this is known with 5005 feedback only). Weight the size of function by number of registers 5006 to save as it is cheap to use one or two push instructions but very 5007 slow to use many of them. */ 5008 if (count) 5009 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 5010 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 5011 || (flag_branch_probabilities 5012 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 5013 cfun->machine->use_fast_prologue_epilogue = false; 5014 else 5015 cfun->machine->use_fast_prologue_epilogue 5016 = !expensive_function_p (count); 5017 } 5018 if (TARGET_PROLOGUE_USING_MOVE 5019 && cfun->machine->use_fast_prologue_epilogue) 5020 frame->save_regs_using_mov = true; 5021 else 5022 frame->save_regs_using_mov = false; 5023 5024 5025 /* Skip return address and saved base pointer. */ 5026 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 5027 5028 frame->hard_frame_pointer_offset = offset; 5029 5030 /* Do some sanity checking of stack_alignment_needed and 5031 preferred_alignment, since i386 port is the only using those features 5032 that may break easily. */ 5033 5034 if (size && !stack_alignment_needed) 5035 abort (); 5036 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT) 5037 abort (); 5038 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 5039 abort (); 5040 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) 5041 abort (); 5042 5043 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 5044 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 5045 5046 /* Register save area */ 5047 offset += frame->nregs * UNITS_PER_WORD; 5048 5049 /* Va-arg area */ 5050 if (ix86_save_varrargs_registers) 5051 { 5052 offset += X86_64_VARARGS_SIZE; 5053 frame->va_arg_size = X86_64_VARARGS_SIZE; 5054 } 5055 else 5056 frame->va_arg_size = 0; 5057 5058 /* Align start of frame for local function. */ 5059 frame->padding1 = ((offset + stack_alignment_needed - 1) 5060 & -stack_alignment_needed) - offset; 5061 5062 offset += frame->padding1; 5063 5064 /* Frame pointer points here. */ 5065 frame->frame_pointer_offset = offset; 5066 5067 offset += size; 5068 5069 /* Add outgoing arguments area. Can be skipped if we eliminated 5070 all the function calls as dead code. 5071 Skipping is however impossible when function calls alloca. Alloca 5072 expander assumes that last current_function_outgoing_args_size 5073 of stack frame are unused. */ 5074 if (ACCUMULATE_OUTGOING_ARGS 5075 && (!current_function_is_leaf || current_function_calls_alloca)) 5076 { 5077 offset += current_function_outgoing_args_size; 5078 frame->outgoing_arguments_size = current_function_outgoing_args_size; 5079 } 5080 else 5081 frame->outgoing_arguments_size = 0; 5082 5083 /* Align stack boundary. Only needed if we're calling another function 5084 or using alloca. */ 5085 if (!current_function_is_leaf || current_function_calls_alloca) 5086 frame->padding2 = ((offset + preferred_alignment - 1) 5087 & -preferred_alignment) - offset; 5088 else 5089 frame->padding2 = 0; 5090 5091 offset += frame->padding2; 5092 5093 /* We've reached end of stack frame. */ 5094 frame->stack_pointer_offset = offset; 5095 5096 /* Size prologue needs to allocate. */ 5097 frame->to_allocate = 5098 (size + frame->padding1 + frame->padding2 5099 + frame->outgoing_arguments_size + frame->va_arg_size); 5100 5101 if ((!frame->to_allocate && frame->nregs <= 1) 5102 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) 5103 frame->save_regs_using_mov = false; 5104 5105 if (TARGET_RED_ZONE && current_function_sp_is_unchanging 5106 && current_function_is_leaf) 5107 { 5108 frame->red_zone_size = frame->to_allocate; 5109 if (frame->save_regs_using_mov) 5110 frame->red_zone_size += frame->nregs * UNITS_PER_WORD; 5111 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 5112 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 5113 } 5114 else 5115 frame->red_zone_size = 0; 5116 frame->to_allocate -= frame->red_zone_size; 5117 frame->stack_pointer_offset -= frame->red_zone_size; 5118#if 0 5119 fprintf (stderr, "nregs: %i\n", frame->nregs); 5120 fprintf (stderr, "size: %i\n", size); 5121 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 5122 fprintf (stderr, "padding1: %i\n", frame->padding1); 5123 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 5124 fprintf (stderr, "padding2: %i\n", frame->padding2); 5125 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 5126 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 5127 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 5128 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 5129 frame->hard_frame_pointer_offset); 5130 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 5131#endif 5132} 5133 5134/* Emit code to save registers in the prologue. */ 5135 5136static void 5137ix86_emit_save_regs (void) 5138{ 5139 int regno; 5140 rtx insn; 5141 5142 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 5143 if (ix86_save_reg (regno, true)) 5144 { 5145 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 5146 RTX_FRAME_RELATED_P (insn) = 1; 5147 } 5148} 5149 5150/* Emit code to save registers using MOV insns. First register 5151 is restored from POINTER + OFFSET. */ 5152static void 5153ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 5154{ 5155 int regno; 5156 rtx insn; 5157 5158 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5159 if (ix86_save_reg (regno, true)) 5160 { 5161 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 5162 Pmode, offset), 5163 gen_rtx_REG (Pmode, regno)); 5164 RTX_FRAME_RELATED_P (insn) = 1; 5165 offset += UNITS_PER_WORD; 5166 } 5167} 5168 5169/* Expand prologue or epilogue stack adjustment. 5170 The pattern exist to put a dependency on all ebp-based memory accesses. 5171 STYLE should be negative if instructions should be marked as frame related, 5172 zero if %r11 register is live and cannot be freely used and positive 5173 otherwise. */ 5174 5175static void 5176pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 5177{ 5178 rtx insn; 5179 5180 if (! TARGET_64BIT) 5181 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 5182 else if (x86_64_immediate_operand (offset, DImode)) 5183 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 5184 else 5185 { 5186 rtx r11; 5187 /* r11 is used by indirect sibcall return as well, set before the 5188 epilogue and used after the epilogue. ATM indirect sibcall 5189 shouldn't be used together with huge frame sizes in one 5190 function because of the frame_size check in sibcall.c. */ 5191 if (style == 0) 5192 abort (); 5193 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5194 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 5195 if (style < 0) 5196 RTX_FRAME_RELATED_P (insn) = 1; 5197 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 5198 offset)); 5199 } 5200 if (style < 0) 5201 RTX_FRAME_RELATED_P (insn) = 1; 5202} 5203 5204/* Expand the prologue into a bunch of separate insns. */ 5205 5206void 5207ix86_expand_prologue (void) 5208{ 5209 rtx insn; 5210 bool pic_reg_used; 5211 struct ix86_frame frame; 5212 HOST_WIDE_INT allocate; 5213 5214 ix86_compute_frame_layout (&frame); 5215 5216 /* Note: AT&T enter does NOT have reversed args. Enter is probably 5217 slower on all targets. Also sdb doesn't like it. */ 5218 5219 if (frame_pointer_needed) 5220 { 5221 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 5222 RTX_FRAME_RELATED_P (insn) = 1; 5223 5224 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 5225 RTX_FRAME_RELATED_P (insn) = 1; 5226 } 5227 5228 allocate = frame.to_allocate; 5229 5230 if (!frame.save_regs_using_mov) 5231 ix86_emit_save_regs (); 5232 else 5233 allocate += frame.nregs * UNITS_PER_WORD; 5234 5235 /* When using red zone we may start register saving before allocating 5236 the stack frame saving one cycle of the prologue. */ 5237 if (TARGET_RED_ZONE && frame.save_regs_using_mov) 5238 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx 5239 : stack_pointer_rtx, 5240 -frame.nregs * UNITS_PER_WORD); 5241 5242 if (allocate == 0) 5243 ; 5244 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 5245 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5246 GEN_INT (-allocate), -1); 5247 else 5248 { 5249 /* Only valid for Win32. */ 5250 rtx eax = gen_rtx_REG (SImode, 0); 5251 bool eax_live = ix86_eax_live_at_start_p (); 5252 5253 if (TARGET_64BIT) 5254 abort (); 5255 5256 if (eax_live) 5257 { 5258 emit_insn (gen_push (eax)); 5259 allocate -= 4; 5260 } 5261 5262 insn = emit_move_insn (eax, GEN_INT (allocate)); 5263 RTX_FRAME_RELATED_P (insn) = 1; 5264 5265 insn = emit_insn (gen_allocate_stack_worker (eax)); 5266 RTX_FRAME_RELATED_P (insn) = 1; 5267 5268 if (eax_live) 5269 { 5270 rtx t = plus_constant (stack_pointer_rtx, allocate); 5271 emit_move_insn (eax, gen_rtx_MEM (SImode, t)); 5272 } 5273 } 5274 5275 if (frame.save_regs_using_mov && !TARGET_RED_ZONE) 5276 { 5277 if (!frame_pointer_needed || !frame.to_allocate) 5278 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 5279 else 5280 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 5281 -frame.nregs * UNITS_PER_WORD); 5282 } 5283 5284 pic_reg_used = false; 5285 if (pic_offset_table_rtx 5286 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5287 || current_function_profile)) 5288 { 5289 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 5290 5291 if (alt_pic_reg_used != INVALID_REGNUM) 5292 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 5293 5294 pic_reg_used = true; 5295 } 5296 5297 if (pic_reg_used) 5298 { 5299 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 5300 5301 /* Even with accurate pre-reload life analysis, we can wind up 5302 deleting all references to the pic register after reload. 5303 Consider if cross-jumping unifies two sides of a branch 5304 controlled by a comparison vs the only read from a global. 5305 In which case, allow the set_got to be deleted, though we're 5306 too late to do anything about the ebx save in the prologue. */ 5307 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 5308 } 5309 5310 /* Prevent function calls from be scheduled before the call to mcount. 5311 In the pic_reg_used case, make sure that the got load isn't deleted. */ 5312 if (current_function_profile) 5313 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 5314} 5315 5316/* Emit code to restore saved registers using MOV insns. First register 5317 is restored from POINTER + OFFSET. */ 5318static void 5319ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 5320 int maybe_eh_return) 5321{ 5322 int regno; 5323 rtx base_address = gen_rtx_MEM (Pmode, pointer); 5324 5325 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5326 if (ix86_save_reg (regno, maybe_eh_return)) 5327 { 5328 /* Ensure that adjust_address won't be forced to produce pointer 5329 out of range allowed by x86-64 instruction set. */ 5330 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 5331 { 5332 rtx r11; 5333 5334 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5335 emit_move_insn (r11, GEN_INT (offset)); 5336 emit_insn (gen_adddi3 (r11, r11, pointer)); 5337 base_address = gen_rtx_MEM (Pmode, r11); 5338 offset = 0; 5339 } 5340 emit_move_insn (gen_rtx_REG (Pmode, regno), 5341 adjust_address (base_address, Pmode, offset)); 5342 offset += UNITS_PER_WORD; 5343 } 5344} 5345 5346/* Restore function stack, frame, and registers. */ 5347 5348void 5349ix86_expand_epilogue (int style) 5350{ 5351 int regno; 5352 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 5353 struct ix86_frame frame; 5354 HOST_WIDE_INT offset; 5355 5356 ix86_compute_frame_layout (&frame); 5357 5358 /* Calculate start of saved registers relative to ebp. Special care 5359 must be taken for the normal return case of a function using 5360 eh_return: the eax and edx registers are marked as saved, but not 5361 restored along this path. */ 5362 offset = frame.nregs; 5363 if (current_function_calls_eh_return && style != 2) 5364 offset -= 2; 5365 offset *= -UNITS_PER_WORD; 5366 5367 /* If we're only restoring one register and sp is not valid then 5368 using a move instruction to restore the register since it's 5369 less work than reloading sp and popping the register. 5370 5371 The default code result in stack adjustment using add/lea instruction, 5372 while this code results in LEAVE instruction (or discrete equivalent), 5373 so it is profitable in some other cases as well. Especially when there 5374 are no registers to restore. We also use this code when TARGET_USE_LEAVE 5375 and there is exactly one register to pop. This heuristic may need some 5376 tuning in future. */ 5377 if ((!sp_valid && frame.nregs <= 1) 5378 || (TARGET_EPILOGUE_USING_MOVE 5379 && cfun->machine->use_fast_prologue_epilogue 5380 && (frame.nregs > 1 || frame.to_allocate)) 5381 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 5382 || (frame_pointer_needed && TARGET_USE_LEAVE 5383 && cfun->machine->use_fast_prologue_epilogue 5384 && frame.nregs == 1) 5385 || current_function_calls_eh_return) 5386 { 5387 /* Restore registers. We can use ebp or esp to address the memory 5388 locations. If both are available, default to ebp, since offsets 5389 are known to be small. Only exception is esp pointing directly to the 5390 end of block of saved registers, where we may simplify addressing 5391 mode. */ 5392 5393 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 5394 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 5395 frame.to_allocate, style == 2); 5396 else 5397 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 5398 offset, style == 2); 5399 5400 /* eh_return epilogues need %ecx added to the stack pointer. */ 5401 if (style == 2) 5402 { 5403 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 5404 5405 if (frame_pointer_needed) 5406 { 5407 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 5408 tmp = plus_constant (tmp, UNITS_PER_WORD); 5409 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 5410 5411 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 5412 emit_move_insn (hard_frame_pointer_rtx, tmp); 5413 5414 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 5415 const0_rtx, style); 5416 } 5417 else 5418 { 5419 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 5420 tmp = plus_constant (tmp, (frame.to_allocate 5421 + frame.nregs * UNITS_PER_WORD)); 5422 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 5423 } 5424 } 5425 else if (!frame_pointer_needed) 5426 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5427 GEN_INT (frame.to_allocate 5428 + frame.nregs * UNITS_PER_WORD), 5429 style); 5430 /* If not an i386, mov & pop is faster than "leave". */ 5431 else if (TARGET_USE_LEAVE || optimize_size 5432 || !cfun->machine->use_fast_prologue_epilogue) 5433 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5434 else 5435 { 5436 pro_epilogue_adjust_stack (stack_pointer_rtx, 5437 hard_frame_pointer_rtx, 5438 const0_rtx, style); 5439 if (TARGET_64BIT) 5440 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5441 else 5442 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5443 } 5444 } 5445 else 5446 { 5447 /* First step is to deallocate the stack frame so that we can 5448 pop the registers. */ 5449 if (!sp_valid) 5450 { 5451 if (!frame_pointer_needed) 5452 abort (); 5453 pro_epilogue_adjust_stack (stack_pointer_rtx, 5454 hard_frame_pointer_rtx, 5455 GEN_INT (offset), style); 5456 } 5457 else if (frame.to_allocate) 5458 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5459 GEN_INT (frame.to_allocate), style); 5460 5461 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5462 if (ix86_save_reg (regno, false)) 5463 { 5464 if (TARGET_64BIT) 5465 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 5466 else 5467 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 5468 } 5469 if (frame_pointer_needed) 5470 { 5471 /* Leave results in shorter dependency chains on CPUs that are 5472 able to grok it fast. */ 5473 if (TARGET_USE_LEAVE) 5474 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5475 else if (TARGET_64BIT) 5476 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5477 else 5478 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5479 } 5480 } 5481 5482 /* Sibcall epilogues don't want a return instruction. */ 5483 if (style == 0) 5484 return; 5485 5486 if (current_function_pops_args && current_function_args_size) 5487 { 5488 rtx popc = GEN_INT (current_function_pops_args); 5489 5490 /* i386 can only pop 64K bytes. If asked to pop more, pop 5491 return address, do explicit add, and jump indirectly to the 5492 caller. */ 5493 5494 if (current_function_pops_args >= 65536) 5495 { 5496 rtx ecx = gen_rtx_REG (SImode, 2); 5497 5498 /* There is no "pascal" calling convention in 64bit ABI. */ 5499 if (TARGET_64BIT) 5500 abort (); 5501 5502 emit_insn (gen_popsi1 (ecx)); 5503 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 5504 emit_jump_insn (gen_return_indirect_internal (ecx)); 5505 } 5506 else 5507 emit_jump_insn (gen_return_pop_internal (popc)); 5508 } 5509 else 5510 emit_jump_insn (gen_return_internal ()); 5511} 5512 5513/* Reset from the function's potential modifications. */ 5514 5515static void 5516ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 5517 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5518{ 5519 if (pic_offset_table_rtx) 5520 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 5521} 5522 5523/* Extract the parts of an RTL expression that is a valid memory address 5524 for an instruction. Return 0 if the structure of the address is 5525 grossly off. Return -1 if the address contains ASHIFT, so it is not 5526 strictly valid, but still used for computing length of lea instruction. */ 5527 5528static int 5529ix86_decompose_address (rtx addr, struct ix86_address *out) 5530{ 5531 rtx base = NULL_RTX; 5532 rtx index = NULL_RTX; 5533 rtx disp = NULL_RTX; 5534 HOST_WIDE_INT scale = 1; 5535 rtx scale_rtx = NULL_RTX; 5536 int retval = 1; 5537 enum ix86_address_seg seg = SEG_DEFAULT; 5538 5539 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 5540 base = addr; 5541 else if (GET_CODE (addr) == PLUS) 5542 { 5543 rtx addends[4], op; 5544 int n = 0, i; 5545 5546 op = addr; 5547 do 5548 { 5549 if (n >= 4) 5550 return 0; 5551 addends[n++] = XEXP (op, 1); 5552 op = XEXP (op, 0); 5553 } 5554 while (GET_CODE (op) == PLUS); 5555 if (n >= 4) 5556 return 0; 5557 addends[n] = op; 5558 5559 for (i = n; i >= 0; --i) 5560 { 5561 op = addends[i]; 5562 switch (GET_CODE (op)) 5563 { 5564 case MULT: 5565 if (index) 5566 return 0; 5567 index = XEXP (op, 0); 5568 scale_rtx = XEXP (op, 1); 5569 break; 5570 5571 case UNSPEC: 5572 if (XINT (op, 1) == UNSPEC_TP 5573 && TARGET_TLS_DIRECT_SEG_REFS 5574 && seg == SEG_DEFAULT) 5575 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 5576 else 5577 return 0; 5578 break; 5579 5580 case REG: 5581 case SUBREG: 5582 if (!base) 5583 base = op; 5584 else if (!index) 5585 index = op; 5586 else 5587 return 0; 5588 break; 5589 5590 case CONST: 5591 case CONST_INT: 5592 case SYMBOL_REF: 5593 case LABEL_REF: 5594 if (disp) 5595 return 0; 5596 disp = op; 5597 break; 5598 5599 default: 5600 return 0; 5601 } 5602 } 5603 } 5604 else if (GET_CODE (addr) == MULT) 5605 { 5606 index = XEXP (addr, 0); /* index*scale */ 5607 scale_rtx = XEXP (addr, 1); 5608 } 5609 else if (GET_CODE (addr) == ASHIFT) 5610 { 5611 rtx tmp; 5612 5613 /* We're called for lea too, which implements ashift on occasion. */ 5614 index = XEXP (addr, 0); 5615 tmp = XEXP (addr, 1); 5616 if (GET_CODE (tmp) != CONST_INT) 5617 return 0; 5618 scale = INTVAL (tmp); 5619 if ((unsigned HOST_WIDE_INT) scale > 3) 5620 return 0; 5621 scale = 1 << scale; 5622 retval = -1; 5623 } 5624 else 5625 disp = addr; /* displacement */ 5626 5627 /* Extract the integral value of scale. */ 5628 if (scale_rtx) 5629 { 5630 if (GET_CODE (scale_rtx) != CONST_INT) 5631 return 0; 5632 scale = INTVAL (scale_rtx); 5633 } 5634 5635 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 5636 if (base && index && scale == 1 5637 && (index == arg_pointer_rtx 5638 || index == frame_pointer_rtx 5639 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM))) 5640 { 5641 rtx tmp = base; 5642 base = index; 5643 index = tmp; 5644 } 5645 5646 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 5647 if ((base == hard_frame_pointer_rtx 5648 || base == frame_pointer_rtx 5649 || base == arg_pointer_rtx) && !disp) 5650 disp = const0_rtx; 5651 5652 /* Special case: on K6, [%esi] makes the instruction vector decoded. 5653 Avoid this by transforming to [%esi+0]. */ 5654 if (ix86_tune == PROCESSOR_K6 && !optimize_size 5655 && base && !index && !disp 5656 && REG_P (base) 5657 && REGNO_REG_CLASS (REGNO (base)) == SIREG) 5658 disp = const0_rtx; 5659 5660 /* Special case: encode reg+reg instead of reg*2. */ 5661 if (!base && index && scale && scale == 2) 5662 base = index, scale = 1; 5663 5664 /* Special case: scaling cannot be encoded without base or displacement. */ 5665 if (!base && !disp && index && scale != 1) 5666 disp = const0_rtx; 5667 5668 out->base = base; 5669 out->index = index; 5670 out->disp = disp; 5671 out->scale = scale; 5672 out->seg = seg; 5673 5674 return retval; 5675} 5676 5677/* Return cost of the memory address x. 5678 For i386, it is better to use a complex address than let gcc copy 5679 the address into a reg and make a new pseudo. But not if the address 5680 requires to two regs - that would mean more pseudos with longer 5681 lifetimes. */ 5682static int 5683ix86_address_cost (rtx x) 5684{ 5685 struct ix86_address parts; 5686 int cost = 1; 5687 5688 if (!ix86_decompose_address (x, &parts)) 5689 abort (); 5690 5691 /* More complex memory references are better. */ 5692 if (parts.disp && parts.disp != const0_rtx) 5693 cost--; 5694 if (parts.seg != SEG_DEFAULT) 5695 cost--; 5696 5697 /* Attempt to minimize number of registers in the address. */ 5698 if ((parts.base 5699 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 5700 || (parts.index 5701 && (!REG_P (parts.index) 5702 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 5703 cost++; 5704 5705 if (parts.base 5706 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 5707 && parts.index 5708 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 5709 && parts.base != parts.index) 5710 cost++; 5711 5712 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 5713 since it's predecode logic can't detect the length of instructions 5714 and it degenerates to vector decoded. Increase cost of such 5715 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 5716 to split such addresses or even refuse such addresses at all. 5717 5718 Following addressing modes are affected: 5719 [base+scale*index] 5720 [scale*index+disp] 5721 [base+index] 5722 5723 The first and last case may be avoidable by explicitly coding the zero in 5724 memory address, but I don't have AMD-K6 machine handy to check this 5725 theory. */ 5726 5727 if (TARGET_K6 5728 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 5729 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 5730 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 5731 cost += 10; 5732 5733 return cost; 5734} 5735 5736/* If X is a machine specific address (i.e. a symbol or label being 5737 referenced as a displacement from the GOT implemented using an 5738 UNSPEC), then return the base term. Otherwise return X. */ 5739 5740rtx 5741ix86_find_base_term (rtx x) 5742{ 5743 rtx term; 5744 5745 if (TARGET_64BIT) 5746 { 5747 if (GET_CODE (x) != CONST) 5748 return x; 5749 term = XEXP (x, 0); 5750 if (GET_CODE (term) == PLUS 5751 && (GET_CODE (XEXP (term, 1)) == CONST_INT 5752 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 5753 term = XEXP (term, 0); 5754 if (GET_CODE (term) != UNSPEC 5755 || XINT (term, 1) != UNSPEC_GOTPCREL) 5756 return x; 5757 5758 term = XVECEXP (term, 0, 0); 5759 5760 if (GET_CODE (term) != SYMBOL_REF 5761 && GET_CODE (term) != LABEL_REF) 5762 return x; 5763 5764 return term; 5765 } 5766 5767 term = ix86_delegitimize_address (x); 5768 5769 if (GET_CODE (term) != SYMBOL_REF 5770 && GET_CODE (term) != LABEL_REF) 5771 return x; 5772 5773 return term; 5774} 5775 5776/* Determine if a given RTX is a valid constant. We already know this 5777 satisfies CONSTANT_P. */ 5778 5779bool 5780legitimate_constant_p (rtx x) 5781{ 5782 rtx inner; 5783 5784 switch (GET_CODE (x)) 5785 { 5786 case SYMBOL_REF: 5787 /* TLS symbols are not constant. */ 5788 if (tls_symbolic_operand (x, Pmode)) 5789 return false; 5790 break; 5791 5792 case CONST: 5793 inner = XEXP (x, 0); 5794 5795 /* Offsets of TLS symbols are never valid. 5796 Discourage CSE from creating them. */ 5797 if (GET_CODE (inner) == PLUS 5798 && tls_symbolic_operand (XEXP (inner, 0), Pmode)) 5799 return false; 5800 5801 if (GET_CODE (inner) == PLUS 5802 || GET_CODE (inner) == MINUS) 5803 { 5804 if (GET_CODE (XEXP (inner, 1)) != CONST_INT) 5805 return false; 5806 inner = XEXP (inner, 0); 5807 } 5808 5809 /* Only some unspecs are valid as "constants". */ 5810 if (GET_CODE (inner) == UNSPEC) 5811 switch (XINT (inner, 1)) 5812 { 5813 case UNSPEC_TPOFF: 5814 case UNSPEC_NTPOFF: 5815 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); 5816 case UNSPEC_DTPOFF: 5817 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); 5818 default: 5819 return false; 5820 } 5821 break; 5822 5823 default: 5824 break; 5825 } 5826 5827 /* Otherwise we handle everything else in the move patterns. */ 5828 return true; 5829} 5830 5831/* Determine if it's legal to put X into the constant pool. This 5832 is not possible for the address of thread-local symbols, which 5833 is checked above. */ 5834 5835static bool 5836ix86_cannot_force_const_mem (rtx x) 5837{ 5838 return !legitimate_constant_p (x); 5839} 5840 5841/* Determine if a given RTX is a valid constant address. */ 5842 5843bool 5844constant_address_p (rtx x) 5845{ 5846 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 5847} 5848 5849/* Nonzero if the constant value X is a legitimate general operand 5850 when generating PIC code. It is given that flag_pic is on and 5851 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 5852 5853bool 5854legitimate_pic_operand_p (rtx x) 5855{ 5856 rtx inner; 5857 5858 switch (GET_CODE (x)) 5859 { 5860 case CONST: 5861 inner = XEXP (x, 0); 5862 5863 /* Only some unspecs are valid as "constants". */ 5864 if (GET_CODE (inner) == UNSPEC) 5865 switch (XINT (inner, 1)) 5866 { 5867 case UNSPEC_TPOFF: 5868 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode); 5869 default: 5870 return false; 5871 } 5872 /* FALLTHRU */ 5873 5874 case SYMBOL_REF: 5875 case LABEL_REF: 5876 return legitimate_pic_address_disp_p (x); 5877 5878 default: 5879 return true; 5880 } 5881} 5882 5883/* Determine if a given CONST RTX is a valid memory displacement 5884 in PIC mode. */ 5885 5886int 5887legitimate_pic_address_disp_p (rtx disp) 5888{ 5889 bool saw_plus; 5890 5891 /* In 64bit mode we can allow direct addresses of symbols and labels 5892 when they are not dynamic symbols. */ 5893 if (TARGET_64BIT) 5894 { 5895 /* TLS references should always be enclosed in UNSPEC. */ 5896 if (tls_symbolic_operand (disp, GET_MODE (disp))) 5897 return 0; 5898 if (GET_CODE (disp) == SYMBOL_REF 5899 && ix86_cmodel == CM_SMALL_PIC 5900 && SYMBOL_REF_LOCAL_P (disp)) 5901 return 1; 5902 if (GET_CODE (disp) == LABEL_REF) 5903 return 1; 5904 if (GET_CODE (disp) == CONST 5905 && GET_CODE (XEXP (disp, 0)) == PLUS) 5906 { 5907 rtx op0 = XEXP (XEXP (disp, 0), 0); 5908 rtx op1 = XEXP (XEXP (disp, 0), 1); 5909 5910 /* TLS references should always be enclosed in UNSPEC. */ 5911 if (tls_symbolic_operand (op0, GET_MODE (op0))) 5912 return 0; 5913 if (((GET_CODE (op0) == SYMBOL_REF 5914 && ix86_cmodel == CM_SMALL_PIC 5915 && SYMBOL_REF_LOCAL_P (op0)) 5916 || GET_CODE (op0) == LABEL_REF) 5917 && GET_CODE (op1) == CONST_INT 5918 && INTVAL (op1) < 16*1024*1024 5919 && INTVAL (op1) >= -16*1024*1024) 5920 return 1; 5921 } 5922 } 5923 if (GET_CODE (disp) != CONST) 5924 return 0; 5925 disp = XEXP (disp, 0); 5926 5927 if (TARGET_64BIT) 5928 { 5929 /* We are unsafe to allow PLUS expressions. This limit allowed distance 5930 of GOT tables. We should not need these anyway. */ 5931 if (GET_CODE (disp) != UNSPEC 5932 || XINT (disp, 1) != UNSPEC_GOTPCREL) 5933 return 0; 5934 5935 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 5936 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 5937 return 0; 5938 return 1; 5939 } 5940 5941 saw_plus = false; 5942 if (GET_CODE (disp) == PLUS) 5943 { 5944 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 5945 return 0; 5946 disp = XEXP (disp, 0); 5947 saw_plus = true; 5948 } 5949 5950 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */ 5951 if (TARGET_MACHO && GET_CODE (disp) == MINUS) 5952 { 5953 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 5954 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 5955 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 5956 { 5957 const char *sym_name = XSTR (XEXP (disp, 1), 0); 5958 if (! strcmp (sym_name, "<pic base>")) 5959 return 1; 5960 } 5961 } 5962 5963 if (GET_CODE (disp) != UNSPEC) 5964 return 0; 5965 5966 switch (XINT (disp, 1)) 5967 { 5968 case UNSPEC_GOT: 5969 if (saw_plus) 5970 return false; 5971 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 5972 case UNSPEC_GOTOFF: 5973 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 5974 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 5975 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5976 return false; 5977 case UNSPEC_GOTTPOFF: 5978 case UNSPEC_GOTNTPOFF: 5979 case UNSPEC_INDNTPOFF: 5980 if (saw_plus) 5981 return false; 5982 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5983 case UNSPEC_NTPOFF: 5984 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5985 case UNSPEC_DTPOFF: 5986 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 5987 } 5988 5989 return 0; 5990} 5991 5992/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 5993 memory address for an instruction. The MODE argument is the machine mode 5994 for the MEM expression that wants to use this address. 5995 5996 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 5997 convert common non-canonical forms to canonical form so that they will 5998 be recognized. */ 5999 6000int 6001legitimate_address_p (enum machine_mode mode, rtx addr, int strict) 6002{ 6003 struct ix86_address parts; 6004 rtx base, index, disp; 6005 HOST_WIDE_INT scale; 6006 const char *reason = NULL; 6007 rtx reason_rtx = NULL_RTX; 6008 6009 if (TARGET_DEBUG_ADDR) 6010 { 6011 fprintf (stderr, 6012 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 6013 GET_MODE_NAME (mode), strict); 6014 debug_rtx (addr); 6015 } 6016 6017 if (ix86_decompose_address (addr, &parts) <= 0) 6018 { 6019 reason = "decomposition failed"; 6020 goto report_error; 6021 } 6022 6023 base = parts.base; 6024 index = parts.index; 6025 disp = parts.disp; 6026 scale = parts.scale; 6027 6028 /* Validate base register. 6029 6030 Don't allow SUBREG's here, it can lead to spill failures when the base 6031 is one word out of a two word structure, which is represented internally 6032 as a DImode int. */ 6033 6034 if (base) 6035 { 6036 reason_rtx = base; 6037 6038 if (GET_CODE (base) != REG) 6039 { 6040 reason = "base is not a register"; 6041 goto report_error; 6042 } 6043 6044 if (GET_MODE (base) != Pmode) 6045 { 6046 reason = "base is not in Pmode"; 6047 goto report_error; 6048 } 6049 6050 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) 6051 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) 6052 { 6053 reason = "base is not valid"; 6054 goto report_error; 6055 } 6056 } 6057 6058 /* Validate index register. 6059 6060 Don't allow SUBREG's here, it can lead to spill failures when the index 6061 is one word out of a two word structure, which is represented internally 6062 as a DImode int. */ 6063 6064 if (index) 6065 { 6066 reason_rtx = index; 6067 6068 if (GET_CODE (index) != REG) 6069 { 6070 reason = "index is not a register"; 6071 goto report_error; 6072 } 6073 6074 if (GET_MODE (index) != Pmode) 6075 { 6076 reason = "index is not in Pmode"; 6077 goto report_error; 6078 } 6079 6080 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index)) 6081 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index))) 6082 { 6083 reason = "index is not valid"; 6084 goto report_error; 6085 } 6086 } 6087 6088 /* Validate scale factor. */ 6089 if (scale != 1) 6090 { 6091 reason_rtx = GEN_INT (scale); 6092 if (!index) 6093 { 6094 reason = "scale without index"; 6095 goto report_error; 6096 } 6097 6098 if (scale != 2 && scale != 4 && scale != 8) 6099 { 6100 reason = "scale is not a valid multiplier"; 6101 goto report_error; 6102 } 6103 } 6104 6105 /* Validate displacement. */ 6106 if (disp) 6107 { 6108 reason_rtx = disp; 6109 6110 if (GET_CODE (disp) == CONST 6111 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 6112 switch (XINT (XEXP (disp, 0), 1)) 6113 { 6114 case UNSPEC_GOT: 6115 case UNSPEC_GOTOFF: 6116 case UNSPEC_GOTPCREL: 6117 if (!flag_pic) 6118 abort (); 6119 goto is_legitimate_pic; 6120 6121 case UNSPEC_GOTTPOFF: 6122 case UNSPEC_GOTNTPOFF: 6123 case UNSPEC_INDNTPOFF: 6124 case UNSPEC_NTPOFF: 6125 case UNSPEC_DTPOFF: 6126 break; 6127 6128 default: 6129 reason = "invalid address unspec"; 6130 goto report_error; 6131 } 6132 6133 else if (flag_pic && (SYMBOLIC_CONST (disp) 6134#if TARGET_MACHO 6135 && !machopic_operand_p (disp) 6136#endif 6137 )) 6138 { 6139 is_legitimate_pic: 6140 if (TARGET_64BIT && (index || base)) 6141 { 6142 /* foo@dtpoff(%rX) is ok. */ 6143 if (GET_CODE (disp) != CONST 6144 || GET_CODE (XEXP (disp, 0)) != PLUS 6145 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 6146 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 6147 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 6148 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 6149 { 6150 reason = "non-constant pic memory reference"; 6151 goto report_error; 6152 } 6153 } 6154 else if (! legitimate_pic_address_disp_p (disp)) 6155 { 6156 reason = "displacement is an invalid pic construct"; 6157 goto report_error; 6158 } 6159 6160 /* This code used to verify that a symbolic pic displacement 6161 includes the pic_offset_table_rtx register. 6162 6163 While this is good idea, unfortunately these constructs may 6164 be created by "adds using lea" optimization for incorrect 6165 code like: 6166 6167 int a; 6168 int foo(int i) 6169 { 6170 return *(&a+i); 6171 } 6172 6173 This code is nonsensical, but results in addressing 6174 GOT table with pic_offset_table_rtx base. We can't 6175 just refuse it easily, since it gets matched by 6176 "addsi3" pattern, that later gets split to lea in the 6177 case output register differs from input. While this 6178 can be handled by separate addsi pattern for this case 6179 that never results in lea, this seems to be easier and 6180 correct fix for crash to disable this test. */ 6181 } 6182 else if (GET_CODE (disp) != LABEL_REF 6183 && GET_CODE (disp) != CONST_INT 6184 && (GET_CODE (disp) != CONST 6185 || !legitimate_constant_p (disp)) 6186 && (GET_CODE (disp) != SYMBOL_REF 6187 || !legitimate_constant_p (disp))) 6188 { 6189 reason = "displacement is not constant"; 6190 goto report_error; 6191 } 6192 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp)) 6193 { 6194 reason = "displacement is out of range"; 6195 goto report_error; 6196 } 6197 } 6198 6199 /* Everything looks valid. */ 6200 if (TARGET_DEBUG_ADDR) 6201 fprintf (stderr, "Success.\n"); 6202 return TRUE; 6203 6204 report_error: 6205 if (TARGET_DEBUG_ADDR) 6206 { 6207 fprintf (stderr, "Error: %s\n", reason); 6208 debug_rtx (reason_rtx); 6209 } 6210 return FALSE; 6211} 6212 6213/* Return an unique alias set for the GOT. */ 6214 6215static HOST_WIDE_INT 6216ix86_GOT_alias_set (void) 6217{ 6218 static HOST_WIDE_INT set = -1; 6219 if (set == -1) 6220 set = new_alias_set (); 6221 return set; 6222} 6223 6224/* Return a legitimate reference for ORIG (an address) using the 6225 register REG. If REG is 0, a new pseudo is generated. 6226 6227 There are two types of references that must be handled: 6228 6229 1. Global data references must load the address from the GOT, via 6230 the PIC reg. An insn is emitted to do this load, and the reg is 6231 returned. 6232 6233 2. Static data references, constant pool addresses, and code labels 6234 compute the address as an offset from the GOT, whose base is in 6235 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 6236 differentiate them from global data objects. The returned 6237 address is the PIC reg + an unspec constant. 6238 6239 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 6240 reg also appears in the address. */ 6241 6242rtx 6243legitimize_pic_address (rtx orig, rtx reg) 6244{ 6245 rtx addr = orig; 6246 rtx new = orig; 6247 rtx base; 6248 6249#if TARGET_MACHO 6250 if (reg == 0) 6251 reg = gen_reg_rtx (Pmode); 6252 /* Use the generic Mach-O PIC machinery. */ 6253 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 6254#endif 6255 6256 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 6257 new = addr; 6258 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 6259 { 6260 /* This symbol may be referenced via a displacement from the PIC 6261 base address (@GOTOFF). */ 6262 6263 if (reload_in_progress) 6264 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6265 if (GET_CODE (addr) == CONST) 6266 addr = XEXP (addr, 0); 6267 if (GET_CODE (addr) == PLUS) 6268 { 6269 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6270 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6271 } 6272 else 6273 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6274 new = gen_rtx_CONST (Pmode, new); 6275 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6276 6277 if (reg != 0) 6278 { 6279 emit_move_insn (reg, new); 6280 new = reg; 6281 } 6282 } 6283 else if (GET_CODE (addr) == SYMBOL_REF) 6284 { 6285 if (TARGET_64BIT) 6286 { 6287 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 6288 new = gen_rtx_CONST (Pmode, new); 6289 new = gen_rtx_MEM (Pmode, new); 6290 RTX_UNCHANGING_P (new) = 1; 6291 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6292 6293 if (reg == 0) 6294 reg = gen_reg_rtx (Pmode); 6295 /* Use directly gen_movsi, otherwise the address is loaded 6296 into register for CSE. We don't want to CSE this addresses, 6297 instead we CSE addresses from the GOT table, so skip this. */ 6298 emit_insn (gen_movsi (reg, new)); 6299 new = reg; 6300 } 6301 else 6302 { 6303 /* This symbol must be referenced via a load from the 6304 Global Offset Table (@GOT). */ 6305 6306 if (reload_in_progress) 6307 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6308 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 6309 new = gen_rtx_CONST (Pmode, new); 6310 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6311 new = gen_rtx_MEM (Pmode, new); 6312 RTX_UNCHANGING_P (new) = 1; 6313 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6314 6315 if (reg == 0) 6316 reg = gen_reg_rtx (Pmode); 6317 emit_move_insn (reg, new); 6318 new = reg; 6319 } 6320 } 6321 else 6322 { 6323 if (GET_CODE (addr) == CONST) 6324 { 6325 addr = XEXP (addr, 0); 6326 6327 /* We must match stuff we generate before. Assume the only 6328 unspecs that can get here are ours. Not that we could do 6329 anything with them anyway.... */ 6330 if (GET_CODE (addr) == UNSPEC 6331 || (GET_CODE (addr) == PLUS 6332 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 6333 return orig; 6334 if (GET_CODE (addr) != PLUS) 6335 abort (); 6336 } 6337 if (GET_CODE (addr) == PLUS) 6338 { 6339 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 6340 6341 /* Check first to see if this is a constant offset from a @GOTOFF 6342 symbol reference. */ 6343 if (local_symbolic_operand (op0, Pmode) 6344 && GET_CODE (op1) == CONST_INT) 6345 { 6346 if (!TARGET_64BIT) 6347 { 6348 if (reload_in_progress) 6349 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6350 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 6351 UNSPEC_GOTOFF); 6352 new = gen_rtx_PLUS (Pmode, new, op1); 6353 new = gen_rtx_CONST (Pmode, new); 6354 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6355 6356 if (reg != 0) 6357 { 6358 emit_move_insn (reg, new); 6359 new = reg; 6360 } 6361 } 6362 else 6363 { 6364 if (INTVAL (op1) < -16*1024*1024 6365 || INTVAL (op1) >= 16*1024*1024) 6366 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1)); 6367 } 6368 } 6369 else 6370 { 6371 base = legitimize_pic_address (XEXP (addr, 0), reg); 6372 new = legitimize_pic_address (XEXP (addr, 1), 6373 base == reg ? NULL_RTX : reg); 6374 6375 if (GET_CODE (new) == CONST_INT) 6376 new = plus_constant (base, INTVAL (new)); 6377 else 6378 { 6379 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 6380 { 6381 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 6382 new = XEXP (new, 1); 6383 } 6384 new = gen_rtx_PLUS (Pmode, base, new); 6385 } 6386 } 6387 } 6388 } 6389 return new; 6390} 6391 6392/* Load the thread pointer. If TO_REG is true, force it into a register. */ 6393 6394static rtx 6395get_thread_pointer (int to_reg) 6396{ 6397 rtx tp, reg, insn; 6398 6399 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 6400 if (!to_reg) 6401 return tp; 6402 6403 reg = gen_reg_rtx (Pmode); 6404 insn = gen_rtx_SET (VOIDmode, reg, tp); 6405 insn = emit_insn (insn); 6406 6407 return reg; 6408} 6409 6410/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 6411 false if we expect this to be used for a memory address and true if 6412 we expect to load the address into a register. */ 6413 6414static rtx 6415legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 6416{ 6417 rtx dest, base, off, pic; 6418 int type; 6419 6420 switch (model) 6421 { 6422 case TLS_MODEL_GLOBAL_DYNAMIC: 6423 dest = gen_reg_rtx (Pmode); 6424 if (TARGET_64BIT) 6425 { 6426 rtx rax = gen_rtx_REG (Pmode, 0), insns; 6427 6428 start_sequence (); 6429 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 6430 insns = get_insns (); 6431 end_sequence (); 6432 6433 emit_libcall_block (insns, dest, rax, x); 6434 } 6435 else 6436 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 6437 break; 6438 6439 case TLS_MODEL_LOCAL_DYNAMIC: 6440 base = gen_reg_rtx (Pmode); 6441 if (TARGET_64BIT) 6442 { 6443 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 6444 6445 start_sequence (); 6446 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 6447 insns = get_insns (); 6448 end_sequence (); 6449 6450 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 6451 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 6452 emit_libcall_block (insns, base, rax, note); 6453 } 6454 else 6455 emit_insn (gen_tls_local_dynamic_base_32 (base)); 6456 6457 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 6458 off = gen_rtx_CONST (Pmode, off); 6459 6460 return gen_rtx_PLUS (Pmode, base, off); 6461 6462 case TLS_MODEL_INITIAL_EXEC: 6463 if (TARGET_64BIT) 6464 { 6465 pic = NULL; 6466 type = UNSPEC_GOTNTPOFF; 6467 } 6468 else if (flag_pic) 6469 { 6470 if (reload_in_progress) 6471 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6472 pic = pic_offset_table_rtx; 6473 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 6474 } 6475 else if (!TARGET_GNU_TLS) 6476 { 6477 pic = gen_reg_rtx (Pmode); 6478 emit_insn (gen_set_got (pic)); 6479 type = UNSPEC_GOTTPOFF; 6480 } 6481 else 6482 { 6483 pic = NULL; 6484 type = UNSPEC_INDNTPOFF; 6485 } 6486 6487 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 6488 off = gen_rtx_CONST (Pmode, off); 6489 if (pic) 6490 off = gen_rtx_PLUS (Pmode, pic, off); 6491 off = gen_rtx_MEM (Pmode, off); 6492 RTX_UNCHANGING_P (off) = 1; 6493 set_mem_alias_set (off, ix86_GOT_alias_set ()); 6494 6495 if (TARGET_64BIT || TARGET_GNU_TLS) 6496 { 6497 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 6498 off = force_reg (Pmode, off); 6499 return gen_rtx_PLUS (Pmode, base, off); 6500 } 6501 else 6502 { 6503 base = get_thread_pointer (true); 6504 dest = gen_reg_rtx (Pmode); 6505 emit_insn (gen_subsi3 (dest, base, off)); 6506 } 6507 break; 6508 6509 case TLS_MODEL_LOCAL_EXEC: 6510 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 6511 (TARGET_64BIT || TARGET_GNU_TLS) 6512 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 6513 off = gen_rtx_CONST (Pmode, off); 6514 6515 if (TARGET_64BIT || TARGET_GNU_TLS) 6516 { 6517 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 6518 return gen_rtx_PLUS (Pmode, base, off); 6519 } 6520 else 6521 { 6522 base = get_thread_pointer (true); 6523 dest = gen_reg_rtx (Pmode); 6524 emit_insn (gen_subsi3 (dest, base, off)); 6525 } 6526 break; 6527 6528 default: 6529 abort (); 6530 } 6531 6532 return dest; 6533} 6534 6535/* Try machine-dependent ways of modifying an illegitimate address 6536 to be legitimate. If we find one, return the new, valid address. 6537 This macro is used in only one place: `memory_address' in explow.c. 6538 6539 OLDX is the address as it was before break_out_memory_refs was called. 6540 In some cases it is useful to look at this to decide what needs to be done. 6541 6542 MODE and WIN are passed so that this macro can use 6543 GO_IF_LEGITIMATE_ADDRESS. 6544 6545 It is always safe for this macro to do nothing. It exists to recognize 6546 opportunities to optimize the output. 6547 6548 For the 80386, we handle X+REG by loading X into a register R and 6549 using R+REG. R will go in a general reg and indexing will be used. 6550 However, if REG is a broken-out memory address or multiplication, 6551 nothing needs to be done because REG can certainly go in a general reg. 6552 6553 When -fpic is used, special handling is needed for symbolic references. 6554 See comments by legitimize_pic_address in i386.c for details. */ 6555 6556rtx 6557legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 6558{ 6559 int changed = 0; 6560 unsigned log; 6561 6562 if (TARGET_DEBUG_ADDR) 6563 { 6564 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 6565 GET_MODE_NAME (mode)); 6566 debug_rtx (x); 6567 } 6568 6569 log = tls_symbolic_operand (x, mode); 6570 if (log) 6571 return legitimize_tls_address (x, log, false); 6572 6573 if (flag_pic && SYMBOLIC_CONST (x)) 6574 return legitimize_pic_address (x, 0); 6575 6576 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 6577 if (GET_CODE (x) == ASHIFT 6578 && GET_CODE (XEXP (x, 1)) == CONST_INT 6579 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4) 6580 { 6581 changed = 1; 6582 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 6583 GEN_INT (1 << log)); 6584 } 6585 6586 if (GET_CODE (x) == PLUS) 6587 { 6588 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 6589 6590 if (GET_CODE (XEXP (x, 0)) == ASHIFT 6591 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 6592 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4) 6593 { 6594 changed = 1; 6595 XEXP (x, 0) = gen_rtx_MULT (Pmode, 6596 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 6597 GEN_INT (1 << log)); 6598 } 6599 6600 if (GET_CODE (XEXP (x, 1)) == ASHIFT 6601 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 6602 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4) 6603 { 6604 changed = 1; 6605 XEXP (x, 1) = gen_rtx_MULT (Pmode, 6606 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 6607 GEN_INT (1 << log)); 6608 } 6609 6610 /* Put multiply first if it isn't already. */ 6611 if (GET_CODE (XEXP (x, 1)) == MULT) 6612 { 6613 rtx tmp = XEXP (x, 0); 6614 XEXP (x, 0) = XEXP (x, 1); 6615 XEXP (x, 1) = tmp; 6616 changed = 1; 6617 } 6618 6619 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 6620 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 6621 created by virtual register instantiation, register elimination, and 6622 similar optimizations. */ 6623 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 6624 { 6625 changed = 1; 6626 x = gen_rtx_PLUS (Pmode, 6627 gen_rtx_PLUS (Pmode, XEXP (x, 0), 6628 XEXP (XEXP (x, 1), 0)), 6629 XEXP (XEXP (x, 1), 1)); 6630 } 6631 6632 /* Canonicalize 6633 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 6634 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 6635 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 6636 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 6637 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 6638 && CONSTANT_P (XEXP (x, 1))) 6639 { 6640 rtx constant; 6641 rtx other = NULL_RTX; 6642 6643 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6644 { 6645 constant = XEXP (x, 1); 6646 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 6647 } 6648 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 6649 { 6650 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 6651 other = XEXP (x, 1); 6652 } 6653 else 6654 constant = 0; 6655 6656 if (constant) 6657 { 6658 changed = 1; 6659 x = gen_rtx_PLUS (Pmode, 6660 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 6661 XEXP (XEXP (XEXP (x, 0), 1), 0)), 6662 plus_constant (other, INTVAL (constant))); 6663 } 6664 } 6665 6666 if (changed && legitimate_address_p (mode, x, FALSE)) 6667 return x; 6668 6669 if (GET_CODE (XEXP (x, 0)) == MULT) 6670 { 6671 changed = 1; 6672 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 6673 } 6674 6675 if (GET_CODE (XEXP (x, 1)) == MULT) 6676 { 6677 changed = 1; 6678 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 6679 } 6680 6681 if (changed 6682 && GET_CODE (XEXP (x, 1)) == REG 6683 && GET_CODE (XEXP (x, 0)) == REG) 6684 return x; 6685 6686 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 6687 { 6688 changed = 1; 6689 x = legitimize_pic_address (x, 0); 6690 } 6691 6692 if (changed && legitimate_address_p (mode, x, FALSE)) 6693 return x; 6694 6695 if (GET_CODE (XEXP (x, 0)) == REG) 6696 { 6697 rtx temp = gen_reg_rtx (Pmode); 6698 rtx val = force_operand (XEXP (x, 1), temp); 6699 if (val != temp) 6700 emit_move_insn (temp, val); 6701 6702 XEXP (x, 1) = temp; 6703 return x; 6704 } 6705 6706 else if (GET_CODE (XEXP (x, 1)) == REG) 6707 { 6708 rtx temp = gen_reg_rtx (Pmode); 6709 rtx val = force_operand (XEXP (x, 0), temp); 6710 if (val != temp) 6711 emit_move_insn (temp, val); 6712 6713 XEXP (x, 0) = temp; 6714 return x; 6715 } 6716 } 6717 6718 return x; 6719} 6720 6721/* Print an integer constant expression in assembler syntax. Addition 6722 and subtraction are the only arithmetic that may appear in these 6723 expressions. FILE is the stdio stream to write to, X is the rtx, and 6724 CODE is the operand print code from the output string. */ 6725 6726static void 6727output_pic_addr_const (FILE *file, rtx x, int code) 6728{ 6729 char buf[256]; 6730 6731 switch (GET_CODE (x)) 6732 { 6733 case PC: 6734 if (flag_pic) 6735 putc ('.', file); 6736 else 6737 abort (); 6738 break; 6739 6740 case SYMBOL_REF: 6741 assemble_name (file, XSTR (x, 0)); 6742 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 6743 fputs ("@PLT", file); 6744 break; 6745 6746 case LABEL_REF: 6747 x = XEXP (x, 0); 6748 /* FALLTHRU */ 6749 case CODE_LABEL: 6750 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 6751 assemble_name (asm_out_file, buf); 6752 break; 6753 6754 case CONST_INT: 6755 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 6756 break; 6757 6758 case CONST: 6759 /* This used to output parentheses around the expression, 6760 but that does not work on the 386 (either ATT or BSD assembler). */ 6761 output_pic_addr_const (file, XEXP (x, 0), code); 6762 break; 6763 6764 case CONST_DOUBLE: 6765 if (GET_MODE (x) == VOIDmode) 6766 { 6767 /* We can use %d if the number is <32 bits and positive. */ 6768 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 6769 fprintf (file, "0x%lx%08lx", 6770 (unsigned long) CONST_DOUBLE_HIGH (x), 6771 (unsigned long) CONST_DOUBLE_LOW (x)); 6772 else 6773 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 6774 } 6775 else 6776 /* We can't handle floating point constants; 6777 PRINT_OPERAND must handle them. */ 6778 output_operand_lossage ("floating constant misused"); 6779 break; 6780 6781 case PLUS: 6782 /* Some assemblers need integer constants to appear first. */ 6783 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 6784 { 6785 output_pic_addr_const (file, XEXP (x, 0), code); 6786 putc ('+', file); 6787 output_pic_addr_const (file, XEXP (x, 1), code); 6788 } 6789 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 6790 { 6791 output_pic_addr_const (file, XEXP (x, 1), code); 6792 putc ('+', file); 6793 output_pic_addr_const (file, XEXP (x, 0), code); 6794 } 6795 else 6796 abort (); 6797 break; 6798 6799 case MINUS: 6800 if (!TARGET_MACHO) 6801 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 6802 output_pic_addr_const (file, XEXP (x, 0), code); 6803 putc ('-', file); 6804 output_pic_addr_const (file, XEXP (x, 1), code); 6805 if (!TARGET_MACHO) 6806 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 6807 break; 6808 6809 case UNSPEC: 6810 if (XVECLEN (x, 0) != 1) 6811 abort (); 6812 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 6813 switch (XINT (x, 1)) 6814 { 6815 case UNSPEC_GOT: 6816 fputs ("@GOT", file); 6817 break; 6818 case UNSPEC_GOTOFF: 6819 fputs ("@GOTOFF", file); 6820 break; 6821 case UNSPEC_GOTPCREL: 6822 fputs ("@GOTPCREL(%rip)", file); 6823 break; 6824 case UNSPEC_GOTTPOFF: 6825 /* FIXME: This might be @TPOFF in Sun ld too. */ 6826 fputs ("@GOTTPOFF", file); 6827 break; 6828 case UNSPEC_TPOFF: 6829 fputs ("@TPOFF", file); 6830 break; 6831 case UNSPEC_NTPOFF: 6832 if (TARGET_64BIT) 6833 fputs ("@TPOFF", file); 6834 else 6835 fputs ("@NTPOFF", file); 6836 break; 6837 case UNSPEC_DTPOFF: 6838 fputs ("@DTPOFF", file); 6839 break; 6840 case UNSPEC_GOTNTPOFF: 6841 if (TARGET_64BIT) 6842 fputs ("@GOTTPOFF(%rip)", file); 6843 else 6844 fputs ("@GOTNTPOFF", file); 6845 break; 6846 case UNSPEC_INDNTPOFF: 6847 fputs ("@INDNTPOFF", file); 6848 break; 6849 default: 6850 output_operand_lossage ("invalid UNSPEC as operand"); 6851 break; 6852 } 6853 break; 6854 6855 default: 6856 output_operand_lossage ("invalid expression as operand"); 6857 } 6858} 6859 6860/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST. 6861 We need to handle our special PIC relocations. */ 6862 6863void 6864i386_dwarf_output_addr_const (FILE *file, rtx x) 6865{ 6866#ifdef ASM_QUAD 6867 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG); 6868#else 6869 if (TARGET_64BIT) 6870 abort (); 6871 fprintf (file, "%s", ASM_LONG); 6872#endif 6873 if (flag_pic) 6874 output_pic_addr_const (file, x, '\0'); 6875 else 6876 output_addr_const (file, x); 6877 fputc ('\n', file); 6878} 6879 6880/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL. 6881 We need to emit DTP-relative relocations. */ 6882 6883void 6884i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 6885{ 6886 fputs (ASM_LONG, file); 6887 output_addr_const (file, x); 6888 fputs ("@DTPOFF", file); 6889 switch (size) 6890 { 6891 case 4: 6892 break; 6893 case 8: 6894 fputs (", 0", file); 6895 break; 6896 default: 6897 abort (); 6898 } 6899} 6900 6901/* In the name of slightly smaller debug output, and to cater to 6902 general assembler losage, recognize PIC+GOTOFF and turn it back 6903 into a direct symbol reference. */ 6904 6905static rtx 6906ix86_delegitimize_address (rtx orig_x) 6907{ 6908 rtx x = orig_x, y; 6909 6910 if (GET_CODE (x) == MEM) 6911 x = XEXP (x, 0); 6912 6913 if (TARGET_64BIT) 6914 { 6915 if (GET_CODE (x) != CONST 6916 || GET_CODE (XEXP (x, 0)) != UNSPEC 6917 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 6918 || GET_CODE (orig_x) != MEM) 6919 return orig_x; 6920 return XVECEXP (XEXP (x, 0), 0, 0); 6921 } 6922 6923 if (GET_CODE (x) != PLUS 6924 || GET_CODE (XEXP (x, 1)) != CONST) 6925 return orig_x; 6926 6927 if (GET_CODE (XEXP (x, 0)) == REG 6928 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 6929 /* %ebx + GOT/GOTOFF */ 6930 y = NULL; 6931 else if (GET_CODE (XEXP (x, 0)) == PLUS) 6932 { 6933 /* %ebx + %reg * scale + GOT/GOTOFF */ 6934 y = XEXP (x, 0); 6935 if (GET_CODE (XEXP (y, 0)) == REG 6936 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM) 6937 y = XEXP (y, 1); 6938 else if (GET_CODE (XEXP (y, 1)) == REG 6939 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM) 6940 y = XEXP (y, 0); 6941 else 6942 return orig_x; 6943 if (GET_CODE (y) != REG 6944 && GET_CODE (y) != MULT 6945 && GET_CODE (y) != ASHIFT) 6946 return orig_x; 6947 } 6948 else 6949 return orig_x; 6950 6951 x = XEXP (XEXP (x, 1), 0); 6952 if (GET_CODE (x) == UNSPEC 6953 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 6954 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 6955 { 6956 if (y) 6957 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0)); 6958 return XVECEXP (x, 0, 0); 6959 } 6960 6961 if (GET_CODE (x) == PLUS 6962 && GET_CODE (XEXP (x, 0)) == UNSPEC 6963 && GET_CODE (XEXP (x, 1)) == CONST_INT 6964 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 6965 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF 6966 && GET_CODE (orig_x) != MEM))) 6967 { 6968 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1)); 6969 if (y) 6970 return gen_rtx_PLUS (Pmode, y, x); 6971 return x; 6972 } 6973 6974 return orig_x; 6975} 6976 6977static void 6978put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 6979 int fp, FILE *file) 6980{ 6981 const char *suffix; 6982 6983 if (mode == CCFPmode || mode == CCFPUmode) 6984 { 6985 enum rtx_code second_code, bypass_code; 6986 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 6987 if (bypass_code != NIL || second_code != NIL) 6988 abort (); 6989 code = ix86_fp_compare_code_to_integer (code); 6990 mode = CCmode; 6991 } 6992 if (reverse) 6993 code = reverse_condition (code); 6994 6995 switch (code) 6996 { 6997 case EQ: 6998 suffix = "e"; 6999 break; 7000 case NE: 7001 suffix = "ne"; 7002 break; 7003 case GT: 7004 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode) 7005 abort (); 7006 suffix = "g"; 7007 break; 7008 case GTU: 7009 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers. 7010 Those same assemblers have the same but opposite losage on cmov. */ 7011 if (mode != CCmode) 7012 abort (); 7013 suffix = fp ? "nbe" : "a"; 7014 break; 7015 case LT: 7016 if (mode == CCNOmode || mode == CCGOCmode) 7017 suffix = "s"; 7018 else if (mode == CCmode || mode == CCGCmode) 7019 suffix = "l"; 7020 else 7021 abort (); 7022 break; 7023 case LTU: 7024 if (mode != CCmode) 7025 abort (); 7026 suffix = "b"; 7027 break; 7028 case GE: 7029 if (mode == CCNOmode || mode == CCGOCmode) 7030 suffix = "ns"; 7031 else if (mode == CCmode || mode == CCGCmode) 7032 suffix = "ge"; 7033 else 7034 abort (); 7035 break; 7036 case GEU: 7037 /* ??? As above. */ 7038 if (mode != CCmode) 7039 abort (); 7040 suffix = fp ? "nb" : "ae"; 7041 break; 7042 case LE: 7043 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode) 7044 abort (); 7045 suffix = "le"; 7046 break; 7047 case LEU: 7048 if (mode != CCmode) 7049 abort (); 7050 suffix = "be"; 7051 break; 7052 case UNORDERED: 7053 suffix = fp ? "u" : "p"; 7054 break; 7055 case ORDERED: 7056 suffix = fp ? "nu" : "np"; 7057 break; 7058 default: 7059 abort (); 7060 } 7061 fputs (suffix, file); 7062} 7063 7064/* Print the name of register X to FILE based on its machine mode and number. 7065 If CODE is 'w', pretend the mode is HImode. 7066 If CODE is 'b', pretend the mode is QImode. 7067 If CODE is 'k', pretend the mode is SImode. 7068 If CODE is 'q', pretend the mode is DImode. 7069 If CODE is 'h', pretend the reg is the `high' byte register. 7070 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ 7071 7072void 7073print_reg (rtx x, int code, FILE *file) 7074{ 7075 if (REGNO (x) == ARG_POINTER_REGNUM 7076 || REGNO (x) == FRAME_POINTER_REGNUM 7077 || REGNO (x) == FLAGS_REG 7078 || REGNO (x) == FPSR_REG) 7079 abort (); 7080 7081 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7082 putc ('%', file); 7083 7084 if (code == 'w' || MMX_REG_P (x)) 7085 code = 2; 7086 else if (code == 'b') 7087 code = 1; 7088 else if (code == 'k') 7089 code = 4; 7090 else if (code == 'q') 7091 code = 8; 7092 else if (code == 'y') 7093 code = 3; 7094 else if (code == 'h') 7095 code = 0; 7096 else 7097 code = GET_MODE_SIZE (GET_MODE (x)); 7098 7099 /* Irritatingly, AMD extended registers use different naming convention 7100 from the normal registers. */ 7101 if (REX_INT_REG_P (x)) 7102 { 7103 if (!TARGET_64BIT) 7104 abort (); 7105 switch (code) 7106 { 7107 case 0: 7108 error ("extended registers have no high halves"); 7109 break; 7110 case 1: 7111 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 7112 break; 7113 case 2: 7114 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 7115 break; 7116 case 4: 7117 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 7118 break; 7119 case 8: 7120 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 7121 break; 7122 default: 7123 error ("unsupported operand size for extended register"); 7124 break; 7125 } 7126 return; 7127 } 7128 switch (code) 7129 { 7130 case 3: 7131 if (STACK_TOP_P (x)) 7132 { 7133 fputs ("st(0)", file); 7134 break; 7135 } 7136 /* FALLTHRU */ 7137 case 8: 7138 case 4: 7139 case 12: 7140 if (! ANY_FP_REG_P (x)) 7141 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 7142 /* FALLTHRU */ 7143 case 16: 7144 case 2: 7145 normal: 7146 fputs (hi_reg_name[REGNO (x)], file); 7147 break; 7148 case 1: 7149 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 7150 goto normal; 7151 fputs (qi_reg_name[REGNO (x)], file); 7152 break; 7153 case 0: 7154 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 7155 goto normal; 7156 fputs (qi_high_reg_name[REGNO (x)], file); 7157 break; 7158 default: 7159 abort (); 7160 } 7161} 7162 7163/* Locate some local-dynamic symbol still in use by this function 7164 so that we can print its name in some tls_local_dynamic_base 7165 pattern. */ 7166 7167static const char * 7168get_some_local_dynamic_name (void) 7169{ 7170 rtx insn; 7171 7172 if (cfun->machine->some_ld_name) 7173 return cfun->machine->some_ld_name; 7174 7175 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 7176 if (INSN_P (insn) 7177 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 7178 return cfun->machine->some_ld_name; 7179 7180 abort (); 7181} 7182 7183static int 7184get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 7185{ 7186 rtx x = *px; 7187 7188 if (GET_CODE (x) == SYMBOL_REF 7189 && local_dynamic_symbolic_operand (x, Pmode)) 7190 { 7191 cfun->machine->some_ld_name = XSTR (x, 0); 7192 return 1; 7193 } 7194 7195 return 0; 7196} 7197 7198/* Meaning of CODE: 7199 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 7200 C -- print opcode suffix for set/cmov insn. 7201 c -- like C, but print reversed condition 7202 F,f -- likewise, but for floating-point. 7203 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 7204 otherwise nothing 7205 R -- print the prefix for register names. 7206 z -- print the opcode suffix for the size of the current operand. 7207 * -- print a star (in certain assembler syntax) 7208 A -- print an absolute memory reference. 7209 w -- print the operand as if it's a "word" (HImode) even if it isn't. 7210 s -- print a shift double count, followed by the assemblers argument 7211 delimiter. 7212 b -- print the QImode name of the register for the indicated operand. 7213 %b0 would print %al if operands[0] is reg 0. 7214 w -- likewise, print the HImode name of the register. 7215 k -- likewise, print the SImode name of the register. 7216 q -- likewise, print the DImode name of the register. 7217 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 7218 y -- print "st(0)" instead of "st" as a register. 7219 D -- print condition for SSE cmp instruction. 7220 P -- if PIC, print an @PLT suffix. 7221 X -- don't print any sort of PIC '@' suffix for a symbol. 7222 & -- print some in-use local-dynamic symbol name. 7223 */ 7224 7225void 7226print_operand (FILE *file, rtx x, int code) 7227{ 7228 if (code) 7229 { 7230 switch (code) 7231 { 7232 case '*': 7233 if (ASSEMBLER_DIALECT == ASM_ATT) 7234 putc ('*', file); 7235 return; 7236 7237 case '&': 7238 assemble_name (file, get_some_local_dynamic_name ()); 7239 return; 7240 7241 case 'A': 7242 if (ASSEMBLER_DIALECT == ASM_ATT) 7243 putc ('*', file); 7244 else if (ASSEMBLER_DIALECT == ASM_INTEL) 7245 { 7246 /* Intel syntax. For absolute addresses, registers should not 7247 be surrounded by braces. */ 7248 if (GET_CODE (x) != REG) 7249 { 7250 putc ('[', file); 7251 PRINT_OPERAND (file, x, 0); 7252 putc (']', file); 7253 return; 7254 } 7255 } 7256 else 7257 abort (); 7258 7259 PRINT_OPERAND (file, x, 0); 7260 return; 7261 7262 7263 case 'L': 7264 if (ASSEMBLER_DIALECT == ASM_ATT) 7265 putc ('l', file); 7266 return; 7267 7268 case 'W': 7269 if (ASSEMBLER_DIALECT == ASM_ATT) 7270 putc ('w', file); 7271 return; 7272 7273 case 'B': 7274 if (ASSEMBLER_DIALECT == ASM_ATT) 7275 putc ('b', file); 7276 return; 7277 7278 case 'Q': 7279 if (ASSEMBLER_DIALECT == ASM_ATT) 7280 putc ('l', file); 7281 return; 7282 7283 case 'S': 7284 if (ASSEMBLER_DIALECT == ASM_ATT) 7285 putc ('s', file); 7286 return; 7287 7288 case 'T': 7289 if (ASSEMBLER_DIALECT == ASM_ATT) 7290 putc ('t', file); 7291 return; 7292 7293 case 'z': 7294 /* 387 opcodes don't get size suffixes if the operands are 7295 registers. */ 7296 if (STACK_REG_P (x)) 7297 return; 7298 7299 /* Likewise if using Intel opcodes. */ 7300 if (ASSEMBLER_DIALECT == ASM_INTEL) 7301 return; 7302 7303 /* This is the size of op from size of operand. */ 7304 switch (GET_MODE_SIZE (GET_MODE (x))) 7305 { 7306 case 2: 7307#ifdef HAVE_GAS_FILDS_FISTS 7308 putc ('s', file); 7309#endif 7310 return; 7311 7312 case 4: 7313 if (GET_MODE (x) == SFmode) 7314 { 7315 putc ('s', file); 7316 return; 7317 } 7318 else 7319 putc ('l', file); 7320 return; 7321 7322 case 12: 7323 case 16: 7324 putc ('t', file); 7325 return; 7326 7327 case 8: 7328 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 7329 { 7330#ifdef GAS_MNEMONICS 7331 putc ('q', file); 7332#else 7333 putc ('l', file); 7334 putc ('l', file); 7335#endif 7336 } 7337 else 7338 putc ('l', file); 7339 return; 7340 7341 default: 7342 abort (); 7343 } 7344 7345 case 'b': 7346 case 'w': 7347 case 'k': 7348 case 'q': 7349 case 'h': 7350 case 'y': 7351 case 'X': 7352 case 'P': 7353 break; 7354 7355 case 's': 7356 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 7357 { 7358 PRINT_OPERAND (file, x, 0); 7359 putc (',', file); 7360 } 7361 return; 7362 7363 case 'D': 7364 /* Little bit of braindamage here. The SSE compare instructions 7365 does use completely different names for the comparisons that the 7366 fp conditional moves. */ 7367 switch (GET_CODE (x)) 7368 { 7369 case EQ: 7370 case UNEQ: 7371 fputs ("eq", file); 7372 break; 7373 case LT: 7374 case UNLT: 7375 fputs ("lt", file); 7376 break; 7377 case LE: 7378 case UNLE: 7379 fputs ("le", file); 7380 break; 7381 case UNORDERED: 7382 fputs ("unord", file); 7383 break; 7384 case NE: 7385 case LTGT: 7386 fputs ("neq", file); 7387 break; 7388 case UNGE: 7389 case GE: 7390 fputs ("nlt", file); 7391 break; 7392 case UNGT: 7393 case GT: 7394 fputs ("nle", file); 7395 break; 7396 case ORDERED: 7397 fputs ("ord", file); 7398 break; 7399 default: 7400 abort (); 7401 break; 7402 } 7403 return; 7404 case 'O': 7405#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7406 if (ASSEMBLER_DIALECT == ASM_ATT) 7407 { 7408 switch (GET_MODE (x)) 7409 { 7410 case HImode: putc ('w', file); break; 7411 case SImode: 7412 case SFmode: putc ('l', file); break; 7413 case DImode: 7414 case DFmode: putc ('q', file); break; 7415 default: abort (); 7416 } 7417 putc ('.', file); 7418 } 7419#endif 7420 return; 7421 case 'C': 7422 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 7423 return; 7424 case 'F': 7425#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7426 if (ASSEMBLER_DIALECT == ASM_ATT) 7427 putc ('.', file); 7428#endif 7429 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 7430 return; 7431 7432 /* Like above, but reverse condition */ 7433 case 'c': 7434 /* Check to see if argument to %c is really a constant 7435 and not a condition code which needs to be reversed. */ 7436 if (GET_RTX_CLASS (GET_CODE (x)) != '<') 7437 { 7438 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 7439 return; 7440 } 7441 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 7442 return; 7443 case 'f': 7444#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7445 if (ASSEMBLER_DIALECT == ASM_ATT) 7446 putc ('.', file); 7447#endif 7448 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 7449 return; 7450 case '+': 7451 { 7452 rtx x; 7453 7454 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 7455 return; 7456 7457 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 7458 if (x) 7459 { 7460 int pred_val = INTVAL (XEXP (x, 0)); 7461 7462 if (pred_val < REG_BR_PROB_BASE * 45 / 100 7463 || pred_val > REG_BR_PROB_BASE * 55 / 100) 7464 { 7465 int taken = pred_val > REG_BR_PROB_BASE / 2; 7466 int cputaken = final_forward_branch_p (current_output_insn) == 0; 7467 7468 /* Emit hints only in the case default branch prediction 7469 heuristics would fail. */ 7470 if (taken != cputaken) 7471 { 7472 /* We use 3e (DS) prefix for taken branches and 7473 2e (CS) prefix for not taken branches. */ 7474 if (taken) 7475 fputs ("ds ; ", file); 7476 else 7477 fputs ("cs ; ", file); 7478 } 7479 } 7480 } 7481 return; 7482 } 7483 default: 7484 output_operand_lossage ("invalid operand code `%c'", code); 7485 } 7486 } 7487 7488 if (GET_CODE (x) == REG) 7489 print_reg (x, code, file); 7490 7491 else if (GET_CODE (x) == MEM) 7492 { 7493 /* No `byte ptr' prefix for call instructions. */ 7494 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 7495 { 7496 const char * size; 7497 switch (GET_MODE_SIZE (GET_MODE (x))) 7498 { 7499 case 1: size = "BYTE"; break; 7500 case 2: size = "WORD"; break; 7501 case 4: size = "DWORD"; break; 7502 case 8: size = "QWORD"; break; 7503 case 12: size = "XWORD"; break; 7504 case 16: size = "XMMWORD"; break; 7505 default: 7506 abort (); 7507 } 7508 7509 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 7510 if (code == 'b') 7511 size = "BYTE"; 7512 else if (code == 'w') 7513 size = "WORD"; 7514 else if (code == 'k') 7515 size = "DWORD"; 7516 7517 fputs (size, file); 7518 fputs (" PTR ", file); 7519 } 7520 7521 x = XEXP (x, 0); 7522 /* Avoid (%rip) for call operands. */ 7523 if (CONSTANT_ADDRESS_P (x) && code == 'P' 7524 && GET_CODE (x) != CONST_INT) 7525 output_addr_const (file, x); 7526 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 7527 output_operand_lossage ("invalid constraints for operand"); 7528 else 7529 output_address (x); 7530 } 7531 7532 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 7533 { 7534 REAL_VALUE_TYPE r; 7535 long l; 7536 7537 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7538 REAL_VALUE_TO_TARGET_SINGLE (r, l); 7539 7540 if (ASSEMBLER_DIALECT == ASM_ATT) 7541 putc ('$', file); 7542 fprintf (file, "0x%08lx", l); 7543 } 7544 7545 /* These float cases don't actually occur as immediate operands. */ 7546 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 7547 { 7548 char dstr[30]; 7549 7550 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 7551 fprintf (file, "%s", dstr); 7552 } 7553 7554 else if (GET_CODE (x) == CONST_DOUBLE 7555 && GET_MODE (x) == XFmode) 7556 { 7557 char dstr[30]; 7558 7559 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 7560 fprintf (file, "%s", dstr); 7561 } 7562 7563 else 7564 { 7565 if (code != 'P') 7566 { 7567 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 7568 { 7569 if (ASSEMBLER_DIALECT == ASM_ATT) 7570 putc ('$', file); 7571 } 7572 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 7573 || GET_CODE (x) == LABEL_REF) 7574 { 7575 if (ASSEMBLER_DIALECT == ASM_ATT) 7576 putc ('$', file); 7577 else 7578 fputs ("OFFSET FLAT:", file); 7579 } 7580 } 7581 if (GET_CODE (x) == CONST_INT) 7582 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7583 else if (flag_pic) 7584 output_pic_addr_const (file, x, code); 7585 else 7586 output_addr_const (file, x); 7587 } 7588} 7589 7590/* Print a memory operand whose address is ADDR. */ 7591 7592void 7593print_operand_address (FILE *file, rtx addr) 7594{ 7595 struct ix86_address parts; 7596 rtx base, index, disp; 7597 int scale; 7598 7599 if (! ix86_decompose_address (addr, &parts)) 7600 abort (); 7601 7602 base = parts.base; 7603 index = parts.index; 7604 disp = parts.disp; 7605 scale = parts.scale; 7606 7607 switch (parts.seg) 7608 { 7609 case SEG_DEFAULT: 7610 break; 7611 case SEG_FS: 7612 case SEG_GS: 7613 if (USER_LABEL_PREFIX[0] == 0) 7614 putc ('%', file); 7615 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 7616 break; 7617 default: 7618 abort (); 7619 } 7620 7621 if (!base && !index) 7622 { 7623 /* Displacement only requires special attention. */ 7624 7625 if (GET_CODE (disp) == CONST_INT) 7626 { 7627 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 7628 { 7629 if (USER_LABEL_PREFIX[0] == 0) 7630 putc ('%', file); 7631 fputs ("ds:", file); 7632 } 7633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 7634 } 7635 else if (flag_pic) 7636 output_pic_addr_const (file, disp, 0); 7637 else 7638 output_addr_const (file, disp); 7639 7640 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 7641 if (TARGET_64BIT 7642 && ((GET_CODE (disp) == SYMBOL_REF 7643 && ! tls_symbolic_operand (disp, GET_MODE (disp))) 7644 || GET_CODE (disp) == LABEL_REF 7645 || (GET_CODE (disp) == CONST 7646 && GET_CODE (XEXP (disp, 0)) == PLUS 7647 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF 7648 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF) 7649 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT))) 7650 fputs ("(%rip)", file); 7651 } 7652 else 7653 { 7654 if (ASSEMBLER_DIALECT == ASM_ATT) 7655 { 7656 if (disp) 7657 { 7658 if (flag_pic) 7659 output_pic_addr_const (file, disp, 0); 7660 else if (GET_CODE (disp) == LABEL_REF) 7661 output_asm_label (disp); 7662 else 7663 output_addr_const (file, disp); 7664 } 7665 7666 putc ('(', file); 7667 if (base) 7668 print_reg (base, 0, file); 7669 if (index) 7670 { 7671 putc (',', file); 7672 print_reg (index, 0, file); 7673 if (scale != 1) 7674 fprintf (file, ",%d", scale); 7675 } 7676 putc (')', file); 7677 } 7678 else 7679 { 7680 rtx offset = NULL_RTX; 7681 7682 if (disp) 7683 { 7684 /* Pull out the offset of a symbol; print any symbol itself. */ 7685 if (GET_CODE (disp) == CONST 7686 && GET_CODE (XEXP (disp, 0)) == PLUS 7687 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 7688 { 7689 offset = XEXP (XEXP (disp, 0), 1); 7690 disp = gen_rtx_CONST (VOIDmode, 7691 XEXP (XEXP (disp, 0), 0)); 7692 } 7693 7694 if (flag_pic) 7695 output_pic_addr_const (file, disp, 0); 7696 else if (GET_CODE (disp) == LABEL_REF) 7697 output_asm_label (disp); 7698 else if (GET_CODE (disp) == CONST_INT) 7699 offset = disp; 7700 else 7701 output_addr_const (file, disp); 7702 } 7703 7704 putc ('[', file); 7705 if (base) 7706 { 7707 print_reg (base, 0, file); 7708 if (offset) 7709 { 7710 if (INTVAL (offset) >= 0) 7711 putc ('+', file); 7712 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 7713 } 7714 } 7715 else if (offset) 7716 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 7717 else 7718 putc ('0', file); 7719 7720 if (index) 7721 { 7722 putc ('+', file); 7723 print_reg (index, 0, file); 7724 if (scale != 1) 7725 fprintf (file, "*%d", scale); 7726 } 7727 putc (']', file); 7728 } 7729 } 7730} 7731 7732bool 7733output_addr_const_extra (FILE *file, rtx x) 7734{ 7735 rtx op; 7736 7737 if (GET_CODE (x) != UNSPEC) 7738 return false; 7739 7740 op = XVECEXP (x, 0, 0); 7741 switch (XINT (x, 1)) 7742 { 7743 case UNSPEC_GOTTPOFF: 7744 output_addr_const (file, op); 7745 /* FIXME: This might be @TPOFF in Sun ld. */ 7746 fputs ("@GOTTPOFF", file); 7747 break; 7748 case UNSPEC_TPOFF: 7749 output_addr_const (file, op); 7750 fputs ("@TPOFF", file); 7751 break; 7752 case UNSPEC_NTPOFF: 7753 output_addr_const (file, op); 7754 if (TARGET_64BIT) 7755 fputs ("@TPOFF", file); 7756 else 7757 fputs ("@NTPOFF", file); 7758 break; 7759 case UNSPEC_DTPOFF: 7760 output_addr_const (file, op); 7761 fputs ("@DTPOFF", file); 7762 break; 7763 case UNSPEC_GOTNTPOFF: 7764 output_addr_const (file, op); 7765 if (TARGET_64BIT) 7766 fputs ("@GOTTPOFF(%rip)", file); 7767 else 7768 fputs ("@GOTNTPOFF", file); 7769 break; 7770 case UNSPEC_INDNTPOFF: 7771 output_addr_const (file, op); 7772 fputs ("@INDNTPOFF", file); 7773 break; 7774 7775 default: 7776 return false; 7777 } 7778 7779 return true; 7780} 7781 7782/* Split one or more DImode RTL references into pairs of SImode 7783 references. The RTL can be REG, offsettable MEM, integer constant, or 7784 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 7785 split and "num" is its length. lo_half and hi_half are output arrays 7786 that parallel "operands". */ 7787 7788void 7789split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 7790{ 7791 while (num--) 7792 { 7793 rtx op = operands[num]; 7794 7795 /* simplify_subreg refuse to split volatile memory addresses, 7796 but we still have to handle it. */ 7797 if (GET_CODE (op) == MEM) 7798 { 7799 lo_half[num] = adjust_address (op, SImode, 0); 7800 hi_half[num] = adjust_address (op, SImode, 4); 7801 } 7802 else 7803 { 7804 lo_half[num] = simplify_gen_subreg (SImode, op, 7805 GET_MODE (op) == VOIDmode 7806 ? DImode : GET_MODE (op), 0); 7807 hi_half[num] = simplify_gen_subreg (SImode, op, 7808 GET_MODE (op) == VOIDmode 7809 ? DImode : GET_MODE (op), 4); 7810 } 7811 } 7812} 7813/* Split one or more TImode RTL references into pairs of SImode 7814 references. The RTL can be REG, offsettable MEM, integer constant, or 7815 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 7816 split and "num" is its length. lo_half and hi_half are output arrays 7817 that parallel "operands". */ 7818 7819void 7820split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 7821{ 7822 while (num--) 7823 { 7824 rtx op = operands[num]; 7825 7826 /* simplify_subreg refuse to split volatile memory addresses, but we 7827 still have to handle it. */ 7828 if (GET_CODE (op) == MEM) 7829 { 7830 lo_half[num] = adjust_address (op, DImode, 0); 7831 hi_half[num] = adjust_address (op, DImode, 8); 7832 } 7833 else 7834 { 7835 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 7836 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 7837 } 7838 } 7839} 7840 7841/* Output code to perform a 387 binary operation in INSN, one of PLUS, 7842 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 7843 is the expression of the binary operation. The output may either be 7844 emitted here, or returned to the caller, like all output_* functions. 7845 7846 There is no guarantee that the operands are the same mode, as they 7847 might be within FLOAT or FLOAT_EXTEND expressions. */ 7848 7849#ifndef SYSV386_COMPAT 7850/* Set to 1 for compatibility with brain-damaged assemblers. No-one 7851 wants to fix the assemblers because that causes incompatibility 7852 with gcc. No-one wants to fix gcc because that causes 7853 incompatibility with assemblers... You can use the option of 7854 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 7855#define SYSV386_COMPAT 1 7856#endif 7857 7858const char * 7859output_387_binary_op (rtx insn, rtx *operands) 7860{ 7861 static char buf[30]; 7862 const char *p; 7863 const char *ssep; 7864 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]); 7865 7866#ifdef ENABLE_CHECKING 7867 /* Even if we do not want to check the inputs, this documents input 7868 constraints. Which helps in understanding the following code. */ 7869 if (STACK_REG_P (operands[0]) 7870 && ((REG_P (operands[1]) 7871 && REGNO (operands[0]) == REGNO (operands[1]) 7872 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 7873 || (REG_P (operands[2]) 7874 && REGNO (operands[0]) == REGNO (operands[2]) 7875 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 7876 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 7877 ; /* ok */ 7878 else if (!is_sse) 7879 abort (); 7880#endif 7881 7882 switch (GET_CODE (operands[3])) 7883 { 7884 case PLUS: 7885 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7886 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7887 p = "fiadd"; 7888 else 7889 p = "fadd"; 7890 ssep = "add"; 7891 break; 7892 7893 case MINUS: 7894 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7895 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7896 p = "fisub"; 7897 else 7898 p = "fsub"; 7899 ssep = "sub"; 7900 break; 7901 7902 case MULT: 7903 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7904 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7905 p = "fimul"; 7906 else 7907 p = "fmul"; 7908 ssep = "mul"; 7909 break; 7910 7911 case DIV: 7912 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 7913 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 7914 p = "fidiv"; 7915 else 7916 p = "fdiv"; 7917 ssep = "div"; 7918 break; 7919 7920 default: 7921 abort (); 7922 } 7923 7924 if (is_sse) 7925 { 7926 strcpy (buf, ssep); 7927 if (GET_MODE (operands[0]) == SFmode) 7928 strcat (buf, "ss\t{%2, %0|%0, %2}"); 7929 else 7930 strcat (buf, "sd\t{%2, %0|%0, %2}"); 7931 return buf; 7932 } 7933 strcpy (buf, p); 7934 7935 switch (GET_CODE (operands[3])) 7936 { 7937 case MULT: 7938 case PLUS: 7939 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 7940 { 7941 rtx temp = operands[2]; 7942 operands[2] = operands[1]; 7943 operands[1] = temp; 7944 } 7945 7946 /* know operands[0] == operands[1]. */ 7947 7948 if (GET_CODE (operands[2]) == MEM) 7949 { 7950 p = "%z2\t%2"; 7951 break; 7952 } 7953 7954 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 7955 { 7956 if (STACK_TOP_P (operands[0])) 7957 /* How is it that we are storing to a dead operand[2]? 7958 Well, presumably operands[1] is dead too. We can't 7959 store the result to st(0) as st(0) gets popped on this 7960 instruction. Instead store to operands[2] (which I 7961 think has to be st(1)). st(1) will be popped later. 7962 gcc <= 2.8.1 didn't have this check and generated 7963 assembly code that the Unixware assembler rejected. */ 7964 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 7965 else 7966 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 7967 break; 7968 } 7969 7970 if (STACK_TOP_P (operands[0])) 7971 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 7972 else 7973 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 7974 break; 7975 7976 case MINUS: 7977 case DIV: 7978 if (GET_CODE (operands[1]) == MEM) 7979 { 7980 p = "r%z1\t%1"; 7981 break; 7982 } 7983 7984 if (GET_CODE (operands[2]) == MEM) 7985 { 7986 p = "%z2\t%2"; 7987 break; 7988 } 7989 7990 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 7991 { 7992#if SYSV386_COMPAT 7993 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 7994 derived assemblers, confusingly reverse the direction of 7995 the operation for fsub{r} and fdiv{r} when the 7996 destination register is not st(0). The Intel assembler 7997 doesn't have this brain damage. Read !SYSV386_COMPAT to 7998 figure out what the hardware really does. */ 7999 if (STACK_TOP_P (operands[0])) 8000 p = "{p\t%0, %2|rp\t%2, %0}"; 8001 else 8002 p = "{rp\t%2, %0|p\t%0, %2}"; 8003#else 8004 if (STACK_TOP_P (operands[0])) 8005 /* As above for fmul/fadd, we can't store to st(0). */ 8006 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8007 else 8008 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8009#endif 8010 break; 8011 } 8012 8013 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 8014 { 8015#if SYSV386_COMPAT 8016 if (STACK_TOP_P (operands[0])) 8017 p = "{rp\t%0, %1|p\t%1, %0}"; 8018 else 8019 p = "{p\t%1, %0|rp\t%0, %1}"; 8020#else 8021 if (STACK_TOP_P (operands[0])) 8022 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 8023 else 8024 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 8025#endif 8026 break; 8027 } 8028 8029 if (STACK_TOP_P (operands[0])) 8030 { 8031 if (STACK_TOP_P (operands[1])) 8032 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8033 else 8034 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 8035 break; 8036 } 8037 else if (STACK_TOP_P (operands[1])) 8038 { 8039#if SYSV386_COMPAT 8040 p = "{\t%1, %0|r\t%0, %1}"; 8041#else 8042 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 8043#endif 8044 } 8045 else 8046 { 8047#if SYSV386_COMPAT 8048 p = "{r\t%2, %0|\t%0, %2}"; 8049#else 8050 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8051#endif 8052 } 8053 break; 8054 8055 default: 8056 abort (); 8057 } 8058 8059 strcat (buf, p); 8060 return buf; 8061} 8062 8063/* Output code to initialize control word copies used by 8064 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN 8065 is set to control word rounding downwards. */ 8066void 8067emit_i387_cw_initialization (rtx normal, rtx round_down) 8068{ 8069 rtx reg = gen_reg_rtx (HImode); 8070 8071 emit_insn (gen_x86_fnstcw_1 (normal)); 8072 emit_move_insn (reg, normal); 8073 if (!TARGET_PARTIAL_REG_STALL && !optimize_size 8074 && !TARGET_64BIT) 8075 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 8076 else 8077 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00))); 8078 emit_move_insn (round_down, reg); 8079} 8080 8081/* Output code for INSN to convert a float to a signed int. OPERANDS 8082 are the insn operands. The output may be [HSD]Imode and the input 8083 operand may be [SDX]Fmode. */ 8084 8085const char * 8086output_fix_trunc (rtx insn, rtx *operands) 8087{ 8088 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8089 int dimode_p = GET_MODE (operands[0]) == DImode; 8090 8091 /* Jump through a hoop or two for DImode, since the hardware has no 8092 non-popping instruction. We used to do this a different way, but 8093 that was somewhat fragile and broke with post-reload splitters. */ 8094 if (dimode_p && !stack_top_dies) 8095 output_asm_insn ("fld\t%y1", operands); 8096 8097 if (!STACK_TOP_P (operands[1])) 8098 abort (); 8099 8100 if (GET_CODE (operands[0]) != MEM) 8101 abort (); 8102 8103 output_asm_insn ("fldcw\t%3", operands); 8104 if (stack_top_dies || dimode_p) 8105 output_asm_insn ("fistp%z0\t%0", operands); 8106 else 8107 output_asm_insn ("fist%z0\t%0", operands); 8108 output_asm_insn ("fldcw\t%2", operands); 8109 8110 return ""; 8111} 8112 8113/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 8114 should be used and 2 when fnstsw should be used. UNORDERED_P is true 8115 when fucom should be used. */ 8116 8117const char * 8118output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 8119{ 8120 int stack_top_dies; 8121 rtx cmp_op0 = operands[0]; 8122 rtx cmp_op1 = operands[1]; 8123 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]); 8124 8125 if (eflags_p == 2) 8126 { 8127 cmp_op0 = cmp_op1; 8128 cmp_op1 = operands[2]; 8129 } 8130 if (is_sse) 8131 { 8132 if (GET_MODE (operands[0]) == SFmode) 8133 if (unordered_p) 8134 return "ucomiss\t{%1, %0|%0, %1}"; 8135 else 8136 return "comiss\t{%1, %0|%0, %1}"; 8137 else 8138 if (unordered_p) 8139 return "ucomisd\t{%1, %0|%0, %1}"; 8140 else 8141 return "comisd\t{%1, %0|%0, %1}"; 8142 } 8143 8144 if (! STACK_TOP_P (cmp_op0)) 8145 abort (); 8146 8147 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8148 8149 if (STACK_REG_P (cmp_op1) 8150 && stack_top_dies 8151 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 8152 && REGNO (cmp_op1) != FIRST_STACK_REG) 8153 { 8154 /* If both the top of the 387 stack dies, and the other operand 8155 is also a stack register that dies, then this must be a 8156 `fcompp' float compare */ 8157 8158 if (eflags_p == 1) 8159 { 8160 /* There is no double popping fcomi variant. Fortunately, 8161 eflags is immune from the fstp's cc clobbering. */ 8162 if (unordered_p) 8163 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 8164 else 8165 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 8166 return "fstp\t%y0"; 8167 } 8168 else 8169 { 8170 if (eflags_p == 2) 8171 { 8172 if (unordered_p) 8173 return "fucompp\n\tfnstsw\t%0"; 8174 else 8175 return "fcompp\n\tfnstsw\t%0"; 8176 } 8177 else 8178 { 8179 if (unordered_p) 8180 return "fucompp"; 8181 else 8182 return "fcompp"; 8183 } 8184 } 8185 } 8186 else 8187 { 8188 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 8189 8190 static const char * const alt[24] = 8191 { 8192 "fcom%z1\t%y1", 8193 "fcomp%z1\t%y1", 8194 "fucom%z1\t%y1", 8195 "fucomp%z1\t%y1", 8196 8197 "ficom%z1\t%y1", 8198 "ficomp%z1\t%y1", 8199 NULL, 8200 NULL, 8201 8202 "fcomi\t{%y1, %0|%0, %y1}", 8203 "fcomip\t{%y1, %0|%0, %y1}", 8204 "fucomi\t{%y1, %0|%0, %y1}", 8205 "fucomip\t{%y1, %0|%0, %y1}", 8206 8207 NULL, 8208 NULL, 8209 NULL, 8210 NULL, 8211 8212 "fcom%z2\t%y2\n\tfnstsw\t%0", 8213 "fcomp%z2\t%y2\n\tfnstsw\t%0", 8214 "fucom%z2\t%y2\n\tfnstsw\t%0", 8215 "fucomp%z2\t%y2\n\tfnstsw\t%0", 8216 8217 "ficom%z2\t%y2\n\tfnstsw\t%0", 8218 "ficomp%z2\t%y2\n\tfnstsw\t%0", 8219 NULL, 8220 NULL 8221 }; 8222 8223 int mask; 8224 const char *ret; 8225 8226 mask = eflags_p << 3; 8227 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2; 8228 mask |= unordered_p << 1; 8229 mask |= stack_top_dies; 8230 8231 if (mask >= 24) 8232 abort (); 8233 ret = alt[mask]; 8234 if (ret == NULL) 8235 abort (); 8236 8237 return ret; 8238 } 8239} 8240 8241void 8242ix86_output_addr_vec_elt (FILE *file, int value) 8243{ 8244 const char *directive = ASM_LONG; 8245 8246 if (TARGET_64BIT) 8247 { 8248#ifdef ASM_QUAD 8249 directive = ASM_QUAD; 8250#else 8251 abort (); 8252#endif 8253 } 8254 8255 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 8256} 8257 8258void 8259ix86_output_addr_diff_elt (FILE *file, int value, int rel) 8260{ 8261 if (TARGET_64BIT) 8262 fprintf (file, "%s%s%d-%s%d\n", 8263 ASM_LONG, LPREFIX, value, LPREFIX, rel); 8264 else if (HAVE_AS_GOTOFF_IN_DATA) 8265 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 8266#if TARGET_MACHO 8267 else if (TARGET_MACHO) 8268 { 8269 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 8270 machopic_output_function_base_name (file); 8271 fprintf(file, "\n"); 8272 } 8273#endif 8274 else 8275 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 8276 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 8277} 8278 8279/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 8280 for the target. */ 8281 8282void 8283ix86_expand_clear (rtx dest) 8284{ 8285 rtx tmp; 8286 8287 /* We play register width games, which are only valid after reload. */ 8288 if (!reload_completed) 8289 abort (); 8290 8291 /* Avoid HImode and its attendant prefix byte. */ 8292 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 8293 dest = gen_rtx_REG (SImode, REGNO (dest)); 8294 8295 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 8296 8297 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 8298 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 8299 { 8300 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 8301 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 8302 } 8303 8304 emit_insn (tmp); 8305} 8306 8307/* X is an unchanging MEM. If it is a constant pool reference, return 8308 the constant pool rtx, else NULL. */ 8309 8310static rtx 8311maybe_get_pool_constant (rtx x) 8312{ 8313 x = ix86_delegitimize_address (XEXP (x, 0)); 8314 8315 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 8316 return get_pool_constant (x); 8317 8318 return NULL_RTX; 8319} 8320 8321void 8322ix86_expand_move (enum machine_mode mode, rtx operands[]) 8323{ 8324 int strict = (reload_in_progress || reload_completed); 8325 rtx op0, op1; 8326 enum tls_model model; 8327 8328 op0 = operands[0]; 8329 op1 = operands[1]; 8330 8331 model = tls_symbolic_operand (op1, Pmode); 8332 if (model) 8333 { 8334 op1 = legitimize_tls_address (op1, model, true); 8335 op1 = force_operand (op1, op0); 8336 if (op1 == op0) 8337 return; 8338 } 8339 8340 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 8341 { 8342#if TARGET_MACHO 8343 if (MACHOPIC_PURE) 8344 { 8345 rtx temp = ((reload_in_progress 8346 || ((op0 && GET_CODE (op0) == REG) 8347 && mode == Pmode)) 8348 ? op0 : gen_reg_rtx (Pmode)); 8349 op1 = machopic_indirect_data_reference (op1, temp); 8350 op1 = machopic_legitimize_pic_address (op1, mode, 8351 temp == op1 ? 0 : temp); 8352 } 8353 else if (MACHOPIC_INDIRECT) 8354 op1 = machopic_indirect_data_reference (op1, 0); 8355 if (op0 == op1) 8356 return; 8357#else 8358 if (GET_CODE (op0) == MEM) 8359 op1 = force_reg (Pmode, op1); 8360 else 8361 { 8362 rtx temp = op0; 8363 if (GET_CODE (temp) != REG) 8364 temp = gen_reg_rtx (Pmode); 8365 temp = legitimize_pic_address (op1, temp); 8366 if (temp == op0) 8367 return; 8368 op1 = temp; 8369 } 8370#endif /* TARGET_MACHO */ 8371 } 8372 else 8373 { 8374 if (GET_CODE (op0) == MEM 8375 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 8376 || !push_operand (op0, mode)) 8377 && GET_CODE (op1) == MEM) 8378 op1 = force_reg (mode, op1); 8379 8380 if (push_operand (op0, mode) 8381 && ! general_no_elim_operand (op1, mode)) 8382 op1 = copy_to_mode_reg (mode, op1); 8383 8384 /* Force large constants in 64bit compilation into register 8385 to get them CSEed. */ 8386 if (TARGET_64BIT && mode == DImode 8387 && immediate_operand (op1, mode) 8388 && !x86_64_zero_extended_value (op1) 8389 && !register_operand (op0, mode) 8390 && optimize && !reload_completed && !reload_in_progress) 8391 op1 = copy_to_mode_reg (mode, op1); 8392 8393 if (FLOAT_MODE_P (mode)) 8394 { 8395 /* If we are loading a floating point constant to a register, 8396 force the value to memory now, since we'll get better code 8397 out the back end. */ 8398 8399 if (strict) 8400 ; 8401 else if (GET_CODE (op1) == CONST_DOUBLE) 8402 { 8403 op1 = validize_mem (force_const_mem (mode, op1)); 8404 if (!register_operand (op0, mode)) 8405 { 8406 rtx temp = gen_reg_rtx (mode); 8407 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 8408 emit_move_insn (op0, temp); 8409 return; 8410 } 8411 } 8412 } 8413 } 8414 8415 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 8416} 8417 8418void 8419ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 8420{ 8421 /* Force constants other than zero into memory. We do not know how 8422 the instructions used to build constants modify the upper 64 bits 8423 of the register, once we have that information we may be able 8424 to handle some of them more efficiently. */ 8425 if ((reload_in_progress | reload_completed) == 0 8426 && register_operand (operands[0], mode) 8427 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode)) 8428 operands[1] = validize_mem (force_const_mem (mode, operands[1])); 8429 8430 /* Make operand1 a register if it isn't already. */ 8431 if (!no_new_pseudos 8432 && !register_operand (operands[0], mode) 8433 && !register_operand (operands[1], mode)) 8434 { 8435 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]); 8436 emit_move_insn (operands[0], temp); 8437 return; 8438 } 8439 8440 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); 8441} 8442 8443/* Attempt to expand a binary operator. Make the expansion closer to the 8444 actual machine, then just general_operand, which will allow 3 separate 8445 memory references (one output, two input) in a single insn. */ 8446 8447void 8448ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 8449 rtx operands[]) 8450{ 8451 int matching_memory; 8452 rtx src1, src2, dst, op, clob; 8453 8454 dst = operands[0]; 8455 src1 = operands[1]; 8456 src2 = operands[2]; 8457 8458 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 8459 if (GET_RTX_CLASS (code) == 'c' 8460 && (rtx_equal_p (dst, src2) 8461 || immediate_operand (src1, mode))) 8462 { 8463 rtx temp = src1; 8464 src1 = src2; 8465 src2 = temp; 8466 } 8467 8468 /* If the destination is memory, and we do not have matching source 8469 operands, do things in registers. */ 8470 matching_memory = 0; 8471 if (GET_CODE (dst) == MEM) 8472 { 8473 if (rtx_equal_p (dst, src1)) 8474 matching_memory = 1; 8475 else if (GET_RTX_CLASS (code) == 'c' 8476 && rtx_equal_p (dst, src2)) 8477 matching_memory = 2; 8478 else 8479 dst = gen_reg_rtx (mode); 8480 } 8481 8482 /* Both source operands cannot be in memory. */ 8483 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 8484 { 8485 if (matching_memory != 2) 8486 src2 = force_reg (mode, src2); 8487 else 8488 src1 = force_reg (mode, src1); 8489 } 8490 8491 /* If the operation is not commutable, source 1 cannot be a constant 8492 or non-matching memory. */ 8493 if ((CONSTANT_P (src1) 8494 || (!matching_memory && GET_CODE (src1) == MEM)) 8495 && GET_RTX_CLASS (code) != 'c') 8496 src1 = force_reg (mode, src1); 8497 8498 /* If optimizing, copy to regs to improve CSE */ 8499 if (optimize && ! no_new_pseudos) 8500 { 8501 if (GET_CODE (dst) == MEM) 8502 dst = gen_reg_rtx (mode); 8503 if (GET_CODE (src1) == MEM) 8504 src1 = force_reg (mode, src1); 8505 if (GET_CODE (src2) == MEM) 8506 src2 = force_reg (mode, src2); 8507 } 8508 8509 /* Emit the instruction. */ 8510 8511 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 8512 if (reload_in_progress) 8513 { 8514 /* Reload doesn't know about the flags register, and doesn't know that 8515 it doesn't want to clobber it. We can only do this with PLUS. */ 8516 if (code != PLUS) 8517 abort (); 8518 emit_insn (op); 8519 } 8520 else 8521 { 8522 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 8523 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 8524 } 8525 8526 /* Fix up the destination if needed. */ 8527 if (dst != operands[0]) 8528 emit_move_insn (operands[0], dst); 8529} 8530 8531/* Return TRUE or FALSE depending on whether the binary operator meets the 8532 appropriate constraints. */ 8533 8534int 8535ix86_binary_operator_ok (enum rtx_code code, 8536 enum machine_mode mode ATTRIBUTE_UNUSED, 8537 rtx operands[3]) 8538{ 8539 /* Both source operands cannot be in memory. */ 8540 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 8541 return 0; 8542 /* If the operation is not commutable, source 1 cannot be a constant. */ 8543 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c') 8544 return 0; 8545 /* If the destination is memory, we must have a matching source operand. */ 8546 if (GET_CODE (operands[0]) == MEM 8547 && ! (rtx_equal_p (operands[0], operands[1]) 8548 || (GET_RTX_CLASS (code) == 'c' 8549 && rtx_equal_p (operands[0], operands[2])))) 8550 return 0; 8551 /* If the operation is not commutable and the source 1 is memory, we must 8552 have a matching destination. */ 8553 if (GET_CODE (operands[1]) == MEM 8554 && GET_RTX_CLASS (code) != 'c' 8555 && ! rtx_equal_p (operands[0], operands[1])) 8556 return 0; 8557 return 1; 8558} 8559 8560/* Attempt to expand a unary operator. Make the expansion closer to the 8561 actual machine, then just general_operand, which will allow 2 separate 8562 memory references (one output, one input) in a single insn. */ 8563 8564void 8565ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 8566 rtx operands[]) 8567{ 8568 int matching_memory; 8569 rtx src, dst, op, clob; 8570 8571 dst = operands[0]; 8572 src = operands[1]; 8573 8574 /* If the destination is memory, and we do not have matching source 8575 operands, do things in registers. */ 8576 matching_memory = 0; 8577 if (GET_CODE (dst) == MEM) 8578 { 8579 if (rtx_equal_p (dst, src)) 8580 matching_memory = 1; 8581 else 8582 dst = gen_reg_rtx (mode); 8583 } 8584 8585 /* When source operand is memory, destination must match. */ 8586 if (!matching_memory && GET_CODE (src) == MEM) 8587 src = force_reg (mode, src); 8588 8589 /* If optimizing, copy to regs to improve CSE */ 8590 if (optimize && ! no_new_pseudos) 8591 { 8592 if (GET_CODE (dst) == MEM) 8593 dst = gen_reg_rtx (mode); 8594 if (GET_CODE (src) == MEM) 8595 src = force_reg (mode, src); 8596 } 8597 8598 /* Emit the instruction. */ 8599 8600 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 8601 if (reload_in_progress || code == NOT) 8602 { 8603 /* Reload doesn't know about the flags register, and doesn't know that 8604 it doesn't want to clobber it. */ 8605 if (code != NOT) 8606 abort (); 8607 emit_insn (op); 8608 } 8609 else 8610 { 8611 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 8612 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 8613 } 8614 8615 /* Fix up the destination if needed. */ 8616 if (dst != operands[0]) 8617 emit_move_insn (operands[0], dst); 8618} 8619 8620/* Return TRUE or FALSE depending on whether the unary operator meets the 8621 appropriate constraints. */ 8622 8623int 8624ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 8625 enum machine_mode mode ATTRIBUTE_UNUSED, 8626 rtx operands[2] ATTRIBUTE_UNUSED) 8627{ 8628 /* If one of operands is memory, source and destination must match. */ 8629 if ((GET_CODE (operands[0]) == MEM 8630 || GET_CODE (operands[1]) == MEM) 8631 && ! rtx_equal_p (operands[0], operands[1])) 8632 return FALSE; 8633 return TRUE; 8634} 8635 8636/* Return TRUE or FALSE depending on whether the first SET in INSN 8637 has source and destination with matching CC modes, and that the 8638 CC mode is at least as constrained as REQ_MODE. */ 8639 8640int 8641ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 8642{ 8643 rtx set; 8644 enum machine_mode set_mode; 8645 8646 set = PATTERN (insn); 8647 if (GET_CODE (set) == PARALLEL) 8648 set = XVECEXP (set, 0, 0); 8649 if (GET_CODE (set) != SET) 8650 abort (); 8651 if (GET_CODE (SET_SRC (set)) != COMPARE) 8652 abort (); 8653 8654 set_mode = GET_MODE (SET_DEST (set)); 8655 switch (set_mode) 8656 { 8657 case CCNOmode: 8658 if (req_mode != CCNOmode 8659 && (req_mode != CCmode 8660 || XEXP (SET_SRC (set), 1) != const0_rtx)) 8661 return 0; 8662 break; 8663 case CCmode: 8664 if (req_mode == CCGCmode) 8665 return 0; 8666 /* FALLTHRU */ 8667 case CCGCmode: 8668 if (req_mode == CCGOCmode || req_mode == CCNOmode) 8669 return 0; 8670 /* FALLTHRU */ 8671 case CCGOCmode: 8672 if (req_mode == CCZmode) 8673 return 0; 8674 /* FALLTHRU */ 8675 case CCZmode: 8676 break; 8677 8678 default: 8679 abort (); 8680 } 8681 8682 return (GET_MODE (SET_SRC (set)) == set_mode); 8683} 8684 8685/* Generate insn patterns to do an integer compare of OPERANDS. */ 8686 8687static rtx 8688ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 8689{ 8690 enum machine_mode cmpmode; 8691 rtx tmp, flags; 8692 8693 cmpmode = SELECT_CC_MODE (code, op0, op1); 8694 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 8695 8696 /* This is very simple, but making the interface the same as in the 8697 FP case makes the rest of the code easier. */ 8698 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 8699 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 8700 8701 /* Return the test that should be put into the flags user, i.e. 8702 the bcc, scc, or cmov instruction. */ 8703 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 8704} 8705 8706/* Figure out whether to use ordered or unordered fp comparisons. 8707 Return the appropriate mode to use. */ 8708 8709enum machine_mode 8710ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 8711{ 8712 /* ??? In order to make all comparisons reversible, we do all comparisons 8713 non-trapping when compiling for IEEE. Once gcc is able to distinguish 8714 all forms trapping and nontrapping comparisons, we can make inequality 8715 comparisons trapping again, since it results in better code when using 8716 FCOM based compares. */ 8717 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 8718} 8719 8720enum machine_mode 8721ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 8722{ 8723 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 8724 return ix86_fp_compare_mode (code); 8725 switch (code) 8726 { 8727 /* Only zero flag is needed. */ 8728 case EQ: /* ZF=0 */ 8729 case NE: /* ZF!=0 */ 8730 return CCZmode; 8731 /* Codes needing carry flag. */ 8732 case GEU: /* CF=0 */ 8733 case GTU: /* CF=0 & ZF=0 */ 8734 case LTU: /* CF=1 */ 8735 case LEU: /* CF=1 | ZF=1 */ 8736 return CCmode; 8737 /* Codes possibly doable only with sign flag when 8738 comparing against zero. */ 8739 case GE: /* SF=OF or SF=0 */ 8740 case LT: /* SF<>OF or SF=1 */ 8741 if (op1 == const0_rtx) 8742 return CCGOCmode; 8743 else 8744 /* For other cases Carry flag is not required. */ 8745 return CCGCmode; 8746 /* Codes doable only with sign flag when comparing 8747 against zero, but we miss jump instruction for it 8748 so we need to use relational tests against overflow 8749 that thus needs to be zero. */ 8750 case GT: /* ZF=0 & SF=OF */ 8751 case LE: /* ZF=1 | SF<>OF */ 8752 if (op1 == const0_rtx) 8753 return CCNOmode; 8754 else 8755 return CCGCmode; 8756 /* strcmp pattern do (use flags) and combine may ask us for proper 8757 mode. */ 8758 case USE: 8759 return CCmode; 8760 default: 8761 abort (); 8762 } 8763} 8764 8765/* Return the fixed registers used for condition codes. */ 8766 8767static bool 8768ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 8769{ 8770 *p1 = FLAGS_REG; 8771 *p2 = FPSR_REG; 8772 return true; 8773} 8774 8775/* If two condition code modes are compatible, return a condition code 8776 mode which is compatible with both. Otherwise, return 8777 VOIDmode. */ 8778 8779static enum machine_mode 8780ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 8781{ 8782 if (m1 == m2) 8783 return m1; 8784 8785 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 8786 return VOIDmode; 8787 8788 if ((m1 == CCGCmode && m2 == CCGOCmode) 8789 || (m1 == CCGOCmode && m2 == CCGCmode)) 8790 return CCGCmode; 8791 8792 switch (m1) 8793 { 8794 default: 8795 abort (); 8796 8797 case CCmode: 8798 case CCGCmode: 8799 case CCGOCmode: 8800 case CCNOmode: 8801 case CCZmode: 8802 switch (m2) 8803 { 8804 default: 8805 return VOIDmode; 8806 8807 case CCmode: 8808 case CCGCmode: 8809 case CCGOCmode: 8810 case CCNOmode: 8811 case CCZmode: 8812 return CCmode; 8813 } 8814 8815 case CCFPmode: 8816 case CCFPUmode: 8817 /* These are only compatible with themselves, which we already 8818 checked above. */ 8819 return VOIDmode; 8820 } 8821} 8822 8823/* Return true if we should use an FCOMI instruction for this fp comparison. */ 8824 8825int 8826ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 8827{ 8828 enum rtx_code swapped_code = swap_condition (code); 8829 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 8830 || (ix86_fp_comparison_cost (swapped_code) 8831 == ix86_fp_comparison_fcomi_cost (swapped_code))); 8832} 8833 8834/* Swap, force into registers, or otherwise massage the two operands 8835 to a fp comparison. The operands are updated in place; the new 8836 comparison code is returned. */ 8837 8838static enum rtx_code 8839ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 8840{ 8841 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 8842 rtx op0 = *pop0, op1 = *pop1; 8843 enum machine_mode op_mode = GET_MODE (op0); 8844 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1); 8845 8846 /* All of the unordered compare instructions only work on registers. 8847 The same is true of the XFmode compare instructions. The same is 8848 true of the fcomi compare instructions. */ 8849 8850 if (!is_sse 8851 && (fpcmp_mode == CCFPUmode 8852 || op_mode == XFmode 8853 || ix86_use_fcomi_compare (code))) 8854 { 8855 op0 = force_reg (op_mode, op0); 8856 op1 = force_reg (op_mode, op1); 8857 } 8858 else 8859 { 8860 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 8861 things around if they appear profitable, otherwise force op0 8862 into a register. */ 8863 8864 if (standard_80387_constant_p (op0) == 0 8865 || (GET_CODE (op0) == MEM 8866 && ! (standard_80387_constant_p (op1) == 0 8867 || GET_CODE (op1) == MEM))) 8868 { 8869 rtx tmp; 8870 tmp = op0, op0 = op1, op1 = tmp; 8871 code = swap_condition (code); 8872 } 8873 8874 if (GET_CODE (op0) != REG) 8875 op0 = force_reg (op_mode, op0); 8876 8877 if (CONSTANT_P (op1)) 8878 { 8879 if (standard_80387_constant_p (op1)) 8880 op1 = force_reg (op_mode, op1); 8881 else 8882 op1 = validize_mem (force_const_mem (op_mode, op1)); 8883 } 8884 } 8885 8886 /* Try to rearrange the comparison to make it cheaper. */ 8887 if (ix86_fp_comparison_cost (code) 8888 > ix86_fp_comparison_cost (swap_condition (code)) 8889 && (GET_CODE (op1) == REG || !no_new_pseudos)) 8890 { 8891 rtx tmp; 8892 tmp = op0, op0 = op1, op1 = tmp; 8893 code = swap_condition (code); 8894 if (GET_CODE (op0) != REG) 8895 op0 = force_reg (op_mode, op0); 8896 } 8897 8898 *pop0 = op0; 8899 *pop1 = op1; 8900 return code; 8901} 8902 8903/* Convert comparison codes we use to represent FP comparison to integer 8904 code that will result in proper branch. Return UNKNOWN if no such code 8905 is available. */ 8906static enum rtx_code 8907ix86_fp_compare_code_to_integer (enum rtx_code code) 8908{ 8909 switch (code) 8910 { 8911 case GT: 8912 return GTU; 8913 case GE: 8914 return GEU; 8915 case ORDERED: 8916 case UNORDERED: 8917 return code; 8918 break; 8919 case UNEQ: 8920 return EQ; 8921 break; 8922 case UNLT: 8923 return LTU; 8924 break; 8925 case UNLE: 8926 return LEU; 8927 break; 8928 case LTGT: 8929 return NE; 8930 break; 8931 default: 8932 return UNKNOWN; 8933 } 8934} 8935 8936/* Split comparison code CODE into comparisons we can do using branch 8937 instructions. BYPASS_CODE is comparison code for branch that will 8938 branch around FIRST_CODE and SECOND_CODE. If some of branches 8939 is not required, set value to NIL. 8940 We never require more than two branches. */ 8941static void 8942ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 8943 enum rtx_code *first_code, 8944 enum rtx_code *second_code) 8945{ 8946 *first_code = code; 8947 *bypass_code = NIL; 8948 *second_code = NIL; 8949 8950 /* The fcomi comparison sets flags as follows: 8951 8952 cmp ZF PF CF 8953 > 0 0 0 8954 < 0 0 1 8955 = 1 0 0 8956 un 1 1 1 */ 8957 8958 switch (code) 8959 { 8960 case GT: /* GTU - CF=0 & ZF=0 */ 8961 case GE: /* GEU - CF=0 */ 8962 case ORDERED: /* PF=0 */ 8963 case UNORDERED: /* PF=1 */ 8964 case UNEQ: /* EQ - ZF=1 */ 8965 case UNLT: /* LTU - CF=1 */ 8966 case UNLE: /* LEU - CF=1 | ZF=1 */ 8967 case LTGT: /* EQ - ZF=0 */ 8968 break; 8969 case LT: /* LTU - CF=1 - fails on unordered */ 8970 *first_code = UNLT; 8971 *bypass_code = UNORDERED; 8972 break; 8973 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 8974 *first_code = UNLE; 8975 *bypass_code = UNORDERED; 8976 break; 8977 case EQ: /* EQ - ZF=1 - fails on unordered */ 8978 *first_code = UNEQ; 8979 *bypass_code = UNORDERED; 8980 break; 8981 case NE: /* NE - ZF=0 - fails on unordered */ 8982 *first_code = LTGT; 8983 *second_code = UNORDERED; 8984 break; 8985 case UNGE: /* GEU - CF=0 - fails on unordered */ 8986 *first_code = GE; 8987 *second_code = UNORDERED; 8988 break; 8989 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 8990 *first_code = GT; 8991 *second_code = UNORDERED; 8992 break; 8993 default: 8994 abort (); 8995 } 8996 if (!TARGET_IEEE_FP) 8997 { 8998 *second_code = NIL; 8999 *bypass_code = NIL; 9000 } 9001} 9002 9003/* Return cost of comparison done fcom + arithmetics operations on AX. 9004 All following functions do use number of instructions as a cost metrics. 9005 In future this should be tweaked to compute bytes for optimize_size and 9006 take into account performance of various instructions on various CPUs. */ 9007static int 9008ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 9009{ 9010 if (!TARGET_IEEE_FP) 9011 return 4; 9012 /* The cost of code output by ix86_expand_fp_compare. */ 9013 switch (code) 9014 { 9015 case UNLE: 9016 case UNLT: 9017 case LTGT: 9018 case GT: 9019 case GE: 9020 case UNORDERED: 9021 case ORDERED: 9022 case UNEQ: 9023 return 4; 9024 break; 9025 case LT: 9026 case NE: 9027 case EQ: 9028 case UNGE: 9029 return 5; 9030 break; 9031 case LE: 9032 case UNGT: 9033 return 6; 9034 break; 9035 default: 9036 abort (); 9037 } 9038} 9039 9040/* Return cost of comparison done using fcomi operation. 9041 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 9042static int 9043ix86_fp_comparison_fcomi_cost (enum rtx_code code) 9044{ 9045 enum rtx_code bypass_code, first_code, second_code; 9046 /* Return arbitrarily high cost when instruction is not supported - this 9047 prevents gcc from using it. */ 9048 if (!TARGET_CMOVE) 9049 return 1024; 9050 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9051 return (bypass_code != NIL || second_code != NIL) + 2; 9052} 9053 9054/* Return cost of comparison done using sahf operation. 9055 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 9056static int 9057ix86_fp_comparison_sahf_cost (enum rtx_code code) 9058{ 9059 enum rtx_code bypass_code, first_code, second_code; 9060 /* Return arbitrarily high cost when instruction is not preferred - this 9061 avoids gcc from using it. */ 9062 if (!TARGET_USE_SAHF && !optimize_size) 9063 return 1024; 9064 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9065 return (bypass_code != NIL || second_code != NIL) + 3; 9066} 9067 9068/* Compute cost of the comparison done using any method. 9069 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 9070static int 9071ix86_fp_comparison_cost (enum rtx_code code) 9072{ 9073 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 9074 int min; 9075 9076 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 9077 sahf_cost = ix86_fp_comparison_sahf_cost (code); 9078 9079 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 9080 if (min > sahf_cost) 9081 min = sahf_cost; 9082 if (min > fcomi_cost) 9083 min = fcomi_cost; 9084 return min; 9085} 9086 9087/* Generate insn patterns to do a floating point compare of OPERANDS. */ 9088 9089static rtx 9090ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 9091 rtx *second_test, rtx *bypass_test) 9092{ 9093 enum machine_mode fpcmp_mode, intcmp_mode; 9094 rtx tmp, tmp2; 9095 int cost = ix86_fp_comparison_cost (code); 9096 enum rtx_code bypass_code, first_code, second_code; 9097 9098 fpcmp_mode = ix86_fp_compare_mode (code); 9099 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 9100 9101 if (second_test) 9102 *second_test = NULL_RTX; 9103 if (bypass_test) 9104 *bypass_test = NULL_RTX; 9105 9106 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9107 9108 /* Do fcomi/sahf based test when profitable. */ 9109 if ((bypass_code == NIL || bypass_test) 9110 && (second_code == NIL || second_test) 9111 && ix86_fp_comparison_arithmetics_cost (code) > cost) 9112 { 9113 if (TARGET_CMOVE) 9114 { 9115 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 9116 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 9117 tmp); 9118 emit_insn (tmp); 9119 } 9120 else 9121 { 9122 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 9123 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 9124 if (!scratch) 9125 scratch = gen_reg_rtx (HImode); 9126 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 9127 emit_insn (gen_x86_sahf_1 (scratch)); 9128 } 9129 9130 /* The FP codes work out to act like unsigned. */ 9131 intcmp_mode = fpcmp_mode; 9132 code = first_code; 9133 if (bypass_code != NIL) 9134 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 9135 gen_rtx_REG (intcmp_mode, FLAGS_REG), 9136 const0_rtx); 9137 if (second_code != NIL) 9138 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 9139 gen_rtx_REG (intcmp_mode, FLAGS_REG), 9140 const0_rtx); 9141 } 9142 else 9143 { 9144 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 9145 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 9146 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 9147 if (!scratch) 9148 scratch = gen_reg_rtx (HImode); 9149 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 9150 9151 /* In the unordered case, we have to check C2 for NaN's, which 9152 doesn't happen to work out to anything nice combination-wise. 9153 So do some bit twiddling on the value we've got in AH to come 9154 up with an appropriate set of condition codes. */ 9155 9156 intcmp_mode = CCNOmode; 9157 switch (code) 9158 { 9159 case GT: 9160 case UNGT: 9161 if (code == GT || !TARGET_IEEE_FP) 9162 { 9163 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 9164 code = EQ; 9165 } 9166 else 9167 { 9168 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9169 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 9170 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 9171 intcmp_mode = CCmode; 9172 code = GEU; 9173 } 9174 break; 9175 case LT: 9176 case UNLT: 9177 if (code == LT && TARGET_IEEE_FP) 9178 { 9179 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9180 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 9181 intcmp_mode = CCmode; 9182 code = EQ; 9183 } 9184 else 9185 { 9186 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 9187 code = NE; 9188 } 9189 break; 9190 case GE: 9191 case UNGE: 9192 if (code == GE || !TARGET_IEEE_FP) 9193 { 9194 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 9195 code = EQ; 9196 } 9197 else 9198 { 9199 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9200 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 9201 GEN_INT (0x01))); 9202 code = NE; 9203 } 9204 break; 9205 case LE: 9206 case UNLE: 9207 if (code == LE && TARGET_IEEE_FP) 9208 { 9209 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9210 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 9211 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 9212 intcmp_mode = CCmode; 9213 code = LTU; 9214 } 9215 else 9216 { 9217 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 9218 code = NE; 9219 } 9220 break; 9221 case EQ: 9222 case UNEQ: 9223 if (code == EQ && TARGET_IEEE_FP) 9224 { 9225 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9226 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 9227 intcmp_mode = CCmode; 9228 code = EQ; 9229 } 9230 else 9231 { 9232 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 9233 code = NE; 9234 break; 9235 } 9236 break; 9237 case NE: 9238 case LTGT: 9239 if (code == NE && TARGET_IEEE_FP) 9240 { 9241 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 9242 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 9243 GEN_INT (0x40))); 9244 code = NE; 9245 } 9246 else 9247 { 9248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 9249 code = EQ; 9250 } 9251 break; 9252 9253 case UNORDERED: 9254 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 9255 code = NE; 9256 break; 9257 case ORDERED: 9258 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 9259 code = EQ; 9260 break; 9261 9262 default: 9263 abort (); 9264 } 9265 } 9266 9267 /* Return the test that should be put into the flags user, i.e. 9268 the bcc, scc, or cmov instruction. */ 9269 return gen_rtx_fmt_ee (code, VOIDmode, 9270 gen_rtx_REG (intcmp_mode, FLAGS_REG), 9271 const0_rtx); 9272} 9273 9274rtx 9275ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 9276{ 9277 rtx op0, op1, ret; 9278 op0 = ix86_compare_op0; 9279 op1 = ix86_compare_op1; 9280 9281 if (second_test) 9282 *second_test = NULL_RTX; 9283 if (bypass_test) 9284 *bypass_test = NULL_RTX; 9285 9286 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 9287 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 9288 second_test, bypass_test); 9289 else 9290 ret = ix86_expand_int_compare (code, op0, op1); 9291 9292 return ret; 9293} 9294 9295/* Return true if the CODE will result in nontrivial jump sequence. */ 9296bool 9297ix86_fp_jump_nontrivial_p (enum rtx_code code) 9298{ 9299 enum rtx_code bypass_code, first_code, second_code; 9300 if (!TARGET_CMOVE) 9301 return true; 9302 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9303 return bypass_code != NIL || second_code != NIL; 9304} 9305 9306void 9307ix86_expand_branch (enum rtx_code code, rtx label) 9308{ 9309 rtx tmp; 9310 9311 switch (GET_MODE (ix86_compare_op0)) 9312 { 9313 case QImode: 9314 case HImode: 9315 case SImode: 9316 simple: 9317 tmp = ix86_expand_compare (code, NULL, NULL); 9318 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 9319 gen_rtx_LABEL_REF (VOIDmode, label), 9320 pc_rtx); 9321 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 9322 return; 9323 9324 case SFmode: 9325 case DFmode: 9326 case XFmode: 9327 { 9328 rtvec vec; 9329 int use_fcomi; 9330 enum rtx_code bypass_code, first_code, second_code; 9331 9332 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 9333 &ix86_compare_op1); 9334 9335 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 9336 9337 /* Check whether we will use the natural sequence with one jump. If 9338 so, we can expand jump early. Otherwise delay expansion by 9339 creating compound insn to not confuse optimizers. */ 9340 if (bypass_code == NIL && second_code == NIL 9341 && TARGET_CMOVE) 9342 { 9343 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 9344 gen_rtx_LABEL_REF (VOIDmode, label), 9345 pc_rtx, NULL_RTX); 9346 } 9347 else 9348 { 9349 tmp = gen_rtx_fmt_ee (code, VOIDmode, 9350 ix86_compare_op0, ix86_compare_op1); 9351 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 9352 gen_rtx_LABEL_REF (VOIDmode, label), 9353 pc_rtx); 9354 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 9355 9356 use_fcomi = ix86_use_fcomi_compare (code); 9357 vec = rtvec_alloc (3 + !use_fcomi); 9358 RTVEC_ELT (vec, 0) = tmp; 9359 RTVEC_ELT (vec, 1) 9360 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 9361 RTVEC_ELT (vec, 2) 9362 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 9363 if (! use_fcomi) 9364 RTVEC_ELT (vec, 3) 9365 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 9366 9367 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 9368 } 9369 return; 9370 } 9371 9372 case DImode: 9373 if (TARGET_64BIT) 9374 goto simple; 9375 /* Expand DImode branch into multiple compare+branch. */ 9376 { 9377 rtx lo[2], hi[2], label2; 9378 enum rtx_code code1, code2, code3; 9379 9380 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 9381 { 9382 tmp = ix86_compare_op0; 9383 ix86_compare_op0 = ix86_compare_op1; 9384 ix86_compare_op1 = tmp; 9385 code = swap_condition (code); 9386 } 9387 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 9388 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 9389 9390 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 9391 avoid two branches. This costs one extra insn, so disable when 9392 optimizing for size. */ 9393 9394 if ((code == EQ || code == NE) 9395 && (!optimize_size 9396 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 9397 { 9398 rtx xor0, xor1; 9399 9400 xor1 = hi[0]; 9401 if (hi[1] != const0_rtx) 9402 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1], 9403 NULL_RTX, 0, OPTAB_WIDEN); 9404 9405 xor0 = lo[0]; 9406 if (lo[1] != const0_rtx) 9407 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1], 9408 NULL_RTX, 0, OPTAB_WIDEN); 9409 9410 tmp = expand_binop (SImode, ior_optab, xor1, xor0, 9411 NULL_RTX, 0, OPTAB_WIDEN); 9412 9413 ix86_compare_op0 = tmp; 9414 ix86_compare_op1 = const0_rtx; 9415 ix86_expand_branch (code, label); 9416 return; 9417 } 9418 9419 /* Otherwise, if we are doing less-than or greater-or-equal-than, 9420 op1 is a constant and the low word is zero, then we can just 9421 examine the high word. */ 9422 9423 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 9424 switch (code) 9425 { 9426 case LT: case LTU: case GE: case GEU: 9427 ix86_compare_op0 = hi[0]; 9428 ix86_compare_op1 = hi[1]; 9429 ix86_expand_branch (code, label); 9430 return; 9431 default: 9432 break; 9433 } 9434 9435 /* Otherwise, we need two or three jumps. */ 9436 9437 label2 = gen_label_rtx (); 9438 9439 code1 = code; 9440 code2 = swap_condition (code); 9441 code3 = unsigned_condition (code); 9442 9443 switch (code) 9444 { 9445 case LT: case GT: case LTU: case GTU: 9446 break; 9447 9448 case LE: code1 = LT; code2 = GT; break; 9449 case GE: code1 = GT; code2 = LT; break; 9450 case LEU: code1 = LTU; code2 = GTU; break; 9451 case GEU: code1 = GTU; code2 = LTU; break; 9452 9453 case EQ: code1 = NIL; code2 = NE; break; 9454 case NE: code2 = NIL; break; 9455 9456 default: 9457 abort (); 9458 } 9459 9460 /* 9461 * a < b => 9462 * if (hi(a) < hi(b)) goto true; 9463 * if (hi(a) > hi(b)) goto false; 9464 * if (lo(a) < lo(b)) goto true; 9465 * false: 9466 */ 9467 9468 ix86_compare_op0 = hi[0]; 9469 ix86_compare_op1 = hi[1]; 9470 9471 if (code1 != NIL) 9472 ix86_expand_branch (code1, label); 9473 if (code2 != NIL) 9474 ix86_expand_branch (code2, label2); 9475 9476 ix86_compare_op0 = lo[0]; 9477 ix86_compare_op1 = lo[1]; 9478 ix86_expand_branch (code3, label); 9479 9480 if (code2 != NIL) 9481 emit_label (label2); 9482 return; 9483 } 9484 9485 default: 9486 abort (); 9487 } 9488} 9489 9490/* Split branch based on floating point condition. */ 9491void 9492ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 9493 rtx target1, rtx target2, rtx tmp) 9494{ 9495 rtx second, bypass; 9496 rtx label = NULL_RTX; 9497 rtx condition; 9498 int bypass_probability = -1, second_probability = -1, probability = -1; 9499 rtx i; 9500 9501 if (target2 != pc_rtx) 9502 { 9503 rtx tmp = target2; 9504 code = reverse_condition_maybe_unordered (code); 9505 target2 = target1; 9506 target1 = tmp; 9507 } 9508 9509 condition = ix86_expand_fp_compare (code, op1, op2, 9510 tmp, &second, &bypass); 9511 9512 if (split_branch_probability >= 0) 9513 { 9514 /* Distribute the probabilities across the jumps. 9515 Assume the BYPASS and SECOND to be always test 9516 for UNORDERED. */ 9517 probability = split_branch_probability; 9518 9519 /* Value of 1 is low enough to make no need for probability 9520 to be updated. Later we may run some experiments and see 9521 if unordered values are more frequent in practice. */ 9522 if (bypass) 9523 bypass_probability = 1; 9524 if (second) 9525 second_probability = 1; 9526 } 9527 if (bypass != NULL_RTX) 9528 { 9529 label = gen_label_rtx (); 9530 i = emit_jump_insn (gen_rtx_SET 9531 (VOIDmode, pc_rtx, 9532 gen_rtx_IF_THEN_ELSE (VOIDmode, 9533 bypass, 9534 gen_rtx_LABEL_REF (VOIDmode, 9535 label), 9536 pc_rtx))); 9537 if (bypass_probability >= 0) 9538 REG_NOTES (i) 9539 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9540 GEN_INT (bypass_probability), 9541 REG_NOTES (i)); 9542 } 9543 i = emit_jump_insn (gen_rtx_SET 9544 (VOIDmode, pc_rtx, 9545 gen_rtx_IF_THEN_ELSE (VOIDmode, 9546 condition, target1, target2))); 9547 if (probability >= 0) 9548 REG_NOTES (i) 9549 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9550 GEN_INT (probability), 9551 REG_NOTES (i)); 9552 if (second != NULL_RTX) 9553 { 9554 i = emit_jump_insn (gen_rtx_SET 9555 (VOIDmode, pc_rtx, 9556 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 9557 target2))); 9558 if (second_probability >= 0) 9559 REG_NOTES (i) 9560 = gen_rtx_EXPR_LIST (REG_BR_PROB, 9561 GEN_INT (second_probability), 9562 REG_NOTES (i)); 9563 } 9564 if (label != NULL_RTX) 9565 emit_label (label); 9566} 9567 9568int 9569ix86_expand_setcc (enum rtx_code code, rtx dest) 9570{ 9571 rtx ret, tmp, tmpreg, equiv; 9572 rtx second_test, bypass_test; 9573 9574 if (GET_MODE (ix86_compare_op0) == DImode 9575 && !TARGET_64BIT) 9576 return 0; /* FAIL */ 9577 9578 if (GET_MODE (dest) != QImode) 9579 abort (); 9580 9581 ret = ix86_expand_compare (code, &second_test, &bypass_test); 9582 PUT_MODE (ret, QImode); 9583 9584 tmp = dest; 9585 tmpreg = dest; 9586 9587 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 9588 if (bypass_test || second_test) 9589 { 9590 rtx test = second_test; 9591 int bypass = 0; 9592 rtx tmp2 = gen_reg_rtx (QImode); 9593 if (bypass_test) 9594 { 9595 if (second_test) 9596 abort (); 9597 test = bypass_test; 9598 bypass = 1; 9599 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 9600 } 9601 PUT_MODE (test, QImode); 9602 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 9603 9604 if (bypass) 9605 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 9606 else 9607 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 9608 } 9609 9610 /* Attach a REG_EQUAL note describing the comparison result. */ 9611 equiv = simplify_gen_relational (code, QImode, 9612 GET_MODE (ix86_compare_op0), 9613 ix86_compare_op0, ix86_compare_op1); 9614 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 9615 9616 return 1; /* DONE */ 9617} 9618 9619/* Expand comparison setting or clearing carry flag. Return true when 9620 successful and set pop for the operation. */ 9621static bool 9622ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 9623{ 9624 enum machine_mode mode = 9625 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 9626 9627 /* Do not handle DImode compares that go trought special path. Also we can't 9628 deal with FP compares yet. This is possible to add. */ 9629 if ((mode == DImode && !TARGET_64BIT)) 9630 return false; 9631 if (FLOAT_MODE_P (mode)) 9632 { 9633 rtx second_test = NULL, bypass_test = NULL; 9634 rtx compare_op, compare_seq; 9635 9636 /* Shortcut: following common codes never translate into carry flag compares. */ 9637 if (code == EQ || code == NE || code == UNEQ || code == LTGT 9638 || code == ORDERED || code == UNORDERED) 9639 return false; 9640 9641 /* These comparisons require zero flag; swap operands so they won't. */ 9642 if ((code == GT || code == UNLE || code == LE || code == UNGT) 9643 && !TARGET_IEEE_FP) 9644 { 9645 rtx tmp = op0; 9646 op0 = op1; 9647 op1 = tmp; 9648 code = swap_condition (code); 9649 } 9650 9651 /* Try to expand the comparison and verify that we end up with carry flag 9652 based comparison. This is fails to be true only when we decide to expand 9653 comparison using arithmetic that is not too common scenario. */ 9654 start_sequence (); 9655 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 9656 &second_test, &bypass_test); 9657 compare_seq = get_insns (); 9658 end_sequence (); 9659 9660 if (second_test || bypass_test) 9661 return false; 9662 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 9663 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 9664 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 9665 else 9666 code = GET_CODE (compare_op); 9667 if (code != LTU && code != GEU) 9668 return false; 9669 emit_insn (compare_seq); 9670 *pop = compare_op; 9671 return true; 9672 } 9673 if (!INTEGRAL_MODE_P (mode)) 9674 return false; 9675 switch (code) 9676 { 9677 case LTU: 9678 case GEU: 9679 break; 9680 9681 /* Convert a==0 into (unsigned)a<1. */ 9682 case EQ: 9683 case NE: 9684 if (op1 != const0_rtx) 9685 return false; 9686 op1 = const1_rtx; 9687 code = (code == EQ ? LTU : GEU); 9688 break; 9689 9690 /* Convert a>b into b<a or a>=b-1. */ 9691 case GTU: 9692 case LEU: 9693 if (GET_CODE (op1) == CONST_INT) 9694 { 9695 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 9696 /* Bail out on overflow. We still can swap operands but that 9697 would force loading of the constant into register. */ 9698 if (op1 == const0_rtx 9699 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 9700 return false; 9701 code = (code == GTU ? GEU : LTU); 9702 } 9703 else 9704 { 9705 rtx tmp = op1; 9706 op1 = op0; 9707 op0 = tmp; 9708 code = (code == GTU ? LTU : GEU); 9709 } 9710 break; 9711 9712 /* Convert a>=0 into (unsigned)a<0x80000000. */ 9713 case LT: 9714 case GE: 9715 if (mode == DImode || op1 != const0_rtx) 9716 return false; 9717 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 9718 code = (code == LT ? GEU : LTU); 9719 break; 9720 case LE: 9721 case GT: 9722 if (mode == DImode || op1 != constm1_rtx) 9723 return false; 9724 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 9725 code = (code == LE ? GEU : LTU); 9726 break; 9727 9728 default: 9729 return false; 9730 } 9731 /* Swapping operands may cause constant to appear as first operand. */ 9732 if (!nonimmediate_operand (op0, VOIDmode)) 9733 { 9734 if (no_new_pseudos) 9735 return false; 9736 op0 = force_reg (mode, op0); 9737 } 9738 ix86_compare_op0 = op0; 9739 ix86_compare_op1 = op1; 9740 *pop = ix86_expand_compare (code, NULL, NULL); 9741 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU) 9742 abort (); 9743 return true; 9744} 9745 9746int 9747ix86_expand_int_movcc (rtx operands[]) 9748{ 9749 enum rtx_code code = GET_CODE (operands[1]), compare_code; 9750 rtx compare_seq, compare_op; 9751 rtx second_test, bypass_test; 9752 enum machine_mode mode = GET_MODE (operands[0]); 9753 bool sign_bit_compare_p = false;; 9754 9755 start_sequence (); 9756 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 9757 compare_seq = get_insns (); 9758 end_sequence (); 9759 9760 compare_code = GET_CODE (compare_op); 9761 9762 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 9763 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 9764 sign_bit_compare_p = true; 9765 9766 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 9767 HImode insns, we'd be swallowed in word prefix ops. */ 9768 9769 if ((mode != HImode || TARGET_FAST_PREFIX) 9770 && (mode != DImode || TARGET_64BIT) 9771 && GET_CODE (operands[2]) == CONST_INT 9772 && GET_CODE (operands[3]) == CONST_INT) 9773 { 9774 rtx out = operands[0]; 9775 HOST_WIDE_INT ct = INTVAL (operands[2]); 9776 HOST_WIDE_INT cf = INTVAL (operands[3]); 9777 HOST_WIDE_INT diff; 9778 9779 diff = ct - cf; 9780 /* Sign bit compares are better done using shifts than we do by using 9781 sbb. */ 9782 if (sign_bit_compare_p 9783 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 9784 ix86_compare_op1, &compare_op)) 9785 { 9786 /* Detect overlap between destination and compare sources. */ 9787 rtx tmp = out; 9788 9789 if (!sign_bit_compare_p) 9790 { 9791 bool fpcmp = false; 9792 9793 compare_code = GET_CODE (compare_op); 9794 9795 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 9796 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 9797 { 9798 fpcmp = true; 9799 compare_code = ix86_fp_compare_code_to_integer (compare_code); 9800 } 9801 9802 /* To simplify rest of code, restrict to the GEU case. */ 9803 if (compare_code == LTU) 9804 { 9805 HOST_WIDE_INT tmp = ct; 9806 ct = cf; 9807 cf = tmp; 9808 compare_code = reverse_condition (compare_code); 9809 code = reverse_condition (code); 9810 } 9811 else 9812 { 9813 if (fpcmp) 9814 PUT_CODE (compare_op, 9815 reverse_condition_maybe_unordered 9816 (GET_CODE (compare_op))); 9817 else 9818 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 9819 } 9820 diff = ct - cf; 9821 9822 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 9823 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 9824 tmp = gen_reg_rtx (mode); 9825 9826 if (mode == DImode) 9827 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 9828 else 9829 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 9830 } 9831 else 9832 { 9833 if (code == GT || code == GE) 9834 code = reverse_condition (code); 9835 else 9836 { 9837 HOST_WIDE_INT tmp = ct; 9838 ct = cf; 9839 cf = tmp; 9840 diff = ct - cf; 9841 } 9842 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 9843 ix86_compare_op1, VOIDmode, 0, -1); 9844 } 9845 9846 if (diff == 1) 9847 { 9848 /* 9849 * cmpl op0,op1 9850 * sbbl dest,dest 9851 * [addl dest, ct] 9852 * 9853 * Size 5 - 8. 9854 */ 9855 if (ct) 9856 tmp = expand_simple_binop (mode, PLUS, 9857 tmp, GEN_INT (ct), 9858 copy_rtx (tmp), 1, OPTAB_DIRECT); 9859 } 9860 else if (cf == -1) 9861 { 9862 /* 9863 * cmpl op0,op1 9864 * sbbl dest,dest 9865 * orl $ct, dest 9866 * 9867 * Size 8. 9868 */ 9869 tmp = expand_simple_binop (mode, IOR, 9870 tmp, GEN_INT (ct), 9871 copy_rtx (tmp), 1, OPTAB_DIRECT); 9872 } 9873 else if (diff == -1 && ct) 9874 { 9875 /* 9876 * cmpl op0,op1 9877 * sbbl dest,dest 9878 * notl dest 9879 * [addl dest, cf] 9880 * 9881 * Size 8 - 11. 9882 */ 9883 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 9884 if (cf) 9885 tmp = expand_simple_binop (mode, PLUS, 9886 copy_rtx (tmp), GEN_INT (cf), 9887 copy_rtx (tmp), 1, OPTAB_DIRECT); 9888 } 9889 else 9890 { 9891 /* 9892 * cmpl op0,op1 9893 * sbbl dest,dest 9894 * [notl dest] 9895 * andl cf - ct, dest 9896 * [addl dest, ct] 9897 * 9898 * Size 8 - 11. 9899 */ 9900 9901 if (cf == 0) 9902 { 9903 cf = ct; 9904 ct = 0; 9905 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 9906 } 9907 9908 tmp = expand_simple_binop (mode, AND, 9909 copy_rtx (tmp), 9910 gen_int_mode (cf - ct, mode), 9911 copy_rtx (tmp), 1, OPTAB_DIRECT); 9912 if (ct) 9913 tmp = expand_simple_binop (mode, PLUS, 9914 copy_rtx (tmp), GEN_INT (ct), 9915 copy_rtx (tmp), 1, OPTAB_DIRECT); 9916 } 9917 9918 if (!rtx_equal_p (tmp, out)) 9919 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 9920 9921 return 1; /* DONE */ 9922 } 9923 9924 if (diff < 0) 9925 { 9926 HOST_WIDE_INT tmp; 9927 tmp = ct, ct = cf, cf = tmp; 9928 diff = -diff; 9929 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 9930 { 9931 /* We may be reversing unordered compare to normal compare, that 9932 is not valid in general (we may convert non-trapping condition 9933 to trapping one), however on i386 we currently emit all 9934 comparisons unordered. */ 9935 compare_code = reverse_condition_maybe_unordered (compare_code); 9936 code = reverse_condition_maybe_unordered (code); 9937 } 9938 else 9939 { 9940 compare_code = reverse_condition (compare_code); 9941 code = reverse_condition (code); 9942 } 9943 } 9944 9945 compare_code = NIL; 9946 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 9947 && GET_CODE (ix86_compare_op1) == CONST_INT) 9948 { 9949 if (ix86_compare_op1 == const0_rtx 9950 && (code == LT || code == GE)) 9951 compare_code = code; 9952 else if (ix86_compare_op1 == constm1_rtx) 9953 { 9954 if (code == LE) 9955 compare_code = LT; 9956 else if (code == GT) 9957 compare_code = GE; 9958 } 9959 } 9960 9961 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 9962 if (compare_code != NIL 9963 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 9964 && (cf == -1 || ct == -1)) 9965 { 9966 /* If lea code below could be used, only optimize 9967 if it results in a 2 insn sequence. */ 9968 9969 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 9970 || diff == 3 || diff == 5 || diff == 9) 9971 || (compare_code == LT && ct == -1) 9972 || (compare_code == GE && cf == -1)) 9973 { 9974 /* 9975 * notl op1 (if necessary) 9976 * sarl $31, op1 9977 * orl cf, op1 9978 */ 9979 if (ct != -1) 9980 { 9981 cf = ct; 9982 ct = -1; 9983 code = reverse_condition (code); 9984 } 9985 9986 out = emit_store_flag (out, code, ix86_compare_op0, 9987 ix86_compare_op1, VOIDmode, 0, -1); 9988 9989 out = expand_simple_binop (mode, IOR, 9990 out, GEN_INT (cf), 9991 out, 1, OPTAB_DIRECT); 9992 if (out != operands[0]) 9993 emit_move_insn (operands[0], out); 9994 9995 return 1; /* DONE */ 9996 } 9997 } 9998 9999 10000 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 10001 || diff == 3 || diff == 5 || diff == 9) 10002 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 10003 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf)))) 10004 { 10005 /* 10006 * xorl dest,dest 10007 * cmpl op1,op2 10008 * setcc dest 10009 * lea cf(dest*(ct-cf)),dest 10010 * 10011 * Size 14. 10012 * 10013 * This also catches the degenerate setcc-only case. 10014 */ 10015 10016 rtx tmp; 10017 int nops; 10018 10019 out = emit_store_flag (out, code, ix86_compare_op0, 10020 ix86_compare_op1, VOIDmode, 0, 1); 10021 10022 nops = 0; 10023 /* On x86_64 the lea instruction operates on Pmode, so we need 10024 to get arithmetics done in proper mode to match. */ 10025 if (diff == 1) 10026 tmp = copy_rtx (out); 10027 else 10028 { 10029 rtx out1; 10030 out1 = copy_rtx (out); 10031 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 10032 nops++; 10033 if (diff & 1) 10034 { 10035 tmp = gen_rtx_PLUS (mode, tmp, out1); 10036 nops++; 10037 } 10038 } 10039 if (cf != 0) 10040 { 10041 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 10042 nops++; 10043 } 10044 if (!rtx_equal_p (tmp, out)) 10045 { 10046 if (nops == 1) 10047 out = force_operand (tmp, copy_rtx (out)); 10048 else 10049 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 10050 } 10051 if (!rtx_equal_p (out, operands[0])) 10052 emit_move_insn (operands[0], copy_rtx (out)); 10053 10054 return 1; /* DONE */ 10055 } 10056 10057 /* 10058 * General case: Jumpful: 10059 * xorl dest,dest cmpl op1, op2 10060 * cmpl op1, op2 movl ct, dest 10061 * setcc dest jcc 1f 10062 * decl dest movl cf, dest 10063 * andl (cf-ct),dest 1: 10064 * addl ct,dest 10065 * 10066 * Size 20. Size 14. 10067 * 10068 * This is reasonably steep, but branch mispredict costs are 10069 * high on modern cpus, so consider failing only if optimizing 10070 * for space. 10071 */ 10072 10073 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 10074 && BRANCH_COST >= 2) 10075 { 10076 if (cf == 0) 10077 { 10078 cf = ct; 10079 ct = 0; 10080 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 10081 /* We may be reversing unordered compare to normal compare, 10082 that is not valid in general (we may convert non-trapping 10083 condition to trapping one), however on i386 we currently 10084 emit all comparisons unordered. */ 10085 code = reverse_condition_maybe_unordered (code); 10086 else 10087 { 10088 code = reverse_condition (code); 10089 if (compare_code != NIL) 10090 compare_code = reverse_condition (compare_code); 10091 } 10092 } 10093 10094 if (compare_code != NIL) 10095 { 10096 /* notl op1 (if needed) 10097 sarl $31, op1 10098 andl (cf-ct), op1 10099 addl ct, op1 10100 10101 For x < 0 (resp. x <= -1) there will be no notl, 10102 so if possible swap the constants to get rid of the 10103 complement. 10104 True/false will be -1/0 while code below (store flag 10105 followed by decrement) is 0/-1, so the constants need 10106 to be exchanged once more. */ 10107 10108 if (compare_code == GE || !cf) 10109 { 10110 code = reverse_condition (code); 10111 compare_code = LT; 10112 } 10113 else 10114 { 10115 HOST_WIDE_INT tmp = cf; 10116 cf = ct; 10117 ct = tmp; 10118 } 10119 10120 out = emit_store_flag (out, code, ix86_compare_op0, 10121 ix86_compare_op1, VOIDmode, 0, -1); 10122 } 10123 else 10124 { 10125 out = emit_store_flag (out, code, ix86_compare_op0, 10126 ix86_compare_op1, VOIDmode, 0, 1); 10127 10128 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 10129 copy_rtx (out), 1, OPTAB_DIRECT); 10130 } 10131 10132 out = expand_simple_binop (mode, AND, copy_rtx (out), 10133 gen_int_mode (cf - ct, mode), 10134 copy_rtx (out), 1, OPTAB_DIRECT); 10135 if (ct) 10136 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 10137 copy_rtx (out), 1, OPTAB_DIRECT); 10138 if (!rtx_equal_p (out, operands[0])) 10139 emit_move_insn (operands[0], copy_rtx (out)); 10140 10141 return 1; /* DONE */ 10142 } 10143 } 10144 10145 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 10146 { 10147 /* Try a few things more with specific constants and a variable. */ 10148 10149 optab op; 10150 rtx var, orig_out, out, tmp; 10151 10152 if (BRANCH_COST <= 2) 10153 return 0; /* FAIL */ 10154 10155 /* If one of the two operands is an interesting constant, load a 10156 constant with the above and mask it in with a logical operation. */ 10157 10158 if (GET_CODE (operands[2]) == CONST_INT) 10159 { 10160 var = operands[3]; 10161 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 10162 operands[3] = constm1_rtx, op = and_optab; 10163 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 10164 operands[3] = const0_rtx, op = ior_optab; 10165 else 10166 return 0; /* FAIL */ 10167 } 10168 else if (GET_CODE (operands[3]) == CONST_INT) 10169 { 10170 var = operands[2]; 10171 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 10172 operands[2] = constm1_rtx, op = and_optab; 10173 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 10174 operands[2] = const0_rtx, op = ior_optab; 10175 else 10176 return 0; /* FAIL */ 10177 } 10178 else 10179 return 0; /* FAIL */ 10180 10181 orig_out = operands[0]; 10182 tmp = gen_reg_rtx (mode); 10183 operands[0] = tmp; 10184 10185 /* Recurse to get the constant loaded. */ 10186 if (ix86_expand_int_movcc (operands) == 0) 10187 return 0; /* FAIL */ 10188 10189 /* Mask in the interesting variable. */ 10190 out = expand_binop (mode, op, var, tmp, orig_out, 0, 10191 OPTAB_WIDEN); 10192 if (!rtx_equal_p (out, orig_out)) 10193 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 10194 10195 return 1; /* DONE */ 10196 } 10197 10198 /* 10199 * For comparison with above, 10200 * 10201 * movl cf,dest 10202 * movl ct,tmp 10203 * cmpl op1,op2 10204 * cmovcc tmp,dest 10205 * 10206 * Size 15. 10207 */ 10208 10209 if (! nonimmediate_operand (operands[2], mode)) 10210 operands[2] = force_reg (mode, operands[2]); 10211 if (! nonimmediate_operand (operands[3], mode)) 10212 operands[3] = force_reg (mode, operands[3]); 10213 10214 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 10215 { 10216 rtx tmp = gen_reg_rtx (mode); 10217 emit_move_insn (tmp, operands[3]); 10218 operands[3] = tmp; 10219 } 10220 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 10221 { 10222 rtx tmp = gen_reg_rtx (mode); 10223 emit_move_insn (tmp, operands[2]); 10224 operands[2] = tmp; 10225 } 10226 10227 if (! register_operand (operands[2], VOIDmode) 10228 && (mode == QImode 10229 || ! register_operand (operands[3], VOIDmode))) 10230 operands[2] = force_reg (mode, operands[2]); 10231 10232 if (mode == QImode 10233 && ! register_operand (operands[3], VOIDmode)) 10234 operands[3] = force_reg (mode, operands[3]); 10235 10236 emit_insn (compare_seq); 10237 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 10238 gen_rtx_IF_THEN_ELSE (mode, 10239 compare_op, operands[2], 10240 operands[3]))); 10241 if (bypass_test) 10242 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 10243 gen_rtx_IF_THEN_ELSE (mode, 10244 bypass_test, 10245 copy_rtx (operands[3]), 10246 copy_rtx (operands[0])))); 10247 if (second_test) 10248 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 10249 gen_rtx_IF_THEN_ELSE (mode, 10250 second_test, 10251 copy_rtx (operands[2]), 10252 copy_rtx (operands[0])))); 10253 10254 return 1; /* DONE */ 10255} 10256 10257int 10258ix86_expand_fp_movcc (rtx operands[]) 10259{ 10260 enum rtx_code code; 10261 rtx tmp; 10262 rtx compare_op, second_test, bypass_test; 10263 10264 /* For SF/DFmode conditional moves based on comparisons 10265 in same mode, we may want to use SSE min/max instructions. */ 10266 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode) 10267 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode)) 10268 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0]) 10269 /* The SSE comparisons does not support the LTGT/UNEQ pair. */ 10270 && (!TARGET_IEEE_FP 10271 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ)) 10272 /* We may be called from the post-reload splitter. */ 10273 && (!REG_P (operands[0]) 10274 || SSE_REG_P (operands[0]) 10275 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)) 10276 { 10277 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1; 10278 code = GET_CODE (operands[1]); 10279 10280 /* See if we have (cross) match between comparison operands and 10281 conditional move operands. */ 10282 if (rtx_equal_p (operands[2], op1)) 10283 { 10284 rtx tmp = op0; 10285 op0 = op1; 10286 op1 = tmp; 10287 code = reverse_condition_maybe_unordered (code); 10288 } 10289 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1)) 10290 { 10291 /* Check for min operation. */ 10292 if (code == LT || code == UNLE) 10293 { 10294 if (code == UNLE) 10295 { 10296 rtx tmp = op0; 10297 op0 = op1; 10298 op1 = tmp; 10299 } 10300 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 10301 if (memory_operand (op0, VOIDmode)) 10302 op0 = force_reg (GET_MODE (operands[0]), op0); 10303 if (GET_MODE (operands[0]) == SFmode) 10304 emit_insn (gen_minsf3 (operands[0], op0, op1)); 10305 else 10306 emit_insn (gen_mindf3 (operands[0], op0, op1)); 10307 return 1; 10308 } 10309 /* Check for max operation. */ 10310 if (code == GT || code == UNGE) 10311 { 10312 if (code == UNGE) 10313 { 10314 rtx tmp = op0; 10315 op0 = op1; 10316 op1 = tmp; 10317 } 10318 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); 10319 if (memory_operand (op0, VOIDmode)) 10320 op0 = force_reg (GET_MODE (operands[0]), op0); 10321 if (GET_MODE (operands[0]) == SFmode) 10322 emit_insn (gen_maxsf3 (operands[0], op0, op1)); 10323 else 10324 emit_insn (gen_maxdf3 (operands[0], op0, op1)); 10325 return 1; 10326 } 10327 } 10328 /* Manage condition to be sse_comparison_operator. In case we are 10329 in non-ieee mode, try to canonicalize the destination operand 10330 to be first in the comparison - this helps reload to avoid extra 10331 moves. */ 10332 if (!sse_comparison_operator (operands[1], VOIDmode) 10333 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP)) 10334 { 10335 rtx tmp = ix86_compare_op0; 10336 ix86_compare_op0 = ix86_compare_op1; 10337 ix86_compare_op1 = tmp; 10338 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), 10339 VOIDmode, ix86_compare_op0, 10340 ix86_compare_op1); 10341 } 10342 /* Similarly try to manage result to be first operand of conditional 10343 move. We also don't support the NE comparison on SSE, so try to 10344 avoid it. */ 10345 if ((rtx_equal_p (operands[0], operands[3]) 10346 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ)) 10347 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP)) 10348 { 10349 rtx tmp = operands[2]; 10350 operands[2] = operands[3]; 10351 operands[3] = tmp; 10352 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered 10353 (GET_CODE (operands[1])), 10354 VOIDmode, ix86_compare_op0, 10355 ix86_compare_op1); 10356 } 10357 if (GET_MODE (operands[0]) == SFmode) 10358 emit_insn (gen_sse_movsfcc (operands[0], operands[1], 10359 operands[2], operands[3], 10360 ix86_compare_op0, ix86_compare_op1)); 10361 else 10362 emit_insn (gen_sse_movdfcc (operands[0], operands[1], 10363 operands[2], operands[3], 10364 ix86_compare_op0, ix86_compare_op1)); 10365 return 1; 10366 } 10367 10368 /* The floating point conditional move instructions don't directly 10369 support conditions resulting from a signed integer comparison. */ 10370 10371 code = GET_CODE (operands[1]); 10372 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 10373 10374 /* The floating point conditional move instructions don't directly 10375 support signed integer comparisons. */ 10376 10377 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 10378 { 10379 if (second_test != NULL || bypass_test != NULL) 10380 abort (); 10381 tmp = gen_reg_rtx (QImode); 10382 ix86_expand_setcc (code, tmp); 10383 code = NE; 10384 ix86_compare_op0 = tmp; 10385 ix86_compare_op1 = const0_rtx; 10386 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 10387 } 10388 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 10389 { 10390 tmp = gen_reg_rtx (GET_MODE (operands[0])); 10391 emit_move_insn (tmp, operands[3]); 10392 operands[3] = tmp; 10393 } 10394 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 10395 { 10396 tmp = gen_reg_rtx (GET_MODE (operands[0])); 10397 emit_move_insn (tmp, operands[2]); 10398 operands[2] = tmp; 10399 } 10400 10401 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 10402 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 10403 compare_op, 10404 operands[2], 10405 operands[3]))); 10406 if (bypass_test) 10407 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 10408 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 10409 bypass_test, 10410 operands[3], 10411 operands[0]))); 10412 if (second_test) 10413 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 10414 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), 10415 second_test, 10416 operands[2], 10417 operands[0]))); 10418 10419 return 1; 10420} 10421 10422/* Expand conditional increment or decrement using adb/sbb instructions. 10423 The default case using setcc followed by the conditional move can be 10424 done by generic code. */ 10425int 10426ix86_expand_int_addcc (rtx operands[]) 10427{ 10428 enum rtx_code code = GET_CODE (operands[1]); 10429 rtx compare_op; 10430 rtx val = const0_rtx; 10431 bool fpcmp = false; 10432 enum machine_mode mode = GET_MODE (operands[0]); 10433 10434 if (operands[3] != const1_rtx 10435 && operands[3] != constm1_rtx) 10436 return 0; 10437 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 10438 ix86_compare_op1, &compare_op)) 10439 return 0; 10440 code = GET_CODE (compare_op); 10441 10442 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 10443 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 10444 { 10445 fpcmp = true; 10446 code = ix86_fp_compare_code_to_integer (code); 10447 } 10448 10449 if (code != LTU) 10450 { 10451 val = constm1_rtx; 10452 if (fpcmp) 10453 PUT_CODE (compare_op, 10454 reverse_condition_maybe_unordered 10455 (GET_CODE (compare_op))); 10456 else 10457 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 10458 } 10459 PUT_MODE (compare_op, mode); 10460 10461 /* Construct either adc or sbb insn. */ 10462 if ((code == LTU) == (operands[3] == constm1_rtx)) 10463 { 10464 switch (GET_MODE (operands[0])) 10465 { 10466 case QImode: 10467 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 10468 break; 10469 case HImode: 10470 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 10471 break; 10472 case SImode: 10473 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 10474 break; 10475 case DImode: 10476 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 10477 break; 10478 default: 10479 abort (); 10480 } 10481 } 10482 else 10483 { 10484 switch (GET_MODE (operands[0])) 10485 { 10486 case QImode: 10487 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 10488 break; 10489 case HImode: 10490 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 10491 break; 10492 case SImode: 10493 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 10494 break; 10495 case DImode: 10496 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 10497 break; 10498 default: 10499 abort (); 10500 } 10501 } 10502 return 1; /* DONE */ 10503} 10504 10505 10506/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 10507 works for floating pointer parameters and nonoffsetable memories. 10508 For pushes, it returns just stack offsets; the values will be saved 10509 in the right order. Maximally three parts are generated. */ 10510 10511static int 10512ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 10513{ 10514 int size; 10515 10516 if (!TARGET_64BIT) 10517 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 10518 else 10519 size = (GET_MODE_SIZE (mode) + 4) / 8; 10520 10521 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand))) 10522 abort (); 10523 if (size < 2 || size > 3) 10524 abort (); 10525 10526 /* Optimize constant pool reference to immediates. This is used by fp 10527 moves, that force all constants to memory to allow combining. */ 10528 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand)) 10529 { 10530 rtx tmp = maybe_get_pool_constant (operand); 10531 if (tmp) 10532 operand = tmp; 10533 } 10534 10535 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 10536 { 10537 /* The only non-offsetable memories we handle are pushes. */ 10538 if (! push_operand (operand, VOIDmode)) 10539 abort (); 10540 10541 operand = copy_rtx (operand); 10542 PUT_MODE (operand, Pmode); 10543 parts[0] = parts[1] = parts[2] = operand; 10544 } 10545 else if (!TARGET_64BIT) 10546 { 10547 if (mode == DImode) 10548 split_di (&operand, 1, &parts[0], &parts[1]); 10549 else 10550 { 10551 if (REG_P (operand)) 10552 { 10553 if (!reload_completed) 10554 abort (); 10555 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 10556 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 10557 if (size == 3) 10558 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 10559 } 10560 else if (offsettable_memref_p (operand)) 10561 { 10562 operand = adjust_address (operand, SImode, 0); 10563 parts[0] = operand; 10564 parts[1] = adjust_address (operand, SImode, 4); 10565 if (size == 3) 10566 parts[2] = adjust_address (operand, SImode, 8); 10567 } 10568 else if (GET_CODE (operand) == CONST_DOUBLE) 10569 { 10570 REAL_VALUE_TYPE r; 10571 long l[4]; 10572 10573 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 10574 switch (mode) 10575 { 10576 case XFmode: 10577 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 10578 parts[2] = gen_int_mode (l[2], SImode); 10579 break; 10580 case DFmode: 10581 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 10582 break; 10583 default: 10584 abort (); 10585 } 10586 parts[1] = gen_int_mode (l[1], SImode); 10587 parts[0] = gen_int_mode (l[0], SImode); 10588 } 10589 else 10590 abort (); 10591 } 10592 } 10593 else 10594 { 10595 if (mode == TImode) 10596 split_ti (&operand, 1, &parts[0], &parts[1]); 10597 if (mode == XFmode || mode == TFmode) 10598 { 10599 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 10600 if (REG_P (operand)) 10601 { 10602 if (!reload_completed) 10603 abort (); 10604 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 10605 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 10606 } 10607 else if (offsettable_memref_p (operand)) 10608 { 10609 operand = adjust_address (operand, DImode, 0); 10610 parts[0] = operand; 10611 parts[1] = adjust_address (operand, upper_mode, 8); 10612 } 10613 else if (GET_CODE (operand) == CONST_DOUBLE) 10614 { 10615 REAL_VALUE_TYPE r; 10616 long l[3]; 10617 10618 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 10619 real_to_target (l, &r, mode); 10620 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 10621 if (HOST_BITS_PER_WIDE_INT >= 64) 10622 parts[0] 10623 = gen_int_mode 10624 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 10625 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 10626 DImode); 10627 else 10628 parts[0] = immed_double_const (l[0], l[1], DImode); 10629 if (upper_mode == SImode) 10630 parts[1] = gen_int_mode (l[2], SImode); 10631 else if (HOST_BITS_PER_WIDE_INT >= 64) 10632 parts[1] 10633 = gen_int_mode 10634 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 10635 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 10636 DImode); 10637 else 10638 parts[1] = immed_double_const (l[2], l[3], DImode); 10639 } 10640 else 10641 abort (); 10642 } 10643 } 10644 10645 return size; 10646} 10647 10648/* Emit insns to perform a move or push of DI, DF, and XF values. 10649 Return false when normal moves are needed; true when all required 10650 insns have been emitted. Operands 2-4 contain the input values 10651 int the correct order; operands 5-7 contain the output values. */ 10652 10653void 10654ix86_split_long_move (rtx operands[]) 10655{ 10656 rtx part[2][3]; 10657 int nparts; 10658 int push = 0; 10659 int collisions = 0; 10660 enum machine_mode mode = GET_MODE (operands[0]); 10661 10662 /* The DFmode expanders may ask us to move double. 10663 For 64bit target this is single move. By hiding the fact 10664 here we simplify i386.md splitters. */ 10665 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 10666 { 10667 /* Optimize constant pool reference to immediates. This is used by 10668 fp moves, that force all constants to memory to allow combining. */ 10669 10670 if (GET_CODE (operands[1]) == MEM 10671 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 10672 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 10673 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 10674 if (push_operand (operands[0], VOIDmode)) 10675 { 10676 operands[0] = copy_rtx (operands[0]); 10677 PUT_MODE (operands[0], Pmode); 10678 } 10679 else 10680 operands[0] = gen_lowpart (DImode, operands[0]); 10681 operands[1] = gen_lowpart (DImode, operands[1]); 10682 emit_move_insn (operands[0], operands[1]); 10683 return; 10684 } 10685 10686 /* The only non-offsettable memory we handle is push. */ 10687 if (push_operand (operands[0], VOIDmode)) 10688 push = 1; 10689 else if (GET_CODE (operands[0]) == MEM 10690 && ! offsettable_memref_p (operands[0])) 10691 abort (); 10692 10693 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 10694 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 10695 10696 /* When emitting push, take care for source operands on the stack. */ 10697 if (push && GET_CODE (operands[1]) == MEM 10698 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 10699 { 10700 if (nparts == 3) 10701 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 10702 XEXP (part[1][2], 0)); 10703 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 10704 XEXP (part[1][1], 0)); 10705 } 10706 10707 /* We need to do copy in the right order in case an address register 10708 of the source overlaps the destination. */ 10709 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 10710 { 10711 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 10712 collisions++; 10713 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 10714 collisions++; 10715 if (nparts == 3 10716 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 10717 collisions++; 10718 10719 /* Collision in the middle part can be handled by reordering. */ 10720 if (collisions == 1 && nparts == 3 10721 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 10722 { 10723 rtx tmp; 10724 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 10725 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 10726 } 10727 10728 /* If there are more collisions, we can't handle it by reordering. 10729 Do an lea to the last part and use only one colliding move. */ 10730 else if (collisions > 1) 10731 { 10732 rtx base; 10733 10734 collisions = 1; 10735 10736 base = part[0][nparts - 1]; 10737 10738 /* Handle the case when the last part isn't valid for lea. 10739 Happens in 64-bit mode storing the 12-byte XFmode. */ 10740 if (GET_MODE (base) != Pmode) 10741 base = gen_rtx_REG (Pmode, REGNO (base)); 10742 10743 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 10744 part[1][0] = replace_equiv_address (part[1][0], base); 10745 part[1][1] = replace_equiv_address (part[1][1], 10746 plus_constant (base, UNITS_PER_WORD)); 10747 if (nparts == 3) 10748 part[1][2] = replace_equiv_address (part[1][2], 10749 plus_constant (base, 8)); 10750 } 10751 } 10752 10753 if (push) 10754 { 10755 if (!TARGET_64BIT) 10756 { 10757 if (nparts == 3) 10758 { 10759 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 10760 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); 10761 emit_move_insn (part[0][2], part[1][2]); 10762 } 10763 } 10764 else 10765 { 10766 /* In 64bit mode we don't have 32bit push available. In case this is 10767 register, it is OK - we will just use larger counterpart. We also 10768 retype memory - these comes from attempt to avoid REX prefix on 10769 moving of second half of TFmode value. */ 10770 if (GET_MODE (part[1][1]) == SImode) 10771 { 10772 if (GET_CODE (part[1][1]) == MEM) 10773 part[1][1] = adjust_address (part[1][1], DImode, 0); 10774 else if (REG_P (part[1][1])) 10775 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 10776 else 10777 abort (); 10778 if (GET_MODE (part[1][0]) == SImode) 10779 part[1][0] = part[1][1]; 10780 } 10781 } 10782 emit_move_insn (part[0][1], part[1][1]); 10783 emit_move_insn (part[0][0], part[1][0]); 10784 return; 10785 } 10786 10787 /* Choose correct order to not overwrite the source before it is copied. */ 10788 if ((REG_P (part[0][0]) 10789 && REG_P (part[1][1]) 10790 && (REGNO (part[0][0]) == REGNO (part[1][1]) 10791 || (nparts == 3 10792 && REGNO (part[0][0]) == REGNO (part[1][2])))) 10793 || (collisions > 0 10794 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 10795 { 10796 if (nparts == 3) 10797 { 10798 operands[2] = part[0][2]; 10799 operands[3] = part[0][1]; 10800 operands[4] = part[0][0]; 10801 operands[5] = part[1][2]; 10802 operands[6] = part[1][1]; 10803 operands[7] = part[1][0]; 10804 } 10805 else 10806 { 10807 operands[2] = part[0][1]; 10808 operands[3] = part[0][0]; 10809 operands[5] = part[1][1]; 10810 operands[6] = part[1][0]; 10811 } 10812 } 10813 else 10814 { 10815 if (nparts == 3) 10816 { 10817 operands[2] = part[0][0]; 10818 operands[3] = part[0][1]; 10819 operands[4] = part[0][2]; 10820 operands[5] = part[1][0]; 10821 operands[6] = part[1][1]; 10822 operands[7] = part[1][2]; 10823 } 10824 else 10825 { 10826 operands[2] = part[0][0]; 10827 operands[3] = part[0][1]; 10828 operands[5] = part[1][0]; 10829 operands[6] = part[1][1]; 10830 } 10831 } 10832 emit_move_insn (operands[2], operands[5]); 10833 emit_move_insn (operands[3], operands[6]); 10834 if (nparts == 3) 10835 emit_move_insn (operands[4], operands[7]); 10836 10837 return; 10838} 10839 10840void 10841ix86_split_ashldi (rtx *operands, rtx scratch) 10842{ 10843 rtx low[2], high[2]; 10844 int count; 10845 10846 if (GET_CODE (operands[2]) == CONST_INT) 10847 { 10848 split_di (operands, 2, low, high); 10849 count = INTVAL (operands[2]) & 63; 10850 10851 if (count >= 32) 10852 { 10853 emit_move_insn (high[0], low[1]); 10854 emit_move_insn (low[0], const0_rtx); 10855 10856 if (count > 32) 10857 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32))); 10858 } 10859 else 10860 { 10861 if (!rtx_equal_p (operands[0], operands[1])) 10862 emit_move_insn (operands[0], operands[1]); 10863 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); 10864 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count))); 10865 } 10866 } 10867 else 10868 { 10869 if (!rtx_equal_p (operands[0], operands[1])) 10870 emit_move_insn (operands[0], operands[1]); 10871 10872 split_di (operands, 1, low, high); 10873 10874 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); 10875 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); 10876 10877 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10878 { 10879 if (! no_new_pseudos) 10880 scratch = force_reg (SImode, const0_rtx); 10881 else 10882 emit_move_insn (scratch, const0_rtx); 10883 10884 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], 10885 scratch)); 10886 } 10887 else 10888 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 10889 } 10890} 10891 10892void 10893ix86_split_ashrdi (rtx *operands, rtx scratch) 10894{ 10895 rtx low[2], high[2]; 10896 int count; 10897 10898 if (GET_CODE (operands[2]) == CONST_INT) 10899 { 10900 split_di (operands, 2, low, high); 10901 count = INTVAL (operands[2]) & 63; 10902 10903 if (count >= 32) 10904 { 10905 emit_move_insn (low[0], high[1]); 10906 10907 if (! reload_completed) 10908 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31))); 10909 else 10910 { 10911 emit_move_insn (high[0], low[0]); 10912 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31))); 10913 } 10914 10915 if (count > 32) 10916 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); 10917 } 10918 else 10919 { 10920 if (!rtx_equal_p (operands[0], operands[1])) 10921 emit_move_insn (operands[0], operands[1]); 10922 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 10923 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count))); 10924 } 10925 } 10926 else 10927 { 10928 if (!rtx_equal_p (operands[0], operands[1])) 10929 emit_move_insn (operands[0], operands[1]); 10930 10931 split_di (operands, 1, low, high); 10932 10933 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 10934 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); 10935 10936 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10937 { 10938 if (! no_new_pseudos) 10939 scratch = gen_reg_rtx (SImode); 10940 emit_move_insn (scratch, high[0]); 10941 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); 10942 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 10943 scratch)); 10944 } 10945 else 10946 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 10947 } 10948} 10949 10950void 10951ix86_split_lshrdi (rtx *operands, rtx scratch) 10952{ 10953 rtx low[2], high[2]; 10954 int count; 10955 10956 if (GET_CODE (operands[2]) == CONST_INT) 10957 { 10958 split_di (operands, 2, low, high); 10959 count = INTVAL (operands[2]) & 63; 10960 10961 if (count >= 32) 10962 { 10963 emit_move_insn (low[0], high[1]); 10964 emit_move_insn (high[0], const0_rtx); 10965 10966 if (count > 32) 10967 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); 10968 } 10969 else 10970 { 10971 if (!rtx_equal_p (operands[0], operands[1])) 10972 emit_move_insn (operands[0], operands[1]); 10973 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count))); 10974 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count))); 10975 } 10976 } 10977 else 10978 { 10979 if (!rtx_equal_p (operands[0], operands[1])) 10980 emit_move_insn (operands[0], operands[1]); 10981 10982 split_di (operands, 1, low, high); 10983 10984 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); 10985 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); 10986 10987 /* Heh. By reversing the arguments, we can reuse this pattern. */ 10988 if (TARGET_CMOVE && (! no_new_pseudos || scratch)) 10989 { 10990 if (! no_new_pseudos) 10991 scratch = force_reg (SImode, const0_rtx); 10992 else 10993 emit_move_insn (scratch, const0_rtx); 10994 10995 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], 10996 scratch)); 10997 } 10998 else 10999 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 11000 } 11001} 11002 11003/* Helper function for the string operations below. Dest VARIABLE whether 11004 it is aligned to VALUE bytes. If true, jump to the label. */ 11005static rtx 11006ix86_expand_aligntest (rtx variable, int value) 11007{ 11008 rtx label = gen_label_rtx (); 11009 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 11010 if (GET_MODE (variable) == DImode) 11011 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 11012 else 11013 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 11014 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 11015 1, label); 11016 return label; 11017} 11018 11019/* Adjust COUNTER by the VALUE. */ 11020static void 11021ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 11022{ 11023 if (GET_MODE (countreg) == DImode) 11024 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 11025 else 11026 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 11027} 11028 11029/* Zero extend possibly SImode EXP to Pmode register. */ 11030rtx 11031ix86_zero_extend_to_Pmode (rtx exp) 11032{ 11033 rtx r; 11034 if (GET_MODE (exp) == VOIDmode) 11035 return force_reg (Pmode, exp); 11036 if (GET_MODE (exp) == Pmode) 11037 return copy_to_mode_reg (Pmode, exp); 11038 r = gen_reg_rtx (Pmode); 11039 emit_insn (gen_zero_extendsidi2 (r, exp)); 11040 return r; 11041} 11042 11043/* Expand string move (memcpy) operation. Use i386 string operations when 11044 profitable. expand_clrstr contains similar code. */ 11045int 11046ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp) 11047{ 11048 rtx srcreg, destreg, countreg, srcexp, destexp; 11049 enum machine_mode counter_mode; 11050 HOST_WIDE_INT align = 0; 11051 unsigned HOST_WIDE_INT count = 0; 11052 11053 if (GET_CODE (align_exp) == CONST_INT) 11054 align = INTVAL (align_exp); 11055 11056 /* Can't use any of this if the user has appropriated esi or edi. */ 11057 if (global_regs[4] || global_regs[5]) 11058 return 0; 11059 11060 /* This simple hack avoids all inlining code and simplifies code below. */ 11061 if (!TARGET_ALIGN_STRINGOPS) 11062 align = 64; 11063 11064 if (GET_CODE (count_exp) == CONST_INT) 11065 { 11066 count = INTVAL (count_exp); 11067 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 11068 return 0; 11069 } 11070 11071 /* Figure out proper mode for counter. For 32bits it is always SImode, 11072 for 64bits use SImode when possible, otherwise DImode. 11073 Set count to number of bytes copied when known at compile time. */ 11074 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 11075 || x86_64_zero_extended_value (count_exp)) 11076 counter_mode = SImode; 11077 else 11078 counter_mode = DImode; 11079 11080 if (counter_mode != SImode && counter_mode != DImode) 11081 abort (); 11082 11083 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 11084 if (destreg != XEXP (dst, 0)) 11085 dst = replace_equiv_address_nv (dst, destreg); 11086 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 11087 if (srcreg != XEXP (src, 0)) 11088 src = replace_equiv_address_nv (src, srcreg); 11089 11090 /* When optimizing for size emit simple rep ; movsb instruction for 11091 counts not divisible by 4. */ 11092 11093 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 11094 { 11095 emit_insn (gen_cld ()); 11096 countreg = ix86_zero_extend_to_Pmode (count_exp); 11097 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 11098 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); 11099 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, 11100 destexp, srcexp)); 11101 } 11102 11103 /* For constant aligned (or small unaligned) copies use rep movsl 11104 followed by code copying the rest. For PentiumPro ensure 8 byte 11105 alignment to allow rep movsl acceleration. */ 11106 11107 else if (count != 0 11108 && (align >= 8 11109 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 11110 || optimize_size || count < (unsigned int) 64)) 11111 { 11112 unsigned HOST_WIDE_INT offset = 0; 11113 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 11114 rtx srcmem, dstmem; 11115 11116 emit_insn (gen_cld ()); 11117 if (count & ~(size - 1)) 11118 { 11119 countreg = copy_to_mode_reg (counter_mode, 11120 GEN_INT ((count >> (size == 4 ? 2 : 3)) 11121 & (TARGET_64BIT ? -1 : 0x3fffffff))); 11122 countreg = ix86_zero_extend_to_Pmode (countreg); 11123 11124 destexp = gen_rtx_ASHIFT (Pmode, countreg, 11125 GEN_INT (size == 4 ? 2 : 3)); 11126 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 11127 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 11128 11129 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 11130 countreg, destexp, srcexp)); 11131 offset = count & ~(size - 1); 11132 } 11133 if (size == 8 && (count & 0x04)) 11134 { 11135 srcmem = adjust_automodify_address_nv (src, SImode, srcreg, 11136 offset); 11137 dstmem = adjust_automodify_address_nv (dst, SImode, destreg, 11138 offset); 11139 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11140 offset += 4; 11141 } 11142 if (count & 0x02) 11143 { 11144 srcmem = adjust_automodify_address_nv (src, HImode, srcreg, 11145 offset); 11146 dstmem = adjust_automodify_address_nv (dst, HImode, destreg, 11147 offset); 11148 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11149 offset += 2; 11150 } 11151 if (count & 0x01) 11152 { 11153 srcmem = adjust_automodify_address_nv (src, QImode, srcreg, 11154 offset); 11155 dstmem = adjust_automodify_address_nv (dst, QImode, destreg, 11156 offset); 11157 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11158 } 11159 } 11160 /* The generic code based on the glibc implementation: 11161 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 11162 allowing accelerated copying there) 11163 - copy the data using rep movsl 11164 - copy the rest. */ 11165 else 11166 { 11167 rtx countreg2; 11168 rtx label = NULL; 11169 rtx srcmem, dstmem; 11170 int desired_alignment = (TARGET_PENTIUMPRO 11171 && (count == 0 || count >= (unsigned int) 260) 11172 ? 8 : UNITS_PER_WORD); 11173 /* Get rid of MEM_OFFSETs, they won't be accurate. */ 11174 dst = change_address (dst, BLKmode, destreg); 11175 src = change_address (src, BLKmode, srcreg); 11176 11177 /* In case we don't know anything about the alignment, default to 11178 library version, since it is usually equally fast and result in 11179 shorter code. 11180 11181 Also emit call when we know that the count is large and call overhead 11182 will not be important. */ 11183 if (!TARGET_INLINE_ALL_STRINGOPS 11184 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 11185 return 0; 11186 11187 if (TARGET_SINGLE_STRINGOP) 11188 emit_insn (gen_cld ()); 11189 11190 countreg2 = gen_reg_rtx (Pmode); 11191 countreg = copy_to_mode_reg (counter_mode, count_exp); 11192 11193 /* We don't use loops to align destination and to copy parts smaller 11194 than 4 bytes, because gcc is able to optimize such code better (in 11195 the case the destination or the count really is aligned, gcc is often 11196 able to predict the branches) and also it is friendlier to the 11197 hardware branch prediction. 11198 11199 Using loops is beneficial for generic case, because we can 11200 handle small counts using the loops. Many CPUs (such as Athlon) 11201 have large REP prefix setup costs. 11202 11203 This is quite costly. Maybe we can revisit this decision later or 11204 add some customizability to this code. */ 11205 11206 if (count == 0 && align < desired_alignment) 11207 { 11208 label = gen_label_rtx (); 11209 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 11210 LEU, 0, counter_mode, 1, label); 11211 } 11212 if (align <= 1) 11213 { 11214 rtx label = ix86_expand_aligntest (destreg, 1); 11215 srcmem = change_address (src, QImode, srcreg); 11216 dstmem = change_address (dst, QImode, destreg); 11217 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11218 ix86_adjust_counter (countreg, 1); 11219 emit_label (label); 11220 LABEL_NUSES (label) = 1; 11221 } 11222 if (align <= 2) 11223 { 11224 rtx label = ix86_expand_aligntest (destreg, 2); 11225 srcmem = change_address (src, HImode, srcreg); 11226 dstmem = change_address (dst, HImode, destreg); 11227 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11228 ix86_adjust_counter (countreg, 2); 11229 emit_label (label); 11230 LABEL_NUSES (label) = 1; 11231 } 11232 if (align <= 4 && desired_alignment > 4) 11233 { 11234 rtx label = ix86_expand_aligntest (destreg, 4); 11235 srcmem = change_address (src, SImode, srcreg); 11236 dstmem = change_address (dst, SImode, destreg); 11237 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11238 ix86_adjust_counter (countreg, 4); 11239 emit_label (label); 11240 LABEL_NUSES (label) = 1; 11241 } 11242 11243 if (label && desired_alignment > 4 && !TARGET_64BIT) 11244 { 11245 emit_label (label); 11246 LABEL_NUSES (label) = 1; 11247 label = NULL_RTX; 11248 } 11249 if (!TARGET_SINGLE_STRINGOP) 11250 emit_insn (gen_cld ()); 11251 if (TARGET_64BIT) 11252 { 11253 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 11254 GEN_INT (3))); 11255 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 11256 } 11257 else 11258 { 11259 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 11260 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 11261 } 11262 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 11263 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 11264 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 11265 countreg2, destexp, srcexp)); 11266 11267 if (label) 11268 { 11269 emit_label (label); 11270 LABEL_NUSES (label) = 1; 11271 } 11272 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 11273 { 11274 srcmem = change_address (src, SImode, srcreg); 11275 dstmem = change_address (dst, SImode, destreg); 11276 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11277 } 11278 if ((align <= 4 || count == 0) && TARGET_64BIT) 11279 { 11280 rtx label = ix86_expand_aligntest (countreg, 4); 11281 srcmem = change_address (src, SImode, srcreg); 11282 dstmem = change_address (dst, SImode, destreg); 11283 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11284 emit_label (label); 11285 LABEL_NUSES (label) = 1; 11286 } 11287 if (align > 2 && count != 0 && (count & 2)) 11288 { 11289 srcmem = change_address (src, HImode, srcreg); 11290 dstmem = change_address (dst, HImode, destreg); 11291 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11292 } 11293 if (align <= 2 || count == 0) 11294 { 11295 rtx label = ix86_expand_aligntest (countreg, 2); 11296 srcmem = change_address (src, HImode, srcreg); 11297 dstmem = change_address (dst, HImode, destreg); 11298 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11299 emit_label (label); 11300 LABEL_NUSES (label) = 1; 11301 } 11302 if (align > 1 && count != 0 && (count & 1)) 11303 { 11304 srcmem = change_address (src, QImode, srcreg); 11305 dstmem = change_address (dst, QImode, destreg); 11306 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11307 } 11308 if (align <= 1 || count == 0) 11309 { 11310 rtx label = ix86_expand_aligntest (countreg, 1); 11311 srcmem = change_address (src, QImode, srcreg); 11312 dstmem = change_address (dst, QImode, destreg); 11313 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 11314 emit_label (label); 11315 LABEL_NUSES (label) = 1; 11316 } 11317 } 11318 11319 return 1; 11320} 11321 11322/* Expand string clear operation (bzero). Use i386 string operations when 11323 profitable. expand_movstr contains similar code. */ 11324int 11325ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp) 11326{ 11327 rtx destreg, zeroreg, countreg, destexp; 11328 enum machine_mode counter_mode; 11329 HOST_WIDE_INT align = 0; 11330 unsigned HOST_WIDE_INT count = 0; 11331 11332 if (GET_CODE (align_exp) == CONST_INT) 11333 align = INTVAL (align_exp); 11334 11335 /* Can't use any of this if the user has appropriated esi. */ 11336 if (global_regs[4]) 11337 return 0; 11338 11339 /* This simple hack avoids all inlining code and simplifies code below. */ 11340 if (!TARGET_ALIGN_STRINGOPS) 11341 align = 32; 11342 11343 if (GET_CODE (count_exp) == CONST_INT) 11344 { 11345 count = INTVAL (count_exp); 11346 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 11347 return 0; 11348 } 11349 /* Figure out proper mode for counter. For 32bits it is always SImode, 11350 for 64bits use SImode when possible, otherwise DImode. 11351 Set count to number of bytes copied when known at compile time. */ 11352 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode 11353 || x86_64_zero_extended_value (count_exp)) 11354 counter_mode = SImode; 11355 else 11356 counter_mode = DImode; 11357 11358 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 11359 if (destreg != XEXP (dst, 0)) 11360 dst = replace_equiv_address_nv (dst, destreg); 11361 11362 emit_insn (gen_cld ()); 11363 11364 /* When optimizing for size emit simple rep ; movsb instruction for 11365 counts not divisible by 4. */ 11366 11367 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) 11368 { 11369 countreg = ix86_zero_extend_to_Pmode (count_exp); 11370 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 11371 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 11372 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 11373 } 11374 else if (count != 0 11375 && (align >= 8 11376 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 11377 || optimize_size || count < (unsigned int) 64)) 11378 { 11379 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 11380 unsigned HOST_WIDE_INT offset = 0; 11381 11382 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 11383 if (count & ~(size - 1)) 11384 { 11385 countreg = copy_to_mode_reg (counter_mode, 11386 GEN_INT ((count >> (size == 4 ? 2 : 3)) 11387 & (TARGET_64BIT ? -1 : 0x3fffffff))); 11388 countreg = ix86_zero_extend_to_Pmode (countreg); 11389 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3)); 11390 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 11391 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 11392 offset = count & ~(size - 1); 11393 } 11394 if (size == 8 && (count & 0x04)) 11395 { 11396 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, 11397 offset); 11398 emit_insn (gen_strset (destreg, mem, 11399 gen_rtx_SUBREG (SImode, zeroreg, 0))); 11400 offset += 4; 11401 } 11402 if (count & 0x02) 11403 { 11404 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, 11405 offset); 11406 emit_insn (gen_strset (destreg, mem, 11407 gen_rtx_SUBREG (HImode, zeroreg, 0))); 11408 offset += 2; 11409 } 11410 if (count & 0x01) 11411 { 11412 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, 11413 offset); 11414 emit_insn (gen_strset (destreg, mem, 11415 gen_rtx_SUBREG (QImode, zeroreg, 0))); 11416 } 11417 } 11418 else 11419 { 11420 rtx countreg2; 11421 rtx label = NULL; 11422 /* Compute desired alignment of the string operation. */ 11423 int desired_alignment = (TARGET_PENTIUMPRO 11424 && (count == 0 || count >= (unsigned int) 260) 11425 ? 8 : UNITS_PER_WORD); 11426 11427 /* In case we don't know anything about the alignment, default to 11428 library version, since it is usually equally fast and result in 11429 shorter code. 11430 11431 Also emit call when we know that the count is large and call overhead 11432 will not be important. */ 11433 if (!TARGET_INLINE_ALL_STRINGOPS 11434 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 11435 return 0; 11436 11437 if (TARGET_SINGLE_STRINGOP) 11438 emit_insn (gen_cld ()); 11439 11440 countreg2 = gen_reg_rtx (Pmode); 11441 countreg = copy_to_mode_reg (counter_mode, count_exp); 11442 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 11443 /* Get rid of MEM_OFFSET, it won't be accurate. */ 11444 dst = change_address (dst, BLKmode, destreg); 11445 11446 if (count == 0 && align < desired_alignment) 11447 { 11448 label = gen_label_rtx (); 11449 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 11450 LEU, 0, counter_mode, 1, label); 11451 } 11452 if (align <= 1) 11453 { 11454 rtx label = ix86_expand_aligntest (destreg, 1); 11455 emit_insn (gen_strset (destreg, dst, 11456 gen_rtx_SUBREG (QImode, zeroreg, 0))); 11457 ix86_adjust_counter (countreg, 1); 11458 emit_label (label); 11459 LABEL_NUSES (label) = 1; 11460 } 11461 if (align <= 2) 11462 { 11463 rtx label = ix86_expand_aligntest (destreg, 2); 11464 emit_insn (gen_strset (destreg, dst, 11465 gen_rtx_SUBREG (HImode, zeroreg, 0))); 11466 ix86_adjust_counter (countreg, 2); 11467 emit_label (label); 11468 LABEL_NUSES (label) = 1; 11469 } 11470 if (align <= 4 && desired_alignment > 4) 11471 { 11472 rtx label = ix86_expand_aligntest (destreg, 4); 11473 emit_insn (gen_strset (destreg, dst, 11474 (TARGET_64BIT 11475 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 11476 : zeroreg))); 11477 ix86_adjust_counter (countreg, 4); 11478 emit_label (label); 11479 LABEL_NUSES (label) = 1; 11480 } 11481 11482 if (label && desired_alignment > 4 && !TARGET_64BIT) 11483 { 11484 emit_label (label); 11485 LABEL_NUSES (label) = 1; 11486 label = NULL_RTX; 11487 } 11488 11489 if (!TARGET_SINGLE_STRINGOP) 11490 emit_insn (gen_cld ()); 11491 if (TARGET_64BIT) 11492 { 11493 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 11494 GEN_INT (3))); 11495 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 11496 } 11497 else 11498 { 11499 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 11500 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 11501 } 11502 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 11503 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); 11504 11505 if (label) 11506 { 11507 emit_label (label); 11508 LABEL_NUSES (label) = 1; 11509 } 11510 11511 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 11512 emit_insn (gen_strset (destreg, dst, 11513 gen_rtx_SUBREG (SImode, zeroreg, 0))); 11514 if (TARGET_64BIT && (align <= 4 || count == 0)) 11515 { 11516 rtx label = ix86_expand_aligntest (countreg, 4); 11517 emit_insn (gen_strset (destreg, dst, 11518 gen_rtx_SUBREG (SImode, zeroreg, 0))); 11519 emit_label (label); 11520 LABEL_NUSES (label) = 1; 11521 } 11522 if (align > 2 && count != 0 && (count & 2)) 11523 emit_insn (gen_strset (destreg, dst, 11524 gen_rtx_SUBREG (HImode, zeroreg, 0))); 11525 if (align <= 2 || count == 0) 11526 { 11527 rtx label = ix86_expand_aligntest (countreg, 2); 11528 emit_insn (gen_strset (destreg, dst, 11529 gen_rtx_SUBREG (HImode, zeroreg, 0))); 11530 emit_label (label); 11531 LABEL_NUSES (label) = 1; 11532 } 11533 if (align > 1 && count != 0 && (count & 1)) 11534 emit_insn (gen_strset (destreg, dst, 11535 gen_rtx_SUBREG (QImode, zeroreg, 0))); 11536 if (align <= 1 || count == 0) 11537 { 11538 rtx label = ix86_expand_aligntest (countreg, 1); 11539 emit_insn (gen_strset (destreg, dst, 11540 gen_rtx_SUBREG (QImode, zeroreg, 0))); 11541 emit_label (label); 11542 LABEL_NUSES (label) = 1; 11543 } 11544 } 11545 return 1; 11546} 11547 11548/* Expand strlen. */ 11549int 11550ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 11551{ 11552 rtx addr, scratch1, scratch2, scratch3, scratch4; 11553 11554 /* The generic case of strlen expander is long. Avoid it's 11555 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 11556 11557 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 11558 && !TARGET_INLINE_ALL_STRINGOPS 11559 && !optimize_size 11560 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 11561 return 0; 11562 11563 addr = force_reg (Pmode, XEXP (src, 0)); 11564 scratch1 = gen_reg_rtx (Pmode); 11565 11566 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 11567 && !optimize_size) 11568 { 11569 /* Well it seems that some optimizer does not combine a call like 11570 foo(strlen(bar), strlen(bar)); 11571 when the move and the subtraction is done here. It does calculate 11572 the length just once when these instructions are done inside of 11573 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 11574 often used and I use one fewer register for the lifetime of 11575 output_strlen_unroll() this is better. */ 11576 11577 emit_move_insn (out, addr); 11578 11579 ix86_expand_strlensi_unroll_1 (out, src, align); 11580 11581 /* strlensi_unroll_1 returns the address of the zero at the end of 11582 the string, like memchr(), so compute the length by subtracting 11583 the start address. */ 11584 if (TARGET_64BIT) 11585 emit_insn (gen_subdi3 (out, out, addr)); 11586 else 11587 emit_insn (gen_subsi3 (out, out, addr)); 11588 } 11589 else 11590 { 11591 rtx unspec; 11592 scratch2 = gen_reg_rtx (Pmode); 11593 scratch3 = gen_reg_rtx (Pmode); 11594 scratch4 = force_reg (Pmode, constm1_rtx); 11595 11596 emit_move_insn (scratch3, addr); 11597 eoschar = force_reg (QImode, eoschar); 11598 11599 emit_insn (gen_cld ()); 11600 src = replace_equiv_address_nv (src, scratch3); 11601 11602 /* If .md starts supporting :P, this can be done in .md. */ 11603 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 11604 scratch4), UNSPEC_SCAS); 11605 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 11606 if (TARGET_64BIT) 11607 { 11608 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 11609 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 11610 } 11611 else 11612 { 11613 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 11614 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 11615 } 11616 } 11617 return 1; 11618} 11619 11620/* Expand the appropriate insns for doing strlen if not just doing 11621 repnz; scasb 11622 11623 out = result, initialized with the start address 11624 align_rtx = alignment of the address. 11625 scratch = scratch register, initialized with the startaddress when 11626 not aligned, otherwise undefined 11627 11628 This is just the body. It needs the initializations mentioned above and 11629 some address computing at the end. These things are done in i386.md. */ 11630 11631static void 11632ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 11633{ 11634 int align; 11635 rtx tmp; 11636 rtx align_2_label = NULL_RTX; 11637 rtx align_3_label = NULL_RTX; 11638 rtx align_4_label = gen_label_rtx (); 11639 rtx end_0_label = gen_label_rtx (); 11640 rtx mem; 11641 rtx tmpreg = gen_reg_rtx (SImode); 11642 rtx scratch = gen_reg_rtx (SImode); 11643 rtx cmp; 11644 11645 align = 0; 11646 if (GET_CODE (align_rtx) == CONST_INT) 11647 align = INTVAL (align_rtx); 11648 11649 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 11650 11651 /* Is there a known alignment and is it less than 4? */ 11652 if (align < 4) 11653 { 11654 rtx scratch1 = gen_reg_rtx (Pmode); 11655 emit_move_insn (scratch1, out); 11656 /* Is there a known alignment and is it not 2? */ 11657 if (align != 2) 11658 { 11659 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 11660 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 11661 11662 /* Leave just the 3 lower bits. */ 11663 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 11664 NULL_RTX, 0, OPTAB_WIDEN); 11665 11666 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 11667 Pmode, 1, align_4_label); 11668 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL, 11669 Pmode, 1, align_2_label); 11670 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL, 11671 Pmode, 1, align_3_label); 11672 } 11673 else 11674 { 11675 /* Since the alignment is 2, we have to check 2 or 0 bytes; 11676 check if is aligned to 4 - byte. */ 11677 11678 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2), 11679 NULL_RTX, 0, OPTAB_WIDEN); 11680 11681 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 11682 Pmode, 1, align_4_label); 11683 } 11684 11685 mem = change_address (src, QImode, out); 11686 11687 /* Now compare the bytes. */ 11688 11689 /* Compare the first n unaligned byte on a byte per byte basis. */ 11690 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 11691 QImode, 1, end_0_label); 11692 11693 /* Increment the address. */ 11694 if (TARGET_64BIT) 11695 emit_insn (gen_adddi3 (out, out, const1_rtx)); 11696 else 11697 emit_insn (gen_addsi3 (out, out, const1_rtx)); 11698 11699 /* Not needed with an alignment of 2 */ 11700 if (align != 2) 11701 { 11702 emit_label (align_2_label); 11703 11704 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 11705 end_0_label); 11706 11707 if (TARGET_64BIT) 11708 emit_insn (gen_adddi3 (out, out, const1_rtx)); 11709 else 11710 emit_insn (gen_addsi3 (out, out, const1_rtx)); 11711 11712 emit_label (align_3_label); 11713 } 11714 11715 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 11716 end_0_label); 11717 11718 if (TARGET_64BIT) 11719 emit_insn (gen_adddi3 (out, out, const1_rtx)); 11720 else 11721 emit_insn (gen_addsi3 (out, out, const1_rtx)); 11722 } 11723 11724 /* Generate loop to check 4 bytes at a time. It is not a good idea to 11725 align this loop. It gives only huge programs, but does not help to 11726 speed up. */ 11727 emit_label (align_4_label); 11728 11729 mem = change_address (src, SImode, out); 11730 emit_move_insn (scratch, mem); 11731 if (TARGET_64BIT) 11732 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 11733 else 11734 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 11735 11736 /* This formula yields a nonzero result iff one of the bytes is zero. 11737 This saves three branches inside loop and many cycles. */ 11738 11739 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 11740 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 11741 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 11742 emit_insn (gen_andsi3 (tmpreg, tmpreg, 11743 gen_int_mode (0x80808080, SImode))); 11744 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 11745 align_4_label); 11746 11747 if (TARGET_CMOVE) 11748 { 11749 rtx reg = gen_reg_rtx (SImode); 11750 rtx reg2 = gen_reg_rtx (Pmode); 11751 emit_move_insn (reg, tmpreg); 11752 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 11753 11754 /* If zero is not in the first two bytes, move two bytes forward. */ 11755 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 11756 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11757 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 11758 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 11759 gen_rtx_IF_THEN_ELSE (SImode, tmp, 11760 reg, 11761 tmpreg))); 11762 /* Emit lea manually to avoid clobbering of flags. */ 11763 emit_insn (gen_rtx_SET (SImode, reg2, 11764 gen_rtx_PLUS (Pmode, out, GEN_INT (2)))); 11765 11766 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11767 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 11768 emit_insn (gen_rtx_SET (VOIDmode, out, 11769 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 11770 reg2, 11771 out))); 11772 11773 } 11774 else 11775 { 11776 rtx end_2_label = gen_label_rtx (); 11777 /* Is zero in the first two bytes? */ 11778 11779 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 11780 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 11781 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 11782 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 11783 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 11784 pc_rtx); 11785 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 11786 JUMP_LABEL (tmp) = end_2_label; 11787 11788 /* Not in the first two. Move two bytes forward. */ 11789 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 11790 if (TARGET_64BIT) 11791 emit_insn (gen_adddi3 (out, out, GEN_INT (2))); 11792 else 11793 emit_insn (gen_addsi3 (out, out, GEN_INT (2))); 11794 11795 emit_label (end_2_label); 11796 11797 } 11798 11799 /* Avoid branch in fixing the byte. */ 11800 tmpreg = gen_lowpart (QImode, tmpreg); 11801 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 11802 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); 11803 if (TARGET_64BIT) 11804 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); 11805 else 11806 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); 11807 11808 emit_label (end_0_label); 11809} 11810 11811void 11812ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 11813 rtx callarg2 ATTRIBUTE_UNUSED, 11814 rtx pop, int sibcall) 11815{ 11816 rtx use = NULL, call; 11817 11818 if (pop == const0_rtx) 11819 pop = NULL; 11820 if (TARGET_64BIT && pop) 11821 abort (); 11822 11823#if TARGET_MACHO 11824 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 11825 fnaddr = machopic_indirect_call_target (fnaddr); 11826#else 11827 /* Static functions and indirect calls don't need the pic register. */ 11828 if (! TARGET_64BIT && flag_pic 11829 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 11830 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 11831 use_reg (&use, pic_offset_table_rtx); 11832 11833 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 11834 { 11835 rtx al = gen_rtx_REG (QImode, 0); 11836 emit_move_insn (al, callarg2); 11837 use_reg (&use, al); 11838 } 11839#endif /* TARGET_MACHO */ 11840 11841 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 11842 { 11843 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 11844 fnaddr = gen_rtx_MEM (QImode, fnaddr); 11845 } 11846 if (sibcall && TARGET_64BIT 11847 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) 11848 { 11849 rtx addr; 11850 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 11851 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); 11852 emit_move_insn (fnaddr, addr); 11853 fnaddr = gen_rtx_MEM (QImode, fnaddr); 11854 } 11855 11856 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 11857 if (retval) 11858 call = gen_rtx_SET (VOIDmode, retval, call); 11859 if (pop) 11860 { 11861 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 11862 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 11863 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 11864 } 11865 11866 call = emit_call_insn (call); 11867 if (use) 11868 CALL_INSN_FUNCTION_USAGE (call) = use; 11869} 11870 11871 11872/* Clear stack slot assignments remembered from previous functions. 11873 This is called from INIT_EXPANDERS once before RTL is emitted for each 11874 function. */ 11875 11876static struct machine_function * 11877ix86_init_machine_status (void) 11878{ 11879 struct machine_function *f; 11880 11881 f = ggc_alloc_cleared (sizeof (struct machine_function)); 11882 f->use_fast_prologue_epilogue_nregs = -1; 11883 11884 return f; 11885} 11886 11887/* Return a MEM corresponding to a stack slot with mode MODE. 11888 Allocate a new slot if necessary. 11889 11890 The RTL for a function can have several slots available: N is 11891 which slot to use. */ 11892 11893rtx 11894assign_386_stack_local (enum machine_mode mode, int n) 11895{ 11896 struct stack_local_entry *s; 11897 11898 if (n < 0 || n >= MAX_386_STACK_LOCALS) 11899 abort (); 11900 11901 for (s = ix86_stack_locals; s; s = s->next) 11902 if (s->mode == mode && s->n == n) 11903 return s->rtl; 11904 11905 s = (struct stack_local_entry *) 11906 ggc_alloc (sizeof (struct stack_local_entry)); 11907 s->n = n; 11908 s->mode = mode; 11909 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 11910 11911 s->next = ix86_stack_locals; 11912 ix86_stack_locals = s; 11913 return s->rtl; 11914} 11915 11916/* Construct the SYMBOL_REF for the tls_get_addr function. */ 11917 11918static GTY(()) rtx ix86_tls_symbol; 11919rtx 11920ix86_tls_get_addr (void) 11921{ 11922 11923 if (!ix86_tls_symbol) 11924 { 11925 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 11926 (TARGET_GNU_TLS && !TARGET_64BIT) 11927 ? "___tls_get_addr" 11928 : "__tls_get_addr"); 11929 } 11930 11931 return ix86_tls_symbol; 11932} 11933 11934/* Calculate the length of the memory address in the instruction 11935 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 11936 11937static int 11938memory_address_length (rtx addr) 11939{ 11940 struct ix86_address parts; 11941 rtx base, index, disp; 11942 int len; 11943 11944 if (GET_CODE (addr) == PRE_DEC 11945 || GET_CODE (addr) == POST_INC 11946 || GET_CODE (addr) == PRE_MODIFY 11947 || GET_CODE (addr) == POST_MODIFY) 11948 return 0; 11949 11950 if (! ix86_decompose_address (addr, &parts)) 11951 abort (); 11952 11953 base = parts.base; 11954 index = parts.index; 11955 disp = parts.disp; 11956 len = 0; 11957 11958 /* Rule of thumb: 11959 - esp as the base always wants an index, 11960 - ebp as the base always wants a displacement. */ 11961 11962 /* Register Indirect. */ 11963 if (base && !index && !disp) 11964 { 11965 /* esp (for its index) and ebp (for its displacement) need 11966 the two-byte modrm form. */ 11967 if (addr == stack_pointer_rtx 11968 || addr == arg_pointer_rtx 11969 || addr == frame_pointer_rtx 11970 || addr == hard_frame_pointer_rtx) 11971 len = 1; 11972 } 11973 11974 /* Direct Addressing. */ 11975 else if (disp && !base && !index) 11976 len = 4; 11977 11978 else 11979 { 11980 /* Find the length of the displacement constant. */ 11981 if (disp) 11982 { 11983 if (GET_CODE (disp) == CONST_INT 11984 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K') 11985 && base) 11986 len = 1; 11987 else 11988 len = 4; 11989 } 11990 /* ebp always wants a displacement. */ 11991 else if (base == hard_frame_pointer_rtx) 11992 len = 1; 11993 11994 /* An index requires the two-byte modrm form.... */ 11995 if (index 11996 /* ...like esp, which always wants an index. */ 11997 || base == stack_pointer_rtx 11998 || base == arg_pointer_rtx 11999 || base == frame_pointer_rtx) 12000 len += 1; 12001 } 12002 12003 return len; 12004} 12005 12006/* Compute default value for "length_immediate" attribute. When SHORTFORM 12007 is set, expect that insn have 8bit immediate alternative. */ 12008int 12009ix86_attr_length_immediate_default (rtx insn, int shortform) 12010{ 12011 int len = 0; 12012 int i; 12013 extract_insn_cached (insn); 12014 for (i = recog_data.n_operands - 1; i >= 0; --i) 12015 if (CONSTANT_P (recog_data.operand[i])) 12016 { 12017 if (len) 12018 abort (); 12019 if (shortform 12020 && GET_CODE (recog_data.operand[i]) == CONST_INT 12021 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K')) 12022 len = 1; 12023 else 12024 { 12025 switch (get_attr_mode (insn)) 12026 { 12027 case MODE_QI: 12028 len+=1; 12029 break; 12030 case MODE_HI: 12031 len+=2; 12032 break; 12033 case MODE_SI: 12034 len+=4; 12035 break; 12036 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 12037 case MODE_DI: 12038 len+=4; 12039 break; 12040 default: 12041 fatal_insn ("unknown insn mode", insn); 12042 } 12043 } 12044 } 12045 return len; 12046} 12047/* Compute default value for "length_address" attribute. */ 12048int 12049ix86_attr_length_address_default (rtx insn) 12050{ 12051 int i; 12052 12053 if (get_attr_type (insn) == TYPE_LEA) 12054 { 12055 rtx set = PATTERN (insn); 12056 if (GET_CODE (set) == SET) 12057 ; 12058 else if (GET_CODE (set) == PARALLEL 12059 && GET_CODE (XVECEXP (set, 0, 0)) == SET) 12060 set = XVECEXP (set, 0, 0); 12061 else 12062 { 12063#ifdef ENABLE_CHECKING 12064 abort (); 12065#endif 12066 return 0; 12067 } 12068 12069 return memory_address_length (SET_SRC (set)); 12070 } 12071 12072 extract_insn_cached (insn); 12073 for (i = recog_data.n_operands - 1; i >= 0; --i) 12074 if (GET_CODE (recog_data.operand[i]) == MEM) 12075 { 12076 return memory_address_length (XEXP (recog_data.operand[i], 0)); 12077 break; 12078 } 12079 return 0; 12080} 12081 12082/* Return the maximum number of instructions a cpu can issue. */ 12083 12084static int 12085ix86_issue_rate (void) 12086{ 12087 switch (ix86_tune) 12088 { 12089 case PROCESSOR_PENTIUM: 12090 case PROCESSOR_K6: 12091 return 2; 12092 12093 case PROCESSOR_PENTIUMPRO: 12094 case PROCESSOR_PENTIUM4: 12095 case PROCESSOR_ATHLON: 12096 case PROCESSOR_K8: 12097 return 3; 12098 12099 default: 12100 return 1; 12101 } 12102} 12103 12104/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 12105 by DEP_INSN and nothing set by DEP_INSN. */ 12106 12107static int 12108ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type) 12109{ 12110 rtx set, set2; 12111 12112 /* Simplify the test for uninteresting insns. */ 12113 if (insn_type != TYPE_SETCC 12114 && insn_type != TYPE_ICMOV 12115 && insn_type != TYPE_FCMOV 12116 && insn_type != TYPE_IBR) 12117 return 0; 12118 12119 if ((set = single_set (dep_insn)) != 0) 12120 { 12121 set = SET_DEST (set); 12122 set2 = NULL_RTX; 12123 } 12124 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 12125 && XVECLEN (PATTERN (dep_insn), 0) == 2 12126 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 12127 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 12128 { 12129 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 12130 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 12131 } 12132 else 12133 return 0; 12134 12135 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 12136 return 0; 12137 12138 /* This test is true if the dependent insn reads the flags but 12139 not any other potentially set register. */ 12140 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 12141 return 0; 12142 12143 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 12144 return 0; 12145 12146 return 1; 12147} 12148 12149/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 12150 address with operands set by DEP_INSN. */ 12151 12152static int 12153ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type) 12154{ 12155 rtx addr; 12156 12157 if (insn_type == TYPE_LEA 12158 && TARGET_PENTIUM) 12159 { 12160 addr = PATTERN (insn); 12161 if (GET_CODE (addr) == SET) 12162 ; 12163 else if (GET_CODE (addr) == PARALLEL 12164 && GET_CODE (XVECEXP (addr, 0, 0)) == SET) 12165 addr = XVECEXP (addr, 0, 0); 12166 else 12167 abort (); 12168 addr = SET_SRC (addr); 12169 } 12170 else 12171 { 12172 int i; 12173 extract_insn_cached (insn); 12174 for (i = recog_data.n_operands - 1; i >= 0; --i) 12175 if (GET_CODE (recog_data.operand[i]) == MEM) 12176 { 12177 addr = XEXP (recog_data.operand[i], 0); 12178 goto found; 12179 } 12180 return 0; 12181 found:; 12182 } 12183 12184 return modified_in_p (addr, dep_insn); 12185} 12186 12187static int 12188ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 12189{ 12190 enum attr_type insn_type, dep_insn_type; 12191 enum attr_memory memory, dep_memory; 12192 rtx set, set2; 12193 int dep_insn_code_number; 12194 12195 /* Anti and output dependencies have zero cost on all CPUs. */ 12196 if (REG_NOTE_KIND (link) != 0) 12197 return 0; 12198 12199 dep_insn_code_number = recog_memoized (dep_insn); 12200 12201 /* If we can't recognize the insns, we can't really do anything. */ 12202 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 12203 return cost; 12204 12205 insn_type = get_attr_type (insn); 12206 dep_insn_type = get_attr_type (dep_insn); 12207 12208 switch (ix86_tune) 12209 { 12210 case PROCESSOR_PENTIUM: 12211 /* Address Generation Interlock adds a cycle of latency. */ 12212 if (ix86_agi_dependant (insn, dep_insn, insn_type)) 12213 cost += 1; 12214 12215 /* ??? Compares pair with jump/setcc. */ 12216 if (ix86_flags_dependant (insn, dep_insn, insn_type)) 12217 cost = 0; 12218 12219 /* Floating point stores require value to be ready one cycle earlier. */ 12220 if (insn_type == TYPE_FMOV 12221 && get_attr_memory (insn) == MEMORY_STORE 12222 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 12223 cost += 1; 12224 break; 12225 12226 case PROCESSOR_PENTIUMPRO: 12227 memory = get_attr_memory (insn); 12228 dep_memory = get_attr_memory (dep_insn); 12229 12230 /* Since we can't represent delayed latencies of load+operation, 12231 increase the cost here for non-imov insns. */ 12232 if (dep_insn_type != TYPE_IMOV 12233 && dep_insn_type != TYPE_FMOV 12234 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)) 12235 cost += 1; 12236 12237 /* INT->FP conversion is expensive. */ 12238 if (get_attr_fp_int_src (dep_insn)) 12239 cost += 5; 12240 12241 /* There is one cycle extra latency between an FP op and a store. */ 12242 if (insn_type == TYPE_FMOV 12243 && (set = single_set (dep_insn)) != NULL_RTX 12244 && (set2 = single_set (insn)) != NULL_RTX 12245 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 12246 && GET_CODE (SET_DEST (set2)) == MEM) 12247 cost += 1; 12248 12249 /* Show ability of reorder buffer to hide latency of load by executing 12250 in parallel with previous instruction in case 12251 previous instruction is not needed to compute the address. */ 12252 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 12253 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 12254 { 12255 /* Claim moves to take one cycle, as core can issue one load 12256 at time and the next load can start cycle later. */ 12257 if (dep_insn_type == TYPE_IMOV 12258 || dep_insn_type == TYPE_FMOV) 12259 cost = 1; 12260 else if (cost > 1) 12261 cost--; 12262 } 12263 break; 12264 12265 case PROCESSOR_K6: 12266 memory = get_attr_memory (insn); 12267 dep_memory = get_attr_memory (dep_insn); 12268 /* The esp dependency is resolved before the instruction is really 12269 finished. */ 12270 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 12271 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 12272 return 1; 12273 12274 /* Since we can't represent delayed latencies of load+operation, 12275 increase the cost here for non-imov insns. */ 12276 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) 12277 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; 12278 12279 /* INT->FP conversion is expensive. */ 12280 if (get_attr_fp_int_src (dep_insn)) 12281 cost += 5; 12282 12283 /* Show ability of reorder buffer to hide latency of load by executing 12284 in parallel with previous instruction in case 12285 previous instruction is not needed to compute the address. */ 12286 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 12287 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 12288 { 12289 /* Claim moves to take one cycle, as core can issue one load 12290 at time and the next load can start cycle later. */ 12291 if (dep_insn_type == TYPE_IMOV 12292 || dep_insn_type == TYPE_FMOV) 12293 cost = 1; 12294 else if (cost > 2) 12295 cost -= 2; 12296 else 12297 cost = 1; 12298 } 12299 break; 12300 12301 case PROCESSOR_ATHLON: 12302 case PROCESSOR_K8: 12303 memory = get_attr_memory (insn); 12304 dep_memory = get_attr_memory (dep_insn); 12305 12306 /* Show ability of reorder buffer to hide latency of load by executing 12307 in parallel with previous instruction in case 12308 previous instruction is not needed to compute the address. */ 12309 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 12310 && !ix86_agi_dependant (insn, dep_insn, insn_type)) 12311 { 12312 enum attr_unit unit = get_attr_unit (insn); 12313 int loadcost = 3; 12314 12315 /* Because of the difference between the length of integer and 12316 floating unit pipeline preparation stages, the memory operands 12317 for floating point are cheaper. 12318 12319 ??? For Athlon it the difference is most probably 2. */ 12320 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 12321 loadcost = 3; 12322 else 12323 loadcost = TARGET_ATHLON ? 2 : 0; 12324 12325 if (cost >= loadcost) 12326 cost -= loadcost; 12327 else 12328 cost = 0; 12329 } 12330 12331 default: 12332 break; 12333 } 12334 12335 return cost; 12336} 12337 12338static union 12339{ 12340 struct ppro_sched_data 12341 { 12342 rtx decode[3]; 12343 int issued_this_cycle; 12344 } ppro; 12345} ix86_sched_data; 12346 12347static enum attr_ppro_uops 12348ix86_safe_ppro_uops (rtx insn) 12349{ 12350 if (recog_memoized (insn) >= 0) 12351 return get_attr_ppro_uops (insn); 12352 else 12353 return PPRO_UOPS_MANY; 12354} 12355 12356static void 12357ix86_dump_ppro_packet (FILE *dump) 12358{ 12359 if (ix86_sched_data.ppro.decode[0]) 12360 { 12361 fprintf (dump, "PPRO packet: %d", 12362 INSN_UID (ix86_sched_data.ppro.decode[0])); 12363 if (ix86_sched_data.ppro.decode[1]) 12364 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1])); 12365 if (ix86_sched_data.ppro.decode[2]) 12366 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2])); 12367 fputc ('\n', dump); 12368 } 12369} 12370 12371/* We're beginning a new block. Initialize data structures as necessary. */ 12372 12373static void 12374ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED, 12375 int sched_verbose ATTRIBUTE_UNUSED, 12376 int veclen ATTRIBUTE_UNUSED) 12377{ 12378 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data)); 12379} 12380 12381/* Shift INSN to SLOT, and shift everything else down. */ 12382 12383static void 12384ix86_reorder_insn (rtx *insnp, rtx *slot) 12385{ 12386 if (insnp != slot) 12387 { 12388 rtx insn = *insnp; 12389 do 12390 insnp[0] = insnp[1]; 12391 while (++insnp != slot); 12392 *insnp = insn; 12393 } 12394} 12395 12396static void 12397ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready) 12398{ 12399 rtx decode[3]; 12400 enum attr_ppro_uops cur_uops; 12401 int issued_this_cycle; 12402 rtx *insnp; 12403 int i; 12404 12405 /* At this point .ppro.decode contains the state of the three 12406 decoders from last "cycle". That is, those insns that were 12407 actually independent. But here we're scheduling for the 12408 decoder, and we may find things that are decodable in the 12409 same cycle. */ 12410 12411 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode)); 12412 issued_this_cycle = 0; 12413 12414 insnp = e_ready; 12415 cur_uops = ix86_safe_ppro_uops (*insnp); 12416 12417 /* If the decoders are empty, and we've a complex insn at the 12418 head of the priority queue, let it issue without complaint. */ 12419 if (decode[0] == NULL) 12420 { 12421 if (cur_uops == PPRO_UOPS_MANY) 12422 { 12423 decode[0] = *insnp; 12424 goto ppro_done; 12425 } 12426 12427 /* Otherwise, search for a 2-4 uop unsn to issue. */ 12428 while (cur_uops != PPRO_UOPS_FEW) 12429 { 12430 if (insnp == ready) 12431 break; 12432 cur_uops = ix86_safe_ppro_uops (*--insnp); 12433 } 12434 12435 /* If so, move it to the head of the line. */ 12436 if (cur_uops == PPRO_UOPS_FEW) 12437 ix86_reorder_insn (insnp, e_ready); 12438 12439 /* Issue the head of the queue. */ 12440 issued_this_cycle = 1; 12441 decode[0] = *e_ready--; 12442 } 12443 12444 /* Look for simple insns to fill in the other two slots. */ 12445 for (i = 1; i < 3; ++i) 12446 if (decode[i] == NULL) 12447 { 12448 if (ready > e_ready) 12449 goto ppro_done; 12450 12451 insnp = e_ready; 12452 cur_uops = ix86_safe_ppro_uops (*insnp); 12453 while (cur_uops != PPRO_UOPS_ONE) 12454 { 12455 if (insnp == ready) 12456 break; 12457 cur_uops = ix86_safe_ppro_uops (*--insnp); 12458 } 12459 12460 /* Found one. Move it to the head of the queue and issue it. */ 12461 if (cur_uops == PPRO_UOPS_ONE) 12462 { 12463 ix86_reorder_insn (insnp, e_ready); 12464 decode[i] = *e_ready--; 12465 issued_this_cycle++; 12466 continue; 12467 } 12468 12469 /* ??? Didn't find one. Ideally, here we would do a lazy split 12470 of 2-uop insns, issue one and queue the other. */ 12471 } 12472 12473 ppro_done: 12474 if (issued_this_cycle == 0) 12475 issued_this_cycle = 1; 12476 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle; 12477} 12478 12479/* We are about to being issuing insns for this clock cycle. 12480 Override the default sort algorithm to better slot instructions. */ 12481static int 12482ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, 12483 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready, 12484 int *n_readyp, int clock_var ATTRIBUTE_UNUSED) 12485{ 12486 int n_ready = *n_readyp; 12487 rtx *e_ready = ready + n_ready - 1; 12488 12489 /* Make sure to go ahead and initialize key items in 12490 ix86_sched_data if we are not going to bother trying to 12491 reorder the ready queue. */ 12492 if (n_ready < 2) 12493 { 12494 ix86_sched_data.ppro.issued_this_cycle = 1; 12495 goto out; 12496 } 12497 12498 switch (ix86_tune) 12499 { 12500 default: 12501 break; 12502 12503 case PROCESSOR_PENTIUMPRO: 12504 ix86_sched_reorder_ppro (ready, e_ready); 12505 break; 12506 } 12507 12508out: 12509 return ix86_issue_rate (); 12510} 12511 12512/* We are about to issue INSN. Return the number of insns left on the 12513 ready queue that can be issued this cycle. */ 12514 12515static int 12516ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn, 12517 int can_issue_more) 12518{ 12519 int i; 12520 switch (ix86_tune) 12521 { 12522 default: 12523 return can_issue_more - 1; 12524 12525 case PROCESSOR_PENTIUMPRO: 12526 { 12527 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn); 12528 12529 if (uops == PPRO_UOPS_MANY) 12530 { 12531 if (sched_verbose) 12532 ix86_dump_ppro_packet (dump); 12533 ix86_sched_data.ppro.decode[0] = insn; 12534 ix86_sched_data.ppro.decode[1] = NULL; 12535 ix86_sched_data.ppro.decode[2] = NULL; 12536 if (sched_verbose) 12537 ix86_dump_ppro_packet (dump); 12538 ix86_sched_data.ppro.decode[0] = NULL; 12539 } 12540 else if (uops == PPRO_UOPS_FEW) 12541 { 12542 if (sched_verbose) 12543 ix86_dump_ppro_packet (dump); 12544 ix86_sched_data.ppro.decode[0] = insn; 12545 ix86_sched_data.ppro.decode[1] = NULL; 12546 ix86_sched_data.ppro.decode[2] = NULL; 12547 } 12548 else 12549 { 12550 for (i = 0; i < 3; ++i) 12551 if (ix86_sched_data.ppro.decode[i] == NULL) 12552 { 12553 ix86_sched_data.ppro.decode[i] = insn; 12554 break; 12555 } 12556 if (i == 3) 12557 abort (); 12558 if (i == 2) 12559 { 12560 if (sched_verbose) 12561 ix86_dump_ppro_packet (dump); 12562 ix86_sched_data.ppro.decode[0] = NULL; 12563 ix86_sched_data.ppro.decode[1] = NULL; 12564 ix86_sched_data.ppro.decode[2] = NULL; 12565 } 12566 } 12567 } 12568 return --ix86_sched_data.ppro.issued_this_cycle; 12569 } 12570} 12571 12572static int 12573ia32_use_dfa_pipeline_interface (void) 12574{ 12575 if (TARGET_PENTIUM || TARGET_ATHLON_K8) 12576 return 1; 12577 return 0; 12578} 12579 12580/* How many alternative schedules to try. This should be as wide as the 12581 scheduling freedom in the DFA, but no wider. Making this value too 12582 large results extra work for the scheduler. */ 12583 12584static int 12585ia32_multipass_dfa_lookahead (void) 12586{ 12587 if (ix86_tune == PROCESSOR_PENTIUM) 12588 return 2; 12589 else 12590 return 0; 12591} 12592 12593 12594/* Compute the alignment given to a constant that is being placed in memory. 12595 EXP is the constant and ALIGN is the alignment that the object would 12596 ordinarily have. 12597 The value of this function is used instead of that alignment to align 12598 the object. */ 12599 12600int 12601ix86_constant_alignment (tree exp, int align) 12602{ 12603 if (TREE_CODE (exp) == REAL_CST) 12604 { 12605 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 12606 return 64; 12607 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 12608 return 128; 12609 } 12610 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 12611 && !TARGET_NO_ALIGN_LONG_STRINGS 12612 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 12613 return BITS_PER_WORD; 12614 12615 return align; 12616} 12617 12618/* Compute the alignment for a static variable. 12619 TYPE is the data type, and ALIGN is the alignment that 12620 the object would ordinarily have. The value of this function is used 12621 instead of that alignment to align the object. */ 12622 12623int 12624ix86_data_alignment (tree type, int align) 12625{ 12626 if (AGGREGATE_TYPE_P (type) 12627 && TYPE_SIZE (type) 12628 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 12629 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256 12630 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256) 12631 return 256; 12632 12633 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 12634 to 16byte boundary. */ 12635 if (TARGET_64BIT) 12636 { 12637 if (AGGREGATE_TYPE_P (type) 12638 && TYPE_SIZE (type) 12639 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 12640 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 12641 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 12642 return 128; 12643 } 12644 12645 if (TREE_CODE (type) == ARRAY_TYPE) 12646 { 12647 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 12648 return 64; 12649 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 12650 return 128; 12651 } 12652 else if (TREE_CODE (type) == COMPLEX_TYPE) 12653 { 12654 12655 if (TYPE_MODE (type) == DCmode && align < 64) 12656 return 64; 12657 if (TYPE_MODE (type) == XCmode && align < 128) 12658 return 128; 12659 } 12660 else if ((TREE_CODE (type) == RECORD_TYPE 12661 || TREE_CODE (type) == UNION_TYPE 12662 || TREE_CODE (type) == QUAL_UNION_TYPE) 12663 && TYPE_FIELDS (type)) 12664 { 12665 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 12666 return 64; 12667 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 12668 return 128; 12669 } 12670 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 12671 || TREE_CODE (type) == INTEGER_TYPE) 12672 { 12673 if (TYPE_MODE (type) == DFmode && align < 64) 12674 return 64; 12675 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 12676 return 128; 12677 } 12678 12679 return align; 12680} 12681 12682/* Compute the alignment for a local variable. 12683 TYPE is the data type, and ALIGN is the alignment that 12684 the object would ordinarily have. The value of this macro is used 12685 instead of that alignment to align the object. */ 12686 12687int 12688ix86_local_alignment (tree type, int align) 12689{ 12690 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 12691 to 16byte boundary. */ 12692 if (TARGET_64BIT) 12693 { 12694 if (AGGREGATE_TYPE_P (type) 12695 && TYPE_SIZE (type) 12696 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 12697 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 12698 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 12699 return 128; 12700 } 12701 if (TREE_CODE (type) == ARRAY_TYPE) 12702 { 12703 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 12704 return 64; 12705 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 12706 return 128; 12707 } 12708 else if (TREE_CODE (type) == COMPLEX_TYPE) 12709 { 12710 if (TYPE_MODE (type) == DCmode && align < 64) 12711 return 64; 12712 if (TYPE_MODE (type) == XCmode && align < 128) 12713 return 128; 12714 } 12715 else if ((TREE_CODE (type) == RECORD_TYPE 12716 || TREE_CODE (type) == UNION_TYPE 12717 || TREE_CODE (type) == QUAL_UNION_TYPE) 12718 && TYPE_FIELDS (type)) 12719 { 12720 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 12721 return 64; 12722 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 12723 return 128; 12724 } 12725 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 12726 || TREE_CODE (type) == INTEGER_TYPE) 12727 { 12728 12729 if (TYPE_MODE (type) == DFmode && align < 64) 12730 return 64; 12731 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 12732 return 128; 12733 } 12734 return align; 12735} 12736 12737/* Emit RTL insns to initialize the variable parts of a trampoline. 12738 FNADDR is an RTX for the address of the function's pure code. 12739 CXT is an RTX for the static chain value for the function. */ 12740void 12741x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 12742{ 12743 if (!TARGET_64BIT) 12744 { 12745 /* Compute offset from the end of the jmp to the target function. */ 12746 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 12747 plus_constant (tramp, 10), 12748 NULL_RTX, 1, OPTAB_DIRECT); 12749 emit_move_insn (gen_rtx_MEM (QImode, tramp), 12750 gen_int_mode (0xb9, QImode)); 12751 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 12752 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 12753 gen_int_mode (0xe9, QImode)); 12754 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 12755 } 12756 else 12757 { 12758 int offset = 0; 12759 /* Try to load address using shorter movl instead of movabs. 12760 We may want to support movq for kernel mode, but kernel does not use 12761 trampolines at the moment. */ 12762 if (x86_64_zero_extended_value (fnaddr)) 12763 { 12764 fnaddr = copy_to_mode_reg (DImode, fnaddr); 12765 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12766 gen_int_mode (0xbb41, HImode)); 12767 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 12768 gen_lowpart (SImode, fnaddr)); 12769 offset += 6; 12770 } 12771 else 12772 { 12773 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12774 gen_int_mode (0xbb49, HImode)); 12775 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 12776 fnaddr); 12777 offset += 10; 12778 } 12779 /* Load static chain using movabs to r10. */ 12780 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12781 gen_int_mode (0xba49, HImode)); 12782 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 12783 cxt); 12784 offset += 10; 12785 /* Jump to the r11 */ 12786 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 12787 gen_int_mode (0xff49, HImode)); 12788 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 12789 gen_int_mode (0xe3, QImode)); 12790 offset += 3; 12791 if (offset > TRAMPOLINE_SIZE) 12792 abort (); 12793 } 12794 12795#ifdef ENABLE_EXECUTE_STACK 12796 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"), 12797 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 12798#endif 12799} 12800 12801#define def_builtin(MASK, NAME, TYPE, CODE) \ 12802do { \ 12803 if ((MASK) & target_flags \ 12804 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 12805 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 12806 NULL, NULL_TREE); \ 12807} while (0) 12808 12809struct builtin_description 12810{ 12811 const unsigned int mask; 12812 const enum insn_code icode; 12813 const char *const name; 12814 const enum ix86_builtins code; 12815 const enum rtx_code comparison; 12816 const unsigned int flag; 12817}; 12818 12819static const struct builtin_description bdesc_comi[] = 12820{ 12821 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 12822 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 12823 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 12824 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 12825 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 12826 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 12827 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 12828 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 12829 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 12830 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 12831 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 12832 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 12833 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 12834 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 12835 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 12836 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 12837 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 12838 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 12839 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 12840 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 12841 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 12842 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 12843 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 12844 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 12845}; 12846 12847static const struct builtin_description bdesc_2arg[] = 12848{ 12849 /* SSE */ 12850 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 12851 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 12852 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 12853 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 12854 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 12855 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 12856 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 12857 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 12858 12859 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 12860 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 12861 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 12862 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, 12863 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, 12864 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 12865 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, 12866 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, 12867 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, 12868 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, 12869 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, 12870 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, 12871 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 12872 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 12873 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 12874 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 12875 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 }, 12876 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 }, 12877 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 }, 12878 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 }, 12879 12880 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 12881 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 12882 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 12883 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 12884 12885 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 12886 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 12887 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 12888 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 12889 12890 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 12891 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 12892 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 12893 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 12894 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 12895 12896 /* MMX */ 12897 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 12898 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 12899 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 12900 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 12901 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 12902 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 12903 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 12904 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 12905 12906 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 12907 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 12908 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 12909 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 12910 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 12911 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 12912 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 12913 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 12914 12915 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 12916 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 12917 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 12918 12919 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 12920 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 12921 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 12922 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 12923 12924 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 12925 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 12926 12927 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 12928 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 12929 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 12930 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 12931 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 12932 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 12933 12934 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 12935 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 12936 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 12937 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 12938 12939 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 12940 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 12941 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 12942 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 12943 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 12944 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 12945 12946 /* Special. */ 12947 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 12948 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 12949 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 12950 12951 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 12952 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 12953 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 12954 12955 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 12956 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 12957 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 12958 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 12959 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 12960 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 12961 12962 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 12963 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 12964 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 12965 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 12966 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 12967 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 12968 12969 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 12970 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 12971 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 12972 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 12973 12974 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 12975 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 12976 12977 /* SSE2 */ 12978 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 12979 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 12980 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 12981 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 12982 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 12983 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 12984 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 12985 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 12986 12987 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 12988 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 12989 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 12990 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 }, 12991 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 }, 12992 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 12993 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 }, 12994 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 }, 12995 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 }, 12996 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 }, 12997 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 }, 12998 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 }, 12999 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 13000 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 13001 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 13002 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 13003 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 }, 13004 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 }, 13005 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 }, 13006 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 }, 13007 13008 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 13009 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 13010 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 13011 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 13012 13013 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 13014 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 13015 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 13016 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 13017 13018 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 13019 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 13020 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 13021 13022 /* SSE2 MMX */ 13023 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 13024 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 13025 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 13026 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 13027 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 13028 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 13029 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 13030 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 13031 13032 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 13033 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 13034 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 13035 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 13036 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 13037 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 13038 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 13039 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 13040 13041 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 13042 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 13043 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 }, 13044 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 }, 13045 13046 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 13047 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 13048 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 13049 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 13050 13051 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 13052 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 13053 13054 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 13055 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 13056 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 13057 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 13058 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 13059 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 13060 13061 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 13062 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 13063 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 13064 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 13065 13066 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 13067 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 13068 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 13069 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 13070 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 13071 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 13072 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 13073 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 13074 13075 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 13076 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 13077 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 13078 13079 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 13080 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 13081 13082 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 }, 13083 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 13084 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 }, 13085 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 13086 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 }, 13087 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 13088 13089 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 }, 13090 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 13091 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 }, 13092 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 13093 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 }, 13094 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 13095 13096 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 }, 13097 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 13098 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 }, 13099 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 13100 13101 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 13102 13103 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 13104 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 13105 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 13106 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, 13107 13108 /* SSE3 MMX */ 13109 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, 13110 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, 13111 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, 13112 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, 13113 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, 13114 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } 13115}; 13116 13117static const struct builtin_description bdesc_1arg[] = 13118{ 13119 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 13120 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 13121 13122 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 13123 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 13124 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 13125 13126 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 13127 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 13128 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 13129 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 13130 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 13131 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 13132 13133 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 13134 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 13135 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 }, 13136 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 }, 13137 13138 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 13139 13140 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 13141 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 13142 13143 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 13144 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 13145 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 13146 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 13147 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 13148 13149 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 13150 13151 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 13152 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 13153 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 13154 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 13155 13156 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 13157 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 13158 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 13159 13160 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }, 13161 13162 /* SSE3 */ 13163 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, 13164 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, 13165 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 } 13166}; 13167 13168void 13169ix86_init_builtins (void) 13170{ 13171 if (TARGET_MMX) 13172 ix86_init_mmx_sse_builtins (); 13173} 13174 13175/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 13176 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 13177 builtins. */ 13178static void 13179ix86_init_mmx_sse_builtins (void) 13180{ 13181 const struct builtin_description * d; 13182 size_t i; 13183 13184 tree pchar_type_node = build_pointer_type (char_type_node); 13185 tree pcchar_type_node = build_pointer_type ( 13186 build_type_variant (char_type_node, 1, 0)); 13187 tree pfloat_type_node = build_pointer_type (float_type_node); 13188 tree pcfloat_type_node = build_pointer_type ( 13189 build_type_variant (float_type_node, 1, 0)); 13190 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 13191 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 13192 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 13193 13194 /* Comparisons. */ 13195 tree int_ftype_v4sf_v4sf 13196 = build_function_type_list (integer_type_node, 13197 V4SF_type_node, V4SF_type_node, NULL_TREE); 13198 tree v4si_ftype_v4sf_v4sf 13199 = build_function_type_list (V4SI_type_node, 13200 V4SF_type_node, V4SF_type_node, NULL_TREE); 13201 /* MMX/SSE/integer conversions. */ 13202 tree int_ftype_v4sf 13203 = build_function_type_list (integer_type_node, 13204 V4SF_type_node, NULL_TREE); 13205 tree int64_ftype_v4sf 13206 = build_function_type_list (long_long_integer_type_node, 13207 V4SF_type_node, NULL_TREE); 13208 tree int_ftype_v8qi 13209 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 13210 tree v4sf_ftype_v4sf_int 13211 = build_function_type_list (V4SF_type_node, 13212 V4SF_type_node, integer_type_node, NULL_TREE); 13213 tree v4sf_ftype_v4sf_int64 13214 = build_function_type_list (V4SF_type_node, 13215 V4SF_type_node, long_long_integer_type_node, 13216 NULL_TREE); 13217 tree v4sf_ftype_v4sf_v2si 13218 = build_function_type_list (V4SF_type_node, 13219 V4SF_type_node, V2SI_type_node, NULL_TREE); 13220 tree int_ftype_v4hi_int 13221 = build_function_type_list (integer_type_node, 13222 V4HI_type_node, integer_type_node, NULL_TREE); 13223 tree v4hi_ftype_v4hi_int_int 13224 = build_function_type_list (V4HI_type_node, V4HI_type_node, 13225 integer_type_node, integer_type_node, 13226 NULL_TREE); 13227 /* Miscellaneous. */ 13228 tree v8qi_ftype_v4hi_v4hi 13229 = build_function_type_list (V8QI_type_node, 13230 V4HI_type_node, V4HI_type_node, NULL_TREE); 13231 tree v4hi_ftype_v2si_v2si 13232 = build_function_type_list (V4HI_type_node, 13233 V2SI_type_node, V2SI_type_node, NULL_TREE); 13234 tree v4sf_ftype_v4sf_v4sf_int 13235 = build_function_type_list (V4SF_type_node, 13236 V4SF_type_node, V4SF_type_node, 13237 integer_type_node, NULL_TREE); 13238 tree v2si_ftype_v4hi_v4hi 13239 = build_function_type_list (V2SI_type_node, 13240 V4HI_type_node, V4HI_type_node, NULL_TREE); 13241 tree v4hi_ftype_v4hi_int 13242 = build_function_type_list (V4HI_type_node, 13243 V4HI_type_node, integer_type_node, NULL_TREE); 13244 tree v4hi_ftype_v4hi_di 13245 = build_function_type_list (V4HI_type_node, 13246 V4HI_type_node, long_long_unsigned_type_node, 13247 NULL_TREE); 13248 tree v2si_ftype_v2si_di 13249 = build_function_type_list (V2SI_type_node, 13250 V2SI_type_node, long_long_unsigned_type_node, 13251 NULL_TREE); 13252 tree void_ftype_void 13253 = build_function_type (void_type_node, void_list_node); 13254 tree void_ftype_unsigned 13255 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 13256 tree void_ftype_unsigned_unsigned 13257 = build_function_type_list (void_type_node, unsigned_type_node, 13258 unsigned_type_node, NULL_TREE); 13259 tree void_ftype_pcvoid_unsigned_unsigned 13260 = build_function_type_list (void_type_node, const_ptr_type_node, 13261 unsigned_type_node, unsigned_type_node, 13262 NULL_TREE); 13263 tree unsigned_ftype_void 13264 = build_function_type (unsigned_type_node, void_list_node); 13265 tree di_ftype_void 13266 = build_function_type (long_long_unsigned_type_node, void_list_node); 13267 tree v4sf_ftype_void 13268 = build_function_type (V4SF_type_node, void_list_node); 13269 tree v2si_ftype_v4sf 13270 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 13271 /* Loads/stores. */ 13272 tree void_ftype_v8qi_v8qi_pchar 13273 = build_function_type_list (void_type_node, 13274 V8QI_type_node, V8QI_type_node, 13275 pchar_type_node, NULL_TREE); 13276 tree v4sf_ftype_pcfloat 13277 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 13278 /* @@@ the type is bogus */ 13279 tree v4sf_ftype_v4sf_pv2si 13280 = build_function_type_list (V4SF_type_node, 13281 V4SF_type_node, pv2si_type_node, NULL_TREE); 13282 tree void_ftype_pv2si_v4sf 13283 = build_function_type_list (void_type_node, 13284 pv2si_type_node, V4SF_type_node, NULL_TREE); 13285 tree void_ftype_pfloat_v4sf 13286 = build_function_type_list (void_type_node, 13287 pfloat_type_node, V4SF_type_node, NULL_TREE); 13288 tree void_ftype_pdi_di 13289 = build_function_type_list (void_type_node, 13290 pdi_type_node, long_long_unsigned_type_node, 13291 NULL_TREE); 13292 tree void_ftype_pv2di_v2di 13293 = build_function_type_list (void_type_node, 13294 pv2di_type_node, V2DI_type_node, NULL_TREE); 13295 /* Normal vector unops. */ 13296 tree v4sf_ftype_v4sf 13297 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 13298 13299 /* Normal vector binops. */ 13300 tree v4sf_ftype_v4sf_v4sf 13301 = build_function_type_list (V4SF_type_node, 13302 V4SF_type_node, V4SF_type_node, NULL_TREE); 13303 tree v8qi_ftype_v8qi_v8qi 13304 = build_function_type_list (V8QI_type_node, 13305 V8QI_type_node, V8QI_type_node, NULL_TREE); 13306 tree v4hi_ftype_v4hi_v4hi 13307 = build_function_type_list (V4HI_type_node, 13308 V4HI_type_node, V4HI_type_node, NULL_TREE); 13309 tree v2si_ftype_v2si_v2si 13310 = build_function_type_list (V2SI_type_node, 13311 V2SI_type_node, V2SI_type_node, NULL_TREE); 13312 tree di_ftype_di_di 13313 = build_function_type_list (long_long_unsigned_type_node, 13314 long_long_unsigned_type_node, 13315 long_long_unsigned_type_node, NULL_TREE); 13316 13317 tree v2si_ftype_v2sf 13318 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 13319 tree v2sf_ftype_v2si 13320 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 13321 tree v2si_ftype_v2si 13322 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 13323 tree v2sf_ftype_v2sf 13324 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 13325 tree v2sf_ftype_v2sf_v2sf 13326 = build_function_type_list (V2SF_type_node, 13327 V2SF_type_node, V2SF_type_node, NULL_TREE); 13328 tree v2si_ftype_v2sf_v2sf 13329 = build_function_type_list (V2SI_type_node, 13330 V2SF_type_node, V2SF_type_node, NULL_TREE); 13331 tree pint_type_node = build_pointer_type (integer_type_node); 13332 tree pcint_type_node = build_pointer_type ( 13333 build_type_variant (integer_type_node, 1, 0)); 13334 tree pdouble_type_node = build_pointer_type (double_type_node); 13335 tree pcdouble_type_node = build_pointer_type ( 13336 build_type_variant (double_type_node, 1, 0)); 13337 tree int_ftype_v2df_v2df 13338 = build_function_type_list (integer_type_node, 13339 V2DF_type_node, V2DF_type_node, NULL_TREE); 13340 13341 tree ti_ftype_void 13342 = build_function_type (intTI_type_node, void_list_node); 13343 tree v2di_ftype_void 13344 = build_function_type (V2DI_type_node, void_list_node); 13345 tree ti_ftype_ti_ti 13346 = build_function_type_list (intTI_type_node, 13347 intTI_type_node, intTI_type_node, NULL_TREE); 13348 tree void_ftype_pcvoid 13349 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 13350 tree v2di_ftype_di 13351 = build_function_type_list (V2DI_type_node, 13352 long_long_unsigned_type_node, NULL_TREE); 13353 tree di_ftype_v2di 13354 = build_function_type_list (long_long_unsigned_type_node, 13355 V2DI_type_node, NULL_TREE); 13356 tree v4sf_ftype_v4si 13357 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 13358 tree v4si_ftype_v4sf 13359 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 13360 tree v2df_ftype_v4si 13361 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 13362 tree v4si_ftype_v2df 13363 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 13364 tree v2si_ftype_v2df 13365 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 13366 tree v4sf_ftype_v2df 13367 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 13368 tree v2df_ftype_v2si 13369 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 13370 tree v2df_ftype_v4sf 13371 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 13372 tree int_ftype_v2df 13373 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 13374 tree int64_ftype_v2df 13375 = build_function_type_list (long_long_integer_type_node, 13376 V2DF_type_node, NULL_TREE); 13377 tree v2df_ftype_v2df_int 13378 = build_function_type_list (V2DF_type_node, 13379 V2DF_type_node, integer_type_node, NULL_TREE); 13380 tree v2df_ftype_v2df_int64 13381 = build_function_type_list (V2DF_type_node, 13382 V2DF_type_node, long_long_integer_type_node, 13383 NULL_TREE); 13384 tree v4sf_ftype_v4sf_v2df 13385 = build_function_type_list (V4SF_type_node, 13386 V4SF_type_node, V2DF_type_node, NULL_TREE); 13387 tree v2df_ftype_v2df_v4sf 13388 = build_function_type_list (V2DF_type_node, 13389 V2DF_type_node, V4SF_type_node, NULL_TREE); 13390 tree v2df_ftype_v2df_v2df_int 13391 = build_function_type_list (V2DF_type_node, 13392 V2DF_type_node, V2DF_type_node, 13393 integer_type_node, 13394 NULL_TREE); 13395 tree v2df_ftype_v2df_pv2si 13396 = build_function_type_list (V2DF_type_node, 13397 V2DF_type_node, pv2si_type_node, NULL_TREE); 13398 tree void_ftype_pv2si_v2df 13399 = build_function_type_list (void_type_node, 13400 pv2si_type_node, V2DF_type_node, NULL_TREE); 13401 tree void_ftype_pdouble_v2df 13402 = build_function_type_list (void_type_node, 13403 pdouble_type_node, V2DF_type_node, NULL_TREE); 13404 tree void_ftype_pint_int 13405 = build_function_type_list (void_type_node, 13406 pint_type_node, integer_type_node, NULL_TREE); 13407 tree void_ftype_v16qi_v16qi_pchar 13408 = build_function_type_list (void_type_node, 13409 V16QI_type_node, V16QI_type_node, 13410 pchar_type_node, NULL_TREE); 13411 tree v2df_ftype_pcdouble 13412 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 13413 tree v2df_ftype_v2df_v2df 13414 = build_function_type_list (V2DF_type_node, 13415 V2DF_type_node, V2DF_type_node, NULL_TREE); 13416 tree v16qi_ftype_v16qi_v16qi 13417 = build_function_type_list (V16QI_type_node, 13418 V16QI_type_node, V16QI_type_node, NULL_TREE); 13419 tree v8hi_ftype_v8hi_v8hi 13420 = build_function_type_list (V8HI_type_node, 13421 V8HI_type_node, V8HI_type_node, NULL_TREE); 13422 tree v4si_ftype_v4si_v4si 13423 = build_function_type_list (V4SI_type_node, 13424 V4SI_type_node, V4SI_type_node, NULL_TREE); 13425 tree v2di_ftype_v2di_v2di 13426 = build_function_type_list (V2DI_type_node, 13427 V2DI_type_node, V2DI_type_node, NULL_TREE); 13428 tree v2di_ftype_v2df_v2df 13429 = build_function_type_list (V2DI_type_node, 13430 V2DF_type_node, V2DF_type_node, NULL_TREE); 13431 tree v2df_ftype_v2df 13432 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 13433 tree v2df_ftype_double 13434 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE); 13435 tree v2df_ftype_double_double 13436 = build_function_type_list (V2DF_type_node, 13437 double_type_node, double_type_node, NULL_TREE); 13438 tree int_ftype_v8hi_int 13439 = build_function_type_list (integer_type_node, 13440 V8HI_type_node, integer_type_node, NULL_TREE); 13441 tree v8hi_ftype_v8hi_int_int 13442 = build_function_type_list (V8HI_type_node, 13443 V8HI_type_node, integer_type_node, 13444 integer_type_node, NULL_TREE); 13445 tree v2di_ftype_v2di_int 13446 = build_function_type_list (V2DI_type_node, 13447 V2DI_type_node, integer_type_node, NULL_TREE); 13448 tree v4si_ftype_v4si_int 13449 = build_function_type_list (V4SI_type_node, 13450 V4SI_type_node, integer_type_node, NULL_TREE); 13451 tree v8hi_ftype_v8hi_int 13452 = build_function_type_list (V8HI_type_node, 13453 V8HI_type_node, integer_type_node, NULL_TREE); 13454 tree v8hi_ftype_v8hi_v2di 13455 = build_function_type_list (V8HI_type_node, 13456 V8HI_type_node, V2DI_type_node, NULL_TREE); 13457 tree v4si_ftype_v4si_v2di 13458 = build_function_type_list (V4SI_type_node, 13459 V4SI_type_node, V2DI_type_node, NULL_TREE); 13460 tree v4si_ftype_v8hi_v8hi 13461 = build_function_type_list (V4SI_type_node, 13462 V8HI_type_node, V8HI_type_node, NULL_TREE); 13463 tree di_ftype_v8qi_v8qi 13464 = build_function_type_list (long_long_unsigned_type_node, 13465 V8QI_type_node, V8QI_type_node, NULL_TREE); 13466 tree v2di_ftype_v16qi_v16qi 13467 = build_function_type_list (V2DI_type_node, 13468 V16QI_type_node, V16QI_type_node, NULL_TREE); 13469 tree int_ftype_v16qi 13470 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 13471 tree v16qi_ftype_pcchar 13472 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 13473 tree void_ftype_pchar_v16qi 13474 = build_function_type_list (void_type_node, 13475 pchar_type_node, V16QI_type_node, NULL_TREE); 13476 tree v4si_ftype_pcint 13477 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); 13478 tree void_ftype_pcint_v4si 13479 = build_function_type_list (void_type_node, 13480 pcint_type_node, V4SI_type_node, NULL_TREE); 13481 tree v2di_ftype_v2di 13482 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); 13483 13484 tree float80_type; 13485 tree float128_type; 13486 13487 /* The __float80 type. */ 13488 if (TYPE_MODE (long_double_type_node) == XFmode) 13489 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 13490 "__float80"); 13491 else 13492 { 13493 /* The __float80 type. */ 13494 float80_type = make_node (REAL_TYPE); 13495 TYPE_PRECISION (float80_type) = 96; 13496 layout_type (float80_type); 13497 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 13498 } 13499 13500 float128_type = make_node (REAL_TYPE); 13501 TYPE_PRECISION (float128_type) = 128; 13502 layout_type (float128_type); 13503 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 13504 13505 /* Add all builtins that are more or less simple operations on two 13506 operands. */ 13507 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 13508 { 13509 /* Use one of the operands; the target can have a different mode for 13510 mask-generating compares. */ 13511 enum machine_mode mode; 13512 tree type; 13513 13514 if (d->name == 0) 13515 continue; 13516 mode = insn_data[d->icode].operand[1].mode; 13517 13518 switch (mode) 13519 { 13520 case V16QImode: 13521 type = v16qi_ftype_v16qi_v16qi; 13522 break; 13523 case V8HImode: 13524 type = v8hi_ftype_v8hi_v8hi; 13525 break; 13526 case V4SImode: 13527 type = v4si_ftype_v4si_v4si; 13528 break; 13529 case V2DImode: 13530 type = v2di_ftype_v2di_v2di; 13531 break; 13532 case V2DFmode: 13533 type = v2df_ftype_v2df_v2df; 13534 break; 13535 case TImode: 13536 type = ti_ftype_ti_ti; 13537 break; 13538 case V4SFmode: 13539 type = v4sf_ftype_v4sf_v4sf; 13540 break; 13541 case V8QImode: 13542 type = v8qi_ftype_v8qi_v8qi; 13543 break; 13544 case V4HImode: 13545 type = v4hi_ftype_v4hi_v4hi; 13546 break; 13547 case V2SImode: 13548 type = v2si_ftype_v2si_v2si; 13549 break; 13550 case DImode: 13551 type = di_ftype_di_di; 13552 break; 13553 13554 default: 13555 abort (); 13556 } 13557 13558 /* Override for comparisons. */ 13559 if (d->icode == CODE_FOR_maskcmpv4sf3 13560 || d->icode == CODE_FOR_maskncmpv4sf3 13561 || d->icode == CODE_FOR_vmmaskcmpv4sf3 13562 || d->icode == CODE_FOR_vmmaskncmpv4sf3) 13563 type = v4si_ftype_v4sf_v4sf; 13564 13565 if (d->icode == CODE_FOR_maskcmpv2df3 13566 || d->icode == CODE_FOR_maskncmpv2df3 13567 || d->icode == CODE_FOR_vmmaskcmpv2df3 13568 || d->icode == CODE_FOR_vmmaskncmpv2df3) 13569 type = v2di_ftype_v2df_v2df; 13570 13571 def_builtin (d->mask, d->name, type, d->code); 13572 } 13573 13574 /* Add the remaining MMX insns with somewhat more complicated types. */ 13575 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); 13576 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 13577 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 13578 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 13579 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 13580 13581 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 13582 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 13583 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 13584 13585 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 13586 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 13587 13588 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 13589 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 13590 13591 /* comi/ucomi insns. */ 13592 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 13593 if (d->mask == MASK_SSE2) 13594 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 13595 else 13596 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 13597 13598 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 13599 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 13600 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 13601 13602 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 13603 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 13604 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 13605 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 13606 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 13607 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 13608 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 13609 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 13610 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 13611 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 13612 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 13613 13614 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); 13615 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); 13616 13617 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 13618 13619 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); 13620 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 13621 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); 13622 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); 13623 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 13624 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); 13625 13626 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 13627 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 13628 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 13629 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 13630 13631 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 13632 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 13633 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 13634 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 13635 13636 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 13637 13638 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 13639 13640 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 13641 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 13642 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 13643 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 13644 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 13645 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 13646 13647 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 13648 13649 /* Original 3DNow! */ 13650 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 13651 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 13652 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 13653 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 13654 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 13655 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 13656 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 13657 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 13658 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 13659 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 13660 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 13661 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 13662 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 13663 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 13664 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 13665 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 13666 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 13667 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 13668 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 13669 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 13670 13671 /* 3DNow! extension as used in the Athlon CPU. */ 13672 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 13673 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 13674 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 13675 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 13676 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 13677 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 13678 13679 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); 13680 13681 /* SSE2 */ 13682 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128); 13683 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128); 13684 13685 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 13686 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ); 13687 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q); 13688 13689 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD); 13690 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 13691 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD); 13692 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD); 13693 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 13694 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); 13695 13696 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD); 13697 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD); 13698 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD); 13699 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD); 13700 13701 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 13702 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 13703 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 13704 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 13705 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 13706 13707 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 13708 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 13709 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 13710 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 13711 13712 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 13713 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 13714 13715 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 13716 13717 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 13718 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 13719 13720 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 13721 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 13722 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 13723 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 13724 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 13725 13726 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 13727 13728 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 13729 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 13730 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 13731 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 13732 13733 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 13734 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 13735 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 13736 13737 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 13738 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 13739 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 13740 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 13741 13742 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1); 13743 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD); 13744 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD); 13745 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1); 13746 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD); 13747 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1); 13748 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD); 13749 13750 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 13751 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 13752 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 13753 13754 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA); 13755 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 13756 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD); 13757 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA); 13758 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 13759 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); 13760 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); 13761 13762 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); 13763 13764 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128); 13765 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128); 13766 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 13767 13768 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128); 13769 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128); 13770 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 13771 13772 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128); 13773 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128); 13774 13775 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 13776 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 13777 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 13778 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 13779 13780 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 13781 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 13782 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 13783 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 13784 13785 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 13786 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 13787 13788 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 13789 13790 /* Prescott New Instructions. */ 13791 def_builtin (MASK_SSE3, "__builtin_ia32_monitor", 13792 void_ftype_pcvoid_unsigned_unsigned, 13793 IX86_BUILTIN_MONITOR); 13794 def_builtin (MASK_SSE3, "__builtin_ia32_mwait", 13795 void_ftype_unsigned_unsigned, 13796 IX86_BUILTIN_MWAIT); 13797 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", 13798 v4sf_ftype_v4sf, 13799 IX86_BUILTIN_MOVSHDUP); 13800 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", 13801 v4sf_ftype_v4sf, 13802 IX86_BUILTIN_MOVSLDUP); 13803 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", 13804 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); 13805 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup", 13806 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP); 13807 def_builtin (MASK_SSE3, "__builtin_ia32_movddup", 13808 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP); 13809} 13810 13811/* Errors in the source file can cause expand_expr to return const0_rtx 13812 where we expect a vector. To avoid crashing, use one of the vector 13813 clear instructions. */ 13814static rtx 13815safe_vector_operand (rtx x, enum machine_mode mode) 13816{ 13817 if (x != const0_rtx) 13818 return x; 13819 x = gen_reg_rtx (mode); 13820 13821 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)) 13822 emit_insn (gen_mmx_clrdi (mode == DImode ? x 13823 : gen_rtx_SUBREG (DImode, x, 0))); 13824 else 13825 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x 13826 : gen_rtx_SUBREG (V4SFmode, x, 0), 13827 CONST0_RTX (V4SFmode))); 13828 return x; 13829} 13830 13831/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 13832 13833static rtx 13834ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) 13835{ 13836 rtx pat; 13837 tree arg0 = TREE_VALUE (arglist); 13838 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13839 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13840 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13841 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13842 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13843 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 13844 13845 if (VECTOR_MODE_P (mode0)) 13846 op0 = safe_vector_operand (op0, mode0); 13847 if (VECTOR_MODE_P (mode1)) 13848 op1 = safe_vector_operand (op1, mode1); 13849 13850 if (! target 13851 || GET_MODE (target) != tmode 13852 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13853 target = gen_reg_rtx (tmode); 13854 13855 if (GET_MODE (op1) == SImode && mode1 == TImode) 13856 { 13857 rtx x = gen_reg_rtx (V4SImode); 13858 emit_insn (gen_sse2_loadd (x, op1)); 13859 op1 = gen_lowpart (TImode, x); 13860 } 13861 13862 /* In case the insn wants input operands in modes different from 13863 the result, abort. */ 13864 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode) 13865 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)) 13866 abort (); 13867 13868 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13869 op0 = copy_to_mode_reg (mode0, op0); 13870 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 13871 op1 = copy_to_mode_reg (mode1, op1); 13872 13873 /* In the commutative cases, both op0 and op1 are nonimmediate_operand, 13874 yet one of the two must not be a memory. This is normally enforced 13875 by expanders, but we didn't bother to create one here. */ 13876 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) 13877 op0 = copy_to_mode_reg (mode0, op0); 13878 13879 pat = GEN_FCN (icode) (target, op0, op1); 13880 if (! pat) 13881 return 0; 13882 emit_insn (pat); 13883 return target; 13884} 13885 13886/* Subroutine of ix86_expand_builtin to take care of stores. */ 13887 13888static rtx 13889ix86_expand_store_builtin (enum insn_code icode, tree arglist) 13890{ 13891 rtx pat; 13892 tree arg0 = TREE_VALUE (arglist); 13893 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13894 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13895 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13896 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 13897 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 13898 13899 if (VECTOR_MODE_P (mode1)) 13900 op1 = safe_vector_operand (op1, mode1); 13901 13902 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13903 op1 = copy_to_mode_reg (mode1, op1); 13904 13905 pat = GEN_FCN (icode) (op0, op1); 13906 if (pat) 13907 emit_insn (pat); 13908 return 0; 13909} 13910 13911/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 13912 13913static rtx 13914ix86_expand_unop_builtin (enum insn_code icode, tree arglist, 13915 rtx target, int do_load) 13916{ 13917 rtx pat; 13918 tree arg0 = TREE_VALUE (arglist); 13919 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13920 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13921 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13922 13923 if (! target 13924 || GET_MODE (target) != tmode 13925 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13926 target = gen_reg_rtx (tmode); 13927 if (do_load) 13928 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 13929 else 13930 { 13931 if (VECTOR_MODE_P (mode0)) 13932 op0 = safe_vector_operand (op0, mode0); 13933 13934 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13935 op0 = copy_to_mode_reg (mode0, op0); 13936 } 13937 13938 pat = GEN_FCN (icode) (target, op0); 13939 if (! pat) 13940 return 0; 13941 emit_insn (pat); 13942 return target; 13943} 13944 13945/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 13946 sqrtss, rsqrtss, rcpss. */ 13947 13948static rtx 13949ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) 13950{ 13951 rtx pat; 13952 tree arg0 = TREE_VALUE (arglist); 13953 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13954 enum machine_mode tmode = insn_data[icode].operand[0].mode; 13955 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 13956 13957 if (! target 13958 || GET_MODE (target) != tmode 13959 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 13960 target = gen_reg_rtx (tmode); 13961 13962 if (VECTOR_MODE_P (mode0)) 13963 op0 = safe_vector_operand (op0, mode0); 13964 13965 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 13966 op0 = copy_to_mode_reg (mode0, op0); 13967 13968 op1 = op0; 13969 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 13970 op1 = copy_to_mode_reg (mode0, op1); 13971 13972 pat = GEN_FCN (icode) (target, op0, op1); 13973 if (! pat) 13974 return 0; 13975 emit_insn (pat); 13976 return target; 13977} 13978 13979/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 13980 13981static rtx 13982ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, 13983 rtx target) 13984{ 13985 rtx pat; 13986 tree arg0 = TREE_VALUE (arglist); 13987 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 13988 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 13989 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 13990 rtx op2; 13991 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 13992 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 13993 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 13994 enum rtx_code comparison = d->comparison; 13995 13996 if (VECTOR_MODE_P (mode0)) 13997 op0 = safe_vector_operand (op0, mode0); 13998 if (VECTOR_MODE_P (mode1)) 13999 op1 = safe_vector_operand (op1, mode1); 14000 14001 /* Swap operands if we have a comparison that isn't available in 14002 hardware. */ 14003 if (d->flag) 14004 { 14005 rtx tmp = gen_reg_rtx (mode1); 14006 emit_move_insn (tmp, op1); 14007 op1 = op0; 14008 op0 = tmp; 14009 } 14010 14011 if (! target 14012 || GET_MODE (target) != tmode 14013 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 14014 target = gen_reg_rtx (tmode); 14015 14016 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 14017 op0 = copy_to_mode_reg (mode0, op0); 14018 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 14019 op1 = copy_to_mode_reg (mode1, op1); 14020 14021 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 14022 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 14023 if (! pat) 14024 return 0; 14025 emit_insn (pat); 14026 return target; 14027} 14028 14029/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 14030 14031static rtx 14032ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, 14033 rtx target) 14034{ 14035 rtx pat; 14036 tree arg0 = TREE_VALUE (arglist); 14037 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14038 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14039 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14040 rtx op2; 14041 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 14042 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 14043 enum rtx_code comparison = d->comparison; 14044 14045 if (VECTOR_MODE_P (mode0)) 14046 op0 = safe_vector_operand (op0, mode0); 14047 if (VECTOR_MODE_P (mode1)) 14048 op1 = safe_vector_operand (op1, mode1); 14049 14050 /* Swap operands if we have a comparison that isn't available in 14051 hardware. */ 14052 if (d->flag) 14053 { 14054 rtx tmp = op1; 14055 op1 = op0; 14056 op0 = tmp; 14057 } 14058 14059 target = gen_reg_rtx (SImode); 14060 emit_move_insn (target, const0_rtx); 14061 target = gen_rtx_SUBREG (QImode, target, 0); 14062 14063 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 14064 op0 = copy_to_mode_reg (mode0, op0); 14065 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 14066 op1 = copy_to_mode_reg (mode1, op1); 14067 14068 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 14069 pat = GEN_FCN (d->icode) (op0, op1); 14070 if (! pat) 14071 return 0; 14072 emit_insn (pat); 14073 emit_insn (gen_rtx_SET (VOIDmode, 14074 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 14075 gen_rtx_fmt_ee (comparison, QImode, 14076 SET_DEST (pat), 14077 const0_rtx))); 14078 14079 return SUBREG_REG (target); 14080} 14081 14082/* Expand an expression EXP that calls a built-in function, 14083 with result going to TARGET if that's convenient 14084 (and in mode MODE if that's convenient). 14085 SUBTARGET may be used as the target for computing one of EXP's operands. 14086 IGNORE is nonzero if the value is to be ignored. */ 14087 14088rtx 14089ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 14090 enum machine_mode mode ATTRIBUTE_UNUSED, 14091 int ignore ATTRIBUTE_UNUSED) 14092{ 14093 const struct builtin_description *d; 14094 size_t i; 14095 enum insn_code icode; 14096 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 14097 tree arglist = TREE_OPERAND (exp, 1); 14098 tree arg0, arg1, arg2; 14099 rtx op0, op1, op2, pat; 14100 enum machine_mode tmode, mode0, mode1, mode2; 14101 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 14102 14103 switch (fcode) 14104 { 14105 case IX86_BUILTIN_EMMS: 14106 emit_insn (gen_emms ()); 14107 return 0; 14108 14109 case IX86_BUILTIN_SFENCE: 14110 emit_insn (gen_sfence ()); 14111 return 0; 14112 14113 case IX86_BUILTIN_PEXTRW: 14114 case IX86_BUILTIN_PEXTRW128: 14115 icode = (fcode == IX86_BUILTIN_PEXTRW 14116 ? CODE_FOR_mmx_pextrw 14117 : CODE_FOR_sse2_pextrw); 14118 arg0 = TREE_VALUE (arglist); 14119 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14120 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14121 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14122 tmode = insn_data[icode].operand[0].mode; 14123 mode0 = insn_data[icode].operand[1].mode; 14124 mode1 = insn_data[icode].operand[2].mode; 14125 14126 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 14127 op0 = copy_to_mode_reg (mode0, op0); 14128 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 14129 { 14130 error ("selector must be an integer constant in the range 0..%i", 14131 fcode == IX86_BUILTIN_PEXTRW ? 3:7); 14132 return gen_reg_rtx (tmode); 14133 } 14134 if (target == 0 14135 || GET_MODE (target) != tmode 14136 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 14137 target = gen_reg_rtx (tmode); 14138 pat = GEN_FCN (icode) (target, op0, op1); 14139 if (! pat) 14140 return 0; 14141 emit_insn (pat); 14142 return target; 14143 14144 case IX86_BUILTIN_PINSRW: 14145 case IX86_BUILTIN_PINSRW128: 14146 icode = (fcode == IX86_BUILTIN_PINSRW 14147 ? CODE_FOR_mmx_pinsrw 14148 : CODE_FOR_sse2_pinsrw); 14149 arg0 = TREE_VALUE (arglist); 14150 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14151 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 14152 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14153 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14154 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 14155 tmode = insn_data[icode].operand[0].mode; 14156 mode0 = insn_data[icode].operand[1].mode; 14157 mode1 = insn_data[icode].operand[2].mode; 14158 mode2 = insn_data[icode].operand[3].mode; 14159 14160 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 14161 op0 = copy_to_mode_reg (mode0, op0); 14162 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 14163 op1 = copy_to_mode_reg (mode1, op1); 14164 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 14165 { 14166 error ("selector must be an integer constant in the range 0..%i", 14167 fcode == IX86_BUILTIN_PINSRW ? 15:255); 14168 return const0_rtx; 14169 } 14170 if (target == 0 14171 || GET_MODE (target) != tmode 14172 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 14173 target = gen_reg_rtx (tmode); 14174 pat = GEN_FCN (icode) (target, op0, op1, op2); 14175 if (! pat) 14176 return 0; 14177 emit_insn (pat); 14178 return target; 14179 14180 case IX86_BUILTIN_MASKMOVQ: 14181 case IX86_BUILTIN_MASKMOVDQU: 14182 icode = (fcode == IX86_BUILTIN_MASKMOVQ 14183 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) 14184 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64 14185 : CODE_FOR_sse2_maskmovdqu)); 14186 /* Note the arg order is different from the operand order. */ 14187 arg1 = TREE_VALUE (arglist); 14188 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 14189 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 14190 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14191 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14192 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 14193 mode0 = insn_data[icode].operand[0].mode; 14194 mode1 = insn_data[icode].operand[1].mode; 14195 mode2 = insn_data[icode].operand[2].mode; 14196 14197 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 14198 op0 = copy_to_mode_reg (mode0, op0); 14199 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 14200 op1 = copy_to_mode_reg (mode1, op1); 14201 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 14202 op2 = copy_to_mode_reg (mode2, op2); 14203 pat = GEN_FCN (icode) (op0, op1, op2); 14204 if (! pat) 14205 return 0; 14206 emit_insn (pat); 14207 return 0; 14208 14209 case IX86_BUILTIN_SQRTSS: 14210 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target); 14211 case IX86_BUILTIN_RSQRTSS: 14212 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target); 14213 case IX86_BUILTIN_RCPSS: 14214 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); 14215 14216 case IX86_BUILTIN_LOADAPS: 14217 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); 14218 14219 case IX86_BUILTIN_LOADUPS: 14220 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 14221 14222 case IX86_BUILTIN_STOREAPS: 14223 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); 14224 14225 case IX86_BUILTIN_STOREUPS: 14226 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 14227 14228 case IX86_BUILTIN_LOADSS: 14229 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1); 14230 14231 case IX86_BUILTIN_STORESS: 14232 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist); 14233 14234 case IX86_BUILTIN_LOADHPS: 14235 case IX86_BUILTIN_LOADLPS: 14236 case IX86_BUILTIN_LOADHPD: 14237 case IX86_BUILTIN_LOADLPD: 14238 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps 14239 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps 14240 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd 14241 : CODE_FOR_sse2_movsd); 14242 arg0 = TREE_VALUE (arglist); 14243 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14244 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14245 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14246 tmode = insn_data[icode].operand[0].mode; 14247 mode0 = insn_data[icode].operand[1].mode; 14248 mode1 = insn_data[icode].operand[2].mode; 14249 14250 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 14251 op0 = copy_to_mode_reg (mode0, op0); 14252 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 14253 if (target == 0 14254 || GET_MODE (target) != tmode 14255 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 14256 target = gen_reg_rtx (tmode); 14257 pat = GEN_FCN (icode) (target, op0, op1); 14258 if (! pat) 14259 return 0; 14260 emit_insn (pat); 14261 return target; 14262 14263 case IX86_BUILTIN_STOREHPS: 14264 case IX86_BUILTIN_STORELPS: 14265 case IX86_BUILTIN_STOREHPD: 14266 case IX86_BUILTIN_STORELPD: 14267 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps 14268 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps 14269 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd 14270 : CODE_FOR_sse2_movsd); 14271 arg0 = TREE_VALUE (arglist); 14272 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14273 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14274 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14275 mode0 = insn_data[icode].operand[1].mode; 14276 mode1 = insn_data[icode].operand[2].mode; 14277 14278 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 14279 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 14280 op1 = copy_to_mode_reg (mode1, op1); 14281 14282 pat = GEN_FCN (icode) (op0, op0, op1); 14283 if (! pat) 14284 return 0; 14285 emit_insn (pat); 14286 return 0; 14287 14288 case IX86_BUILTIN_MOVNTPS: 14289 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 14290 case IX86_BUILTIN_MOVNTQ: 14291 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 14292 14293 case IX86_BUILTIN_LDMXCSR: 14294 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); 14295 target = assign_386_stack_local (SImode, 0); 14296 emit_move_insn (target, op0); 14297 emit_insn (gen_ldmxcsr (target)); 14298 return 0; 14299 14300 case IX86_BUILTIN_STMXCSR: 14301 target = assign_386_stack_local (SImode, 0); 14302 emit_insn (gen_stmxcsr (target)); 14303 return copy_to_mode_reg (SImode, target); 14304 14305 case IX86_BUILTIN_SHUFPS: 14306 case IX86_BUILTIN_SHUFPD: 14307 icode = (fcode == IX86_BUILTIN_SHUFPS 14308 ? CODE_FOR_sse_shufps 14309 : CODE_FOR_sse2_shufpd); 14310 arg0 = TREE_VALUE (arglist); 14311 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14312 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 14313 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14314 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14315 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 14316 tmode = insn_data[icode].operand[0].mode; 14317 mode0 = insn_data[icode].operand[1].mode; 14318 mode1 = insn_data[icode].operand[2].mode; 14319 mode2 = insn_data[icode].operand[3].mode; 14320 14321 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 14322 op0 = copy_to_mode_reg (mode0, op0); 14323 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) 14324 op1 = copy_to_mode_reg (mode1, op1); 14325 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 14326 { 14327 /* @@@ better error message */ 14328 error ("mask must be an immediate"); 14329 return gen_reg_rtx (tmode); 14330 } 14331 if (target == 0 14332 || GET_MODE (target) != tmode 14333 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 14334 target = gen_reg_rtx (tmode); 14335 pat = GEN_FCN (icode) (target, op0, op1, op2); 14336 if (! pat) 14337 return 0; 14338 emit_insn (pat); 14339 return target; 14340 14341 case IX86_BUILTIN_PSHUFW: 14342 case IX86_BUILTIN_PSHUFD: 14343 case IX86_BUILTIN_PSHUFHW: 14344 case IX86_BUILTIN_PSHUFLW: 14345 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw 14346 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw 14347 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd 14348 : CODE_FOR_mmx_pshufw); 14349 arg0 = TREE_VALUE (arglist); 14350 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14351 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14352 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14353 tmode = insn_data[icode].operand[0].mode; 14354 mode1 = insn_data[icode].operand[1].mode; 14355 mode2 = insn_data[icode].operand[2].mode; 14356 14357 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 14358 op0 = copy_to_mode_reg (mode1, op0); 14359 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 14360 { 14361 /* @@@ better error message */ 14362 error ("mask must be an immediate"); 14363 return const0_rtx; 14364 } 14365 if (target == 0 14366 || GET_MODE (target) != tmode 14367 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 14368 target = gen_reg_rtx (tmode); 14369 pat = GEN_FCN (icode) (target, op0, op1); 14370 if (! pat) 14371 return 0; 14372 emit_insn (pat); 14373 return target; 14374 14375 case IX86_BUILTIN_PSLLDQI128: 14376 case IX86_BUILTIN_PSRLDQI128: 14377 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 14378 : CODE_FOR_sse2_lshrti3); 14379 arg0 = TREE_VALUE (arglist); 14380 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14381 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14382 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14383 tmode = insn_data[icode].operand[0].mode; 14384 mode1 = insn_data[icode].operand[1].mode; 14385 mode2 = insn_data[icode].operand[2].mode; 14386 14387 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 14388 { 14389 op0 = copy_to_reg (op0); 14390 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 14391 } 14392 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 14393 { 14394 error ("shift must be an immediate"); 14395 return const0_rtx; 14396 } 14397 target = gen_reg_rtx (V2DImode); 14398 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1); 14399 if (! pat) 14400 return 0; 14401 emit_insn (pat); 14402 return target; 14403 14404 case IX86_BUILTIN_FEMMS: 14405 emit_insn (gen_femms ()); 14406 return NULL_RTX; 14407 14408 case IX86_BUILTIN_PAVGUSB: 14409 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target); 14410 14411 case IX86_BUILTIN_PF2ID: 14412 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0); 14413 14414 case IX86_BUILTIN_PFACC: 14415 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target); 14416 14417 case IX86_BUILTIN_PFADD: 14418 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target); 14419 14420 case IX86_BUILTIN_PFCMPEQ: 14421 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target); 14422 14423 case IX86_BUILTIN_PFCMPGE: 14424 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target); 14425 14426 case IX86_BUILTIN_PFCMPGT: 14427 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target); 14428 14429 case IX86_BUILTIN_PFMAX: 14430 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target); 14431 14432 case IX86_BUILTIN_PFMIN: 14433 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target); 14434 14435 case IX86_BUILTIN_PFMUL: 14436 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target); 14437 14438 case IX86_BUILTIN_PFRCP: 14439 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0); 14440 14441 case IX86_BUILTIN_PFRCPIT1: 14442 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target); 14443 14444 case IX86_BUILTIN_PFRCPIT2: 14445 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target); 14446 14447 case IX86_BUILTIN_PFRSQIT1: 14448 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target); 14449 14450 case IX86_BUILTIN_PFRSQRT: 14451 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0); 14452 14453 case IX86_BUILTIN_PFSUB: 14454 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target); 14455 14456 case IX86_BUILTIN_PFSUBR: 14457 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target); 14458 14459 case IX86_BUILTIN_PI2FD: 14460 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0); 14461 14462 case IX86_BUILTIN_PMULHRW: 14463 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); 14464 14465 case IX86_BUILTIN_PF2IW: 14466 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); 14467 14468 case IX86_BUILTIN_PFNACC: 14469 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target); 14470 14471 case IX86_BUILTIN_PFPNACC: 14472 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target); 14473 14474 case IX86_BUILTIN_PI2FW: 14475 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0); 14476 14477 case IX86_BUILTIN_PSWAPDSI: 14478 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0); 14479 14480 case IX86_BUILTIN_PSWAPDSF: 14481 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); 14482 14483 case IX86_BUILTIN_SSE_ZERO: 14484 target = gen_reg_rtx (V4SFmode); 14485 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode))); 14486 return target; 14487 14488 case IX86_BUILTIN_MMX_ZERO: 14489 target = gen_reg_rtx (DImode); 14490 emit_insn (gen_mmx_clrdi (target)); 14491 return target; 14492 14493 case IX86_BUILTIN_CLRTI: 14494 target = gen_reg_rtx (V2DImode); 14495 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0))); 14496 return target; 14497 14498 14499 case IX86_BUILTIN_SQRTSD: 14500 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target); 14501 case IX86_BUILTIN_LOADAPD: 14502 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1); 14503 case IX86_BUILTIN_LOADUPD: 14504 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); 14505 14506 case IX86_BUILTIN_STOREAPD: 14507 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 14508 case IX86_BUILTIN_STOREUPD: 14509 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); 14510 14511 case IX86_BUILTIN_LOADSD: 14512 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1); 14513 14514 case IX86_BUILTIN_STORESD: 14515 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist); 14516 14517 case IX86_BUILTIN_SETPD1: 14518 target = assign_386_stack_local (DFmode, 0); 14519 arg0 = TREE_VALUE (arglist); 14520 emit_move_insn (adjust_address (target, DFmode, 0), 14521 expand_expr (arg0, NULL_RTX, VOIDmode, 0)); 14522 op0 = gen_reg_rtx (V2DFmode); 14523 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0))); 14524 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0))); 14525 return op0; 14526 14527 case IX86_BUILTIN_SETPD: 14528 target = assign_386_stack_local (V2DFmode, 0); 14529 arg0 = TREE_VALUE (arglist); 14530 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14531 emit_move_insn (adjust_address (target, DFmode, 0), 14532 expand_expr (arg0, NULL_RTX, VOIDmode, 0)); 14533 emit_move_insn (adjust_address (target, DFmode, 8), 14534 expand_expr (arg1, NULL_RTX, VOIDmode, 0)); 14535 op0 = gen_reg_rtx (V2DFmode); 14536 emit_insn (gen_sse2_movapd (op0, target)); 14537 return op0; 14538 14539 case IX86_BUILTIN_LOADRPD: 14540 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, 14541 gen_reg_rtx (V2DFmode), 1); 14542 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1))); 14543 return target; 14544 14545 case IX86_BUILTIN_LOADPD1: 14546 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, 14547 gen_reg_rtx (V2DFmode), 1); 14548 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx)); 14549 return target; 14550 14551 case IX86_BUILTIN_STOREPD1: 14552 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 14553 case IX86_BUILTIN_STORERPD: 14554 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist); 14555 14556 case IX86_BUILTIN_CLRPD: 14557 target = gen_reg_rtx (V2DFmode); 14558 emit_insn (gen_sse_clrv2df (target)); 14559 return target; 14560 14561 case IX86_BUILTIN_MFENCE: 14562 emit_insn (gen_sse2_mfence ()); 14563 return 0; 14564 case IX86_BUILTIN_LFENCE: 14565 emit_insn (gen_sse2_lfence ()); 14566 return 0; 14567 14568 case IX86_BUILTIN_CLFLUSH: 14569 arg0 = TREE_VALUE (arglist); 14570 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14571 icode = CODE_FOR_sse2_clflush; 14572 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 14573 op0 = copy_to_mode_reg (Pmode, op0); 14574 14575 emit_insn (gen_sse2_clflush (op0)); 14576 return 0; 14577 14578 case IX86_BUILTIN_MOVNTPD: 14579 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); 14580 case IX86_BUILTIN_MOVNTDQ: 14581 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); 14582 case IX86_BUILTIN_MOVNTI: 14583 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); 14584 14585 case IX86_BUILTIN_LOADDQA: 14586 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1); 14587 case IX86_BUILTIN_LOADDQU: 14588 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); 14589 case IX86_BUILTIN_LOADD: 14590 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1); 14591 14592 case IX86_BUILTIN_STOREDQA: 14593 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist); 14594 case IX86_BUILTIN_STOREDQU: 14595 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); 14596 case IX86_BUILTIN_STORED: 14597 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist); 14598 14599 case IX86_BUILTIN_MONITOR: 14600 arg0 = TREE_VALUE (arglist); 14601 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14602 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 14603 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14604 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14605 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 14606 if (!REG_P (op0)) 14607 op0 = copy_to_mode_reg (SImode, op0); 14608 if (!REG_P (op1)) 14609 op1 = copy_to_mode_reg (SImode, op1); 14610 if (!REG_P (op2)) 14611 op2 = copy_to_mode_reg (SImode, op2); 14612 emit_insn (gen_monitor (op0, op1, op2)); 14613 return 0; 14614 14615 case IX86_BUILTIN_MWAIT: 14616 arg0 = TREE_VALUE (arglist); 14617 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 14618 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 14619 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 14620 if (!REG_P (op0)) 14621 op0 = copy_to_mode_reg (SImode, op0); 14622 if (!REG_P (op1)) 14623 op1 = copy_to_mode_reg (SImode, op1); 14624 emit_insn (gen_mwait (op0, op1)); 14625 return 0; 14626 14627 case IX86_BUILTIN_LOADDDUP: 14628 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1); 14629 14630 case IX86_BUILTIN_LDDQU: 14631 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target, 14632 1); 14633 14634 default: 14635 break; 14636 } 14637 14638 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 14639 if (d->code == fcode) 14640 { 14641 /* Compares are treated specially. */ 14642 if (d->icode == CODE_FOR_maskcmpv4sf3 14643 || d->icode == CODE_FOR_vmmaskcmpv4sf3 14644 || d->icode == CODE_FOR_maskncmpv4sf3 14645 || d->icode == CODE_FOR_vmmaskncmpv4sf3 14646 || d->icode == CODE_FOR_maskcmpv2df3 14647 || d->icode == CODE_FOR_vmmaskcmpv2df3 14648 || d->icode == CODE_FOR_maskncmpv2df3 14649 || d->icode == CODE_FOR_vmmaskncmpv2df3) 14650 return ix86_expand_sse_compare (d, arglist, target); 14651 14652 return ix86_expand_binop_builtin (d->icode, arglist, target); 14653 } 14654 14655 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 14656 if (d->code == fcode) 14657 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 14658 14659 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 14660 if (d->code == fcode) 14661 return ix86_expand_sse_comi (d, arglist, target); 14662 14663 /* @@@ Should really do something sensible here. */ 14664 return 0; 14665} 14666 14667/* Store OPERAND to the memory after reload is completed. This means 14668 that we can't easily use assign_stack_local. */ 14669rtx 14670ix86_force_to_memory (enum machine_mode mode, rtx operand) 14671{ 14672 rtx result; 14673 if (!reload_completed) 14674 abort (); 14675 if (TARGET_RED_ZONE) 14676 { 14677 result = gen_rtx_MEM (mode, 14678 gen_rtx_PLUS (Pmode, 14679 stack_pointer_rtx, 14680 GEN_INT (-RED_ZONE_SIZE))); 14681 emit_move_insn (result, operand); 14682 } 14683 else if (!TARGET_RED_ZONE && TARGET_64BIT) 14684 { 14685 switch (mode) 14686 { 14687 case HImode: 14688 case SImode: 14689 operand = gen_lowpart (DImode, operand); 14690 /* FALLTHRU */ 14691 case DImode: 14692 emit_insn ( 14693 gen_rtx_SET (VOIDmode, 14694 gen_rtx_MEM (DImode, 14695 gen_rtx_PRE_DEC (DImode, 14696 stack_pointer_rtx)), 14697 operand)); 14698 break; 14699 default: 14700 abort (); 14701 } 14702 result = gen_rtx_MEM (mode, stack_pointer_rtx); 14703 } 14704 else 14705 { 14706 switch (mode) 14707 { 14708 case DImode: 14709 { 14710 rtx operands[2]; 14711 split_di (&operand, 1, operands, operands + 1); 14712 emit_insn ( 14713 gen_rtx_SET (VOIDmode, 14714 gen_rtx_MEM (SImode, 14715 gen_rtx_PRE_DEC (Pmode, 14716 stack_pointer_rtx)), 14717 operands[1])); 14718 emit_insn ( 14719 gen_rtx_SET (VOIDmode, 14720 gen_rtx_MEM (SImode, 14721 gen_rtx_PRE_DEC (Pmode, 14722 stack_pointer_rtx)), 14723 operands[0])); 14724 } 14725 break; 14726 case HImode: 14727 /* It is better to store HImodes as SImodes. */ 14728 if (!TARGET_PARTIAL_REG_STALL) 14729 operand = gen_lowpart (SImode, operand); 14730 /* FALLTHRU */ 14731 case SImode: 14732 emit_insn ( 14733 gen_rtx_SET (VOIDmode, 14734 gen_rtx_MEM (GET_MODE (operand), 14735 gen_rtx_PRE_DEC (SImode, 14736 stack_pointer_rtx)), 14737 operand)); 14738 break; 14739 default: 14740 abort (); 14741 } 14742 result = gen_rtx_MEM (mode, stack_pointer_rtx); 14743 } 14744 return result; 14745} 14746 14747/* Free operand from the memory. */ 14748void 14749ix86_free_from_memory (enum machine_mode mode) 14750{ 14751 if (!TARGET_RED_ZONE) 14752 { 14753 int size; 14754 14755 if (mode == DImode || TARGET_64BIT) 14756 size = 8; 14757 else if (mode == HImode && TARGET_PARTIAL_REG_STALL) 14758 size = 2; 14759 else 14760 size = 4; 14761 /* Use LEA to deallocate stack space. In peephole2 it will be converted 14762 to pop or add instruction if registers are available. */ 14763 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 14764 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 14765 GEN_INT (size)))); 14766 } 14767} 14768 14769/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 14770 QImode must go into class Q_REGS. 14771 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 14772 movdf to do mem-to-mem moves through integer regs. */ 14773enum reg_class 14774ix86_preferred_reload_class (rtx x, enum reg_class class) 14775{ 14776 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x))) 14777 return NO_REGS; 14778 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 14779 { 14780 /* SSE can't load any constant directly yet. */ 14781 if (SSE_CLASS_P (class)) 14782 return NO_REGS; 14783 /* Floats can load 0 and 1. */ 14784 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x)) 14785 { 14786 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */ 14787 if (MAYBE_SSE_CLASS_P (class)) 14788 return (reg_class_subset_p (class, GENERAL_REGS) 14789 ? GENERAL_REGS : FLOAT_REGS); 14790 else 14791 return class; 14792 } 14793 /* General regs can load everything. */ 14794 if (reg_class_subset_p (class, GENERAL_REGS)) 14795 return GENERAL_REGS; 14796 /* In case we haven't resolved FLOAT or SSE yet, give up. */ 14797 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)) 14798 return NO_REGS; 14799 } 14800 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x)) 14801 return NO_REGS; 14802 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS)) 14803 return Q_REGS; 14804 return class; 14805} 14806 14807/* If we are copying between general and FP registers, we need a memory 14808 location. The same is true for SSE and MMX registers. 14809 14810 The macro can't work reliably when one of the CLASSES is class containing 14811 registers from multiple units (SSE, MMX, integer). We avoid this by never 14812 combining those units in single alternative in the machine description. 14813 Ensure that this constraint holds to avoid unexpected surprises. 14814 14815 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 14816 enforce these sanity checks. */ 14817int 14818ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 14819 enum machine_mode mode, int strict) 14820{ 14821 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 14822 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 14823 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 14824 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 14825 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 14826 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 14827 { 14828 if (strict) 14829 abort (); 14830 else 14831 return 1; 14832 } 14833 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2) 14834 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2) 14835 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 14836 && ((mode != SImode && (mode != DImode || !TARGET_64BIT)) 14837 || (!TARGET_INTER_UNIT_MOVES && !optimize_size)))); 14838} 14839/* Return the cost of moving data from a register in class CLASS1 to 14840 one in class CLASS2. 14841 14842 It is not required that the cost always equal 2 when FROM is the same as TO; 14843 on some machines it is expensive to move between registers if they are not 14844 general registers. */ 14845int 14846ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, 14847 enum reg_class class2) 14848{ 14849 /* In case we require secondary memory, compute cost of the store followed 14850 by load. In order to avoid bad register allocation choices, we need 14851 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 14852 14853 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 14854 { 14855 int cost = 1; 14856 14857 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 14858 MEMORY_MOVE_COST (mode, class1, 1)); 14859 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 14860 MEMORY_MOVE_COST (mode, class2, 1)); 14861 14862 /* In case of copying from general_purpose_register we may emit multiple 14863 stores followed by single load causing memory size mismatch stall. 14864 Count this as arbitrarily high cost of 20. */ 14865 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 14866 cost += 20; 14867 14868 /* In the case of FP/MMX moves, the registers actually overlap, and we 14869 have to switch modes in order to treat them differently. */ 14870 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 14871 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 14872 cost += 20; 14873 14874 return cost; 14875 } 14876 14877 /* Moves between SSE/MMX and integer unit are expensive. */ 14878 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 14879 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 14880 return ix86_cost->mmxsse_to_integer; 14881 if (MAYBE_FLOAT_CLASS_P (class1)) 14882 return ix86_cost->fp_move; 14883 if (MAYBE_SSE_CLASS_P (class1)) 14884 return ix86_cost->sse_move; 14885 if (MAYBE_MMX_CLASS_P (class1)) 14886 return ix86_cost->mmx_move; 14887 return 2; 14888} 14889 14890/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 14891int 14892ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) 14893{ 14894 /* Flags and only flags can only hold CCmode values. */ 14895 if (CC_REGNO_P (regno)) 14896 return GET_MODE_CLASS (mode) == MODE_CC; 14897 if (GET_MODE_CLASS (mode) == MODE_CC 14898 || GET_MODE_CLASS (mode) == MODE_RANDOM 14899 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 14900 return 0; 14901 if (FP_REGNO_P (regno)) 14902 return VALID_FP_MODE_P (mode); 14903 if (SSE_REGNO_P (regno)) 14904 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0); 14905 if (MMX_REGNO_P (regno)) 14906 return (TARGET_MMX 14907 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0); 14908 /* We handle both integer and floats in the general purpose registers. 14909 In future we should be able to handle vector modes as well. */ 14910 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) 14911 return 0; 14912 /* Take care for QImode values - they can be in non-QI regs, but then 14913 they do cause partial register stalls. */ 14914 if (regno < 4 || mode != QImode || TARGET_64BIT) 14915 return 1; 14916 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL; 14917} 14918 14919/* Return the cost of moving data of mode M between a 14920 register and memory. A value of 2 is the default; this cost is 14921 relative to those in `REGISTER_MOVE_COST'. 14922 14923 If moving between registers and memory is more expensive than 14924 between two registers, you should define this macro to express the 14925 relative cost. 14926 14927 Model also increased moving costs of QImode registers in non 14928 Q_REGS classes. 14929 */ 14930int 14931ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) 14932{ 14933 if (FLOAT_CLASS_P (class)) 14934 { 14935 int index; 14936 switch (mode) 14937 { 14938 case SFmode: 14939 index = 0; 14940 break; 14941 case DFmode: 14942 index = 1; 14943 break; 14944 case XFmode: 14945 index = 2; 14946 break; 14947 default: 14948 return 100; 14949 } 14950 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 14951 } 14952 if (SSE_CLASS_P (class)) 14953 { 14954 int index; 14955 switch (GET_MODE_SIZE (mode)) 14956 { 14957 case 4: 14958 index = 0; 14959 break; 14960 case 8: 14961 index = 1; 14962 break; 14963 case 16: 14964 index = 2; 14965 break; 14966 default: 14967 return 100; 14968 } 14969 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 14970 } 14971 if (MMX_CLASS_P (class)) 14972 { 14973 int index; 14974 switch (GET_MODE_SIZE (mode)) 14975 { 14976 case 4: 14977 index = 0; 14978 break; 14979 case 8: 14980 index = 1; 14981 break; 14982 default: 14983 return 100; 14984 } 14985 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 14986 } 14987 switch (GET_MODE_SIZE (mode)) 14988 { 14989 case 1: 14990 if (in) 14991 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 14992 : ix86_cost->movzbl_load); 14993 else 14994 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 14995 : ix86_cost->int_store[0] + 4); 14996 break; 14997 case 2: 14998 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 14999 default: 15000 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 15001 if (mode == TFmode) 15002 mode = XFmode; 15003 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 15004 * (((int) GET_MODE_SIZE (mode) 15005 + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); 15006 } 15007} 15008 15009/* Compute a (partial) cost for rtx X. Return true if the complete 15010 cost has been computed, and false if subexpressions should be 15011 scanned. In either case, *TOTAL contains the cost result. */ 15012 15013static bool 15014ix86_rtx_costs (rtx x, int code, int outer_code, int *total) 15015{ 15016 enum machine_mode mode = GET_MODE (x); 15017 15018 switch (code) 15019 { 15020 case CONST_INT: 15021 case CONST: 15022 case LABEL_REF: 15023 case SYMBOL_REF: 15024 if (TARGET_64BIT && !x86_64_sign_extended_value (x)) 15025 *total = 3; 15026 else if (TARGET_64BIT && !x86_64_zero_extended_value (x)) 15027 *total = 2; 15028 else if (flag_pic && SYMBOLIC_CONST (x) 15029 && (!TARGET_64BIT 15030 || (!GET_CODE (x) != LABEL_REF 15031 && (GET_CODE (x) != SYMBOL_REF 15032 || !SYMBOL_REF_LOCAL_P (x))))) 15033 *total = 1; 15034 else 15035 *total = 0; 15036 return true; 15037 15038 case CONST_DOUBLE: 15039 if (mode == VOIDmode) 15040 *total = 0; 15041 else 15042 switch (standard_80387_constant_p (x)) 15043 { 15044 case 1: /* 0.0 */ 15045 *total = 1; 15046 break; 15047 default: /* Other constants */ 15048 *total = 2; 15049 break; 15050 case 0: 15051 case -1: 15052 /* Start with (MEM (SYMBOL_REF)), since that's where 15053 it'll probably end up. Add a penalty for size. */ 15054 *total = (COSTS_N_INSNS (1) 15055 + (flag_pic != 0 && !TARGET_64BIT) 15056 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); 15057 break; 15058 } 15059 return true; 15060 15061 case ZERO_EXTEND: 15062 /* The zero extensions is often completely free on x86_64, so make 15063 it as cheap as possible. */ 15064 if (TARGET_64BIT && mode == DImode 15065 && GET_MODE (XEXP (x, 0)) == SImode) 15066 *total = 1; 15067 else if (TARGET_ZERO_EXTEND_WITH_AND) 15068 *total = COSTS_N_INSNS (ix86_cost->add); 15069 else 15070 *total = COSTS_N_INSNS (ix86_cost->movzx); 15071 return false; 15072 15073 case SIGN_EXTEND: 15074 *total = COSTS_N_INSNS (ix86_cost->movsx); 15075 return false; 15076 15077 case ASHIFT: 15078 if (GET_CODE (XEXP (x, 1)) == CONST_INT 15079 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) 15080 { 15081 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 15082 if (value == 1) 15083 { 15084 *total = COSTS_N_INSNS (ix86_cost->add); 15085 return false; 15086 } 15087 if ((value == 2 || value == 3) 15088 && !TARGET_DECOMPOSE_LEA 15089 && ix86_cost->lea <= ix86_cost->shift_const) 15090 { 15091 *total = COSTS_N_INSNS (ix86_cost->lea); 15092 return false; 15093 } 15094 } 15095 /* FALLTHRU */ 15096 15097 case ROTATE: 15098 case ASHIFTRT: 15099 case LSHIFTRT: 15100 case ROTATERT: 15101 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) 15102 { 15103 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 15104 { 15105 if (INTVAL (XEXP (x, 1)) > 32) 15106 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2); 15107 else 15108 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2); 15109 } 15110 else 15111 { 15112 if (GET_CODE (XEXP (x, 1)) == AND) 15113 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2); 15114 else 15115 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2); 15116 } 15117 } 15118 else 15119 { 15120 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 15121 *total = COSTS_N_INSNS (ix86_cost->shift_const); 15122 else 15123 *total = COSTS_N_INSNS (ix86_cost->shift_var); 15124 } 15125 return false; 15126 15127 case MULT: 15128 if (FLOAT_MODE_P (mode)) 15129 *total = COSTS_N_INSNS (ix86_cost->fmul); 15130 else if (GET_CODE (XEXP (x, 1)) == CONST_INT) 15131 { 15132 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 15133 int nbits; 15134 15135 for (nbits = 0; value != 0; value >>= 1) 15136 nbits++; 15137 15138 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)] 15139 + nbits * ix86_cost->mult_bit); 15140 } 15141 else 15142 { 15143 /* This is arbitrary */ 15144 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)] 15145 + 7 * ix86_cost->mult_bit); 15146 } 15147 return false; 15148 15149 case DIV: 15150 case UDIV: 15151 case MOD: 15152 case UMOD: 15153 if (FLOAT_MODE_P (mode)) 15154 *total = COSTS_N_INSNS (ix86_cost->fdiv); 15155 else 15156 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]); 15157 return false; 15158 15159 case PLUS: 15160 if (FLOAT_MODE_P (mode)) 15161 *total = COSTS_N_INSNS (ix86_cost->fadd); 15162 else if (!TARGET_DECOMPOSE_LEA 15163 && GET_MODE_CLASS (mode) == MODE_INT 15164 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) 15165 { 15166 if (GET_CODE (XEXP (x, 0)) == PLUS 15167 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 15168 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 15169 && CONSTANT_P (XEXP (x, 1))) 15170 { 15171 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 15172 if (val == 2 || val == 4 || val == 8) 15173 { 15174 *total = COSTS_N_INSNS (ix86_cost->lea); 15175 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 15176 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), 15177 outer_code); 15178 *total += rtx_cost (XEXP (x, 1), outer_code); 15179 return true; 15180 } 15181 } 15182 else if (GET_CODE (XEXP (x, 0)) == MULT 15183 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 15184 { 15185 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 15186 if (val == 2 || val == 4 || val == 8) 15187 { 15188 *total = COSTS_N_INSNS (ix86_cost->lea); 15189 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 15190 *total += rtx_cost (XEXP (x, 1), outer_code); 15191 return true; 15192 } 15193 } 15194 else if (GET_CODE (XEXP (x, 0)) == PLUS) 15195 { 15196 *total = COSTS_N_INSNS (ix86_cost->lea); 15197 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 15198 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 15199 *total += rtx_cost (XEXP (x, 1), outer_code); 15200 return true; 15201 } 15202 } 15203 /* FALLTHRU */ 15204 15205 case MINUS: 15206 if (FLOAT_MODE_P (mode)) 15207 { 15208 *total = COSTS_N_INSNS (ix86_cost->fadd); 15209 return false; 15210 } 15211 /* FALLTHRU */ 15212 15213 case AND: 15214 case IOR: 15215 case XOR: 15216 if (!TARGET_64BIT && mode == DImode) 15217 { 15218 *total = (COSTS_N_INSNS (ix86_cost->add) * 2 15219 + (rtx_cost (XEXP (x, 0), outer_code) 15220 << (GET_MODE (XEXP (x, 0)) != DImode)) 15221 + (rtx_cost (XEXP (x, 1), outer_code) 15222 << (GET_MODE (XEXP (x, 1)) != DImode))); 15223 return true; 15224 } 15225 /* FALLTHRU */ 15226 15227 case NEG: 15228 if (FLOAT_MODE_P (mode)) 15229 { 15230 *total = COSTS_N_INSNS (ix86_cost->fchs); 15231 return false; 15232 } 15233 /* FALLTHRU */ 15234 15235 case NOT: 15236 if (!TARGET_64BIT && mode == DImode) 15237 *total = COSTS_N_INSNS (ix86_cost->add * 2); 15238 else 15239 *total = COSTS_N_INSNS (ix86_cost->add); 15240 return false; 15241 15242 case FLOAT_EXTEND: 15243 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode)) 15244 *total = 0; 15245 return false; 15246 15247 case ABS: 15248 if (FLOAT_MODE_P (mode)) 15249 *total = COSTS_N_INSNS (ix86_cost->fabs); 15250 return false; 15251 15252 case SQRT: 15253 if (FLOAT_MODE_P (mode)) 15254 *total = COSTS_N_INSNS (ix86_cost->fsqrt); 15255 return false; 15256 15257 case UNSPEC: 15258 if (XINT (x, 1) == UNSPEC_TP) 15259 *total = 0; 15260 return false; 15261 15262 default: 15263 return false; 15264 } 15265} 15266 15267#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) 15268static void 15269ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED) 15270{ 15271 init_section (); 15272 fputs ("\tpushl $", asm_out_file); 15273 assemble_name (asm_out_file, XSTR (symbol, 0)); 15274 fputc ('\n', asm_out_file); 15275} 15276#endif 15277 15278#if TARGET_MACHO 15279 15280static int current_machopic_label_num; 15281 15282/* Given a symbol name and its associated stub, write out the 15283 definition of the stub. */ 15284 15285void 15286machopic_output_stub (FILE *file, const char *symb, const char *stub) 15287{ 15288 unsigned int length; 15289 char *binder_name, *symbol_name, lazy_ptr_name[32]; 15290 int label = ++current_machopic_label_num; 15291 15292 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 15293 symb = (*targetm.strip_name_encoding) (symb); 15294 15295 length = strlen (stub); 15296 binder_name = alloca (length + 32); 15297 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 15298 15299 length = strlen (symb); 15300 symbol_name = alloca (length + 32); 15301 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 15302 15303 sprintf (lazy_ptr_name, "L%d$lz", label); 15304 15305 if (MACHOPIC_PURE) 15306 machopic_picsymbol_stub_section (); 15307 else 15308 machopic_symbol_stub_section (); 15309 15310 fprintf (file, "%s:\n", stub); 15311 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 15312 15313 if (MACHOPIC_PURE) 15314 { 15315 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label); 15316 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 15317 fprintf (file, "\tjmp %%edx\n"); 15318 } 15319 else 15320 fprintf (file, "\tjmp *%s\n", lazy_ptr_name); 15321 15322 fprintf (file, "%s:\n", binder_name); 15323 15324 if (MACHOPIC_PURE) 15325 { 15326 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 15327 fprintf (file, "\tpushl %%eax\n"); 15328 } 15329 else 15330 fprintf (file, "\t pushl $%s\n", lazy_ptr_name); 15331 15332 fprintf (file, "\tjmp dyld_stub_binding_helper\n"); 15333 15334 machopic_lazy_symbol_ptr_section (); 15335 fprintf (file, "%s:\n", lazy_ptr_name); 15336 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 15337 fprintf (file, "\t.long %s\n", binder_name); 15338} 15339#endif /* TARGET_MACHO */ 15340 15341/* Order the registers for register allocator. */ 15342 15343void 15344x86_order_regs_for_local_alloc (void) 15345{ 15346 int pos = 0; 15347 int i; 15348 15349 /* First allocate the local general purpose registers. */ 15350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 15351 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 15352 reg_alloc_order [pos++] = i; 15353 15354 /* Global general purpose registers. */ 15355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 15356 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 15357 reg_alloc_order [pos++] = i; 15358 15359 /* x87 registers come first in case we are doing FP math 15360 using them. */ 15361 if (!TARGET_SSE_MATH) 15362 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 15363 reg_alloc_order [pos++] = i; 15364 15365 /* SSE registers. */ 15366 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 15367 reg_alloc_order [pos++] = i; 15368 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 15369 reg_alloc_order [pos++] = i; 15370 15371 /* x87 registers. */ 15372 if (TARGET_SSE_MATH) 15373 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 15374 reg_alloc_order [pos++] = i; 15375 15376 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 15377 reg_alloc_order [pos++] = i; 15378 15379 /* Initialize the rest of array as we do not allocate some registers 15380 at all. */ 15381 while (pos < FIRST_PSEUDO_REGISTER) 15382 reg_alloc_order [pos++] = 0; 15383} 15384 15385#ifndef TARGET_USE_MS_BITFIELD_LAYOUT 15386#define TARGET_USE_MS_BITFIELD_LAYOUT 0 15387#endif 15388 15389/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 15390 struct attribute_spec.handler. */ 15391static tree 15392ix86_handle_struct_attribute (tree *node, tree name, 15393 tree args ATTRIBUTE_UNUSED, 15394 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 15395{ 15396 tree *type = NULL; 15397 if (DECL_P (*node)) 15398 { 15399 if (TREE_CODE (*node) == TYPE_DECL) 15400 type = &TREE_TYPE (*node); 15401 } 15402 else 15403 type = node; 15404 15405 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 15406 || TREE_CODE (*type) == UNION_TYPE))) 15407 { 15408 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name)); 15409 *no_add_attrs = true; 15410 } 15411 15412 else if ((is_attribute_p ("ms_struct", name) 15413 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 15414 || ((is_attribute_p ("gcc_struct", name) 15415 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 15416 { 15417 warning ("`%s' incompatible attribute ignored", 15418 IDENTIFIER_POINTER (name)); 15419 *no_add_attrs = true; 15420 } 15421 15422 return NULL_TREE; 15423} 15424 15425static bool 15426ix86_ms_bitfield_layout_p (tree record_type) 15427{ 15428 return (TARGET_USE_MS_BITFIELD_LAYOUT && 15429 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 15430 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 15431} 15432 15433/* Returns an expression indicating where the this parameter is 15434 located on entry to the FUNCTION. */ 15435 15436static rtx 15437x86_this_parameter (tree function) 15438{ 15439 tree type = TREE_TYPE (function); 15440 15441 if (TARGET_64BIT) 15442 { 15443 int n = aggregate_value_p (TREE_TYPE (type), type) != 0; 15444 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 15445 } 15446 15447 if (ix86_function_regparm (type, function) > 0) 15448 { 15449 tree parm; 15450 15451 parm = TYPE_ARG_TYPES (type); 15452 /* Figure out whether or not the function has a variable number of 15453 arguments. */ 15454 for (; parm; parm = TREE_CHAIN (parm)) 15455 if (TREE_VALUE (parm) == void_type_node) 15456 break; 15457 /* If not, the this parameter is in the first argument. */ 15458 if (parm) 15459 { 15460 int regno = 0; 15461 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 15462 regno = 2; 15463 return gen_rtx_REG (SImode, regno); 15464 } 15465 } 15466 15467 if (aggregate_value_p (TREE_TYPE (type), type)) 15468 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 15469 else 15470 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 15471} 15472 15473/* Determine whether x86_output_mi_thunk can succeed. */ 15474 15475static bool 15476x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, 15477 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 15478 HOST_WIDE_INT vcall_offset, tree function) 15479{ 15480 /* 64-bit can handle anything. */ 15481 if (TARGET_64BIT) 15482 return true; 15483 15484 /* For 32-bit, everything's fine if we have one free register. */ 15485 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 15486 return true; 15487 15488 /* Need a free register for vcall_offset. */ 15489 if (vcall_offset) 15490 return false; 15491 15492 /* Need a free register for GOT references. */ 15493 if (flag_pic && !(*targetm.binds_local_p) (function)) 15494 return false; 15495 15496 /* Otherwise ok. */ 15497 return true; 15498} 15499 15500/* Output the assembler code for a thunk function. THUNK_DECL is the 15501 declaration for the thunk function itself, FUNCTION is the decl for 15502 the target function. DELTA is an immediate constant offset to be 15503 added to THIS. If VCALL_OFFSET is nonzero, the word at 15504 *(*this + vcall_offset) should be added to THIS. */ 15505 15506static void 15507x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, 15508 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, 15509 HOST_WIDE_INT vcall_offset, tree function) 15510{ 15511 rtx xops[3]; 15512 rtx this = x86_this_parameter (function); 15513 rtx this_reg, tmp; 15514 15515 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 15516 pull it in now and let DELTA benefit. */ 15517 if (REG_P (this)) 15518 this_reg = this; 15519 else if (vcall_offset) 15520 { 15521 /* Put the this parameter into %eax. */ 15522 xops[0] = this; 15523 xops[1] = this_reg = gen_rtx_REG (Pmode, 0); 15524 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 15525 } 15526 else 15527 this_reg = NULL_RTX; 15528 15529 /* Adjust the this parameter by a fixed constant. */ 15530 if (delta) 15531 { 15532 xops[0] = GEN_INT (delta); 15533 xops[1] = this_reg ? this_reg : this; 15534 if (TARGET_64BIT) 15535 { 15536 if (!x86_64_general_operand (xops[0], DImode)) 15537 { 15538 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 15539 xops[1] = tmp; 15540 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 15541 xops[0] = tmp; 15542 xops[1] = this; 15543 } 15544 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 15545 } 15546 else 15547 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 15548 } 15549 15550 /* Adjust the this parameter by a value stored in the vtable. */ 15551 if (vcall_offset) 15552 { 15553 if (TARGET_64BIT) 15554 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 15555 else 15556 { 15557 int tmp_regno = 2 /* ECX */; 15558 if (lookup_attribute ("fastcall", 15559 TYPE_ATTRIBUTES (TREE_TYPE (function)))) 15560 tmp_regno = 0 /* EAX */; 15561 tmp = gen_rtx_REG (SImode, tmp_regno); 15562 } 15563 15564 xops[0] = gen_rtx_MEM (Pmode, this_reg); 15565 xops[1] = tmp; 15566 if (TARGET_64BIT) 15567 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 15568 else 15569 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 15570 15571 /* Adjust the this parameter. */ 15572 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 15573 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 15574 { 15575 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 15576 xops[0] = GEN_INT (vcall_offset); 15577 xops[1] = tmp2; 15578 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 15579 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 15580 } 15581 xops[1] = this_reg; 15582 if (TARGET_64BIT) 15583 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 15584 else 15585 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 15586 } 15587 15588 /* If necessary, drop THIS back to its stack slot. */ 15589 if (this_reg && this_reg != this) 15590 { 15591 xops[0] = this_reg; 15592 xops[1] = this; 15593 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 15594 } 15595 15596 xops[0] = XEXP (DECL_RTL (function), 0); 15597 if (TARGET_64BIT) 15598 { 15599 if (!flag_pic || (*targetm.binds_local_p) (function)) 15600 output_asm_insn ("jmp\t%P0", xops); 15601 else 15602 { 15603 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); 15604 tmp = gen_rtx_CONST (Pmode, tmp); 15605 tmp = gen_rtx_MEM (QImode, tmp); 15606 xops[0] = tmp; 15607 output_asm_insn ("jmp\t%A0", xops); 15608 } 15609 } 15610 else 15611 { 15612 if (!flag_pic || (*targetm.binds_local_p) (function)) 15613 output_asm_insn ("jmp\t%P0", xops); 15614 else 15615#if TARGET_MACHO 15616 if (TARGET_MACHO) 15617 { 15618 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function)); 15619 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip)); 15620 tmp = gen_rtx_MEM (QImode, tmp); 15621 xops[0] = tmp; 15622 output_asm_insn ("jmp\t%0", xops); 15623 } 15624 else 15625#endif /* TARGET_MACHO */ 15626 { 15627 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 15628 output_set_got (tmp); 15629 15630 xops[1] = tmp; 15631 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 15632 output_asm_insn ("jmp\t{*}%1", xops); 15633 } 15634 } 15635} 15636 15637static void 15638x86_file_start (void) 15639{ 15640 default_file_start (); 15641 if (X86_FILE_START_VERSION_DIRECTIVE) 15642 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 15643 if (X86_FILE_START_FLTUSED) 15644 fputs ("\t.global\t__fltused\n", asm_out_file); 15645 if (ix86_asm_dialect == ASM_INTEL) 15646 fputs ("\t.intel_syntax\n", asm_out_file); 15647} 15648 15649int 15650x86_field_alignment (tree field, int computed) 15651{ 15652 enum machine_mode mode; 15653 tree type = TREE_TYPE (field); 15654 15655 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 15656 return computed; 15657 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 15658 ? get_inner_array_type (type) : type); 15659 if (mode == DFmode || mode == DCmode 15660 || GET_MODE_CLASS (mode) == MODE_INT 15661 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 15662 return MIN (32, computed); 15663 return computed; 15664} 15665 15666/* Output assembler code to FILE to increment profiler label # LABELNO 15667 for profiling a function entry. */ 15668void 15669x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 15670{ 15671 if (TARGET_64BIT) 15672 if (flag_pic) 15673 { 15674#ifndef NO_PROFILE_COUNTERS 15675 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); 15676#endif 15677 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 15678 } 15679 else 15680 { 15681#ifndef NO_PROFILE_COUNTERS 15682 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); 15683#endif 15684 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 15685 } 15686 else if (flag_pic) 15687 { 15688#ifndef NO_PROFILE_COUNTERS 15689 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 15690 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 15691#endif 15692 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 15693 } 15694 else 15695 { 15696#ifndef NO_PROFILE_COUNTERS 15697 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 15698 PROFILE_COUNT_REGISTER); 15699#endif 15700 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 15701 } 15702} 15703 15704/* We don't have exact information about the insn sizes, but we may assume 15705 quite safely that we are informed about all 1 byte insns and memory 15706 address sizes. This is enough to eliminate unnecessary padding in 15707 99% of cases. */ 15708 15709static int 15710min_insn_size (rtx insn) 15711{ 15712 int l = 0; 15713 15714 if (!INSN_P (insn) || !active_insn_p (insn)) 15715 return 0; 15716 15717 /* Discard alignments we've emit and jump instructions. */ 15718 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 15719 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 15720 return 0; 15721 if (GET_CODE (insn) == JUMP_INSN 15722 && (GET_CODE (PATTERN (insn)) == ADDR_VEC 15723 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) 15724 return 0; 15725 15726 /* Important case - calls are always 5 bytes. 15727 It is common to have many calls in the row. */ 15728 if (GET_CODE (insn) == CALL_INSN 15729 && symbolic_reference_mentioned_p (PATTERN (insn)) 15730 && !SIBLING_CALL_P (insn)) 15731 return 5; 15732 if (get_attr_length (insn) <= 1) 15733 return 1; 15734 15735 /* For normal instructions we may rely on the sizes of addresses 15736 and the presence of symbol to require 4 bytes of encoding. 15737 This is not the case for jumps where references are PC relative. */ 15738 if (GET_CODE (insn) != JUMP_INSN) 15739 { 15740 l = get_attr_length_address (insn); 15741 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 15742 l = 4; 15743 } 15744 if (l) 15745 return 1+l; 15746 else 15747 return 2; 15748} 15749 15750/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 15751 window. */ 15752 15753static void 15754k8_avoid_jump_misspredicts (void) 15755{ 15756 rtx insn, start = get_insns (); 15757 int nbytes = 0, njumps = 0; 15758 int isjump = 0; 15759 15760 /* Look for all minimal intervals of instructions containing 4 jumps. 15761 The intervals are bounded by START and INSN. NBYTES is the total 15762 size of instructions in the interval including INSN and not including 15763 START. When the NBYTES is smaller than 16 bytes, it is possible 15764 that the end of START and INSN ends up in the same 16byte page. 15765 15766 The smallest offset in the page INSN can start is the case where START 15767 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 15768 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). 15769 */ 15770 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 15771 { 15772 15773 nbytes += min_insn_size (insn); 15774 if (rtl_dump_file) 15775 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n", 15776 INSN_UID (insn), min_insn_size (insn)); 15777 if ((GET_CODE (insn) == JUMP_INSN 15778 && GET_CODE (PATTERN (insn)) != ADDR_VEC 15779 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 15780 || GET_CODE (insn) == CALL_INSN) 15781 njumps++; 15782 else 15783 continue; 15784 15785 while (njumps > 3) 15786 { 15787 start = NEXT_INSN (start); 15788 if ((GET_CODE (start) == JUMP_INSN 15789 && GET_CODE (PATTERN (start)) != ADDR_VEC 15790 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) 15791 || GET_CODE (start) == CALL_INSN) 15792 njumps--, isjump = 1; 15793 else 15794 isjump = 0; 15795 nbytes -= min_insn_size (start); 15796 } 15797 if (njumps < 0) 15798 abort (); 15799 if (rtl_dump_file) 15800 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n", 15801 INSN_UID (start), INSN_UID (insn), nbytes); 15802 15803 if (njumps == 3 && isjump && nbytes < 16) 15804 { 15805 int padsize = 15 - nbytes + min_insn_size (insn); 15806 15807 if (rtl_dump_file) 15808 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize); 15809 emit_insn_before (gen_align (GEN_INT (padsize)), insn); 15810 } 15811 } 15812} 15813 15814/* Implement machine specific optimizations. 15815 At the moment we implement single transformation: AMD Athlon works faster 15816 when RET is not destination of conditional jump or directly preceded 15817 by other jump instruction. We avoid the penalty by inserting NOP just 15818 before the RET instructions in such cases. */ 15819static void 15820ix86_reorg (void) 15821{ 15822 edge e; 15823 15824 if (!TARGET_ATHLON_K8 || !optimize || optimize_size) 15825 return; 15826 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) 15827 { 15828 basic_block bb = e->src; 15829 rtx ret = BB_END (bb); 15830 rtx prev; 15831 bool replace = false; 15832 15833 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN 15834 || !maybe_hot_bb_p (bb)) 15835 continue; 15836 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 15837 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) 15838 break; 15839 if (prev && GET_CODE (prev) == CODE_LABEL) 15840 { 15841 edge e; 15842 for (e = bb->pred; e; e = e->pred_next) 15843 if (EDGE_FREQUENCY (e) && e->src->index >= 0 15844 && !(e->flags & EDGE_FALLTHRU)) 15845 replace = true; 15846 } 15847 if (!replace) 15848 { 15849 prev = prev_active_insn (ret); 15850 if (prev 15851 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) 15852 || GET_CODE (prev) == CALL_INSN)) 15853 replace = true; 15854 /* Empty functions get branch mispredict even when the jump destination 15855 is not visible to us. */ 15856 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) 15857 replace = true; 15858 } 15859 if (replace) 15860 { 15861 emit_insn_before (gen_return_internal_long (), ret); 15862 delete_insn (ret); 15863 } 15864 } 15865 k8_avoid_jump_misspredicts (); 15866} 15867 15868/* Return nonzero when QImode register that must be represented via REX prefix 15869 is used. */ 15870bool 15871x86_extended_QIreg_mentioned_p (rtx insn) 15872{ 15873 int i; 15874 extract_insn_cached (insn); 15875 for (i = 0; i < recog_data.n_operands; i++) 15876 if (REG_P (recog_data.operand[i]) 15877 && REGNO (recog_data.operand[i]) >= 4) 15878 return true; 15879 return false; 15880} 15881 15882/* Return nonzero when P points to register encoded via REX prefix. 15883 Called via for_each_rtx. */ 15884static int 15885extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) 15886{ 15887 unsigned int regno; 15888 if (!REG_P (*p)) 15889 return 0; 15890 regno = REGNO (*p); 15891 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); 15892} 15893 15894/* Return true when INSN mentions register that must be encoded using REX 15895 prefix. */ 15896bool 15897x86_extended_reg_mentioned_p (rtx insn) 15898{ 15899 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL); 15900} 15901 15902/* Generate an unsigned DImode/SImode to FP conversion. This is the same code 15903 optabs would emit if we didn't have TFmode patterns. */ 15904 15905void 15906x86_emit_floatuns (rtx operands[2]) 15907{ 15908 rtx neglab, donelab, i0, i1, f0, in, out; 15909 enum machine_mode mode, inmode; 15910 15911 inmode = GET_MODE (operands[1]); 15912 if (inmode != SImode 15913 && inmode != DImode) 15914 abort (); 15915 15916 out = operands[0]; 15917 in = force_reg (inmode, operands[1]); 15918 mode = GET_MODE (out); 15919 neglab = gen_label_rtx (); 15920 donelab = gen_label_rtx (); 15921 i1 = gen_reg_rtx (Pmode); 15922 f0 = gen_reg_rtx (mode); 15923 15924 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); 15925 15926 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 15927 emit_jump_insn (gen_jump (donelab)); 15928 emit_barrier (); 15929 15930 emit_label (neglab); 15931 15932 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 15933 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 15934 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 15935 expand_float (f0, i0, 0); 15936 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 15937 15938 emit_label (donelab); 15939} 15940 15941/* Return if we do not know how to pass TYPE solely in registers. */ 15942bool 15943ix86_must_pass_in_stack (enum machine_mode mode, tree type) 15944{ 15945 if (default_must_pass_in_stack (mode, type)) 15946 return true; 15947 return (!TARGET_64BIT && type && mode == TImode); 15948} 15949 15950/* Initialize vector TARGET via VALS. */ 15951void 15952ix86_expand_vector_init (rtx target, rtx vals) 15953{ 15954 enum machine_mode mode = GET_MODE (target); 15955 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode)); 15956 int n_elts = (GET_MODE_SIZE (mode) / elt_size); 15957 int i; 15958 15959 for (i = n_elts - 1; i >= 0; i--) 15960 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT 15961 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE) 15962 break; 15963 15964 /* Few special cases first... 15965 ... constants are best loaded from constant pool. */ 15966 if (i < 0) 15967 { 15968 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 15969 return; 15970 } 15971 15972 /* ... values where only first field is non-constant are best loaded 15973 from the pool and overwriten via move later. */ 15974 if (!i) 15975 { 15976 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0), 15977 GET_MODE_INNER (mode), 0); 15978 15979 op = force_reg (mode, op); 15980 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode)); 15981 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 15982 switch (GET_MODE (target)) 15983 { 15984 case V2DFmode: 15985 emit_insn (gen_sse2_movsd (target, target, op)); 15986 break; 15987 case V4SFmode: 15988 emit_insn (gen_sse_movss (target, target, op)); 15989 break; 15990 default: 15991 break; 15992 } 15993 return; 15994 } 15995 15996 /* And the busy sequence doing rotations. */ 15997 switch (GET_MODE (target)) 15998 { 15999 case V2DFmode: 16000 { 16001 rtx vecop0 = 16002 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0); 16003 rtx vecop1 = 16004 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0); 16005 16006 vecop0 = force_reg (V2DFmode, vecop0); 16007 vecop1 = force_reg (V2DFmode, vecop1); 16008 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1)); 16009 } 16010 break; 16011 case V4SFmode: 16012 { 16013 rtx vecop0 = 16014 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0); 16015 rtx vecop1 = 16016 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0); 16017 rtx vecop2 = 16018 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0); 16019 rtx vecop3 = 16020 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0); 16021 rtx tmp1 = gen_reg_rtx (V4SFmode); 16022 rtx tmp2 = gen_reg_rtx (V4SFmode); 16023 16024 vecop0 = force_reg (V4SFmode, vecop0); 16025 vecop1 = force_reg (V4SFmode, vecop1); 16026 vecop2 = force_reg (V4SFmode, vecop2); 16027 vecop3 = force_reg (V4SFmode, vecop3); 16028 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3)); 16029 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2)); 16030 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1)); 16031 } 16032 break; 16033 default: 16034 abort (); 16035 } 16036} 16037 16038#include "gt-i386.h" 16039