i386.c revision 171836
1178825Sdfr/* Subroutines used for code generation on IA-32. 2233294Sstas Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3233294Sstas 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. 4233294Sstas 5178825SdfrThis file is part of GCC. 6233294Sstas 7233294SstasGCC is free software; you can redistribute it and/or modify 8233294Sstasit under the terms of the GNU General Public License as published by 9178825Sdfrthe Free Software Foundation; either version 2, or (at your option) 10233294Sstasany later version. 11233294Sstas 12178825SdfrGCC is distributed in the hope that it will be useful, 13233294Sstasbut WITHOUT ANY WARRANTY; without even the implied warranty of 14233294SstasMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15233294SstasGNU General Public License for more details. 16178825Sdfr 17233294SstasYou should have received a copy of the GNU General Public License 18233294Sstasalong with GCC; see the file COPYING. If not, write to 19233294Sstasthe Free Software Foundation, 51 Franklin Street, Fifth Floor, 20178825SdfrBoston, MA 02110-1301, USA. */ 21233294Sstas 22233294Sstas/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 171836 2007-08-14 03:04:42Z kan $ */ 23233294Sstas 24233294Sstas#include "config.h" 25233294Sstas#include "system.h" 26233294Sstas#include "coretypes.h" 27233294Sstas#include "tm.h" 28233294Sstas#include "rtl.h" 29233294Sstas#include "tree.h" 30233294Sstas#include "tm_p.h" 31233294Sstas#include "regs.h" 32178825Sdfr#include "hard-reg-set.h" 33178825Sdfr#include "real.h" 34178825Sdfr#include "insn-config.h" 35178825Sdfr#include "conditions.h" 36178825Sdfr#include "output.h" 37233294Sstas#include "insn-codes.h" 38178825Sdfr#include "insn-attr.h" 39178825Sdfr#include "flags.h" 40178825Sdfr#include "except.h" 41178825Sdfr#include "function.h" 42178825Sdfr#include "recog.h" 43178825Sdfr#include "expr.h" 44178825Sdfr#include "optabs.h" 45178825Sdfr#include "toplev.h" 46178825Sdfr#include "basic-block.h" 47178825Sdfr#include "ggc.h" 48178825Sdfr#include "target.h" 49178825Sdfr#include "target-def.h" 50178825Sdfr#include "langhooks.h" 51178825Sdfr#include "cgraph.h" 52178825Sdfr#include "tree-gimple.h" 53178825Sdfr#include "dwarf2.h" 54178825Sdfr#include "tm-constrs.h" 55178825Sdfr 56178825Sdfr#ifndef CHECK_STACK_LIMIT 57178825Sdfr#define CHECK_STACK_LIMIT (-1) 58178825Sdfr#endif 59233294Sstas 60178825Sdfr/* Return index of given mode in mult and division cost tables. */ 61178825Sdfr#define MODE_INDEX(mode) \ 62178825Sdfr ((mode) == QImode ? 0 \ 63178825Sdfr : (mode) == HImode ? 1 \ 64178825Sdfr : (mode) == SImode ? 2 \ 65178825Sdfr : (mode) == DImode ? 3 \ 66178825Sdfr : 4) 67178825Sdfr 68178825Sdfr/* Processor costs (relative to an add) */ 69178825Sdfr/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ 70178825Sdfr#define COSTS_N_BYTES(N) ((N) * 2) 71178825Sdfr 72178825Sdfrstatic const 73178825Sdfrstruct processor_costs size_cost = { /* costs for tuning for size */ 74178825Sdfr COSTS_N_BYTES (2), /* cost of an add instruction */ 75178825Sdfr COSTS_N_BYTES (3), /* cost of a lea instruction */ 76178825Sdfr COSTS_N_BYTES (2), /* variable shift costs */ 77178825Sdfr COSTS_N_BYTES (3), /* constant shift costs */ 78178825Sdfr {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ 79178825Sdfr COSTS_N_BYTES (3), /* HI */ 80178825Sdfr COSTS_N_BYTES (3), /* SI */ 81178825Sdfr COSTS_N_BYTES (3), /* DI */ 82178825Sdfr COSTS_N_BYTES (5)}, /* other */ 83178825Sdfr 0, /* cost of multiply per each bit set */ 84178825Sdfr {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ 85178825Sdfr COSTS_N_BYTES (3), /* HI */ 86178825Sdfr COSTS_N_BYTES (3), /* SI */ 87178825Sdfr COSTS_N_BYTES (3), /* DI */ 88178825Sdfr COSTS_N_BYTES (5)}, /* other */ 89178825Sdfr COSTS_N_BYTES (3), /* cost of movsx */ 90178825Sdfr COSTS_N_BYTES (3), /* cost of movzx */ 91178825Sdfr 0, /* "large" insn */ 92178825Sdfr 2, /* MOVE_RATIO */ 93178825Sdfr 2, /* cost for loading QImode using movzbl */ 94178825Sdfr {2, 2, 2}, /* cost of loading integer registers 95178825Sdfr in QImode, HImode and SImode. 96178825Sdfr Relative to reg-reg move (2). */ 97178825Sdfr {2, 2, 2}, /* cost of storing integer registers */ 98178825Sdfr 2, /* cost of reg,reg fld/fst */ 99178825Sdfr {2, 2, 2}, /* cost of loading fp registers 100178825Sdfr in SFmode, DFmode and XFmode */ 101178825Sdfr {2, 2, 2}, /* cost of storing fp registers 102178825Sdfr in SFmode, DFmode and XFmode */ 103178825Sdfr 3, /* cost of moving MMX register */ 104178825Sdfr {3, 3}, /* cost of loading MMX registers 105178825Sdfr in SImode and DImode */ 106178825Sdfr {3, 3}, /* cost of storing MMX registers 107178825Sdfr in SImode and DImode */ 108178825Sdfr 3, /* cost of moving SSE register */ 109178825Sdfr {3, 3, 3}, /* cost of loading SSE registers 110178825Sdfr in SImode, DImode and TImode */ 111233294Sstas {3, 3, 3}, /* cost of storing SSE registers 112178825Sdfr in SImode, DImode and TImode */ 113178825Sdfr 3, /* MMX or SSE register to integer */ 114178825Sdfr 0, /* size of prefetch block */ 115178825Sdfr 0, /* number of parallel prefetches */ 116178825Sdfr 2, /* Branch cost */ 117178825Sdfr COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ 118178825Sdfr COSTS_N_BYTES (2), /* cost of FMUL instruction. */ 119178825Sdfr COSTS_N_BYTES (2), /* cost of FDIV instruction. */ 120178825Sdfr COSTS_N_BYTES (2), /* cost of FABS instruction. */ 121178825Sdfr COSTS_N_BYTES (2), /* cost of FCHS instruction. */ 122178825Sdfr COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ 123178825Sdfr}; 124178825Sdfr 125178825Sdfr/* Processor costs (relative to an add) */ 126178825Sdfrstatic const 127178825Sdfrstruct processor_costs i386_cost = { /* 386 specific costs */ 128178825Sdfr COSTS_N_INSNS (1), /* cost of an add instruction */ 129178825Sdfr COSTS_N_INSNS (1), /* cost of a lea instruction */ 130178825Sdfr COSTS_N_INSNS (3), /* variable shift costs */ 131178825Sdfr COSTS_N_INSNS (2), /* constant shift costs */ 132178825Sdfr {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ 133178825Sdfr COSTS_N_INSNS (6), /* HI */ 134178825Sdfr COSTS_N_INSNS (6), /* SI */ 135178825Sdfr COSTS_N_INSNS (6), /* DI */ 136178825Sdfr COSTS_N_INSNS (6)}, /* other */ 137178825Sdfr COSTS_N_INSNS (1), /* cost of multiply per each bit set */ 138178825Sdfr {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ 139178825Sdfr COSTS_N_INSNS (23), /* HI */ 140178825Sdfr COSTS_N_INSNS (23), /* SI */ 141178825Sdfr COSTS_N_INSNS (23), /* DI */ 142178825Sdfr COSTS_N_INSNS (23)}, /* other */ 143178825Sdfr COSTS_N_INSNS (3), /* cost of movsx */ 144178825Sdfr COSTS_N_INSNS (2), /* cost of movzx */ 145178825Sdfr 15, /* "large" insn */ 146178825Sdfr 3, /* MOVE_RATIO */ 147233294Sstas 4, /* cost for loading QImode using movzbl */ 148233294Sstas {2, 4, 2}, /* cost of loading integer registers 149178825Sdfr in QImode, HImode and SImode. 150178825Sdfr Relative to reg-reg move (2). */ 151178825Sdfr {2, 4, 2}, /* cost of storing integer registers */ 152178825Sdfr 2, /* cost of reg,reg fld/fst */ 153178825Sdfr {8, 8, 8}, /* cost of loading fp registers 154178825Sdfr in SFmode, DFmode and XFmode */ 155178825Sdfr {8, 8, 8}, /* cost of storing fp registers 156178825Sdfr in SFmode, DFmode and XFmode */ 157178825Sdfr 2, /* cost of moving MMX register */ 158178825Sdfr {4, 8}, /* cost of loading MMX registers 159178825Sdfr in SImode and DImode */ 160178825Sdfr {4, 8}, /* cost of storing MMX registers 161178825Sdfr in SImode and DImode */ 162178825Sdfr 2, /* cost of moving SSE register */ 163178825Sdfr {4, 8, 16}, /* cost of loading SSE registers 164178825Sdfr in SImode, DImode and TImode */ 165178825Sdfr {4, 8, 16}, /* cost of storing SSE registers 166178825Sdfr in SImode, DImode and TImode */ 167178825Sdfr 3, /* MMX or SSE register to integer */ 168178825Sdfr 0, /* size of prefetch block */ 169178825Sdfr 0, /* number of parallel prefetches */ 170178825Sdfr 1, /* Branch cost */ 171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ 172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */ 173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */ 174 COSTS_N_INSNS (22), /* cost of FABS instruction. */ 175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */ 176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ 177}; 178 179static const 180struct processor_costs i486_cost = { /* 486 specific costs */ 181 COSTS_N_INSNS (1), /* cost of an add instruction */ 182 COSTS_N_INSNS (1), /* cost of a lea instruction */ 183 COSTS_N_INSNS (3), /* variable shift costs */ 184 COSTS_N_INSNS (2), /* constant shift costs */ 185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ 186 COSTS_N_INSNS (12), /* HI */ 187 COSTS_N_INSNS (12), /* SI */ 188 COSTS_N_INSNS (12), /* DI */ 189 COSTS_N_INSNS (12)}, /* other */ 190 1, /* cost of multiply per each bit set */ 191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ 192 COSTS_N_INSNS (40), /* HI */ 193 COSTS_N_INSNS (40), /* SI */ 194 COSTS_N_INSNS (40), /* DI */ 195 COSTS_N_INSNS (40)}, /* other */ 196 COSTS_N_INSNS (3), /* cost of movsx */ 197 COSTS_N_INSNS (2), /* cost of movzx */ 198 15, /* "large" insn */ 199 3, /* MOVE_RATIO */ 200 4, /* cost for loading QImode using movzbl */ 201 {2, 4, 2}, /* cost of loading integer registers 202 in QImode, HImode and SImode. 203 Relative to reg-reg move (2). */ 204 {2, 4, 2}, /* cost of storing integer registers */ 205 2, /* cost of reg,reg fld/fst */ 206 {8, 8, 8}, /* cost of loading fp registers 207 in SFmode, DFmode and XFmode */ 208 {8, 8, 8}, /* cost of storing fp registers 209 in SFmode, DFmode and XFmode */ 210 2, /* cost of moving MMX register */ 211 {4, 8}, /* cost of loading MMX registers 212 in SImode and DImode */ 213 {4, 8}, /* cost of storing MMX registers 214 in SImode and DImode */ 215 2, /* cost of moving SSE register */ 216 {4, 8, 16}, /* cost of loading SSE registers 217 in SImode, DImode and TImode */ 218 {4, 8, 16}, /* cost of storing SSE registers 219 in SImode, DImode and TImode */ 220 3, /* MMX or SSE register to integer */ 221 0, /* size of prefetch block */ 222 0, /* number of parallel prefetches */ 223 1, /* Branch cost */ 224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */ 226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */ 227 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ 230}; 231 232static const 233struct processor_costs pentium_cost = { 234 COSTS_N_INSNS (1), /* cost of an add instruction */ 235 COSTS_N_INSNS (1), /* cost of a lea instruction */ 236 COSTS_N_INSNS (4), /* variable shift costs */ 237 COSTS_N_INSNS (1), /* constant shift costs */ 238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 239 COSTS_N_INSNS (11), /* HI */ 240 COSTS_N_INSNS (11), /* SI */ 241 COSTS_N_INSNS (11), /* DI */ 242 COSTS_N_INSNS (11)}, /* other */ 243 0, /* cost of multiply per each bit set */ 244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ 245 COSTS_N_INSNS (25), /* HI */ 246 COSTS_N_INSNS (25), /* SI */ 247 COSTS_N_INSNS (25), /* DI */ 248 COSTS_N_INSNS (25)}, /* other */ 249 COSTS_N_INSNS (3), /* cost of movsx */ 250 COSTS_N_INSNS (2), /* cost of movzx */ 251 8, /* "large" insn */ 252 6, /* MOVE_RATIO */ 253 6, /* cost for loading QImode using movzbl */ 254 {2, 4, 2}, /* cost of loading integer registers 255 in QImode, HImode and SImode. 256 Relative to reg-reg move (2). */ 257 {2, 4, 2}, /* cost of storing integer registers */ 258 2, /* cost of reg,reg fld/fst */ 259 {2, 2, 6}, /* cost of loading fp registers 260 in SFmode, DFmode and XFmode */ 261 {4, 4, 6}, /* cost of storing fp registers 262 in SFmode, DFmode and XFmode */ 263 8, /* cost of moving MMX register */ 264 {8, 8}, /* cost of loading MMX registers 265 in SImode and DImode */ 266 {8, 8}, /* cost of storing MMX registers 267 in SImode and DImode */ 268 2, /* cost of moving SSE register */ 269 {4, 8, 16}, /* cost of loading SSE registers 270 in SImode, DImode and TImode */ 271 {4, 8, 16}, /* cost of storing SSE registers 272 in SImode, DImode and TImode */ 273 3, /* MMX or SSE register to integer */ 274 0, /* size of prefetch block */ 275 0, /* number of parallel prefetches */ 276 2, /* Branch cost */ 277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */ 279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */ 280 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ 283}; 284 285static const 286struct processor_costs pentiumpro_cost = { 287 COSTS_N_INSNS (1), /* cost of an add instruction */ 288 COSTS_N_INSNS (1), /* cost of a lea instruction */ 289 COSTS_N_INSNS (1), /* variable shift costs */ 290 COSTS_N_INSNS (1), /* constant shift costs */ 291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 292 COSTS_N_INSNS (4), /* HI */ 293 COSTS_N_INSNS (4), /* SI */ 294 COSTS_N_INSNS (4), /* DI */ 295 COSTS_N_INSNS (4)}, /* other */ 296 0, /* cost of multiply per each bit set */ 297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ 298 COSTS_N_INSNS (17), /* HI */ 299 COSTS_N_INSNS (17), /* SI */ 300 COSTS_N_INSNS (17), /* DI */ 301 COSTS_N_INSNS (17)}, /* other */ 302 COSTS_N_INSNS (1), /* cost of movsx */ 303 COSTS_N_INSNS (1), /* cost of movzx */ 304 8, /* "large" insn */ 305 6, /* MOVE_RATIO */ 306 2, /* cost for loading QImode using movzbl */ 307 {4, 4, 4}, /* cost of loading integer registers 308 in QImode, HImode and SImode. 309 Relative to reg-reg move (2). */ 310 {2, 2, 2}, /* cost of storing integer registers */ 311 2, /* cost of reg,reg fld/fst */ 312 {2, 2, 6}, /* cost of loading fp registers 313 in SFmode, DFmode and XFmode */ 314 {4, 4, 6}, /* cost of storing fp registers 315 in SFmode, DFmode and XFmode */ 316 2, /* cost of moving MMX register */ 317 {2, 2}, /* cost of loading MMX registers 318 in SImode and DImode */ 319 {2, 2}, /* cost of storing MMX registers 320 in SImode and DImode */ 321 2, /* cost of moving SSE register */ 322 {2, 2, 8}, /* cost of loading SSE registers 323 in SImode, DImode and TImode */ 324 {2, 2, 8}, /* cost of storing SSE registers 325 in SImode, DImode and TImode */ 326 3, /* MMX or SSE register to integer */ 327 32, /* size of prefetch block */ 328 6, /* number of parallel prefetches */ 329 2, /* Branch cost */ 330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 333 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 336}; 337 338static const 339struct processor_costs k6_cost = { 340 COSTS_N_INSNS (1), /* cost of an add instruction */ 341 COSTS_N_INSNS (2), /* cost of a lea instruction */ 342 COSTS_N_INSNS (1), /* variable shift costs */ 343 COSTS_N_INSNS (1), /* constant shift costs */ 344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 345 COSTS_N_INSNS (3), /* HI */ 346 COSTS_N_INSNS (3), /* SI */ 347 COSTS_N_INSNS (3), /* DI */ 348 COSTS_N_INSNS (3)}, /* other */ 349 0, /* cost of multiply per each bit set */ 350 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 351 COSTS_N_INSNS (18), /* HI */ 352 COSTS_N_INSNS (18), /* SI */ 353 COSTS_N_INSNS (18), /* DI */ 354 COSTS_N_INSNS (18)}, /* other */ 355 COSTS_N_INSNS (2), /* cost of movsx */ 356 COSTS_N_INSNS (2), /* cost of movzx */ 357 8, /* "large" insn */ 358 4, /* MOVE_RATIO */ 359 3, /* cost for loading QImode using movzbl */ 360 {4, 5, 4}, /* cost of loading integer registers 361 in QImode, HImode and SImode. 362 Relative to reg-reg move (2). */ 363 {2, 3, 2}, /* cost of storing integer registers */ 364 4, /* cost of reg,reg fld/fst */ 365 {6, 6, 6}, /* cost of loading fp registers 366 in SFmode, DFmode and XFmode */ 367 {4, 4, 4}, /* cost of storing fp registers 368 in SFmode, DFmode and XFmode */ 369 2, /* cost of moving MMX register */ 370 {2, 2}, /* cost of loading MMX registers 371 in SImode and DImode */ 372 {2, 2}, /* cost of storing MMX registers 373 in SImode and DImode */ 374 2, /* cost of moving SSE register */ 375 {2, 2, 8}, /* cost of loading SSE registers 376 in SImode, DImode and TImode */ 377 {2, 2, 8}, /* cost of storing SSE registers 378 in SImode, DImode and TImode */ 379 6, /* MMX or SSE register to integer */ 380 32, /* size of prefetch block */ 381 1, /* number of parallel prefetches */ 382 1, /* Branch cost */ 383 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ 384 COSTS_N_INSNS (2), /* cost of FMUL instruction. */ 385 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 386 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 387 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 388 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 389}; 390 391static const 392struct processor_costs athlon_cost = { 393 COSTS_N_INSNS (1), /* cost of an add instruction */ 394 COSTS_N_INSNS (2), /* cost of a lea instruction */ 395 COSTS_N_INSNS (1), /* variable shift costs */ 396 COSTS_N_INSNS (1), /* constant shift costs */ 397 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ 398 COSTS_N_INSNS (5), /* HI */ 399 COSTS_N_INSNS (5), /* SI */ 400 COSTS_N_INSNS (5), /* DI */ 401 COSTS_N_INSNS (5)}, /* other */ 402 0, /* cost of multiply per each bit set */ 403 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 404 COSTS_N_INSNS (26), /* HI */ 405 COSTS_N_INSNS (42), /* SI */ 406 COSTS_N_INSNS (74), /* DI */ 407 COSTS_N_INSNS (74)}, /* other */ 408 COSTS_N_INSNS (1), /* cost of movsx */ 409 COSTS_N_INSNS (1), /* cost of movzx */ 410 8, /* "large" insn */ 411 9, /* MOVE_RATIO */ 412 4, /* cost for loading QImode using movzbl */ 413 {3, 4, 3}, /* cost of loading integer registers 414 in QImode, HImode and SImode. 415 Relative to reg-reg move (2). */ 416 {3, 4, 3}, /* cost of storing integer registers */ 417 4, /* cost of reg,reg fld/fst */ 418 {4, 4, 12}, /* cost of loading fp registers 419 in SFmode, DFmode and XFmode */ 420 {6, 6, 8}, /* cost of storing fp registers 421 in SFmode, DFmode and XFmode */ 422 2, /* cost of moving MMX register */ 423 {4, 4}, /* cost of loading MMX registers 424 in SImode and DImode */ 425 {4, 4}, /* cost of storing MMX registers 426 in SImode and DImode */ 427 2, /* cost of moving SSE register */ 428 {4, 4, 6}, /* cost of loading SSE registers 429 in SImode, DImode and TImode */ 430 {4, 4, 5}, /* cost of storing SSE registers 431 in SImode, DImode and TImode */ 432 5, /* MMX or SSE register to integer */ 433 64, /* size of prefetch block */ 434 6, /* number of parallel prefetches */ 435 5, /* Branch cost */ 436 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 437 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 438 COSTS_N_INSNS (24), /* cost of FDIV instruction. */ 439 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 440 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 441 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 442}; 443 444static const 445struct processor_costs k8_cost = { 446 COSTS_N_INSNS (1), /* cost of an add instruction */ 447 COSTS_N_INSNS (2), /* cost of a lea instruction */ 448 COSTS_N_INSNS (1), /* variable shift costs */ 449 COSTS_N_INSNS (1), /* constant shift costs */ 450 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 451 COSTS_N_INSNS (4), /* HI */ 452 COSTS_N_INSNS (3), /* SI */ 453 COSTS_N_INSNS (4), /* DI */ 454 COSTS_N_INSNS (5)}, /* other */ 455 0, /* cost of multiply per each bit set */ 456 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 457 COSTS_N_INSNS (26), /* HI */ 458 COSTS_N_INSNS (42), /* SI */ 459 COSTS_N_INSNS (74), /* DI */ 460 COSTS_N_INSNS (74)}, /* other */ 461 COSTS_N_INSNS (1), /* cost of movsx */ 462 COSTS_N_INSNS (1), /* cost of movzx */ 463 8, /* "large" insn */ 464 9, /* MOVE_RATIO */ 465 4, /* cost for loading QImode using movzbl */ 466 {3, 4, 3}, /* cost of loading integer registers 467 in QImode, HImode and SImode. 468 Relative to reg-reg move (2). */ 469 {3, 4, 3}, /* cost of storing integer registers */ 470 4, /* cost of reg,reg fld/fst */ 471 {4, 4, 12}, /* cost of loading fp registers 472 in SFmode, DFmode and XFmode */ 473 {6, 6, 8}, /* cost of storing fp registers 474 in SFmode, DFmode and XFmode */ 475 2, /* cost of moving MMX register */ 476 {3, 3}, /* cost of loading MMX registers 477 in SImode and DImode */ 478 {4, 4}, /* cost of storing MMX registers 479 in SImode and DImode */ 480 2, /* cost of moving SSE register */ 481 {4, 3, 6}, /* cost of loading SSE registers 482 in SImode, DImode and TImode */ 483 {4, 4, 5}, /* cost of storing SSE registers 484 in SImode, DImode and TImode */ 485 5, /* MMX or SSE register to integer */ 486 64, /* size of prefetch block */ 487 6, /* number of parallel prefetches */ 488 5, /* Branch cost */ 489 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 490 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 491 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 492 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 493 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 494 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 495}; 496 497static const 498struct processor_costs pentium4_cost = { 499 COSTS_N_INSNS (1), /* cost of an add instruction */ 500 COSTS_N_INSNS (3), /* cost of a lea instruction */ 501 COSTS_N_INSNS (4), /* variable shift costs */ 502 COSTS_N_INSNS (4), /* constant shift costs */ 503 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ 504 COSTS_N_INSNS (15), /* HI */ 505 COSTS_N_INSNS (15), /* SI */ 506 COSTS_N_INSNS (15), /* DI */ 507 COSTS_N_INSNS (15)}, /* other */ 508 0, /* cost of multiply per each bit set */ 509 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ 510 COSTS_N_INSNS (56), /* HI */ 511 COSTS_N_INSNS (56), /* SI */ 512 COSTS_N_INSNS (56), /* DI */ 513 COSTS_N_INSNS (56)}, /* other */ 514 COSTS_N_INSNS (1), /* cost of movsx */ 515 COSTS_N_INSNS (1), /* cost of movzx */ 516 16, /* "large" insn */ 517 6, /* MOVE_RATIO */ 518 2, /* cost for loading QImode using movzbl */ 519 {4, 5, 4}, /* cost of loading integer registers 520 in QImode, HImode and SImode. 521 Relative to reg-reg move (2). */ 522 {2, 3, 2}, /* cost of storing integer registers */ 523 2, /* cost of reg,reg fld/fst */ 524 {2, 2, 6}, /* cost of loading fp registers 525 in SFmode, DFmode and XFmode */ 526 {4, 4, 6}, /* cost of storing fp registers 527 in SFmode, DFmode and XFmode */ 528 2, /* cost of moving MMX register */ 529 {2, 2}, /* cost of loading MMX registers 530 in SImode and DImode */ 531 {2, 2}, /* cost of storing MMX registers 532 in SImode and DImode */ 533 12, /* cost of moving SSE register */ 534 {12, 12, 12}, /* cost of loading SSE registers 535 in SImode, DImode and TImode */ 536 {2, 2, 8}, /* cost of storing SSE registers 537 in SImode, DImode and TImode */ 538 10, /* MMX or SSE register to integer */ 539 64, /* size of prefetch block */ 540 6, /* number of parallel prefetches */ 541 2, /* Branch cost */ 542 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ 543 COSTS_N_INSNS (7), /* cost of FMUL instruction. */ 544 COSTS_N_INSNS (43), /* cost of FDIV instruction. */ 545 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 546 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 547 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ 548}; 549 550static const 551struct processor_costs nocona_cost = { 552 COSTS_N_INSNS (1), /* cost of an add instruction */ 553 COSTS_N_INSNS (1), /* cost of a lea instruction */ 554 COSTS_N_INSNS (1), /* variable shift costs */ 555 COSTS_N_INSNS (1), /* constant shift costs */ 556 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ 557 COSTS_N_INSNS (10), /* HI */ 558 COSTS_N_INSNS (10), /* SI */ 559 COSTS_N_INSNS (10), /* DI */ 560 COSTS_N_INSNS (10)}, /* other */ 561 0, /* cost of multiply per each bit set */ 562 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ 563 COSTS_N_INSNS (66), /* HI */ 564 COSTS_N_INSNS (66), /* SI */ 565 COSTS_N_INSNS (66), /* DI */ 566 COSTS_N_INSNS (66)}, /* other */ 567 COSTS_N_INSNS (1), /* cost of movsx */ 568 COSTS_N_INSNS (1), /* cost of movzx */ 569 16, /* "large" insn */ 570 17, /* MOVE_RATIO */ 571 4, /* cost for loading QImode using movzbl */ 572 {4, 4, 4}, /* cost of loading integer registers 573 in QImode, HImode and SImode. 574 Relative to reg-reg move (2). */ 575 {4, 4, 4}, /* cost of storing integer registers */ 576 3, /* cost of reg,reg fld/fst */ 577 {12, 12, 12}, /* cost of loading fp registers 578 in SFmode, DFmode and XFmode */ 579 {4, 4, 4}, /* cost of storing fp registers 580 in SFmode, DFmode and XFmode */ 581 6, /* cost of moving MMX register */ 582 {12, 12}, /* cost of loading MMX registers 583 in SImode and DImode */ 584 {12, 12}, /* cost of storing MMX registers 585 in SImode and DImode */ 586 6, /* cost of moving SSE register */ 587 {12, 12, 12}, /* cost of loading SSE registers 588 in SImode, DImode and TImode */ 589 {12, 12, 12}, /* cost of storing SSE registers 590 in SImode, DImode and TImode */ 591 8, /* MMX or SSE register to integer */ 592 128, /* size of prefetch block */ 593 8, /* number of parallel prefetches */ 594 1, /* Branch cost */ 595 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 596 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 597 COSTS_N_INSNS (40), /* cost of FDIV instruction. */ 598 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 599 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 600 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ 601}; 602 603/* Generic64 should produce code tuned for Nocona and K8. */ 604static const 605struct processor_costs generic64_cost = { 606 COSTS_N_INSNS (1), /* cost of an add instruction */ 607 /* On all chips taken into consideration lea is 2 cycles and more. With 608 this cost however our current implementation of synth_mult results in 609 use of unnecessary temporary registers causing regression on several 610 SPECfp benchmarks. */ 611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 612 COSTS_N_INSNS (1), /* variable shift costs */ 613 COSTS_N_INSNS (1), /* constant shift costs */ 614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 615 COSTS_N_INSNS (4), /* HI */ 616 COSTS_N_INSNS (3), /* SI */ 617 COSTS_N_INSNS (4), /* DI */ 618 COSTS_N_INSNS (2)}, /* other */ 619 0, /* cost of multiply per each bit set */ 620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 621 COSTS_N_INSNS (26), /* HI */ 622 COSTS_N_INSNS (42), /* SI */ 623 COSTS_N_INSNS (74), /* DI */ 624 COSTS_N_INSNS (74)}, /* other */ 625 COSTS_N_INSNS (1), /* cost of movsx */ 626 COSTS_N_INSNS (1), /* cost of movzx */ 627 8, /* "large" insn */ 628 17, /* MOVE_RATIO */ 629 4, /* cost for loading QImode using movzbl */ 630 {4, 4, 4}, /* cost of loading integer registers 631 in QImode, HImode and SImode. 632 Relative to reg-reg move (2). */ 633 {4, 4, 4}, /* cost of storing integer registers */ 634 4, /* cost of reg,reg fld/fst */ 635 {12, 12, 12}, /* cost of loading fp registers 636 in SFmode, DFmode and XFmode */ 637 {6, 6, 8}, /* cost of storing fp registers 638 in SFmode, DFmode and XFmode */ 639 2, /* cost of moving MMX register */ 640 {8, 8}, /* cost of loading MMX registers 641 in SImode and DImode */ 642 {8, 8}, /* cost of storing MMX registers 643 in SImode and DImode */ 644 2, /* cost of moving SSE register */ 645 {8, 8, 8}, /* cost of loading SSE registers 646 in SImode, DImode and TImode */ 647 {8, 8, 8}, /* cost of storing SSE registers 648 in SImode, DImode and TImode */ 649 5, /* MMX or SSE register to integer */ 650 64, /* size of prefetch block */ 651 6, /* number of parallel prefetches */ 652 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value 653 is increased to perhaps more appropriate value of 5. */ 654 3, /* Branch cost */ 655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 658 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 661}; 662 663/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ 664static const 665struct processor_costs generic32_cost = { 666 COSTS_N_INSNS (1), /* cost of an add instruction */ 667 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 668 COSTS_N_INSNS (1), /* variable shift costs */ 669 COSTS_N_INSNS (1), /* constant shift costs */ 670 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 671 COSTS_N_INSNS (4), /* HI */ 672 COSTS_N_INSNS (3), /* SI */ 673 COSTS_N_INSNS (4), /* DI */ 674 COSTS_N_INSNS (2)}, /* other */ 675 0, /* cost of multiply per each bit set */ 676 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 677 COSTS_N_INSNS (26), /* HI */ 678 COSTS_N_INSNS (42), /* SI */ 679 COSTS_N_INSNS (74), /* DI */ 680 COSTS_N_INSNS (74)}, /* other */ 681 COSTS_N_INSNS (1), /* cost of movsx */ 682 COSTS_N_INSNS (1), /* cost of movzx */ 683 8, /* "large" insn */ 684 17, /* MOVE_RATIO */ 685 4, /* cost for loading QImode using movzbl */ 686 {4, 4, 4}, /* cost of loading integer registers 687 in QImode, HImode and SImode. 688 Relative to reg-reg move (2). */ 689 {4, 4, 4}, /* cost of storing integer registers */ 690 4, /* cost of reg,reg fld/fst */ 691 {12, 12, 12}, /* cost of loading fp registers 692 in SFmode, DFmode and XFmode */ 693 {6, 6, 8}, /* cost of storing fp registers 694 in SFmode, DFmode and XFmode */ 695 2, /* cost of moving MMX register */ 696 {8, 8}, /* cost of loading MMX registers 697 in SImode and DImode */ 698 {8, 8}, /* cost of storing MMX registers 699 in SImode and DImode */ 700 2, /* cost of moving SSE register */ 701 {8, 8, 8}, /* cost of loading SSE registers 702 in SImode, DImode and TImode */ 703 {8, 8, 8}, /* cost of storing SSE registers 704 in SImode, DImode and TImode */ 705 5, /* MMX or SSE register to integer */ 706 64, /* size of prefetch block */ 707 6, /* number of parallel prefetches */ 708 3, /* Branch cost */ 709 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 711 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 712 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 713 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 714 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 715}; 716 717const struct processor_costs *ix86_cost = &pentium_cost; 718 719/* Processor feature/optimization bitmasks. */ 720#define m_386 (1<<PROCESSOR_I386) 721#define m_486 (1<<PROCESSOR_I486) 722#define m_PENT (1<<PROCESSOR_PENTIUM) 723#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 724#define m_K6 (1<<PROCESSOR_K6) 725#define m_ATHLON (1<<PROCESSOR_ATHLON) 726#define m_PENT4 (1<<PROCESSOR_PENTIUM4) 727#define m_K8 (1<<PROCESSOR_K8) 728#define m_ATHLON_K8 (m_K8 | m_ATHLON) 729#define m_NOCONA (1<<PROCESSOR_NOCONA) 730#define m_GENERIC32 (1<<PROCESSOR_GENERIC32) 731#define m_GENERIC64 (1<<PROCESSOR_GENERIC64) 732#define m_GENERIC (m_GENERIC32 | m_GENERIC64) 733 734/* Generic instruction choice should be common subset of supported CPUs 735 (PPro/PENT4/NOCONA/Athlon/K8). */ 736 737/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for 738 Generic64 seems like good code size tradeoff. We can't enable it for 32bit 739 generic because it is not working well with PPro base chips. */ 740const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64; 741const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 742const int x86_zero_extend_with_and = m_486 | m_PENT; 743const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */; 744const int x86_double_with_add = ~m_386; 745const int x86_use_bit_test = m_386; 746const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC; 747const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA; 748const int x86_3dnow_a = m_ATHLON_K8; 749const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 750/* Branch hints were put in P4 based on simulation result. But 751 after P4 was made, no performance benefit was observed with 752 branch hints. It also increases the code size. As the result, 753 icc never generates branch hints. */ 754const int x86_branch_hints = 0; 755const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */ 756/* We probably ought to watch for partial register stalls on Generic32 757 compilation setting as well. However in current implementation the 758 partial register stalls are not eliminated very well - they can 759 be introduced via subregs synthesized by combine and can happen 760 in caller/callee saving sequences. 761 Because this option pays back little on PPro based chips and is in conflict 762 with partial reg. dependencies used by Athlon/P4 based chips, it is better 763 to leave it off for generic32 for now. */ 764const int x86_partial_reg_stall = m_PPRO; 765const int x86_partial_flag_reg_stall = m_GENERIC; 766const int x86_use_himode_fiop = m_386 | m_486 | m_K6; 767const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC); 768const int x86_use_mov0 = m_K6; 769const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC); 770const int x86_read_modify_write = ~m_PENT; 771const int x86_read_modify = ~(m_PENT | m_PPRO); 772const int x86_split_long_moves = m_PPRO; 773const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */ 774const int x86_fast_prefix = ~(m_PENT | m_486 | m_386); 775const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA; 776const int x86_qimode_math = ~(0); 777const int x86_promote_qi_regs = 0; 778/* On PPro this flag is meant to avoid partial register stalls. Just like 779 the x86_partial_reg_stall this option might be considered for Generic32 780 if our scheme for avoiding partial stalls was more effective. */ 781const int x86_himode_math = ~(m_PPRO); 782const int x86_promote_hi_regs = m_PPRO; 783const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC; 784const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC; 785const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC; 786const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC; 787const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC); 788const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 789const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 790const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC; 791const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC; 792const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC; 793const int x86_shift1 = ~m_486; 794const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 795/* In Generic model we have an conflict here in between PPro/Pentium4 based chips 796 that thread 128bit SSE registers as single units versus K8 based chips that 797 divide SSE registers to two 64bit halves. 798 x86_sse_partial_reg_dependency promote all store destinations to be 128bit 799 to allow register renaming on 128bit SSE units, but usually results in one 800 extra microop on 64bit SSE units. Experimental results shows that disabling 801 this option on P4 brings over 20% SPECfp regression, while enabling it on 802 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling 803 of moves. */ 804const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC; 805/* Set for machines where the type and dependencies are resolved on SSE 806 register parts instead of whole registers, so we may maintain just 807 lower part of scalar values in proper format leaving the upper part 808 undefined. */ 809const int x86_sse_split_regs = m_ATHLON_K8; 810const int x86_sse_typeless_stores = m_ATHLON_K8; 811const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA; 812const int x86_use_ffreep = m_ATHLON_K8; 813const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6; 814const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC); 815 816/* ??? Allowing interunit moves makes it all too easy for the compiler to put 817 integer data in xmm registers. Which results in pretty abysmal code. */ 818const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */; 819 820const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32; 821/* Some CPU cores are not able to predict more than 4 branch instructions in 822 the 16 byte window. */ 823const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC; 824const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC; 825const int x86_use_bt = m_ATHLON_K8; 826/* Compare and exchange was added for 80486. */ 827const int x86_cmpxchg = ~m_386; 828/* Compare and exchange 8 bytes was added for pentium. */ 829const int x86_cmpxchg8b = ~(m_386 | m_486); 830/* Compare and exchange 16 bytes was added for nocona. */ 831const int x86_cmpxchg16b = m_NOCONA; 832/* Exchange and add was added for 80486. */ 833const int x86_xadd = ~m_386; 834const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC; 835 836/* In case the average insn count for single function invocation is 837 lower than this constant, emit fast (but longer) prologue and 838 epilogue code. */ 839#define FAST_PROLOGUE_INSN_COUNT 20 840 841/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 842static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 843static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 844static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 845 846/* Array of the smallest class containing reg number REGNO, indexed by 847 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 848 849enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 850{ 851 /* ax, dx, cx, bx */ 852 AREG, DREG, CREG, BREG, 853 /* si, di, bp, sp */ 854 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 855 /* FP registers */ 856 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 857 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 858 /* arg pointer */ 859 NON_Q_REGS, 860 /* flags, fpsr, dirflag, frame */ 861 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 862 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 863 SSE_REGS, SSE_REGS, 864 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 865 MMX_REGS, MMX_REGS, 866 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 867 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 868 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 869 SSE_REGS, SSE_REGS, 870}; 871 872/* The "default" register map used in 32bit mode. */ 873 874int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 875{ 876 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 877 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 878 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 879 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 880 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 881 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 882 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 883}; 884 885static int const x86_64_int_parameter_registers[6] = 886{ 887 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/, 888 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */ 889}; 890 891static int const x86_64_int_return_registers[4] = 892{ 893 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/ 894}; 895 896/* The "default" register map used in 64bit mode. */ 897int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 898{ 899 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 900 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 901 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 902 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 903 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 904 8,9,10,11,12,13,14,15, /* extended integer registers */ 905 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 906}; 907 908/* Define the register numbers to be used in Dwarf debugging information. 909 The SVR4 reference port C compiler uses the following register numbers 910 in its Dwarf output code: 911 0 for %eax (gcc regno = 0) 912 1 for %ecx (gcc regno = 2) 913 2 for %edx (gcc regno = 1) 914 3 for %ebx (gcc regno = 3) 915 4 for %esp (gcc regno = 7) 916 5 for %ebp (gcc regno = 6) 917 6 for %esi (gcc regno = 4) 918 7 for %edi (gcc regno = 5) 919 The following three DWARF register numbers are never generated by 920 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 921 believes these numbers have these meanings. 922 8 for %eip (no gcc equivalent) 923 9 for %eflags (gcc regno = 17) 924 10 for %trapno (no gcc equivalent) 925 It is not at all clear how we should number the FP stack registers 926 for the x86 architecture. If the version of SDB on x86/svr4 were 927 a bit less brain dead with respect to floating-point then we would 928 have a precedent to follow with respect to DWARF register numbers 929 for x86 FP registers, but the SDB on x86/svr4 is so completely 930 broken with respect to FP registers that it is hardly worth thinking 931 of it as something to strive for compatibility with. 932 The version of x86/svr4 SDB I have at the moment does (partially) 933 seem to believe that DWARF register number 11 is associated with 934 the x86 register %st(0), but that's about all. Higher DWARF 935 register numbers don't seem to be associated with anything in 936 particular, and even for DWARF regno 11, SDB only seems to under- 937 stand that it should say that a variable lives in %st(0) (when 938 asked via an `=' command) if we said it was in DWARF regno 11, 939 but SDB still prints garbage when asked for the value of the 940 variable in question (via a `/' command). 941 (Also note that the labels SDB prints for various FP stack regs 942 when doing an `x' command are all wrong.) 943 Note that these problems generally don't affect the native SVR4 944 C compiler because it doesn't allow the use of -O with -g and 945 because when it is *not* optimizing, it allocates a memory 946 location for each floating-point variable, and the memory 947 location is what gets described in the DWARF AT_location 948 attribute for the variable in question. 949 Regardless of the severe mental illness of the x86/svr4 SDB, we 950 do something sensible here and we use the following DWARF 951 register numbers. Note that these are all stack-top-relative 952 numbers. 953 11 for %st(0) (gcc regno = 8) 954 12 for %st(1) (gcc regno = 9) 955 13 for %st(2) (gcc regno = 10) 956 14 for %st(3) (gcc regno = 11) 957 15 for %st(4) (gcc regno = 12) 958 16 for %st(5) (gcc regno = 13) 959 17 for %st(6) (gcc regno = 14) 960 18 for %st(7) (gcc regno = 15) 961*/ 962int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 963{ 964 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 965 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 966 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */ 967 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 968 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 969 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 970 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 971}; 972 973/* Test and compare insns in i386.md store the information needed to 974 generate branch and scc insns here. */ 975 976rtx ix86_compare_op0 = NULL_RTX; 977rtx ix86_compare_op1 = NULL_RTX; 978rtx ix86_compare_emitted = NULL_RTX; 979 980/* Size of the register save area. */ 981#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16) 982 983/* Define the structure for the machine field in struct function. */ 984 985struct stack_local_entry GTY(()) 986{ 987 unsigned short mode; 988 unsigned short n; 989 rtx rtl; 990 struct stack_local_entry *next; 991}; 992 993/* Structure describing stack frame layout. 994 Stack grows downward: 995 996 [arguments] 997 <- ARG_POINTER 998 saved pc 999 1000 saved frame pointer if frame_pointer_needed 1001 <- HARD_FRAME_POINTER 1002 [saved regs] 1003 1004 [padding1] \ 1005 ) 1006 [va_arg registers] ( 1007 > to_allocate <- FRAME_POINTER 1008 [frame] ( 1009 ) 1010 [padding2] / 1011 */ 1012struct ix86_frame 1013{ 1014 int nregs; 1015 int padding1; 1016 int va_arg_size; 1017 HOST_WIDE_INT frame; 1018 int padding2; 1019 int outgoing_arguments_size; 1020 int red_zone_size; 1021 1022 HOST_WIDE_INT to_allocate; 1023 /* The offsets relative to ARG_POINTER. */ 1024 HOST_WIDE_INT frame_pointer_offset; 1025 HOST_WIDE_INT hard_frame_pointer_offset; 1026 HOST_WIDE_INT stack_pointer_offset; 1027 1028 /* When save_regs_using_mov is set, emit prologue using 1029 move instead of push instructions. */ 1030 bool save_regs_using_mov; 1031}; 1032 1033/* Code model option. */ 1034enum cmodel ix86_cmodel; 1035/* Asm dialect. */ 1036enum asm_dialect ix86_asm_dialect = ASM_ATT; 1037/* TLS dialects. */ 1038enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 1039 1040/* Which unit we are generating floating point math for. */ 1041enum fpmath_unit ix86_fpmath; 1042 1043/* Which cpu are we scheduling for. */ 1044enum processor_type ix86_tune; 1045/* Which instruction set architecture to use. */ 1046enum processor_type ix86_arch; 1047 1048/* true if sse prefetch instruction is not NOOP. */ 1049int x86_prefetch_sse; 1050 1051/* ix86_regparm_string as a number */ 1052static int ix86_regparm; 1053 1054/* -mstackrealign option */ 1055extern int ix86_force_align_arg_pointer; 1056static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer"; 1057 1058/* Preferred alignment for stack boundary in bits. */ 1059unsigned int ix86_preferred_stack_boundary; 1060 1061/* Values 1-5: see jump.c */ 1062int ix86_branch_cost; 1063 1064/* Variables which are this size or smaller are put in the data/bss 1065 or ldata/lbss sections. */ 1066 1067int ix86_section_threshold = 65536; 1068 1069/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 1070char internal_label_prefix[16]; 1071int internal_label_prefix_len; 1072 1073static bool ix86_handle_option (size_t, const char *, int); 1074static void output_pic_addr_const (FILE *, rtx, int); 1075static void put_condition_code (enum rtx_code, enum machine_mode, 1076 int, int, FILE *); 1077static const char *get_some_local_dynamic_name (void); 1078static int get_some_local_dynamic_name_1 (rtx *, void *); 1079static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx); 1080static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *, 1081 rtx *); 1082static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *); 1083static enum machine_mode ix86_cc_modes_compatible (enum machine_mode, 1084 enum machine_mode); 1085static rtx get_thread_pointer (int); 1086static rtx legitimize_tls_address (rtx, enum tls_model, int); 1087static void get_pc_thunk_name (char [32], unsigned int); 1088static rtx gen_push (rtx); 1089static int ix86_flags_dependent (rtx, rtx, enum attr_type); 1090static int ix86_agi_dependent (rtx, rtx, enum attr_type); 1091static struct machine_function * ix86_init_machine_status (void); 1092static int ix86_split_to_parts (rtx, rtx *, enum machine_mode); 1093static int ix86_nsaved_regs (void); 1094static void ix86_emit_save_regs (void); 1095static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT); 1096static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int); 1097static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT); 1098static HOST_WIDE_INT ix86_GOT_alias_set (void); 1099static void ix86_adjust_counter (rtx, HOST_WIDE_INT); 1100static rtx ix86_expand_aligntest (rtx, int); 1101static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx); 1102static int ix86_issue_rate (void); 1103static int ix86_adjust_cost (rtx, rtx, rtx, int); 1104static int ia32_multipass_dfa_lookahead (void); 1105static void ix86_init_mmx_sse_builtins (void); 1106static rtx x86_this_parameter (tree); 1107static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 1108 HOST_WIDE_INT, tree); 1109static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); 1110static void x86_file_start (void); 1111static void ix86_reorg (void); 1112static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); 1113static tree ix86_build_builtin_va_list (void); 1114static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, 1115 tree, int *, int); 1116static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *); 1117static bool ix86_scalar_mode_supported_p (enum machine_mode); 1118static bool ix86_vector_mode_supported_p (enum machine_mode); 1119 1120static int ix86_address_cost (rtx); 1121static bool ix86_cannot_force_const_mem (rtx); 1122static rtx ix86_delegitimize_address (rtx); 1123 1124static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 1125 1126struct builtin_description; 1127static rtx ix86_expand_sse_comi (const struct builtin_description *, 1128 tree, rtx); 1129static rtx ix86_expand_sse_compare (const struct builtin_description *, 1130 tree, rtx); 1131static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx); 1132static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int); 1133static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx); 1134static rtx ix86_expand_store_builtin (enum insn_code, tree); 1135static rtx safe_vector_operand (rtx, enum machine_mode); 1136static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *); 1137static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code); 1138static int ix86_fp_comparison_fcomi_cost (enum rtx_code code); 1139static int ix86_fp_comparison_sahf_cost (enum rtx_code code); 1140static int ix86_fp_comparison_cost (enum rtx_code code); 1141static unsigned int ix86_select_alt_pic_regnum (void); 1142static int ix86_save_reg (unsigned int, int); 1143static void ix86_compute_frame_layout (struct ix86_frame *); 1144static int ix86_comp_type_attributes (tree, tree); 1145static int ix86_function_regparm (tree, tree); 1146const struct attribute_spec ix86_attribute_table[]; 1147static bool ix86_function_ok_for_sibcall (tree, tree); 1148static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *); 1149static int ix86_value_regno (enum machine_mode, tree, tree); 1150static bool contains_128bit_aligned_vector_p (tree); 1151static rtx ix86_struct_value_rtx (tree, int); 1152static bool ix86_ms_bitfield_layout_p (tree); 1153static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); 1154static int extended_reg_mentioned_1 (rtx *, void *); 1155static bool ix86_rtx_costs (rtx, int, int, int *); 1156static int min_insn_size (rtx); 1157static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers); 1158static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type); 1159static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, 1160 tree, bool); 1161static void ix86_init_builtins (void); 1162static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 1163static const char *ix86_mangle_fundamental_type (tree); 1164static tree ix86_stack_protect_fail (void); 1165static rtx ix86_internal_arg_pointer (void); 1166static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); 1167 1168/* This function is only used on Solaris. */ 1169static void i386_solaris_elf_named_section (const char *, unsigned int, tree) 1170 ATTRIBUTE_UNUSED; 1171 1172/* Register class used for passing given 64bit part of the argument. 1173 These represent classes as documented by the PS ABI, with the exception 1174 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 1175 use SF or DFmode move instead of DImode to avoid reformatting penalties. 1176 1177 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 1178 whenever possible (upper half does contain padding). 1179 */ 1180enum x86_64_reg_class 1181 { 1182 X86_64_NO_CLASS, 1183 X86_64_INTEGER_CLASS, 1184 X86_64_INTEGERSI_CLASS, 1185 X86_64_SSE_CLASS, 1186 X86_64_SSESF_CLASS, 1187 X86_64_SSEDF_CLASS, 1188 X86_64_SSEUP_CLASS, 1189 X86_64_X87_CLASS, 1190 X86_64_X87UP_CLASS, 1191 X86_64_COMPLEX_X87_CLASS, 1192 X86_64_MEMORY_CLASS 1193 }; 1194static const char * const x86_64_reg_class_name[] = { 1195 "no", "integer", "integerSI", "sse", "sseSF", "sseDF", 1196 "sseup", "x87", "x87up", "cplx87", "no" 1197}; 1198 1199#define MAX_CLASSES 4 1200 1201/* Table of constants used by fldpi, fldln2, etc.... */ 1202static REAL_VALUE_TYPE ext_80387_constants_table [5]; 1203static bool ext_80387_constants_init = 0; 1204static void init_ext_80387_constants (void); 1205static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED; 1206static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; 1207static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED; 1208static section *x86_64_elf_select_section (tree decl, int reloc, 1209 unsigned HOST_WIDE_INT align) 1210 ATTRIBUTE_UNUSED; 1211 1212/* Initialize the GCC target structure. */ 1213#undef TARGET_ATTRIBUTE_TABLE 1214#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 1215#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 1216# undef TARGET_MERGE_DECL_ATTRIBUTES 1217# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 1218#endif 1219 1220#undef TARGET_COMP_TYPE_ATTRIBUTES 1221#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 1222 1223#undef TARGET_INIT_BUILTINS 1224#define TARGET_INIT_BUILTINS ix86_init_builtins 1225#undef TARGET_EXPAND_BUILTIN 1226#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 1227 1228#undef TARGET_ASM_FUNCTION_EPILOGUE 1229#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 1230 1231#undef TARGET_ENCODE_SECTION_INFO 1232#ifndef SUBTARGET_ENCODE_SECTION_INFO 1233#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info 1234#else 1235#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO 1236#endif 1237 1238#undef TARGET_ASM_OPEN_PAREN 1239#define TARGET_ASM_OPEN_PAREN "" 1240#undef TARGET_ASM_CLOSE_PAREN 1241#define TARGET_ASM_CLOSE_PAREN "" 1242 1243#undef TARGET_ASM_ALIGNED_HI_OP 1244#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 1245#undef TARGET_ASM_ALIGNED_SI_OP 1246#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 1247#ifdef ASM_QUAD 1248#undef TARGET_ASM_ALIGNED_DI_OP 1249#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 1250#endif 1251 1252#undef TARGET_ASM_UNALIGNED_HI_OP 1253#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 1254#undef TARGET_ASM_UNALIGNED_SI_OP 1255#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 1256#undef TARGET_ASM_UNALIGNED_DI_OP 1257#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 1258 1259#undef TARGET_SCHED_ADJUST_COST 1260#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 1261#undef TARGET_SCHED_ISSUE_RATE 1262#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 1263#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 1264#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 1265 ia32_multipass_dfa_lookahead 1266 1267#undef TARGET_FUNCTION_OK_FOR_SIBCALL 1268#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 1269 1270#ifdef HAVE_AS_TLS 1271#undef TARGET_HAVE_TLS 1272#define TARGET_HAVE_TLS true 1273#endif 1274#undef TARGET_CANNOT_FORCE_CONST_MEM 1275#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 1276#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 1277#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true 1278 1279#undef TARGET_DELEGITIMIZE_ADDRESS 1280#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 1281 1282#undef TARGET_MS_BITFIELD_LAYOUT_P 1283#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 1284 1285#if TARGET_MACHO 1286#undef TARGET_BINDS_LOCAL_P 1287#define TARGET_BINDS_LOCAL_P darwin_binds_local_p 1288#endif 1289 1290#undef TARGET_ASM_OUTPUT_MI_THUNK 1291#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 1292#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1293#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 1294 1295#undef TARGET_ASM_FILE_START 1296#define TARGET_ASM_FILE_START x86_file_start 1297 1298#undef TARGET_DEFAULT_TARGET_FLAGS 1299#define TARGET_DEFAULT_TARGET_FLAGS \ 1300 (TARGET_DEFAULT \ 1301 | TARGET_64BIT_DEFAULT \ 1302 | TARGET_SUBTARGET_DEFAULT \ 1303 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) 1304 1305#undef TARGET_HANDLE_OPTION 1306#define TARGET_HANDLE_OPTION ix86_handle_option 1307 1308#undef TARGET_RTX_COSTS 1309#define TARGET_RTX_COSTS ix86_rtx_costs 1310#undef TARGET_ADDRESS_COST 1311#define TARGET_ADDRESS_COST ix86_address_cost 1312 1313#undef TARGET_FIXED_CONDITION_CODE_REGS 1314#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 1315#undef TARGET_CC_MODES_COMPATIBLE 1316#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 1317 1318#undef TARGET_MACHINE_DEPENDENT_REORG 1319#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 1320 1321#undef TARGET_BUILD_BUILTIN_VA_LIST 1322#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 1323 1324#undef TARGET_MD_ASM_CLOBBERS 1325#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers 1326 1327#undef TARGET_PROMOTE_PROTOTYPES 1328#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true 1329#undef TARGET_STRUCT_VALUE_RTX 1330#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx 1331#undef TARGET_SETUP_INCOMING_VARARGS 1332#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs 1333#undef TARGET_MUST_PASS_IN_STACK 1334#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack 1335#undef TARGET_PASS_BY_REFERENCE 1336#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference 1337#undef TARGET_INTERNAL_ARG_POINTER 1338#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer 1339#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 1340#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec 1341 1342#undef TARGET_GIMPLIFY_VA_ARG_EXPR 1343#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg 1344 1345#undef TARGET_SCALAR_MODE_SUPPORTED_P 1346#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p 1347 1348#undef TARGET_VECTOR_MODE_SUPPORTED_P 1349#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p 1350 1351#ifdef HAVE_AS_TLS 1352#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 1353#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel 1354#endif 1355 1356#ifdef SUBTARGET_INSERT_ATTRIBUTES 1357#undef TARGET_INSERT_ATTRIBUTES 1358#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 1359#endif 1360 1361#undef TARGET_MANGLE_FUNDAMENTAL_TYPE 1362#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type 1363 1364#undef TARGET_STACK_PROTECT_FAIL 1365#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail 1366 1367#undef TARGET_FUNCTION_VALUE 1368#define TARGET_FUNCTION_VALUE ix86_function_value 1369 1370struct gcc_target targetm = TARGET_INITIALIZER; 1371 1372 1373/* The svr4 ABI for the i386 says that records and unions are returned 1374 in memory. */ 1375#ifndef DEFAULT_PCC_STRUCT_RETURN 1376#define DEFAULT_PCC_STRUCT_RETURN 1 1377#endif 1378 1379/* Implement TARGET_HANDLE_OPTION. */ 1380 1381static bool 1382ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) 1383{ 1384 switch (code) 1385 { 1386 case OPT_m3dnow: 1387 if (!value) 1388 { 1389 target_flags &= ~MASK_3DNOW_A; 1390 target_flags_explicit |= MASK_3DNOW_A; 1391 } 1392 return true; 1393 1394 case OPT_mmmx: 1395 if (!value) 1396 { 1397 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A); 1398 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A; 1399 } 1400 return true; 1401 1402 case OPT_msse: 1403 if (!value) 1404 { 1405 target_flags &= ~(MASK_SSE2 | MASK_SSE3); 1406 target_flags_explicit |= MASK_SSE2 | MASK_SSE3; 1407 } 1408 return true; 1409 1410 case OPT_msse2: 1411 if (!value) 1412 { 1413 target_flags &= ~MASK_SSE3; 1414 target_flags_explicit |= MASK_SSE3; 1415 } 1416 return true; 1417 1418 default: 1419 return true; 1420 } 1421} 1422 1423/* Sometimes certain combinations of command options do not make 1424 sense on a particular target machine. You can define a macro 1425 `OVERRIDE_OPTIONS' to take account of this. This macro, if 1426 defined, is executed once just after all the command options have 1427 been parsed. 1428 1429 Don't use this macro to turn on various extra optimizations for 1430 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 1431 1432void 1433override_options (void) 1434{ 1435 int i; 1436 int ix86_tune_defaulted = 0; 1437 1438 /* Comes from final.c -- no real reason to change it. */ 1439#define MAX_CODE_ALIGN 16 1440 1441 static struct ptt 1442 { 1443 const struct processor_costs *cost; /* Processor costs */ 1444 const int target_enable; /* Target flags to enable. */ 1445 const int target_disable; /* Target flags to disable. */ 1446 const int align_loop; /* Default alignments. */ 1447 const int align_loop_max_skip; 1448 const int align_jump; 1449 const int align_jump_max_skip; 1450 const int align_func; 1451 } 1452 const processor_target_table[PROCESSOR_max] = 1453 { 1454 {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, 1455 {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, 1456 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, 1457 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, 1458 {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, 1459 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16}, 1460 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}, 1461 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}, 1462 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}, 1463 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16}, 1464 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16} 1465 }; 1466 1467 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; 1468 static struct pta 1469 { 1470 const char *const name; /* processor name or nickname. */ 1471 const enum processor_type processor; 1472 const enum pta_flags 1473 { 1474 PTA_SSE = 1, 1475 PTA_SSE2 = 2, 1476 PTA_SSE3 = 4, 1477 PTA_MMX = 8, 1478 PTA_PREFETCH_SSE = 16, 1479 PTA_3DNOW = 32, 1480 PTA_3DNOW_A = 64, 1481 PTA_64BIT = 128 1482 } flags; 1483 } 1484 const processor_alias_table[] = 1485 { 1486 {"i386", PROCESSOR_I386, 0}, 1487 {"i486", PROCESSOR_I486, 0}, 1488 {"i586", PROCESSOR_PENTIUM, 0}, 1489 {"pentium", PROCESSOR_PENTIUM, 0}, 1490 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX}, 1491 {"winchip-c6", PROCESSOR_I486, PTA_MMX}, 1492 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1493 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW}, 1494 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE}, 1495 {"i686", PROCESSOR_PENTIUMPRO, 0}, 1496 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0}, 1497 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX}, 1498 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1499 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE}, 1500 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2}, 1501 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1502 | PTA_MMX | PTA_PREFETCH_SSE}, 1503 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 1504 | PTA_MMX | PTA_PREFETCH_SSE}, 1505 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 1506 | PTA_MMX | PTA_PREFETCH_SSE}, 1507 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT 1508 | PTA_MMX | PTA_PREFETCH_SSE}, 1509 {"k6", PROCESSOR_K6, PTA_MMX}, 1510 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1511 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW}, 1512 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1513 | PTA_3DNOW_A}, 1514 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE 1515 | PTA_3DNOW | PTA_3DNOW_A}, 1516 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1517 | PTA_3DNOW_A | PTA_SSE}, 1518 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1519 | PTA_3DNOW_A | PTA_SSE}, 1520 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW 1521 | PTA_3DNOW_A | PTA_SSE}, 1522 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT 1523 | PTA_SSE | PTA_SSE2 }, 1524 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1525 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1526 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1527 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1528 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1529 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1530 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT 1531 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2}, 1532 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ }, 1533 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ }, 1534 }; 1535 1536 int const pta_size = ARRAY_SIZE (processor_alias_table); 1537 1538#ifdef SUBTARGET_OVERRIDE_OPTIONS 1539 SUBTARGET_OVERRIDE_OPTIONS; 1540#endif 1541 1542#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 1543 SUBSUBTARGET_OVERRIDE_OPTIONS; 1544#endif 1545 1546 /* -fPIC is the default for x86_64. */ 1547 if (TARGET_MACHO && TARGET_64BIT) 1548 flag_pic = 2; 1549 1550 /* Set the default values for switches whose default depends on TARGET_64BIT 1551 in case they weren't overwritten by command line options. */ 1552 if (TARGET_64BIT) 1553 { 1554 /* Mach-O doesn't support omitting the frame pointer for now. */ 1555 if (flag_omit_frame_pointer == 2) 1556 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); 1557 if (flag_asynchronous_unwind_tables == 2) 1558 flag_asynchronous_unwind_tables = 1; 1559 if (flag_pcc_struct_return == 2) 1560 flag_pcc_struct_return = 0; 1561 } 1562 else 1563 { 1564 if (flag_omit_frame_pointer == 2) 1565 flag_omit_frame_pointer = 0; 1566 if (flag_asynchronous_unwind_tables == 2) 1567 flag_asynchronous_unwind_tables = 0; 1568 if (flag_pcc_struct_return == 2) 1569 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 1570 } 1571 1572 /* Need to check -mtune=generic first. */ 1573 if (ix86_tune_string) 1574 { 1575 if (!strcmp (ix86_tune_string, "generic") 1576 || !strcmp (ix86_tune_string, "i686") 1577 /* As special support for cross compilers we read -mtune=native 1578 as -mtune=generic. With native compilers we won't see the 1579 -mtune=native, as it was changed by the driver. */ 1580 || !strcmp (ix86_tune_string, "native")) 1581 { 1582 if (TARGET_64BIT) 1583 ix86_tune_string = "generic64"; 1584 else 1585 ix86_tune_string = "generic32"; 1586 } 1587 else if (!strncmp (ix86_tune_string, "generic", 7)) 1588 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1589 } 1590 else 1591 { 1592 if (ix86_arch_string) 1593 ix86_tune_string = ix86_arch_string; 1594 if (!ix86_tune_string) 1595 { 1596 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT]; 1597 ix86_tune_defaulted = 1; 1598 } 1599 1600 /* ix86_tune_string is set to ix86_arch_string or defaulted. We 1601 need to use a sensible tune option. */ 1602 if (!strcmp (ix86_tune_string, "generic") 1603 || !strcmp (ix86_tune_string, "x86-64") 1604 || !strcmp (ix86_tune_string, "i686")) 1605 { 1606 if (TARGET_64BIT) 1607 ix86_tune_string = "generic64"; 1608 else 1609 ix86_tune_string = "generic32"; 1610 } 1611 } 1612 if (!strcmp (ix86_tune_string, "x86-64")) 1613 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or " 1614 "-mtune=generic instead as appropriate."); 1615 1616 if (!ix86_arch_string) 1617 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; 1618 if (!strcmp (ix86_arch_string, "generic")) 1619 error ("generic CPU can be used only for -mtune= switch"); 1620 if (!strncmp (ix86_arch_string, "generic", 7)) 1621 error ("bad value (%s) for -march= switch", ix86_arch_string); 1622 1623 if (ix86_cmodel_string != 0) 1624 { 1625 if (!strcmp (ix86_cmodel_string, "small")) 1626 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1627 else if (!strcmp (ix86_cmodel_string, "medium")) 1628 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; 1629 else if (flag_pic) 1630 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string); 1631 else if (!strcmp (ix86_cmodel_string, "32")) 1632 ix86_cmodel = CM_32; 1633 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 1634 ix86_cmodel = CM_KERNEL; 1635 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic) 1636 ix86_cmodel = CM_LARGE; 1637 else 1638 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string); 1639 } 1640 else 1641 { 1642 ix86_cmodel = CM_32; 1643 if (TARGET_64BIT) 1644 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 1645 } 1646 if (ix86_asm_string != 0) 1647 { 1648 if (! TARGET_MACHO 1649 && !strcmp (ix86_asm_string, "intel")) 1650 ix86_asm_dialect = ASM_INTEL; 1651 else if (!strcmp (ix86_asm_string, "att")) 1652 ix86_asm_dialect = ASM_ATT; 1653 else 1654 error ("bad value (%s) for -masm= switch", ix86_asm_string); 1655 } 1656 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 1657 error ("code model %qs not supported in the %s bit mode", 1658 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 1659 if (ix86_cmodel == CM_LARGE) 1660 sorry ("code model %<large%> not supported yet"); 1661 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0)) 1662 sorry ("%i-bit mode not compiled in", 1663 (target_flags & MASK_64BIT) ? 64 : 32); 1664 1665 for (i = 0; i < pta_size; i++) 1666 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 1667 { 1668 ix86_arch = processor_alias_table[i].processor; 1669 /* Default cpu tuning to the architecture. */ 1670 ix86_tune = ix86_arch; 1671 if (processor_alias_table[i].flags & PTA_MMX 1672 && !(target_flags_explicit & MASK_MMX)) 1673 target_flags |= MASK_MMX; 1674 if (processor_alias_table[i].flags & PTA_3DNOW 1675 && !(target_flags_explicit & MASK_3DNOW)) 1676 target_flags |= MASK_3DNOW; 1677 if (processor_alias_table[i].flags & PTA_3DNOW_A 1678 && !(target_flags_explicit & MASK_3DNOW_A)) 1679 target_flags |= MASK_3DNOW_A; 1680 if (processor_alias_table[i].flags & PTA_SSE 1681 && !(target_flags_explicit & MASK_SSE)) 1682 target_flags |= MASK_SSE; 1683 if (processor_alias_table[i].flags & PTA_SSE2 1684 && !(target_flags_explicit & MASK_SSE2)) 1685 target_flags |= MASK_SSE2; 1686 if (processor_alias_table[i].flags & PTA_SSE3 1687 && !(target_flags_explicit & MASK_SSE3)) 1688 target_flags |= MASK_SSE3; 1689 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE) 1690 x86_prefetch_sse = true; 1691 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1692 error ("CPU you selected does not support x86-64 " 1693 "instruction set"); 1694 break; 1695 } 1696 1697 if (i == pta_size) 1698 error ("bad value (%s) for -march= switch", ix86_arch_string); 1699 1700 for (i = 0; i < pta_size; i++) 1701 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 1702 { 1703 ix86_tune = processor_alias_table[i].processor; 1704 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 1705 { 1706 if (ix86_tune_defaulted) 1707 { 1708 ix86_tune_string = "x86-64"; 1709 for (i = 0; i < pta_size; i++) 1710 if (! strcmp (ix86_tune_string, 1711 processor_alias_table[i].name)) 1712 break; 1713 ix86_tune = processor_alias_table[i].processor; 1714 } 1715 else 1716 error ("CPU you selected does not support x86-64 " 1717 "instruction set"); 1718 } 1719 /* Intel CPUs have always interpreted SSE prefetch instructions as 1720 NOPs; so, we can enable SSE prefetch instructions even when 1721 -mtune (rather than -march) points us to a processor that has them. 1722 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 1723 higher processors. */ 1724 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE)) 1725 x86_prefetch_sse = true; 1726 break; 1727 } 1728 if (i == pta_size) 1729 error ("bad value (%s) for -mtune= switch", ix86_tune_string); 1730 1731 if (optimize_size) 1732 ix86_cost = &size_cost; 1733 else 1734 ix86_cost = processor_target_table[ix86_tune].cost; 1735 target_flags |= processor_target_table[ix86_tune].target_enable; 1736 target_flags &= ~processor_target_table[ix86_tune].target_disable; 1737 1738 /* Arrange to set up i386_stack_locals for all functions. */ 1739 init_machine_status = ix86_init_machine_status; 1740 1741 /* Validate -mregparm= value. */ 1742 if (ix86_regparm_string) 1743 { 1744 i = atoi (ix86_regparm_string); 1745 if (i < 0 || i > REGPARM_MAX) 1746 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX); 1747 else 1748 ix86_regparm = i; 1749 } 1750 else 1751 if (TARGET_64BIT) 1752 ix86_regparm = REGPARM_MAX; 1753 1754 /* If the user has provided any of the -malign-* options, 1755 warn and use that value only if -falign-* is not set. 1756 Remove this code in GCC 3.2 or later. */ 1757 if (ix86_align_loops_string) 1758 { 1759 warning (0, "-malign-loops is obsolete, use -falign-loops"); 1760 if (align_loops == 0) 1761 { 1762 i = atoi (ix86_align_loops_string); 1763 if (i < 0 || i > MAX_CODE_ALIGN) 1764 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1765 else 1766 align_loops = 1 << i; 1767 } 1768 } 1769 1770 if (ix86_align_jumps_string) 1771 { 1772 warning (0, "-malign-jumps is obsolete, use -falign-jumps"); 1773 if (align_jumps == 0) 1774 { 1775 i = atoi (ix86_align_jumps_string); 1776 if (i < 0 || i > MAX_CODE_ALIGN) 1777 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1778 else 1779 align_jumps = 1 << i; 1780 } 1781 } 1782 1783 if (ix86_align_funcs_string) 1784 { 1785 warning (0, "-malign-functions is obsolete, use -falign-functions"); 1786 if (align_functions == 0) 1787 { 1788 i = atoi (ix86_align_funcs_string); 1789 if (i < 0 || i > MAX_CODE_ALIGN) 1790 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN); 1791 else 1792 align_functions = 1 << i; 1793 } 1794 } 1795 1796 /* Default align_* from the processor table. */ 1797 if (align_loops == 0) 1798 { 1799 align_loops = processor_target_table[ix86_tune].align_loop; 1800 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 1801 } 1802 if (align_jumps == 0) 1803 { 1804 align_jumps = processor_target_table[ix86_tune].align_jump; 1805 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 1806 } 1807 if (align_functions == 0) 1808 { 1809 align_functions = processor_target_table[ix86_tune].align_func; 1810 } 1811 1812 /* Validate -mbranch-cost= value, or provide default. */ 1813 ix86_branch_cost = ix86_cost->branch_cost; 1814 if (ix86_branch_cost_string) 1815 { 1816 i = atoi (ix86_branch_cost_string); 1817 if (i < 0 || i > 5) 1818 error ("-mbranch-cost=%d is not between 0 and 5", i); 1819 else 1820 ix86_branch_cost = i; 1821 } 1822 if (ix86_section_threshold_string) 1823 { 1824 i = atoi (ix86_section_threshold_string); 1825 if (i < 0) 1826 error ("-mlarge-data-threshold=%d is negative", i); 1827 else 1828 ix86_section_threshold = i; 1829 } 1830 1831 if (ix86_tls_dialect_string) 1832 { 1833 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 1834 ix86_tls_dialect = TLS_DIALECT_GNU; 1835 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) 1836 ix86_tls_dialect = TLS_DIALECT_GNU2; 1837 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 1838 ix86_tls_dialect = TLS_DIALECT_SUN; 1839 else 1840 error ("bad value (%s) for -mtls-dialect= switch", 1841 ix86_tls_dialect_string); 1842 } 1843 1844 /* Keep nonleaf frame pointers. */ 1845 if (flag_omit_frame_pointer) 1846 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; 1847 else if (TARGET_OMIT_LEAF_FRAME_POINTER) 1848 flag_omit_frame_pointer = 1; 1849 1850 /* If we're doing fast math, we don't care about comparison order 1851 wrt NaNs. This lets us use a shorter comparison sequence. */ 1852 if (flag_finite_math_only) 1853 target_flags &= ~MASK_IEEE_FP; 1854 1855 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 1856 since the insns won't need emulation. */ 1857 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) 1858 target_flags &= ~MASK_NO_FANCY_MATH_387; 1859 1860 /* Likewise, if the target doesn't have a 387, or we've specified 1861 software floating point, don't use 387 inline intrinsics. */ 1862 if (!TARGET_80387) 1863 target_flags |= MASK_NO_FANCY_MATH_387; 1864 1865 /* Turn on SSE2 builtins for -msse3. */ 1866 if (TARGET_SSE3) 1867 target_flags |= MASK_SSE2; 1868 1869 /* Turn on SSE builtins for -msse2. */ 1870 if (TARGET_SSE2) 1871 target_flags |= MASK_SSE; 1872 1873 /* Turn on MMX builtins for -msse. */ 1874 if (TARGET_SSE) 1875 { 1876 target_flags |= MASK_MMX & ~target_flags_explicit; 1877 x86_prefetch_sse = true; 1878 } 1879 1880 /* Turn on MMX builtins for 3Dnow. */ 1881 if (TARGET_3DNOW) 1882 target_flags |= MASK_MMX; 1883 1884 if (TARGET_64BIT) 1885 { 1886 if (TARGET_ALIGN_DOUBLE) 1887 error ("-malign-double makes no sense in the 64bit mode"); 1888 if (TARGET_RTD) 1889 error ("-mrtd calling convention not supported in the 64bit mode"); 1890 1891 /* Enable by default the SSE and MMX builtins. Do allow the user to 1892 explicitly disable any of these. In particular, disabling SSE and 1893 MMX for kernel code is extremely useful. */ 1894 target_flags 1895 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE) 1896 & ~target_flags_explicit); 1897 } 1898 else 1899 { 1900 /* i386 ABI does not specify red zone. It still makes sense to use it 1901 when programmer takes care to stack from being destroyed. */ 1902 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 1903 target_flags |= MASK_NO_RED_ZONE; 1904 } 1905 1906 /* Validate -mpreferred-stack-boundary= value, or provide default. 1907 The default of 128 bits is for Pentium III's SSE __m128. We can't 1908 change it because of optimize_size. Otherwise, we can't mix object 1909 files compiled with -Os and -On. */ 1910 ix86_preferred_stack_boundary = 128; 1911 if (ix86_preferred_stack_boundary_string) 1912 { 1913 i = atoi (ix86_preferred_stack_boundary_string); 1914 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 1915 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i, 1916 TARGET_64BIT ? 4 : 2); 1917 else 1918 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 1919 } 1920 1921 /* Accept -msseregparm only if at least SSE support is enabled. */ 1922 if (TARGET_SSEREGPARM 1923 && ! TARGET_SSE) 1924 error ("-msseregparm used without SSE enabled"); 1925 1926 ix86_fpmath = TARGET_FPMATH_DEFAULT; 1927 1928 if (ix86_fpmath_string != 0) 1929 { 1930 if (! strcmp (ix86_fpmath_string, "387")) 1931 ix86_fpmath = FPMATH_387; 1932 else if (! strcmp (ix86_fpmath_string, "sse")) 1933 { 1934 if (!TARGET_SSE) 1935 { 1936 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 1937 ix86_fpmath = FPMATH_387; 1938 } 1939 else 1940 ix86_fpmath = FPMATH_SSE; 1941 } 1942 else if (! strcmp (ix86_fpmath_string, "387,sse") 1943 || ! strcmp (ix86_fpmath_string, "sse,387")) 1944 { 1945 if (!TARGET_SSE) 1946 { 1947 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 1948 ix86_fpmath = FPMATH_387; 1949 } 1950 else if (!TARGET_80387) 1951 { 1952 warning (0, "387 instruction set disabled, using SSE arithmetics"); 1953 ix86_fpmath = FPMATH_SSE; 1954 } 1955 else 1956 ix86_fpmath = FPMATH_SSE | FPMATH_387; 1957 } 1958 else 1959 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string); 1960 } 1961 1962 /* If the i387 is disabled, then do not return values in it. */ 1963 if (!TARGET_80387) 1964 target_flags &= ~MASK_FLOAT_RETURNS; 1965 1966 if ((x86_accumulate_outgoing_args & TUNEMASK) 1967 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 1968 && !optimize_size) 1969 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1970 1971 /* ??? Unwind info is not correct around the CFG unless either a frame 1972 pointer is present or M_A_O_A is set. Fixing this requires rewriting 1973 unwind info generation to be aware of the CFG and propagating states 1974 around edges. */ 1975 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 1976 || flag_exceptions || flag_non_call_exceptions) 1977 && flag_omit_frame_pointer 1978 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 1979 { 1980 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 1981 warning (0, "unwind tables currently require either a frame pointer " 1982 "or -maccumulate-outgoing-args for correctness"); 1983 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 1984 } 1985 1986 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 1987 { 1988 char *p; 1989 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 1990 p = strchr (internal_label_prefix, 'X'); 1991 internal_label_prefix_len = p - internal_label_prefix; 1992 *p = '\0'; 1993 } 1994 1995 /* When scheduling description is not available, disable scheduler pass 1996 so it won't slow down the compilation and make x87 code slower. */ 1997 if (!TARGET_SCHEDULE) 1998 flag_schedule_insns_after_reload = flag_schedule_insns = 0; 1999} 2000 2001/* switch to the appropriate section for output of DECL. 2002 DECL is either a `VAR_DECL' node or a constant of some sort. 2003 RELOC indicates whether forming the initial value of DECL requires 2004 link-time relocations. */ 2005 2006static section * 2007x86_64_elf_select_section (tree decl, int reloc, 2008 unsigned HOST_WIDE_INT align) 2009{ 2010 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2011 && ix86_in_large_data_p (decl)) 2012 { 2013 const char *sname = NULL; 2014 unsigned int flags = SECTION_WRITE; 2015 switch (categorize_decl_for_section (decl, reloc)) 2016 { 2017 case SECCAT_DATA: 2018 sname = ".ldata"; 2019 break; 2020 case SECCAT_DATA_REL: 2021 sname = ".ldata.rel"; 2022 break; 2023 case SECCAT_DATA_REL_LOCAL: 2024 sname = ".ldata.rel.local"; 2025 break; 2026 case SECCAT_DATA_REL_RO: 2027 sname = ".ldata.rel.ro"; 2028 break; 2029 case SECCAT_DATA_REL_RO_LOCAL: 2030 sname = ".ldata.rel.ro.local"; 2031 break; 2032 case SECCAT_BSS: 2033 sname = ".lbss"; 2034 flags |= SECTION_BSS; 2035 break; 2036 case SECCAT_RODATA: 2037 case SECCAT_RODATA_MERGE_STR: 2038 case SECCAT_RODATA_MERGE_STR_INIT: 2039 case SECCAT_RODATA_MERGE_CONST: 2040 sname = ".lrodata"; 2041 flags = 0; 2042 break; 2043 case SECCAT_SRODATA: 2044 case SECCAT_SDATA: 2045 case SECCAT_SBSS: 2046 gcc_unreachable (); 2047 case SECCAT_TEXT: 2048 case SECCAT_TDATA: 2049 case SECCAT_TBSS: 2050 /* We don't split these for medium model. Place them into 2051 default sections and hope for best. */ 2052 break; 2053 } 2054 if (sname) 2055 { 2056 /* We might get called with string constants, but get_named_section 2057 doesn't like them as they are not DECLs. Also, we need to set 2058 flags in that case. */ 2059 if (!DECL_P (decl)) 2060 return get_section (sname, flags, NULL); 2061 return get_named_section (decl, sname, reloc); 2062 } 2063 } 2064 return default_elf_select_section (decl, reloc, align); 2065} 2066 2067/* Build up a unique section name, expressed as a 2068 STRING_CST node, and assign it to DECL_SECTION_NAME (decl). 2069 RELOC indicates whether the initial value of EXP requires 2070 link-time relocations. */ 2071 2072static void 2073x86_64_elf_unique_section (tree decl, int reloc) 2074{ 2075 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2076 && ix86_in_large_data_p (decl)) 2077 { 2078 const char *prefix = NULL; 2079 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ 2080 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; 2081 2082 switch (categorize_decl_for_section (decl, reloc)) 2083 { 2084 case SECCAT_DATA: 2085 case SECCAT_DATA_REL: 2086 case SECCAT_DATA_REL_LOCAL: 2087 case SECCAT_DATA_REL_RO: 2088 case SECCAT_DATA_REL_RO_LOCAL: 2089 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata."; 2090 break; 2091 case SECCAT_BSS: 2092 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss."; 2093 break; 2094 case SECCAT_RODATA: 2095 case SECCAT_RODATA_MERGE_STR: 2096 case SECCAT_RODATA_MERGE_STR_INIT: 2097 case SECCAT_RODATA_MERGE_CONST: 2098 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata."; 2099 break; 2100 case SECCAT_SRODATA: 2101 case SECCAT_SDATA: 2102 case SECCAT_SBSS: 2103 gcc_unreachable (); 2104 case SECCAT_TEXT: 2105 case SECCAT_TDATA: 2106 case SECCAT_TBSS: 2107 /* We don't split these for medium model. Place them into 2108 default sections and hope for best. */ 2109 break; 2110 } 2111 if (prefix) 2112 { 2113 const char *name; 2114 size_t nlen, plen; 2115 char *string; 2116 plen = strlen (prefix); 2117 2118 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 2119 name = targetm.strip_name_encoding (name); 2120 nlen = strlen (name); 2121 2122 string = alloca (nlen + plen + 1); 2123 memcpy (string, prefix, plen); 2124 memcpy (string + plen, name, nlen + 1); 2125 2126 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string); 2127 return; 2128 } 2129 } 2130 default_unique_section (decl, reloc); 2131} 2132 2133#ifdef COMMON_ASM_OP 2134/* This says how to output assembler code to declare an 2135 uninitialized external linkage data object. 2136 2137 For medium model x86-64 we need to use .largecomm opcode for 2138 large objects. */ 2139void 2140x86_elf_aligned_common (FILE *file, 2141 const char *name, unsigned HOST_WIDE_INT size, 2142 int align) 2143{ 2144 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2145 && size > (unsigned int)ix86_section_threshold) 2146 fprintf (file, ".largecomm\t"); 2147 else 2148 fprintf (file, "%s", COMMON_ASM_OP); 2149 assemble_name (file, name); 2150 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", 2151 size, align / BITS_PER_UNIT); 2152} 2153 2154/* Utility function for targets to use in implementing 2155 ASM_OUTPUT_ALIGNED_BSS. */ 2156 2157void 2158x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, 2159 const char *name, unsigned HOST_WIDE_INT size, 2160 int align) 2161{ 2162 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 2163 && size > (unsigned int)ix86_section_threshold) 2164 switch_to_section (get_named_section (decl, ".lbss", 0)); 2165 else 2166 switch_to_section (bss_section); 2167 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 2168#ifdef ASM_DECLARE_OBJECT_NAME 2169 last_assemble_variable_decl = decl; 2170 ASM_DECLARE_OBJECT_NAME (file, name, decl); 2171#else 2172 /* Standard thing is just output label for the object. */ 2173 ASM_OUTPUT_LABEL (file, name); 2174#endif /* ASM_DECLARE_OBJECT_NAME */ 2175 ASM_OUTPUT_SKIP (file, size ? size : 1); 2176} 2177#endif 2178 2179void 2180optimization_options (int level, int size ATTRIBUTE_UNUSED) 2181{ 2182 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 2183 make the problem with not enough registers even worse. */ 2184#ifdef INSN_SCHEDULING 2185 if (level > 1) 2186 flag_schedule_insns = 0; 2187#endif 2188 2189 if (TARGET_MACHO) 2190 /* The Darwin libraries never set errno, so we might as well 2191 avoid calling them when that's the only reason we would. */ 2192 flag_errno_math = 0; 2193 2194 /* The default values of these switches depend on the TARGET_64BIT 2195 that is not known at this moment. Mark these values with 2 and 2196 let user the to override these. In case there is no command line option 2197 specifying them, we will set the defaults in override_options. */ 2198 if (optimize >= 1) 2199 flag_omit_frame_pointer = 2; 2200 flag_pcc_struct_return = 2; 2201 flag_asynchronous_unwind_tables = 2; 2202#ifdef SUBTARGET_OPTIMIZATION_OPTIONS 2203 SUBTARGET_OPTIMIZATION_OPTIONS; 2204#endif 2205} 2206 2207/* Table of valid machine attributes. */ 2208const struct attribute_spec ix86_attribute_table[] = 2209{ 2210 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 2211 /* Stdcall attribute says callee is responsible for popping arguments 2212 if they are not variable. */ 2213 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2214 /* Fastcall attribute says callee is responsible for popping arguments 2215 if they are not variable. */ 2216 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2217 /* Cdecl attribute says the callee is a normal C declaration */ 2218 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2219 /* Regparm attribute specifies how many integer arguments are to be 2220 passed in registers. */ 2221 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, 2222 /* Sseregparm attribute says we are using x86_64 calling conventions 2223 for FP arguments. */ 2224 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 2225 /* force_align_arg_pointer says this function realigns the stack at entry. */ 2226 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, 2227 false, true, true, ix86_handle_cconv_attribute }, 2228#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2229 { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, 2230 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 2231 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 2232#endif 2233 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2234 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 2235#ifdef SUBTARGET_ATTRIBUTE_TABLE 2236 SUBTARGET_ATTRIBUTE_TABLE, 2237#endif 2238 { NULL, 0, 0, false, false, false, NULL } 2239}; 2240 2241/* Decide whether we can make a sibling call to a function. DECL is the 2242 declaration of the function being targeted by the call and EXP is the 2243 CALL_EXPR representing the call. */ 2244 2245static bool 2246ix86_function_ok_for_sibcall (tree decl, tree exp) 2247{ 2248 tree func; 2249 rtx a, b; 2250 2251 /* If we are generating position-independent code, we cannot sibcall 2252 optimize any indirect call, or a direct call to a global function, 2253 as the PLT requires %ebx be live. */ 2254 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) 2255 return false; 2256 2257 if (decl) 2258 func = decl; 2259 else 2260 { 2261 func = TREE_TYPE (TREE_OPERAND (exp, 0)); 2262 if (POINTER_TYPE_P (func)) 2263 func = TREE_TYPE (func); 2264 } 2265 2266 /* Check that the return value locations are the same. Like 2267 if we are returning floats on the 80387 register stack, we cannot 2268 make a sibcall from a function that doesn't return a float to a 2269 function that does or, conversely, from a function that does return 2270 a float to a function that doesn't; the necessary stack adjustment 2271 would not be executed. This is also the place we notice 2272 differences in the return value ABI. Note that it is ok for one 2273 of the functions to have void return type as long as the return 2274 value of the other is passed in a register. */ 2275 a = ix86_function_value (TREE_TYPE (exp), func, false); 2276 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 2277 cfun->decl, false); 2278 if (STACK_REG_P (a) || STACK_REG_P (b)) 2279 { 2280 if (!rtx_equal_p (a, b)) 2281 return false; 2282 } 2283 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 2284 ; 2285 else if (!rtx_equal_p (a, b)) 2286 return false; 2287 2288 /* If this call is indirect, we'll need to be able to use a call-clobbered 2289 register for the address of the target function. Make sure that all 2290 such registers are not used for passing parameters. */ 2291 if (!decl && !TARGET_64BIT) 2292 { 2293 tree type; 2294 2295 /* We're looking at the CALL_EXPR, we need the type of the function. */ 2296 type = TREE_OPERAND (exp, 0); /* pointer expression */ 2297 type = TREE_TYPE (type); /* pointer type */ 2298 type = TREE_TYPE (type); /* function type */ 2299 2300 if (ix86_function_regparm (type, NULL) >= 3) 2301 { 2302 /* ??? Need to count the actual number of registers to be used, 2303 not the possible number of registers. Fix later. */ 2304 return false; 2305 } 2306 } 2307 2308#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 2309 /* Dllimport'd functions are also called indirectly. */ 2310 if (decl && DECL_DLLIMPORT_P (decl) 2311 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3) 2312 return false; 2313#endif 2314 2315 /* If we forced aligned the stack, then sibcalling would unalign the 2316 stack, which may break the called function. */ 2317 if (cfun->machine->force_align_arg_pointer) 2318 return false; 2319 2320 /* Otherwise okay. That also includes certain types of indirect calls. */ 2321 return true; 2322} 2323 2324/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" 2325 calling convention attributes; 2326 arguments as in struct attribute_spec.handler. */ 2327 2328static tree 2329ix86_handle_cconv_attribute (tree *node, tree name, 2330 tree args, 2331 int flags ATTRIBUTE_UNUSED, 2332 bool *no_add_attrs) 2333{ 2334 if (TREE_CODE (*node) != FUNCTION_TYPE 2335 && TREE_CODE (*node) != METHOD_TYPE 2336 && TREE_CODE (*node) != FIELD_DECL 2337 && TREE_CODE (*node) != TYPE_DECL) 2338 { 2339 warning (OPT_Wattributes, "%qs attribute only applies to functions", 2340 IDENTIFIER_POINTER (name)); 2341 *no_add_attrs = true; 2342 return NULL_TREE; 2343 } 2344 2345 /* Can combine regparm with all attributes but fastcall. */ 2346 if (is_attribute_p ("regparm", name)) 2347 { 2348 tree cst; 2349 2350 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2351 { 2352 error ("fastcall and regparm attributes are not compatible"); 2353 } 2354 2355 cst = TREE_VALUE (args); 2356 if (TREE_CODE (cst) != INTEGER_CST) 2357 { 2358 warning (OPT_Wattributes, 2359 "%qs attribute requires an integer constant argument", 2360 IDENTIFIER_POINTER (name)); 2361 *no_add_attrs = true; 2362 } 2363 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 2364 { 2365 warning (OPT_Wattributes, "argument to %qs attribute larger than %d", 2366 IDENTIFIER_POINTER (name), REGPARM_MAX); 2367 *no_add_attrs = true; 2368 } 2369 2370 if (!TARGET_64BIT 2371 && lookup_attribute (ix86_force_align_arg_pointer_string, 2372 TYPE_ATTRIBUTES (*node)) 2373 && compare_tree_int (cst, REGPARM_MAX-1)) 2374 { 2375 error ("%s functions limited to %d register parameters", 2376 ix86_force_align_arg_pointer_string, REGPARM_MAX-1); 2377 } 2378 2379 return NULL_TREE; 2380 } 2381 2382 if (TARGET_64BIT) 2383 { 2384 warning (OPT_Wattributes, "%qs attribute ignored", 2385 IDENTIFIER_POINTER (name)); 2386 *no_add_attrs = true; 2387 return NULL_TREE; 2388 } 2389 2390 /* Can combine fastcall with stdcall (redundant) and sseregparm. */ 2391 if (is_attribute_p ("fastcall", name)) 2392 { 2393 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2394 { 2395 error ("fastcall and cdecl attributes are not compatible"); 2396 } 2397 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2398 { 2399 error ("fastcall and stdcall attributes are not compatible"); 2400 } 2401 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 2402 { 2403 error ("fastcall and regparm attributes are not compatible"); 2404 } 2405 } 2406 2407 /* Can combine stdcall with fastcall (redundant), regparm and 2408 sseregparm. */ 2409 else if (is_attribute_p ("stdcall", name)) 2410 { 2411 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 2412 { 2413 error ("stdcall and cdecl attributes are not compatible"); 2414 } 2415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2416 { 2417 error ("stdcall and fastcall attributes are not compatible"); 2418 } 2419 } 2420 2421 /* Can combine cdecl with regparm and sseregparm. */ 2422 else if (is_attribute_p ("cdecl", name)) 2423 { 2424 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 2425 { 2426 error ("stdcall and cdecl attributes are not compatible"); 2427 } 2428 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 2429 { 2430 error ("fastcall and cdecl attributes are not compatible"); 2431 } 2432 } 2433 2434 /* Can combine sseregparm with all attributes. */ 2435 2436 return NULL_TREE; 2437} 2438 2439/* Return 0 if the attributes for two types are incompatible, 1 if they 2440 are compatible, and 2 if they are nearly compatible (which causes a 2441 warning to be generated). */ 2442 2443static int 2444ix86_comp_type_attributes (tree type1, tree type2) 2445{ 2446 /* Check for mismatch of non-default calling convention. */ 2447 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 2448 2449 if (TREE_CODE (type1) != FUNCTION_TYPE) 2450 return 1; 2451 2452 /* Check for mismatched fastcall/regparm types. */ 2453 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 2454 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 2455 || (ix86_function_regparm (type1, NULL) 2456 != ix86_function_regparm (type2, NULL))) 2457 return 0; 2458 2459 /* Check for mismatched sseregparm types. */ 2460 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) 2461 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) 2462 return 0; 2463 2464 /* Check for mismatched return types (cdecl vs stdcall). */ 2465 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 2466 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 2467 return 0; 2468 2469 return 1; 2470} 2471 2472/* Return the regparm value for a function with the indicated TYPE and DECL. 2473 DECL may be NULL when calling function indirectly 2474 or considering a libcall. */ 2475 2476static int 2477ix86_function_regparm (tree type, tree decl) 2478{ 2479 tree attr; 2480 int regparm = ix86_regparm; 2481 bool user_convention = false; 2482 2483 if (!TARGET_64BIT) 2484 { 2485 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 2486 if (attr) 2487 { 2488 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 2489 user_convention = true; 2490 } 2491 2492 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 2493 { 2494 regparm = 2; 2495 user_convention = true; 2496 } 2497 2498 /* Use register calling convention for local functions when possible. */ 2499 if (!TARGET_64BIT && !user_convention && decl 2500 && flag_unit_at_a_time && !profile_flag) 2501 { 2502 struct cgraph_local_info *i = cgraph_local_info (decl); 2503 if (i && i->local) 2504 { 2505 int local_regparm, globals = 0, regno; 2506 2507 /* Make sure no regparm register is taken by a global register 2508 variable. */ 2509 for (local_regparm = 0; local_regparm < 3; local_regparm++) 2510 if (global_regs[local_regparm]) 2511 break; 2512 /* We can't use regparm(3) for nested functions as these use 2513 static chain pointer in third argument. */ 2514 if (local_regparm == 3 2515 && decl_function_context (decl) 2516 && !DECL_NO_STATIC_CHAIN (decl)) 2517 local_regparm = 2; 2518 /* If the function realigns its stackpointer, the 2519 prologue will clobber %ecx. If we've already 2520 generated code for the callee, the callee 2521 DECL_STRUCT_FUNCTION is gone, so we fall back to 2522 scanning the attributes for the self-realigning 2523 property. */ 2524 if ((DECL_STRUCT_FUNCTION (decl) 2525 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer) 2526 || (!DECL_STRUCT_FUNCTION (decl) 2527 && lookup_attribute (ix86_force_align_arg_pointer_string, 2528 TYPE_ATTRIBUTES (TREE_TYPE (decl))))) 2529 local_regparm = 2; 2530 /* Each global register variable increases register preassure, 2531 so the more global reg vars there are, the smaller regparm 2532 optimization use, unless requested by the user explicitly. */ 2533 for (regno = 0; regno < 6; regno++) 2534 if (global_regs[regno]) 2535 globals++; 2536 local_regparm 2537 = globals < local_regparm ? local_regparm - globals : 0; 2538 2539 if (local_regparm > regparm) 2540 regparm = local_regparm; 2541 } 2542 } 2543 } 2544 return regparm; 2545} 2546 2547/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and 2548 DFmode (2) arguments in SSE registers for a function with the 2549 indicated TYPE and DECL. DECL may be NULL when calling function 2550 indirectly or considering a libcall. Otherwise return 0. */ 2551 2552static int 2553ix86_function_sseregparm (tree type, tree decl) 2554{ 2555 /* Use SSE registers to pass SFmode and DFmode arguments if requested 2556 by the sseregparm attribute. */ 2557 if (TARGET_SSEREGPARM 2558 || (type 2559 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) 2560 { 2561 if (!TARGET_SSE) 2562 { 2563 if (decl) 2564 error ("Calling %qD with attribute sseregparm without " 2565 "SSE/SSE2 enabled", decl); 2566 else 2567 error ("Calling %qT with attribute sseregparm without " 2568 "SSE/SSE2 enabled", type); 2569 return 0; 2570 } 2571 2572 return 2; 2573 } 2574 2575 /* For local functions, pass up to SSE_REGPARM_MAX SFmode 2576 (and DFmode for SSE2) arguments in SSE registers, 2577 even for 32-bit targets. */ 2578 if (!TARGET_64BIT && decl 2579 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag) 2580 { 2581 struct cgraph_local_info *i = cgraph_local_info (decl); 2582 if (i && i->local) 2583 return TARGET_SSE2 ? 2 : 1; 2584 } 2585 2586 return 0; 2587} 2588 2589/* Return true if EAX is live at the start of the function. Used by 2590 ix86_expand_prologue to determine if we need special help before 2591 calling allocate_stack_worker. */ 2592 2593static bool 2594ix86_eax_live_at_start_p (void) 2595{ 2596 /* Cheat. Don't bother working forward from ix86_function_regparm 2597 to the function type to whether an actual argument is located in 2598 eax. Instead just look at cfg info, which is still close enough 2599 to correct at this point. This gives false positives for broken 2600 functions that might use uninitialized data that happens to be 2601 allocated in eax, but who cares? */ 2602 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0); 2603} 2604 2605/* Value is the number of bytes of arguments automatically 2606 popped when returning from a subroutine call. 2607 FUNDECL is the declaration node of the function (as a tree), 2608 FUNTYPE is the data type of the function (as a tree), 2609 or for a library call it is an identifier node for the subroutine name. 2610 SIZE is the number of bytes of arguments passed on the stack. 2611 2612 On the 80386, the RTD insn may be used to pop them if the number 2613 of args is fixed, but if the number is variable then the caller 2614 must pop them all. RTD can't be used for library calls now 2615 because the library is compiled with the Unix compiler. 2616 Use of RTD is a selectable option, since it is incompatible with 2617 standard Unix calling sequences. If the option is not selected, 2618 the caller must always pop the args. 2619 2620 The attribute stdcall is equivalent to RTD on a per module basis. */ 2621 2622int 2623ix86_return_pops_args (tree fundecl, tree funtype, int size) 2624{ 2625 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 2626 2627 /* Cdecl functions override -mrtd, and never pop the stack. */ 2628 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) { 2629 2630 /* Stdcall and fastcall functions will pop the stack if not 2631 variable args. */ 2632 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 2633 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 2634 rtd = 1; 2635 2636 if (rtd 2637 && (TYPE_ARG_TYPES (funtype) == NULL_TREE 2638 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype))) 2639 == void_type_node))) 2640 return size; 2641 } 2642 2643 /* Lose any fake structure return argument if it is passed on the stack. */ 2644 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 2645 && !TARGET_64BIT 2646 && !KEEP_AGGREGATE_RETURN_POINTER) 2647 { 2648 int nregs = ix86_function_regparm (funtype, fundecl); 2649 2650 if (!nregs) 2651 return GET_MODE_SIZE (Pmode); 2652 } 2653 2654 return 0; 2655} 2656 2657/* Argument support functions. */ 2658 2659/* Return true when register may be used to pass function parameters. */ 2660bool 2661ix86_function_arg_regno_p (int regno) 2662{ 2663 int i; 2664 if (!TARGET_64BIT) 2665 { 2666 if (TARGET_MACHO) 2667 return (regno < REGPARM_MAX 2668 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 2669 else 2670 return (regno < REGPARM_MAX 2671 || (TARGET_MMX && MMX_REGNO_P (regno) 2672 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) 2673 || (TARGET_SSE && SSE_REGNO_P (regno) 2674 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); 2675 } 2676 2677 if (TARGET_MACHO) 2678 { 2679 if (SSE_REGNO_P (regno) && TARGET_SSE) 2680 return true; 2681 } 2682 else 2683 { 2684 if (TARGET_SSE && SSE_REGNO_P (regno) 2685 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) 2686 return true; 2687 } 2688 /* RAX is used as hidden argument to va_arg functions. */ 2689 if (!regno) 2690 return true; 2691 for (i = 0; i < REGPARM_MAX; i++) 2692 if (regno == x86_64_int_parameter_registers[i]) 2693 return true; 2694 return false; 2695} 2696 2697/* Return if we do not know how to pass TYPE solely in registers. */ 2698 2699static bool 2700ix86_must_pass_in_stack (enum machine_mode mode, tree type) 2701{ 2702 if (must_pass_in_stack_var_size_or_pad (mode, type)) 2703 return true; 2704 2705 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! 2706 The layout_type routine is crafty and tries to trick us into passing 2707 currently unsupported vector types on the stack by using TImode. */ 2708 return (!TARGET_64BIT && mode == TImode 2709 && type && TREE_CODE (type) != VECTOR_TYPE); 2710} 2711 2712/* Initialize a variable CUM of type CUMULATIVE_ARGS 2713 for a call to a function whose data type is FNTYPE. 2714 For a library call, FNTYPE is 0. */ 2715 2716void 2717init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 2718 tree fntype, /* tree ptr for function decl */ 2719 rtx libname, /* SYMBOL_REF of library name or 0 */ 2720 tree fndecl) 2721{ 2722 static CUMULATIVE_ARGS zero_cum; 2723 tree param, next_param; 2724 2725 if (TARGET_DEBUG_ARG) 2726 { 2727 fprintf (stderr, "\ninit_cumulative_args ("); 2728 if (fntype) 2729 fprintf (stderr, "fntype code = %s, ret code = %s", 2730 tree_code_name[(int) TREE_CODE (fntype)], 2731 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]); 2732 else 2733 fprintf (stderr, "no fntype"); 2734 2735 if (libname) 2736 fprintf (stderr, ", libname = %s", XSTR (libname, 0)); 2737 } 2738 2739 *cum = zero_cum; 2740 2741 /* Set up the number of registers to use for passing arguments. */ 2742 cum->nregs = ix86_regparm; 2743 if (TARGET_SSE) 2744 cum->sse_nregs = SSE_REGPARM_MAX; 2745 if (TARGET_MMX) 2746 cum->mmx_nregs = MMX_REGPARM_MAX; 2747 cum->warn_sse = true; 2748 cum->warn_mmx = true; 2749 cum->maybe_vaarg = false; 2750 2751 /* Use ecx and edx registers if function has fastcall attribute, 2752 else look for regparm information. */ 2753 if (fntype && !TARGET_64BIT) 2754 { 2755 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 2756 { 2757 cum->nregs = 2; 2758 cum->fastcall = 1; 2759 } 2760 else 2761 cum->nregs = ix86_function_regparm (fntype, fndecl); 2762 } 2763 2764 /* Set up the number of SSE registers used for passing SFmode 2765 and DFmode arguments. Warn for mismatching ABI. */ 2766 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl); 2767 2768 /* Determine if this function has variable arguments. This is 2769 indicated by the last argument being 'void_type_mode' if there 2770 are no variable arguments. If there are variable arguments, then 2771 we won't pass anything in registers in 32-bit mode. */ 2772 2773 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs) 2774 { 2775 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; 2776 param != 0; param = next_param) 2777 { 2778 next_param = TREE_CHAIN (param); 2779 if (next_param == 0 && TREE_VALUE (param) != void_type_node) 2780 { 2781 if (!TARGET_64BIT) 2782 { 2783 cum->nregs = 0; 2784 cum->sse_nregs = 0; 2785 cum->mmx_nregs = 0; 2786 cum->warn_sse = 0; 2787 cum->warn_mmx = 0; 2788 cum->fastcall = 0; 2789 cum->float_in_sse = 0; 2790 } 2791 cum->maybe_vaarg = true; 2792 } 2793 } 2794 } 2795 if ((!fntype && !libname) 2796 || (fntype && !TYPE_ARG_TYPES (fntype))) 2797 cum->maybe_vaarg = true; 2798 2799 if (TARGET_DEBUG_ARG) 2800 fprintf (stderr, ", nregs=%d )\n", cum->nregs); 2801 2802 return; 2803} 2804 2805/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. 2806 But in the case of vector types, it is some vector mode. 2807 2808 When we have only some of our vector isa extensions enabled, then there 2809 are some modes for which vector_mode_supported_p is false. For these 2810 modes, the generic vector support in gcc will choose some non-vector mode 2811 in order to implement the type. By computing the natural mode, we'll 2812 select the proper ABI location for the operand and not depend on whatever 2813 the middle-end decides to do with these vector types. */ 2814 2815static enum machine_mode 2816type_natural_mode (tree type) 2817{ 2818 enum machine_mode mode = TYPE_MODE (type); 2819 2820 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) 2821 { 2822 HOST_WIDE_INT size = int_size_in_bytes (type); 2823 if ((size == 8 || size == 16) 2824 /* ??? Generic code allows us to create width 1 vectors. Ignore. */ 2825 && TYPE_VECTOR_SUBPARTS (type) > 1) 2826 { 2827 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); 2828 2829 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 2830 mode = MIN_MODE_VECTOR_FLOAT; 2831 else 2832 mode = MIN_MODE_VECTOR_INT; 2833 2834 /* Get the mode which has this inner mode and number of units. */ 2835 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 2836 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) 2837 && GET_MODE_INNER (mode) == innermode) 2838 return mode; 2839 2840 gcc_unreachable (); 2841 } 2842 } 2843 2844 return mode; 2845} 2846 2847/* We want to pass a value in REGNO whose "natural" mode is MODE. However, 2848 this may not agree with the mode that the type system has chosen for the 2849 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can 2850 go ahead and use it. Otherwise we have to build a PARALLEL instead. */ 2851 2852static rtx 2853gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, 2854 unsigned int regno) 2855{ 2856 rtx tmp; 2857 2858 if (orig_mode != BLKmode) 2859 tmp = gen_rtx_REG (orig_mode, regno); 2860 else 2861 { 2862 tmp = gen_rtx_REG (mode, regno); 2863 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 2864 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 2865 } 2866 2867 return tmp; 2868} 2869 2870/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 2871 of this code is to classify each 8bytes of incoming argument by the register 2872 class and assign registers accordingly. */ 2873 2874/* Return the union class of CLASS1 and CLASS2. 2875 See the x86-64 PS ABI for details. */ 2876 2877static enum x86_64_reg_class 2878merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 2879{ 2880 /* Rule #1: If both classes are equal, this is the resulting class. */ 2881 if (class1 == class2) 2882 return class1; 2883 2884 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 2885 the other class. */ 2886 if (class1 == X86_64_NO_CLASS) 2887 return class2; 2888 if (class2 == X86_64_NO_CLASS) 2889 return class1; 2890 2891 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 2892 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 2893 return X86_64_MEMORY_CLASS; 2894 2895 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 2896 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 2897 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 2898 return X86_64_INTEGERSI_CLASS; 2899 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 2900 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 2901 return X86_64_INTEGER_CLASS; 2902 2903 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 2904 MEMORY is used. */ 2905 if (class1 == X86_64_X87_CLASS 2906 || class1 == X86_64_X87UP_CLASS 2907 || class1 == X86_64_COMPLEX_X87_CLASS 2908 || class2 == X86_64_X87_CLASS 2909 || class2 == X86_64_X87UP_CLASS 2910 || class2 == X86_64_COMPLEX_X87_CLASS) 2911 return X86_64_MEMORY_CLASS; 2912 2913 /* Rule #6: Otherwise class SSE is used. */ 2914 return X86_64_SSE_CLASS; 2915} 2916 2917/* Classify the argument of type TYPE and mode MODE. 2918 CLASSES will be filled by the register class used to pass each word 2919 of the operand. The number of words is returned. In case the parameter 2920 should be passed in memory, 0 is returned. As a special case for zero 2921 sized containers, classes[0] will be NO_CLASS and 1 is returned. 2922 2923 BIT_OFFSET is used internally for handling records and specifies offset 2924 of the offset in bits modulo 256 to avoid overflow cases. 2925 2926 See the x86-64 PS ABI for details. 2927*/ 2928 2929static int 2930classify_argument (enum machine_mode mode, tree type, 2931 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 2932{ 2933 HOST_WIDE_INT bytes = 2934 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2935 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 2936 2937 /* Variable sized entities are always passed/returned in memory. */ 2938 if (bytes < 0) 2939 return 0; 2940 2941 if (mode != VOIDmode 2942 && targetm.calls.must_pass_in_stack (mode, type)) 2943 return 0; 2944 2945 if (type && AGGREGATE_TYPE_P (type)) 2946 { 2947 int i; 2948 tree field; 2949 enum x86_64_reg_class subclasses[MAX_CLASSES]; 2950 2951 /* On x86-64 we pass structures larger than 16 bytes on the stack. */ 2952 if (bytes > 16) 2953 return 0; 2954 2955 for (i = 0; i < words; i++) 2956 classes[i] = X86_64_NO_CLASS; 2957 2958 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 2959 signalize memory class, so handle it as special case. */ 2960 if (!words) 2961 { 2962 classes[0] = X86_64_NO_CLASS; 2963 return 1; 2964 } 2965 2966 /* Classify each field of record and merge classes. */ 2967 switch (TREE_CODE (type)) 2968 { 2969 case RECORD_TYPE: 2970 /* For classes first merge in the field of the subclasses. */ 2971 if (TYPE_BINFO (type)) 2972 { 2973 tree binfo, base_binfo; 2974 int basenum; 2975 2976 for (binfo = TYPE_BINFO (type), basenum = 0; 2977 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++) 2978 { 2979 int num; 2980 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8; 2981 tree type = BINFO_TYPE (base_binfo); 2982 2983 num = classify_argument (TYPE_MODE (type), 2984 type, subclasses, 2985 (offset + bit_offset) % 256); 2986 if (!num) 2987 return 0; 2988 for (i = 0; i < num; i++) 2989 { 2990 int pos = (offset + (bit_offset % 64)) / 8 / 8; 2991 classes[i + pos] = 2992 merge_classes (subclasses[i], classes[i + pos]); 2993 } 2994 } 2995 } 2996 /* And now merge the fields of structure. */ 2997 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 2998 { 2999 if (TREE_CODE (field) == FIELD_DECL) 3000 { 3001 int num; 3002 3003 if (TREE_TYPE (field) == error_mark_node) 3004 continue; 3005 3006 /* Bitfields are always classified as integer. Handle them 3007 early, since later code would consider them to be 3008 misaligned integers. */ 3009 if (DECL_BIT_FIELD (field)) 3010 { 3011 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3012 i < ((int_bit_position (field) + (bit_offset % 64)) 3013 + tree_low_cst (DECL_SIZE (field), 0) 3014 + 63) / 8 / 8; i++) 3015 classes[i] = 3016 merge_classes (X86_64_INTEGER_CLASS, 3017 classes[i]); 3018 } 3019 else 3020 { 3021 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3022 TREE_TYPE (field), subclasses, 3023 (int_bit_position (field) 3024 + bit_offset) % 256); 3025 if (!num) 3026 return 0; 3027 for (i = 0; i < num; i++) 3028 { 3029 int pos = 3030 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 3031 classes[i + pos] = 3032 merge_classes (subclasses[i], classes[i + pos]); 3033 } 3034 } 3035 } 3036 } 3037 break; 3038 3039 case ARRAY_TYPE: 3040 /* Arrays are handled as small records. */ 3041 { 3042 int num; 3043 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 3044 TREE_TYPE (type), subclasses, bit_offset); 3045 if (!num) 3046 return 0; 3047 3048 /* The partial classes are now full classes. */ 3049 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 3050 subclasses[0] = X86_64_SSE_CLASS; 3051 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4) 3052 subclasses[0] = X86_64_INTEGER_CLASS; 3053 3054 for (i = 0; i < words; i++) 3055 classes[i] = subclasses[i % num]; 3056 3057 break; 3058 } 3059 case UNION_TYPE: 3060 case QUAL_UNION_TYPE: 3061 /* Unions are similar to RECORD_TYPE but offset is always 0. 3062 */ 3063 3064 /* Unions are not derived. */ 3065 gcc_assert (!TYPE_BINFO (type) 3066 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type))); 3067 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3068 { 3069 if (TREE_CODE (field) == FIELD_DECL) 3070 { 3071 int num; 3072 3073 if (TREE_TYPE (field) == error_mark_node) 3074 continue; 3075 3076 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 3077 TREE_TYPE (field), subclasses, 3078 bit_offset); 3079 if (!num) 3080 return 0; 3081 for (i = 0; i < num; i++) 3082 classes[i] = merge_classes (subclasses[i], classes[i]); 3083 } 3084 } 3085 break; 3086 3087 default: 3088 gcc_unreachable (); 3089 } 3090 3091 /* Final merger cleanup. */ 3092 for (i = 0; i < words; i++) 3093 { 3094 /* If one class is MEMORY, everything should be passed in 3095 memory. */ 3096 if (classes[i] == X86_64_MEMORY_CLASS) 3097 return 0; 3098 3099 /* The X86_64_SSEUP_CLASS should be always preceded by 3100 X86_64_SSE_CLASS. */ 3101 if (classes[i] == X86_64_SSEUP_CLASS 3102 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 3103 classes[i] = X86_64_SSE_CLASS; 3104 3105 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 3106 if (classes[i] == X86_64_X87UP_CLASS 3107 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 3108 classes[i] = X86_64_SSE_CLASS; 3109 } 3110 return words; 3111 } 3112 3113 /* Compute alignment needed. We align all types to natural boundaries with 3114 exception of XFmode that is aligned to 64bits. */ 3115 if (mode != VOIDmode && mode != BLKmode) 3116 { 3117 int mode_alignment = GET_MODE_BITSIZE (mode); 3118 3119 if (mode == XFmode) 3120 mode_alignment = 128; 3121 else if (mode == XCmode) 3122 mode_alignment = 256; 3123 if (COMPLEX_MODE_P (mode)) 3124 mode_alignment /= 2; 3125 /* Misaligned fields are always returned in memory. */ 3126 if (bit_offset % mode_alignment) 3127 return 0; 3128 } 3129 3130 /* for V1xx modes, just use the base mode */ 3131 if (VECTOR_MODE_P (mode) 3132 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) 3133 mode = GET_MODE_INNER (mode); 3134 3135 /* Classification of atomic types. */ 3136 switch (mode) 3137 { 3138 case SDmode: 3139 case DDmode: 3140 classes[0] = X86_64_SSE_CLASS; 3141 return 1; 3142 case TDmode: 3143 classes[0] = X86_64_SSE_CLASS; 3144 classes[1] = X86_64_SSEUP_CLASS; 3145 return 2; 3146 case DImode: 3147 case SImode: 3148 case HImode: 3149 case QImode: 3150 case CSImode: 3151 case CHImode: 3152 case CQImode: 3153 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3154 classes[0] = X86_64_INTEGERSI_CLASS; 3155 else 3156 classes[0] = X86_64_INTEGER_CLASS; 3157 return 1; 3158 case CDImode: 3159 case TImode: 3160 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 3161 return 2; 3162 case CTImode: 3163 return 0; 3164 case SFmode: 3165 if (!(bit_offset % 64)) 3166 classes[0] = X86_64_SSESF_CLASS; 3167 else 3168 classes[0] = X86_64_SSE_CLASS; 3169 return 1; 3170 case DFmode: 3171 classes[0] = X86_64_SSEDF_CLASS; 3172 return 1; 3173 case XFmode: 3174 classes[0] = X86_64_X87_CLASS; 3175 classes[1] = X86_64_X87UP_CLASS; 3176 return 2; 3177 case TFmode: 3178 classes[0] = X86_64_SSE_CLASS; 3179 classes[1] = X86_64_SSEUP_CLASS; 3180 return 2; 3181 case SCmode: 3182 classes[0] = X86_64_SSE_CLASS; 3183 return 1; 3184 case DCmode: 3185 classes[0] = X86_64_SSEDF_CLASS; 3186 classes[1] = X86_64_SSEDF_CLASS; 3187 return 2; 3188 case XCmode: 3189 classes[0] = X86_64_COMPLEX_X87_CLASS; 3190 return 1; 3191 case TCmode: 3192 /* This modes is larger than 16 bytes. */ 3193 return 0; 3194 case V4SFmode: 3195 case V4SImode: 3196 case V16QImode: 3197 case V8HImode: 3198 case V2DFmode: 3199 case V2DImode: 3200 classes[0] = X86_64_SSE_CLASS; 3201 classes[1] = X86_64_SSEUP_CLASS; 3202 return 2; 3203 case V2SFmode: 3204 case V2SImode: 3205 case V4HImode: 3206 case V8QImode: 3207 classes[0] = X86_64_SSE_CLASS; 3208 return 1; 3209 case BLKmode: 3210 case VOIDmode: 3211 return 0; 3212 default: 3213 gcc_assert (VECTOR_MODE_P (mode)); 3214 3215 if (bytes > 16) 3216 return 0; 3217 3218 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); 3219 3220 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 3221 classes[0] = X86_64_INTEGERSI_CLASS; 3222 else 3223 classes[0] = X86_64_INTEGER_CLASS; 3224 classes[1] = X86_64_INTEGER_CLASS; 3225 return 1 + (bytes > 8); 3226 } 3227} 3228 3229/* Examine the argument and return set number of register required in each 3230 class. Return 0 iff parameter should be passed in memory. */ 3231static int 3232examine_argument (enum machine_mode mode, tree type, int in_return, 3233 int *int_nregs, int *sse_nregs) 3234{ 3235 enum x86_64_reg_class class[MAX_CLASSES]; 3236 int n = classify_argument (mode, type, class, 0); 3237 3238 *int_nregs = 0; 3239 *sse_nregs = 0; 3240 if (!n) 3241 return 0; 3242 for (n--; n >= 0; n--) 3243 switch (class[n]) 3244 { 3245 case X86_64_INTEGER_CLASS: 3246 case X86_64_INTEGERSI_CLASS: 3247 (*int_nregs)++; 3248 break; 3249 case X86_64_SSE_CLASS: 3250 case X86_64_SSESF_CLASS: 3251 case X86_64_SSEDF_CLASS: 3252 (*sse_nregs)++; 3253 break; 3254 case X86_64_NO_CLASS: 3255 case X86_64_SSEUP_CLASS: 3256 break; 3257 case X86_64_X87_CLASS: 3258 case X86_64_X87UP_CLASS: 3259 if (!in_return) 3260 return 0; 3261 break; 3262 case X86_64_COMPLEX_X87_CLASS: 3263 return in_return ? 2 : 0; 3264 case X86_64_MEMORY_CLASS: 3265 gcc_unreachable (); 3266 } 3267 return 1; 3268} 3269 3270/* Construct container for the argument used by GCC interface. See 3271 FUNCTION_ARG for the detailed description. */ 3272 3273static rtx 3274construct_container (enum machine_mode mode, enum machine_mode orig_mode, 3275 tree type, int in_return, int nintregs, int nsseregs, 3276 const int *intreg, int sse_regno) 3277{ 3278 /* The following variables hold the static issued_error state. */ 3279 static bool issued_sse_arg_error; 3280 static bool issued_sse_ret_error; 3281 static bool issued_x87_ret_error; 3282 3283 enum machine_mode tmpmode; 3284 int bytes = 3285 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3286 enum x86_64_reg_class class[MAX_CLASSES]; 3287 int n; 3288 int i; 3289 int nexps = 0; 3290 int needed_sseregs, needed_intregs; 3291 rtx exp[MAX_CLASSES]; 3292 rtx ret; 3293 3294 n = classify_argument (mode, type, class, 0); 3295 if (TARGET_DEBUG_ARG) 3296 { 3297 if (!n) 3298 fprintf (stderr, "Memory class\n"); 3299 else 3300 { 3301 fprintf (stderr, "Classes:"); 3302 for (i = 0; i < n; i++) 3303 { 3304 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]); 3305 } 3306 fprintf (stderr, "\n"); 3307 } 3308 } 3309 if (!n) 3310 return NULL; 3311 if (!examine_argument (mode, type, in_return, &needed_intregs, 3312 &needed_sseregs)) 3313 return NULL; 3314 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 3315 return NULL; 3316 3317 /* We allowed the user to turn off SSE for kernel mode. Don't crash if 3318 some less clueful developer tries to use floating-point anyway. */ 3319 if (needed_sseregs && !TARGET_SSE) 3320 { 3321 if (in_return) 3322 { 3323 if (!issued_sse_ret_error) 3324 { 3325 error ("SSE register return with SSE disabled"); 3326 issued_sse_ret_error = true; 3327 } 3328 } 3329 else if (!issued_sse_arg_error) 3330 { 3331 error ("SSE register argument with SSE disabled"); 3332 issued_sse_arg_error = true; 3333 } 3334 return NULL; 3335 } 3336 3337 /* Likewise, error if the ABI requires us to return values in the 3338 x87 registers and the user specified -mno-80387. */ 3339 if (!TARGET_80387 && in_return) 3340 for (i = 0; i < n; i++) 3341 if (class[i] == X86_64_X87_CLASS 3342 || class[i] == X86_64_X87UP_CLASS 3343 || class[i] == X86_64_COMPLEX_X87_CLASS) 3344 { 3345 if (!issued_x87_ret_error) 3346 { 3347 error ("x87 register return with x87 disabled"); 3348 issued_x87_ret_error = true; 3349 } 3350 return NULL; 3351 } 3352 3353 /* First construct simple cases. Avoid SCmode, since we want to use 3354 single register to pass this type. */ 3355 if (n == 1 && mode != SCmode) 3356 switch (class[0]) 3357 { 3358 case X86_64_INTEGER_CLASS: 3359 case X86_64_INTEGERSI_CLASS: 3360 return gen_rtx_REG (mode, intreg[0]); 3361 case X86_64_SSE_CLASS: 3362 case X86_64_SSESF_CLASS: 3363 case X86_64_SSEDF_CLASS: 3364 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno)); 3365 case X86_64_X87_CLASS: 3366 case X86_64_COMPLEX_X87_CLASS: 3367 return gen_rtx_REG (mode, FIRST_STACK_REG); 3368 case X86_64_NO_CLASS: 3369 /* Zero sized array, struct or class. */ 3370 return NULL; 3371 default: 3372 gcc_unreachable (); 3373 } 3374 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS 3375 && mode != BLKmode) 3376 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 3377 if (n == 2 3378 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS) 3379 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 3380 if (n == 2 && class[0] == X86_64_INTEGER_CLASS 3381 && class[1] == X86_64_INTEGER_CLASS 3382 && (mode == CDImode || mode == TImode || mode == TFmode) 3383 && intreg[0] + 1 == intreg[1]) 3384 return gen_rtx_REG (mode, intreg[0]); 3385 3386 /* Otherwise figure out the entries of the PARALLEL. */ 3387 for (i = 0; i < n; i++) 3388 { 3389 switch (class[i]) 3390 { 3391 case X86_64_NO_CLASS: 3392 break; 3393 case X86_64_INTEGER_CLASS: 3394 case X86_64_INTEGERSI_CLASS: 3395 /* Merge TImodes on aligned occasions here too. */ 3396 if (i * 8 + 8 > bytes) 3397 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 3398 else if (class[i] == X86_64_INTEGERSI_CLASS) 3399 tmpmode = SImode; 3400 else 3401 tmpmode = DImode; 3402 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 3403 if (tmpmode == BLKmode) 3404 tmpmode = DImode; 3405 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3406 gen_rtx_REG (tmpmode, *intreg), 3407 GEN_INT (i*8)); 3408 intreg++; 3409 break; 3410 case X86_64_SSESF_CLASS: 3411 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3412 gen_rtx_REG (SFmode, 3413 SSE_REGNO (sse_regno)), 3414 GEN_INT (i*8)); 3415 sse_regno++; 3416 break; 3417 case X86_64_SSEDF_CLASS: 3418 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3419 gen_rtx_REG (DFmode, 3420 SSE_REGNO (sse_regno)), 3421 GEN_INT (i*8)); 3422 sse_regno++; 3423 break; 3424 case X86_64_SSE_CLASS: 3425 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS) 3426 tmpmode = TImode; 3427 else 3428 tmpmode = DImode; 3429 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 3430 gen_rtx_REG (tmpmode, 3431 SSE_REGNO (sse_regno)), 3432 GEN_INT (i*8)); 3433 if (tmpmode == TImode) 3434 i++; 3435 sse_regno++; 3436 break; 3437 default: 3438 gcc_unreachable (); 3439 } 3440 } 3441 3442 /* Empty aligned struct, union or class. */ 3443 if (nexps == 0) 3444 return NULL; 3445 3446 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 3447 for (i = 0; i < nexps; i++) 3448 XVECEXP (ret, 0, i) = exp [i]; 3449 return ret; 3450} 3451 3452/* Update the data in CUM to advance over an argument 3453 of mode MODE and data type TYPE. 3454 (TYPE is null for libcalls where that information may not be available.) */ 3455 3456void 3457function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, 3458 tree type, int named) 3459{ 3460 int bytes = 3461 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3462 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3463 3464 if (type) 3465 mode = type_natural_mode (type); 3466 3467 if (TARGET_DEBUG_ARG) 3468 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, " 3469 "mode=%s, named=%d)\n\n", 3470 words, cum->words, cum->nregs, cum->sse_nregs, 3471 GET_MODE_NAME (mode), named); 3472 3473 if (TARGET_64BIT) 3474 { 3475 int int_nregs, sse_nregs; 3476 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 3477 cum->words += words; 3478 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 3479 { 3480 cum->nregs -= int_nregs; 3481 cum->sse_nregs -= sse_nregs; 3482 cum->regno += int_nregs; 3483 cum->sse_regno += sse_nregs; 3484 } 3485 else 3486 cum->words += words; 3487 } 3488 else 3489 { 3490 switch (mode) 3491 { 3492 default: 3493 break; 3494 3495 case BLKmode: 3496 if (bytes < 0) 3497 break; 3498 /* FALLTHRU */ 3499 3500 case DImode: 3501 case SImode: 3502 case HImode: 3503 case QImode: 3504 cum->words += words; 3505 cum->nregs -= words; 3506 cum->regno += words; 3507 3508 if (cum->nregs <= 0) 3509 { 3510 cum->nregs = 0; 3511 cum->regno = 0; 3512 } 3513 break; 3514 3515 case DFmode: 3516 if (cum->float_in_sse < 2) 3517 break; 3518 case SFmode: 3519 if (cum->float_in_sse < 1) 3520 break; 3521 /* FALLTHRU */ 3522 3523 case TImode: 3524 case V16QImode: 3525 case V8HImode: 3526 case V4SImode: 3527 case V2DImode: 3528 case V4SFmode: 3529 case V2DFmode: 3530 if (!type || !AGGREGATE_TYPE_P (type)) 3531 { 3532 cum->sse_words += words; 3533 cum->sse_nregs -= 1; 3534 cum->sse_regno += 1; 3535 if (cum->sse_nregs <= 0) 3536 { 3537 cum->sse_nregs = 0; 3538 cum->sse_regno = 0; 3539 } 3540 } 3541 break; 3542 3543 case V8QImode: 3544 case V4HImode: 3545 case V2SImode: 3546 case V2SFmode: 3547 if (!type || !AGGREGATE_TYPE_P (type)) 3548 { 3549 cum->mmx_words += words; 3550 cum->mmx_nregs -= 1; 3551 cum->mmx_regno += 1; 3552 if (cum->mmx_nregs <= 0) 3553 { 3554 cum->mmx_nregs = 0; 3555 cum->mmx_regno = 0; 3556 } 3557 } 3558 break; 3559 } 3560 } 3561} 3562 3563/* Define where to put the arguments to a function. 3564 Value is zero to push the argument on the stack, 3565 or a hard register in which to store the argument. 3566 3567 MODE is the argument's machine mode. 3568 TYPE is the data type of the argument (as a tree). 3569 This is null for libcalls where that information may 3570 not be available. 3571 CUM is a variable of type CUMULATIVE_ARGS which gives info about 3572 the preceding args and about the function being called. 3573 NAMED is nonzero if this argument is a named parameter 3574 (otherwise it is an extra parameter matching an ellipsis). */ 3575 3576rtx 3577function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode, 3578 tree type, int named) 3579{ 3580 enum machine_mode mode = orig_mode; 3581 rtx ret = NULL_RTX; 3582 int bytes = 3583 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 3584 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3585 static bool warnedsse, warnedmmx; 3586 3587 /* To simplify the code below, represent vector types with a vector mode 3588 even if MMX/SSE are not active. */ 3589 if (type && TREE_CODE (type) == VECTOR_TYPE) 3590 mode = type_natural_mode (type); 3591 3592 /* Handle a hidden AL argument containing number of registers for varargs 3593 x86-64 functions. For i386 ABI just return constm1_rtx to avoid 3594 any AL settings. */ 3595 if (mode == VOIDmode) 3596 { 3597 if (TARGET_64BIT) 3598 return GEN_INT (cum->maybe_vaarg 3599 ? (cum->sse_nregs < 0 3600 ? SSE_REGPARM_MAX 3601 : cum->sse_regno) 3602 : -1); 3603 else 3604 return constm1_rtx; 3605 } 3606 if (TARGET_64BIT) 3607 ret = construct_container (mode, orig_mode, type, 0, cum->nregs, 3608 cum->sse_nregs, 3609 &x86_64_int_parameter_registers [cum->regno], 3610 cum->sse_regno); 3611 else 3612 switch (mode) 3613 { 3614 /* For now, pass fp/complex values on the stack. */ 3615 default: 3616 break; 3617 3618 case BLKmode: 3619 if (bytes < 0) 3620 break; 3621 /* FALLTHRU */ 3622 case DImode: 3623 case SImode: 3624 case HImode: 3625 case QImode: 3626 if (words <= cum->nregs) 3627 { 3628 int regno = cum->regno; 3629 3630 /* Fastcall allocates the first two DWORD (SImode) or 3631 smaller arguments to ECX and EDX. */ 3632 if (cum->fastcall) 3633 { 3634 if (mode == BLKmode || mode == DImode) 3635 break; 3636 3637 /* ECX not EAX is the first allocated register. */ 3638 if (regno == 0) 3639 regno = 2; 3640 } 3641 ret = gen_rtx_REG (mode, regno); 3642 } 3643 break; 3644 case DFmode: 3645 if (cum->float_in_sse < 2) 3646 break; 3647 case SFmode: 3648 if (cum->float_in_sse < 1) 3649 break; 3650 /* FALLTHRU */ 3651 case TImode: 3652 case V16QImode: 3653 case V8HImode: 3654 case V4SImode: 3655 case V2DImode: 3656 case V4SFmode: 3657 case V2DFmode: 3658 if (!type || !AGGREGATE_TYPE_P (type)) 3659 { 3660 if (!TARGET_SSE && !warnedsse && cum->warn_sse) 3661 { 3662 warnedsse = true; 3663 warning (0, "SSE vector argument without SSE enabled " 3664 "changes the ABI"); 3665 } 3666 if (cum->sse_nregs) 3667 ret = gen_reg_or_parallel (mode, orig_mode, 3668 cum->sse_regno + FIRST_SSE_REG); 3669 } 3670 break; 3671 case V8QImode: 3672 case V4HImode: 3673 case V2SImode: 3674 case V2SFmode: 3675 if (!type || !AGGREGATE_TYPE_P (type)) 3676 { 3677 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 3678 { 3679 warnedmmx = true; 3680 warning (0, "MMX vector argument without MMX enabled " 3681 "changes the ABI"); 3682 } 3683 if (cum->mmx_nregs) 3684 ret = gen_reg_or_parallel (mode, orig_mode, 3685 cum->mmx_regno + FIRST_MMX_REG); 3686 } 3687 break; 3688 } 3689 3690 if (TARGET_DEBUG_ARG) 3691 { 3692 fprintf (stderr, 3693 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ", 3694 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named); 3695 3696 if (ret) 3697 print_simple_rtl (stderr, ret); 3698 else 3699 fprintf (stderr, ", stack"); 3700 3701 fprintf (stderr, " )\n"); 3702 } 3703 3704 return ret; 3705} 3706 3707/* A C expression that indicates when an argument must be passed by 3708 reference. If nonzero for an argument, a copy of that argument is 3709 made in memory and a pointer to the argument is passed instead of 3710 the argument itself. The pointer is passed in whatever way is 3711 appropriate for passing a pointer to that type. */ 3712 3713static bool 3714ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 3715 enum machine_mode mode ATTRIBUTE_UNUSED, 3716 tree type, bool named ATTRIBUTE_UNUSED) 3717{ 3718 if (!TARGET_64BIT) 3719 return 0; 3720 3721 if (type && int_size_in_bytes (type) == -1) 3722 { 3723 if (TARGET_DEBUG_ARG) 3724 fprintf (stderr, "function_arg_pass_by_reference\n"); 3725 return 1; 3726 } 3727 3728 return 0; 3729} 3730 3731/* Return true when TYPE should be 128bit aligned for 32bit argument passing 3732 ABI. Only called if TARGET_SSE. */ 3733static bool 3734contains_128bit_aligned_vector_p (tree type) 3735{ 3736 enum machine_mode mode = TYPE_MODE (type); 3737 if (SSE_REG_MODE_P (mode) 3738 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 3739 return true; 3740 if (TYPE_ALIGN (type) < 128) 3741 return false; 3742 3743 if (AGGREGATE_TYPE_P (type)) 3744 { 3745 /* Walk the aggregates recursively. */ 3746 switch (TREE_CODE (type)) 3747 { 3748 case RECORD_TYPE: 3749 case UNION_TYPE: 3750 case QUAL_UNION_TYPE: 3751 { 3752 tree field; 3753 3754 if (TYPE_BINFO (type)) 3755 { 3756 tree binfo, base_binfo; 3757 int i; 3758 3759 for (binfo = TYPE_BINFO (type), i = 0; 3760 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) 3761 if (contains_128bit_aligned_vector_p 3762 (BINFO_TYPE (base_binfo))) 3763 return true; 3764 } 3765 /* And now merge the fields of structure. */ 3766 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 3767 { 3768 if (TREE_CODE (field) == FIELD_DECL 3769 && contains_128bit_aligned_vector_p (TREE_TYPE (field))) 3770 return true; 3771 } 3772 break; 3773 } 3774 3775 case ARRAY_TYPE: 3776 /* Just for use if some languages passes arrays by value. */ 3777 if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) 3778 return true; 3779 break; 3780 3781 default: 3782 gcc_unreachable (); 3783 } 3784 } 3785 return false; 3786} 3787 3788/* Gives the alignment boundary, in bits, of an argument with the 3789 specified mode and type. */ 3790 3791int 3792ix86_function_arg_boundary (enum machine_mode mode, tree type) 3793{ 3794 int align; 3795 if (type) 3796 align = TYPE_ALIGN (type); 3797 else 3798 align = GET_MODE_ALIGNMENT (mode); 3799 if (align < PARM_BOUNDARY) 3800 align = PARM_BOUNDARY; 3801 if (!TARGET_64BIT) 3802 { 3803 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 3804 make an exception for SSE modes since these require 128bit 3805 alignment. 3806 3807 The handling here differs from field_alignment. ICC aligns MMX 3808 arguments to 4 byte boundaries, while structure fields are aligned 3809 to 8 byte boundaries. */ 3810 if (!TARGET_SSE) 3811 align = PARM_BOUNDARY; 3812 else if (!type) 3813 { 3814 if (!SSE_REG_MODE_P (mode)) 3815 align = PARM_BOUNDARY; 3816 } 3817 else 3818 { 3819 if (!contains_128bit_aligned_vector_p (type)) 3820 align = PARM_BOUNDARY; 3821 } 3822 } 3823 if (align > 128) 3824 align = 128; 3825 return align; 3826} 3827 3828/* Return true if N is a possible register number of function value. */ 3829bool 3830ix86_function_value_regno_p (int regno) 3831{ 3832 if (TARGET_MACHO) 3833 { 3834 if (!TARGET_64BIT) 3835 { 3836 return ((regno) == 0 3837 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 3838 || ((regno) == FIRST_SSE_REG && TARGET_SSE)); 3839 } 3840 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG 3841 || ((regno) == FIRST_SSE_REG && TARGET_SSE) 3842 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)); 3843 } 3844 else 3845 { 3846 if (regno == 0 3847 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) 3848 || (regno == FIRST_SSE_REG && TARGET_SSE)) 3849 return true; 3850 3851 if (!TARGET_64BIT 3852 && (regno == FIRST_MMX_REG && TARGET_MMX)) 3853 return true; 3854 3855 return false; 3856 } 3857} 3858 3859/* Define how to find the value returned by a function. 3860 VALTYPE is the data type of the value (as a tree). 3861 If the precise function being called is known, FUNC is its FUNCTION_DECL; 3862 otherwise, FUNC is 0. */ 3863rtx 3864ix86_function_value (tree valtype, tree fntype_or_decl, 3865 bool outgoing ATTRIBUTE_UNUSED) 3866{ 3867 enum machine_mode natmode = type_natural_mode (valtype); 3868 3869 if (TARGET_64BIT) 3870 { 3871 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype, 3872 1, REGPARM_MAX, SSE_REGPARM_MAX, 3873 x86_64_int_return_registers, 0); 3874 /* For zero sized structures, construct_container return NULL, but we 3875 need to keep rest of compiler happy by returning meaningful value. */ 3876 if (!ret) 3877 ret = gen_rtx_REG (TYPE_MODE (valtype), 0); 3878 return ret; 3879 } 3880 else 3881 { 3882 tree fn = NULL_TREE, fntype; 3883 if (fntype_or_decl 3884 && DECL_P (fntype_or_decl)) 3885 fn = fntype_or_decl; 3886 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; 3887 return gen_rtx_REG (TYPE_MODE (valtype), 3888 ix86_value_regno (natmode, fn, fntype)); 3889 } 3890} 3891 3892/* Return true iff type is returned in memory. */ 3893int 3894ix86_return_in_memory (tree type) 3895{ 3896 int needed_intregs, needed_sseregs, size; 3897 enum machine_mode mode = type_natural_mode (type); 3898 3899 if (TARGET_64BIT) 3900 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 3901 3902 if (mode == BLKmode) 3903 return 1; 3904 3905 size = int_size_in_bytes (type); 3906 3907 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 3908 return 0; 3909 3910 if (VECTOR_MODE_P (mode) || mode == TImode) 3911 { 3912 /* User-created vectors small enough to fit in EAX. */ 3913 if (size < 8) 3914 return 0; 3915 3916 /* MMX/3dNow values are returned in MM0, 3917 except when it doesn't exits. */ 3918 if (size == 8) 3919 return (TARGET_MMX ? 0 : 1); 3920 3921 /* SSE values are returned in XMM0, except when it doesn't exist. */ 3922 if (size == 16) 3923 return (TARGET_SSE ? 0 : 1); 3924 } 3925 3926 if (mode == XFmode) 3927 return 0; 3928 3929 if (mode == TDmode) 3930 return 1; 3931 3932 if (size > 12) 3933 return 1; 3934 return 0; 3935} 3936 3937/* When returning SSE vector types, we have a choice of either 3938 (1) being abi incompatible with a -march switch, or 3939 (2) generating an error. 3940 Given no good solution, I think the safest thing is one warning. 3941 The user won't be able to use -Werror, but.... 3942 3943 Choose the STRUCT_VALUE_RTX hook because that's (at present) only 3944 called in response to actually generating a caller or callee that 3945 uses such a type. As opposed to RETURN_IN_MEMORY, which is called 3946 via aggregate_value_p for general type probing from tree-ssa. */ 3947 3948static rtx 3949ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) 3950{ 3951 static bool warnedsse, warnedmmx; 3952 3953 if (type) 3954 { 3955 /* Look at the return type of the function, not the function type. */ 3956 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); 3957 3958 if (!TARGET_SSE && !warnedsse) 3959 { 3960 if (mode == TImode 3961 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 3962 { 3963 warnedsse = true; 3964 warning (0, "SSE vector return without SSE enabled " 3965 "changes the ABI"); 3966 } 3967 } 3968 3969 if (!TARGET_MMX && !warnedmmx) 3970 { 3971 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 3972 { 3973 warnedmmx = true; 3974 warning (0, "MMX vector return without MMX enabled " 3975 "changes the ABI"); 3976 } 3977 } 3978 } 3979 3980 return NULL; 3981} 3982 3983/* Define how to find the value returned by a library function 3984 assuming the value has mode MODE. */ 3985rtx 3986ix86_libcall_value (enum machine_mode mode) 3987{ 3988 if (TARGET_64BIT) 3989 { 3990 switch (mode) 3991 { 3992 case SFmode: 3993 case SCmode: 3994 case DFmode: 3995 case DCmode: 3996 case TFmode: 3997 case SDmode: 3998 case DDmode: 3999 case TDmode: 4000 return gen_rtx_REG (mode, FIRST_SSE_REG); 4001 case XFmode: 4002 case XCmode: 4003 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 4004 case TCmode: 4005 return NULL; 4006 default: 4007 return gen_rtx_REG (mode, 0); 4008 } 4009 } 4010 else 4011 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL)); 4012} 4013 4014/* Given a mode, return the register to use for a return value. */ 4015 4016static int 4017ix86_value_regno (enum machine_mode mode, tree func, tree fntype) 4018{ 4019 gcc_assert (!TARGET_64BIT); 4020 4021 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where 4022 we normally prevent this case when mmx is not available. However 4023 some ABIs may require the result to be returned like DImode. */ 4024 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 4025 return TARGET_MMX ? FIRST_MMX_REG : 0; 4026 4027 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 4028 we prevent this case when sse is not available. However some ABIs 4029 may require the result to be returned like integer TImode. */ 4030 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 4031 return TARGET_SSE ? FIRST_SSE_REG : 0; 4032 4033 /* Decimal floating point values can go in %eax, unlike other float modes. */ 4034 if (DECIMAL_FLOAT_MODE_P (mode)) 4035 return 0; 4036 4037 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */ 4038 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387) 4039 return 0; 4040 4041 /* Floating point return values in %st(0), except for local functions when 4042 SSE math is enabled or for functions with sseregparm attribute. */ 4043 if ((func || fntype) 4044 && (mode == SFmode || mode == DFmode)) 4045 { 4046 int sse_level = ix86_function_sseregparm (fntype, func); 4047 if ((sse_level >= 1 && mode == SFmode) 4048 || (sse_level == 2 && mode == DFmode)) 4049 return FIRST_SSE_REG; 4050 } 4051 4052 return FIRST_FLOAT_REG; 4053} 4054 4055/* Create the va_list data type. */ 4056 4057static tree 4058ix86_build_builtin_va_list (void) 4059{ 4060 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 4061 4062 /* For i386 we use plain pointer to argument area. */ 4063 if (!TARGET_64BIT) 4064 return build_pointer_type (char_type_node); 4065 4066 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 4067 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 4068 4069 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 4070 unsigned_type_node); 4071 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 4072 unsigned_type_node); 4073 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 4074 ptr_type_node); 4075 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 4076 ptr_type_node); 4077 4078 va_list_gpr_counter_field = f_gpr; 4079 va_list_fpr_counter_field = f_fpr; 4080 4081 DECL_FIELD_CONTEXT (f_gpr) = record; 4082 DECL_FIELD_CONTEXT (f_fpr) = record; 4083 DECL_FIELD_CONTEXT (f_ovf) = record; 4084 DECL_FIELD_CONTEXT (f_sav) = record; 4085 4086 TREE_CHAIN (record) = type_decl; 4087 TYPE_NAME (record) = type_decl; 4088 TYPE_FIELDS (record) = f_gpr; 4089 TREE_CHAIN (f_gpr) = f_fpr; 4090 TREE_CHAIN (f_fpr) = f_ovf; 4091 TREE_CHAIN (f_ovf) = f_sav; 4092 4093 layout_type (record); 4094 4095 /* The correct type is an array type of one element. */ 4096 return build_array_type (record, build_index_type (size_zero_node)); 4097} 4098 4099/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ 4100 4101static void 4102ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 4103 tree type, int *pretend_size ATTRIBUTE_UNUSED, 4104 int no_rtl) 4105{ 4106 CUMULATIVE_ARGS next_cum; 4107 rtx save_area = NULL_RTX, mem; 4108 rtx label; 4109 rtx label_ref; 4110 rtx tmp_reg; 4111 rtx nsse_reg; 4112 int set; 4113 tree fntype; 4114 int stdarg_p; 4115 int i; 4116 4117 if (!TARGET_64BIT) 4118 return; 4119 4120 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size) 4121 return; 4122 4123 /* Indicate to allocate space on the stack for varargs save area. */ 4124 ix86_save_varrargs_registers = 1; 4125 4126 cfun->stack_alignment_needed = 128; 4127 4128 fntype = TREE_TYPE (current_function_decl); 4129 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 4130 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) 4131 != void_type_node)); 4132 4133 /* For varargs, we do not want to skip the dummy va_dcl argument. 4134 For stdargs, we do want to skip the last named argument. */ 4135 next_cum = *cum; 4136 if (stdarg_p) 4137 function_arg_advance (&next_cum, mode, type, 1); 4138 4139 if (!no_rtl) 4140 save_area = frame_pointer_rtx; 4141 4142 set = get_varargs_alias_set (); 4143 4144 for (i = next_cum.regno; 4145 i < ix86_regparm 4146 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD; 4147 i++) 4148 { 4149 mem = gen_rtx_MEM (Pmode, 4150 plus_constant (save_area, i * UNITS_PER_WORD)); 4151 MEM_NOTRAP_P (mem) = 1; 4152 set_mem_alias_set (mem, set); 4153 emit_move_insn (mem, gen_rtx_REG (Pmode, 4154 x86_64_int_parameter_registers[i])); 4155 } 4156 4157 if (next_cum.sse_nregs && cfun->va_list_fpr_size) 4158 { 4159 /* Now emit code to save SSE registers. The AX parameter contains number 4160 of SSE parameter registers used to call this function. We use 4161 sse_prologue_save insn template that produces computed jump across 4162 SSE saves. We need some preparation work to get this working. */ 4163 4164 label = gen_label_rtx (); 4165 label_ref = gen_rtx_LABEL_REF (Pmode, label); 4166 4167 /* Compute address to jump to : 4168 label - 5*eax + nnamed_sse_arguments*5 */ 4169 tmp_reg = gen_reg_rtx (Pmode); 4170 nsse_reg = gen_reg_rtx (Pmode); 4171 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); 4172 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4173 gen_rtx_MULT (Pmode, nsse_reg, 4174 GEN_INT (4)))); 4175 if (next_cum.sse_regno) 4176 emit_move_insn 4177 (nsse_reg, 4178 gen_rtx_CONST (DImode, 4179 gen_rtx_PLUS (DImode, 4180 label_ref, 4181 GEN_INT (next_cum.sse_regno * 4)))); 4182 else 4183 emit_move_insn (nsse_reg, label_ref); 4184 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 4185 4186 /* Compute address of memory block we save into. We always use pointer 4187 pointing 127 bytes after first byte to store - this is needed to keep 4188 instruction size limited by 4 bytes. */ 4189 tmp_reg = gen_reg_rtx (Pmode); 4190 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 4191 plus_constant (save_area, 4192 8 * REGPARM_MAX + 127))); 4193 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 4194 MEM_NOTRAP_P (mem) = 1; 4195 set_mem_alias_set (mem, set); 4196 set_mem_align (mem, BITS_PER_WORD); 4197 4198 /* And finally do the dirty job! */ 4199 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 4200 GEN_INT (next_cum.sse_regno), label)); 4201 } 4202 4203} 4204 4205/* Implement va_start. */ 4206 4207void 4208ix86_va_start (tree valist, rtx nextarg) 4209{ 4210 HOST_WIDE_INT words, n_gpr, n_fpr; 4211 tree f_gpr, f_fpr, f_ovf, f_sav; 4212 tree gpr, fpr, ovf, sav, t; 4213 tree type; 4214 4215 /* Only 64bit target needs something special. */ 4216 if (!TARGET_64BIT) 4217 { 4218 std_expand_builtin_va_start (valist, nextarg); 4219 return; 4220 } 4221 4222 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4223 f_fpr = TREE_CHAIN (f_gpr); 4224 f_ovf = TREE_CHAIN (f_fpr); 4225 f_sav = TREE_CHAIN (f_ovf); 4226 4227 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 4228 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4229 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4230 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4231 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4232 4233 /* Count number of gp and fp argument registers used. */ 4234 words = current_function_args_info.words; 4235 n_gpr = current_function_args_info.regno; 4236 n_fpr = current_function_args_info.sse_regno; 4237 4238 if (TARGET_DEBUG_ARG) 4239 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n", 4240 (int) words, (int) n_gpr, (int) n_fpr); 4241 4242 if (cfun->va_list_gpr_size) 4243 { 4244 type = TREE_TYPE (gpr); 4245 t = build2 (MODIFY_EXPR, type, gpr, 4246 build_int_cst (type, n_gpr * 8)); 4247 TREE_SIDE_EFFECTS (t) = 1; 4248 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4249 } 4250 4251 if (cfun->va_list_fpr_size) 4252 { 4253 type = TREE_TYPE (fpr); 4254 t = build2 (MODIFY_EXPR, type, fpr, 4255 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX)); 4256 TREE_SIDE_EFFECTS (t) = 1; 4257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4258 } 4259 4260 /* Find the overflow area. */ 4261 type = TREE_TYPE (ovf); 4262 t = make_tree (type, virtual_incoming_args_rtx); 4263 if (words != 0) 4264 t = build2 (PLUS_EXPR, type, t, 4265 build_int_cst (type, words * UNITS_PER_WORD)); 4266 t = build2 (MODIFY_EXPR, type, ovf, t); 4267 TREE_SIDE_EFFECTS (t) = 1; 4268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4269 4270 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size) 4271 { 4272 /* Find the register save area. 4273 Prologue of the function save it right above stack frame. */ 4274 type = TREE_TYPE (sav); 4275 t = make_tree (type, frame_pointer_rtx); 4276 t = build2 (MODIFY_EXPR, type, sav, t); 4277 TREE_SIDE_EFFECTS (t) = 1; 4278 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4279 } 4280} 4281 4282/* Implement va_arg. */ 4283 4284tree 4285ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) 4286{ 4287 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 4288 tree f_gpr, f_fpr, f_ovf, f_sav; 4289 tree gpr, fpr, ovf, sav, t; 4290 int size, rsize; 4291 tree lab_false, lab_over = NULL_TREE; 4292 tree addr, t2; 4293 rtx container; 4294 int indirect_p = 0; 4295 tree ptrtype; 4296 enum machine_mode nat_mode; 4297 4298 /* Only 64bit target needs something special. */ 4299 if (!TARGET_64BIT) 4300 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 4301 4302 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4303 f_fpr = TREE_CHAIN (f_gpr); 4304 f_ovf = TREE_CHAIN (f_fpr); 4305 f_sav = TREE_CHAIN (f_ovf); 4306 4307 valist = build_va_arg_indirect_ref (valist); 4308 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 4309 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4310 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4311 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4312 4313 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 4314 if (indirect_p) 4315 type = build_pointer_type (type); 4316 size = int_size_in_bytes (type); 4317 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 4318 4319 nat_mode = type_natural_mode (type); 4320 container = construct_container (nat_mode, TYPE_MODE (type), type, 0, 4321 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0); 4322 4323 /* Pull the value out of the saved registers. */ 4324 4325 addr = create_tmp_var (ptr_type_node, "addr"); 4326 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); 4327 4328 if (container) 4329 { 4330 int needed_intregs, needed_sseregs; 4331 bool need_temp; 4332 tree int_addr, sse_addr; 4333 4334 lab_false = create_artificial_label (); 4335 lab_over = create_artificial_label (); 4336 4337 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); 4338 4339 need_temp = (!REG_P (container) 4340 && ((needed_intregs && TYPE_ALIGN (type) > 64) 4341 || TYPE_ALIGN (type) > 128)); 4342 4343 /* In case we are passing structure, verify that it is consecutive block 4344 on the register save area. If not we need to do moves. */ 4345 if (!need_temp && !REG_P (container)) 4346 { 4347 /* Verify that all registers are strictly consecutive */ 4348 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 4349 { 4350 int i; 4351 4352 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4353 { 4354 rtx slot = XVECEXP (container, 0, i); 4355 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 4356 || INTVAL (XEXP (slot, 1)) != i * 16) 4357 need_temp = 1; 4358 } 4359 } 4360 else 4361 { 4362 int i; 4363 4364 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4365 { 4366 rtx slot = XVECEXP (container, 0, i); 4367 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 4368 || INTVAL (XEXP (slot, 1)) != i * 8) 4369 need_temp = 1; 4370 } 4371 } 4372 } 4373 if (!need_temp) 4374 { 4375 int_addr = addr; 4376 sse_addr = addr; 4377 } 4378 else 4379 { 4380 int_addr = create_tmp_var (ptr_type_node, "int_addr"); 4381 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); 4382 sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); 4383 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); 4384 } 4385 4386 /* First ensure that we fit completely in registers. */ 4387 if (needed_intregs) 4388 { 4389 t = build_int_cst (TREE_TYPE (gpr), 4390 (REGPARM_MAX - needed_intregs + 1) * 8); 4391 t = build2 (GE_EXPR, boolean_type_node, gpr, t); 4392 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4393 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4394 gimplify_and_add (t, pre_p); 4395 } 4396 if (needed_sseregs) 4397 { 4398 t = build_int_cst (TREE_TYPE (fpr), 4399 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16 4400 + REGPARM_MAX * 8); 4401 t = build2 (GE_EXPR, boolean_type_node, fpr, t); 4402 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4403 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4404 gimplify_and_add (t, pre_p); 4405 } 4406 4407 /* Compute index to start of area used for integer regs. */ 4408 if (needed_intregs) 4409 { 4410 /* int_addr = gpr + sav; */ 4411 t = fold_convert (ptr_type_node, gpr); 4412 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4413 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t); 4414 gimplify_and_add (t, pre_p); 4415 } 4416 if (needed_sseregs) 4417 { 4418 /* sse_addr = fpr + sav; */ 4419 t = fold_convert (ptr_type_node, fpr); 4420 t = build2 (PLUS_EXPR, ptr_type_node, sav, t); 4421 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t); 4422 gimplify_and_add (t, pre_p); 4423 } 4424 if (need_temp) 4425 { 4426 int i; 4427 tree temp = create_tmp_var (type, "va_arg_tmp"); 4428 4429 /* addr = &temp; */ 4430 t = build1 (ADDR_EXPR, build_pointer_type (type), temp); 4431 t = build2 (MODIFY_EXPR, void_type_node, addr, t); 4432 gimplify_and_add (t, pre_p); 4433 4434 for (i = 0; i < XVECLEN (container, 0); i++) 4435 { 4436 rtx slot = XVECEXP (container, 0, i); 4437 rtx reg = XEXP (slot, 0); 4438 enum machine_mode mode = GET_MODE (reg); 4439 tree piece_type = lang_hooks.types.type_for_mode (mode, 1); 4440 tree addr_type = build_pointer_type (piece_type); 4441 tree src_addr, src; 4442 int src_offset; 4443 tree dest_addr, dest; 4444 4445 if (SSE_REGNO_P (REGNO (reg))) 4446 { 4447 src_addr = sse_addr; 4448 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 4449 } 4450 else 4451 { 4452 src_addr = int_addr; 4453 src_offset = REGNO (reg) * 8; 4454 } 4455 src_addr = fold_convert (addr_type, src_addr); 4456 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr, 4457 size_int (src_offset))); 4458 src = build_va_arg_indirect_ref (src_addr); 4459 4460 dest_addr = fold_convert (addr_type, addr); 4461 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr, 4462 size_int (INTVAL (XEXP (slot, 1))))); 4463 dest = build_va_arg_indirect_ref (dest_addr); 4464 4465 t = build2 (MODIFY_EXPR, void_type_node, dest, src); 4466 gimplify_and_add (t, pre_p); 4467 } 4468 } 4469 4470 if (needed_intregs) 4471 { 4472 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, 4473 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); 4474 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t); 4475 gimplify_and_add (t, pre_p); 4476 } 4477 if (needed_sseregs) 4478 { 4479 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, 4480 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); 4481 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t); 4482 gimplify_and_add (t, pre_p); 4483 } 4484 4485 t = build1 (GOTO_EXPR, void_type_node, lab_over); 4486 gimplify_and_add (t, pre_p); 4487 4488 t = build1 (LABEL_EXPR, void_type_node, lab_false); 4489 append_to_statement_list (t, pre_p); 4490 } 4491 4492 /* ... otherwise out of the overflow area. */ 4493 4494 /* Care for on-stack alignment if needed. */ 4495 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64 4496 || integer_zerop (TYPE_SIZE (type))) 4497 t = ovf; 4498 else 4499 { 4500 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8; 4501 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf, 4502 build_int_cst (TREE_TYPE (ovf), align - 1)); 4503 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 4504 build_int_cst (TREE_TYPE (t), -align)); 4505 } 4506 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 4507 4508 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t); 4509 gimplify_and_add (t2, pre_p); 4510 4511 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 4512 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD)); 4513 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); 4514 gimplify_and_add (t, pre_p); 4515 4516 if (container) 4517 { 4518 t = build1 (LABEL_EXPR, void_type_node, lab_over); 4519 append_to_statement_list (t, pre_p); 4520 } 4521 4522 ptrtype = build_pointer_type (type); 4523 addr = fold_convert (ptrtype, addr); 4524 4525 if (indirect_p) 4526 addr = build_va_arg_indirect_ref (addr); 4527 return build_va_arg_indirect_ref (addr); 4528} 4529 4530/* Return nonzero if OPNUM's MEM should be matched 4531 in movabs* patterns. */ 4532 4533int 4534ix86_check_movabs (rtx insn, int opnum) 4535{ 4536 rtx set, mem; 4537 4538 set = PATTERN (insn); 4539 if (GET_CODE (set) == PARALLEL) 4540 set = XVECEXP (set, 0, 0); 4541 gcc_assert (GET_CODE (set) == SET); 4542 mem = XEXP (set, opnum); 4543 while (GET_CODE (mem) == SUBREG) 4544 mem = SUBREG_REG (mem); 4545 gcc_assert (GET_CODE (mem) == MEM); 4546 return (volatile_ok || !MEM_VOLATILE_P (mem)); 4547} 4548 4549/* Initialize the table of extra 80387 mathematical constants. */ 4550 4551static void 4552init_ext_80387_constants (void) 4553{ 4554 static const char * cst[5] = 4555 { 4556 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 4557 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 4558 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 4559 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 4560 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 4561 }; 4562 int i; 4563 4564 for (i = 0; i < 5; i++) 4565 { 4566 real_from_string (&ext_80387_constants_table[i], cst[i]); 4567 /* Ensure each constant is rounded to XFmode precision. */ 4568 real_convert (&ext_80387_constants_table[i], 4569 XFmode, &ext_80387_constants_table[i]); 4570 } 4571 4572 ext_80387_constants_init = 1; 4573} 4574 4575/* Return true if the constant is something that can be loaded with 4576 a special instruction. */ 4577 4578int 4579standard_80387_constant_p (rtx x) 4580{ 4581 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x))) 4582 return -1; 4583 4584 if (x == CONST0_RTX (GET_MODE (x))) 4585 return 1; 4586 if (x == CONST1_RTX (GET_MODE (x))) 4587 return 2; 4588 4589 /* For XFmode constants, try to find a special 80387 instruction when 4590 optimizing for size or on those CPUs that benefit from them. */ 4591 if (GET_MODE (x) == XFmode 4592 && (optimize_size || x86_ext_80387_constants & TUNEMASK)) 4593 { 4594 REAL_VALUE_TYPE r; 4595 int i; 4596 4597 if (! ext_80387_constants_init) 4598 init_ext_80387_constants (); 4599 4600 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 4601 for (i = 0; i < 5; i++) 4602 if (real_identical (&r, &ext_80387_constants_table[i])) 4603 return i + 3; 4604 } 4605 4606 return 0; 4607} 4608 4609/* Return the opcode of the special instruction to be used to load 4610 the constant X. */ 4611 4612const char * 4613standard_80387_constant_opcode (rtx x) 4614{ 4615 switch (standard_80387_constant_p (x)) 4616 { 4617 case 1: 4618 return "fldz"; 4619 case 2: 4620 return "fld1"; 4621 case 3: 4622 return "fldlg2"; 4623 case 4: 4624 return "fldln2"; 4625 case 5: 4626 return "fldl2e"; 4627 case 6: 4628 return "fldl2t"; 4629 case 7: 4630 return "fldpi"; 4631 default: 4632 gcc_unreachable (); 4633 } 4634} 4635 4636/* Return the CONST_DOUBLE representing the 80387 constant that is 4637 loaded by the specified special instruction. The argument IDX 4638 matches the return value from standard_80387_constant_p. */ 4639 4640rtx 4641standard_80387_constant_rtx (int idx) 4642{ 4643 int i; 4644 4645 if (! ext_80387_constants_init) 4646 init_ext_80387_constants (); 4647 4648 switch (idx) 4649 { 4650 case 3: 4651 case 4: 4652 case 5: 4653 case 6: 4654 case 7: 4655 i = idx - 3; 4656 break; 4657 4658 default: 4659 gcc_unreachable (); 4660 } 4661 4662 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 4663 XFmode); 4664} 4665 4666/* Return 1 if mode is a valid mode for sse. */ 4667static int 4668standard_sse_mode_p (enum machine_mode mode) 4669{ 4670 switch (mode) 4671 { 4672 case V16QImode: 4673 case V8HImode: 4674 case V4SImode: 4675 case V2DImode: 4676 case V4SFmode: 4677 case V2DFmode: 4678 return 1; 4679 4680 default: 4681 return 0; 4682 } 4683} 4684 4685/* Return 1 if X is FP constant we can load to SSE register w/o using memory. 4686 */ 4687int 4688standard_sse_constant_p (rtx x) 4689{ 4690 enum machine_mode mode = GET_MODE (x); 4691 4692 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) 4693 return 1; 4694 if (vector_all_ones_operand (x, mode) 4695 && standard_sse_mode_p (mode)) 4696 return TARGET_SSE2 ? 2 : -1; 4697 4698 return 0; 4699} 4700 4701/* Return the opcode of the special instruction to be used to load 4702 the constant X. */ 4703 4704const char * 4705standard_sse_constant_opcode (rtx insn, rtx x) 4706{ 4707 switch (standard_sse_constant_p (x)) 4708 { 4709 case 1: 4710 if (get_attr_mode (insn) == MODE_V4SF) 4711 return "xorps\t%0, %0"; 4712 else if (get_attr_mode (insn) == MODE_V2DF) 4713 return "xorpd\t%0, %0"; 4714 else 4715 return "pxor\t%0, %0"; 4716 case 2: 4717 return "pcmpeqd\t%0, %0"; 4718 } 4719 gcc_unreachable (); 4720} 4721 4722/* Returns 1 if OP contains a symbol reference */ 4723 4724int 4725symbolic_reference_mentioned_p (rtx op) 4726{ 4727 const char *fmt; 4728 int i; 4729 4730 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 4731 return 1; 4732 4733 fmt = GET_RTX_FORMAT (GET_CODE (op)); 4734 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 4735 { 4736 if (fmt[i] == 'E') 4737 { 4738 int j; 4739 4740 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 4741 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 4742 return 1; 4743 } 4744 4745 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 4746 return 1; 4747 } 4748 4749 return 0; 4750} 4751 4752/* Return 1 if it is appropriate to emit `ret' instructions in the 4753 body of a function. Do this only if the epilogue is simple, needing a 4754 couple of insns. Prior to reloading, we can't tell how many registers 4755 must be saved, so return 0 then. Return 0 if there is no frame 4756 marker to de-allocate. */ 4757 4758int 4759ix86_can_use_return_insn_p (void) 4760{ 4761 struct ix86_frame frame; 4762 4763 if (! reload_completed || frame_pointer_needed) 4764 return 0; 4765 4766 /* Don't allow more than 32 pop, since that's all we can do 4767 with one instruction. */ 4768 if (current_function_pops_args 4769 && current_function_args_size >= 32768) 4770 return 0; 4771 4772 ix86_compute_frame_layout (&frame); 4773 return frame.to_allocate == 0 && frame.nregs == 0; 4774} 4775 4776/* Value should be nonzero if functions must have frame pointers. 4777 Zero means the frame pointer need not be set up (and parms may 4778 be accessed via the stack pointer) in functions that seem suitable. */ 4779 4780int 4781ix86_frame_pointer_required (void) 4782{ 4783 /* If we accessed previous frames, then the generated code expects 4784 to be able to access the saved ebp value in our frame. */ 4785 if (cfun->machine->accesses_prev_frame) 4786 return 1; 4787 4788 /* Several x86 os'es need a frame pointer for other reasons, 4789 usually pertaining to setjmp. */ 4790 if (SUBTARGET_FRAME_POINTER_REQUIRED) 4791 return 1; 4792 4793 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 4794 the frame pointer by default. Turn it back on now if we've not 4795 got a leaf function. */ 4796 if (TARGET_OMIT_LEAF_FRAME_POINTER 4797 && (!current_function_is_leaf 4798 || ix86_current_function_calls_tls_descriptor)) 4799 return 1; 4800 4801 if (current_function_profile) 4802 return 1; 4803 4804 return 0; 4805} 4806 4807/* Record that the current function accesses previous call frames. */ 4808 4809void 4810ix86_setup_frame_addresses (void) 4811{ 4812 cfun->machine->accesses_prev_frame = 1; 4813} 4814 4815#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO 4816# define USE_HIDDEN_LINKONCE 1 4817#else 4818# define USE_HIDDEN_LINKONCE 0 4819#endif 4820 4821static int pic_labels_used; 4822 4823/* Fills in the label name that should be used for a pc thunk for 4824 the given register. */ 4825 4826static void 4827get_pc_thunk_name (char name[32], unsigned int regno) 4828{ 4829 gcc_assert (!TARGET_64BIT); 4830 4831 if (USE_HIDDEN_LINKONCE) 4832 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 4833 else 4834 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 4835} 4836 4837 4838/* This function generates code for -fpic that loads %ebx with 4839 the return address of the caller and then returns. */ 4840 4841void 4842ix86_file_end (void) 4843{ 4844 rtx xops[2]; 4845 int regno; 4846 4847 for (regno = 0; regno < 8; ++regno) 4848 { 4849 char name[32]; 4850 4851 if (! ((pic_labels_used >> regno) & 1)) 4852 continue; 4853 4854 get_pc_thunk_name (name, regno); 4855 4856#if TARGET_MACHO 4857 if (TARGET_MACHO) 4858 { 4859 switch_to_section (darwin_sections[text_coal_section]); 4860 fputs ("\t.weak_definition\t", asm_out_file); 4861 assemble_name (asm_out_file, name); 4862 fputs ("\n\t.private_extern\t", asm_out_file); 4863 assemble_name (asm_out_file, name); 4864 fputs ("\n", asm_out_file); 4865 ASM_OUTPUT_LABEL (asm_out_file, name); 4866 } 4867 else 4868#endif 4869 if (USE_HIDDEN_LINKONCE) 4870 { 4871 tree decl; 4872 4873 decl = build_decl (FUNCTION_DECL, get_identifier (name), 4874 error_mark_node); 4875 TREE_PUBLIC (decl) = 1; 4876 TREE_STATIC (decl) = 1; 4877 DECL_ONE_ONLY (decl) = 1; 4878 4879 (*targetm.asm_out.unique_section) (decl, 0); 4880 switch_to_section (get_named_section (decl, NULL, 0)); 4881 4882 (*targetm.asm_out.globalize_label) (asm_out_file, name); 4883 fputs ("\t.hidden\t", asm_out_file); 4884 assemble_name (asm_out_file, name); 4885 fputc ('\n', asm_out_file); 4886 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 4887 } 4888 else 4889 { 4890 switch_to_section (text_section); 4891 ASM_OUTPUT_LABEL (asm_out_file, name); 4892 } 4893 4894 xops[0] = gen_rtx_REG (SImode, regno); 4895 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx); 4896 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops); 4897 output_asm_insn ("ret", xops); 4898 } 4899 4900 if (NEED_INDICATE_EXEC_STACK) 4901 file_end_indicate_exec_stack (); 4902} 4903 4904/* Emit code for the SET_GOT patterns. */ 4905 4906const char * 4907output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) 4908{ 4909 rtx xops[3]; 4910 4911 xops[0] = dest; 4912 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 4913 4914 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 4915 { 4916 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); 4917 4918 if (!flag_pic) 4919 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 4920 else 4921 output_asm_insn ("call\t%a2", xops); 4922 4923#if TARGET_MACHO 4924 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 4925 is what will be referenced by the Mach-O PIC subsystem. */ 4926 if (!label) 4927 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 4928#endif 4929 4930 (*targetm.asm_out.internal_label) (asm_out_file, "L", 4931 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 4932 4933 if (flag_pic) 4934 output_asm_insn ("pop{l}\t%0", xops); 4935 } 4936 else 4937 { 4938 char name[32]; 4939 get_pc_thunk_name (name, REGNO (dest)); 4940 pic_labels_used |= 1 << REGNO (dest); 4941 4942 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4943 xops[2] = gen_rtx_MEM (QImode, xops[2]); 4944 output_asm_insn ("call\t%X2", xops); 4945 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 4946 is what will be referenced by the Mach-O PIC subsystem. */ 4947#if TARGET_MACHO 4948 if (!label) 4949 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); 4950 else 4951 targetm.asm_out.internal_label (asm_out_file, "L", 4952 CODE_LABEL_NUMBER (label)); 4953#endif 4954 } 4955 4956 if (TARGET_MACHO) 4957 return ""; 4958 4959 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 4960 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); 4961 else 4962 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); 4963 4964 return ""; 4965} 4966 4967/* Generate an "push" pattern for input ARG. */ 4968 4969static rtx 4970gen_push (rtx arg) 4971{ 4972 return gen_rtx_SET (VOIDmode, 4973 gen_rtx_MEM (Pmode, 4974 gen_rtx_PRE_DEC (Pmode, 4975 stack_pointer_rtx)), 4976 arg); 4977} 4978 4979/* Return >= 0 if there is an unused call-clobbered register available 4980 for the entire function. */ 4981 4982static unsigned int 4983ix86_select_alt_pic_regnum (void) 4984{ 4985 if (current_function_is_leaf && !current_function_profile 4986 && !ix86_current_function_calls_tls_descriptor) 4987 { 4988 int i; 4989 for (i = 2; i >= 0; --i) 4990 if (!regs_ever_live[i]) 4991 return i; 4992 } 4993 4994 return INVALID_REGNUM; 4995} 4996 4997/* Return 1 if we need to save REGNO. */ 4998static int 4999ix86_save_reg (unsigned int regno, int maybe_eh_return) 5000{ 5001 if (pic_offset_table_rtx 5002 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 5003 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5004 || current_function_profile 5005 || current_function_calls_eh_return 5006 || current_function_uses_const_pool)) 5007 { 5008 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 5009 return 0; 5010 return 1; 5011 } 5012 5013 if (current_function_calls_eh_return && maybe_eh_return) 5014 { 5015 unsigned i; 5016 for (i = 0; ; i++) 5017 { 5018 unsigned test = EH_RETURN_DATA_REGNO (i); 5019 if (test == INVALID_REGNUM) 5020 break; 5021 if (test == regno) 5022 return 1; 5023 } 5024 } 5025 5026 if (cfun->machine->force_align_arg_pointer 5027 && regno == REGNO (cfun->machine->force_align_arg_pointer)) 5028 return 1; 5029 5030 return (regs_ever_live[regno] 5031 && !call_used_regs[regno] 5032 && !fixed_regs[regno] 5033 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 5034} 5035 5036/* Return number of registers to be saved on the stack. */ 5037 5038static int 5039ix86_nsaved_regs (void) 5040{ 5041 int nregs = 0; 5042 int regno; 5043 5044 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--) 5045 if (ix86_save_reg (regno, true)) 5046 nregs++; 5047 return nregs; 5048} 5049 5050/* Return the offset between two registers, one to be eliminated, and the other 5051 its replacement, at the start of a routine. */ 5052 5053HOST_WIDE_INT 5054ix86_initial_elimination_offset (int from, int to) 5055{ 5056 struct ix86_frame frame; 5057 ix86_compute_frame_layout (&frame); 5058 5059 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 5060 return frame.hard_frame_pointer_offset; 5061 else if (from == FRAME_POINTER_REGNUM 5062 && to == HARD_FRAME_POINTER_REGNUM) 5063 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 5064 else 5065 { 5066 gcc_assert (to == STACK_POINTER_REGNUM); 5067 5068 if (from == ARG_POINTER_REGNUM) 5069 return frame.stack_pointer_offset; 5070 5071 gcc_assert (from == FRAME_POINTER_REGNUM); 5072 return frame.stack_pointer_offset - frame.frame_pointer_offset; 5073 } 5074} 5075 5076/* Fill structure ix86_frame about frame of currently computed function. */ 5077 5078static void 5079ix86_compute_frame_layout (struct ix86_frame *frame) 5080{ 5081 HOST_WIDE_INT total_size; 5082 unsigned int stack_alignment_needed; 5083 HOST_WIDE_INT offset; 5084 unsigned int preferred_alignment; 5085 HOST_WIDE_INT size = get_frame_size (); 5086 5087 frame->nregs = ix86_nsaved_regs (); 5088 total_size = size; 5089 5090 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT; 5091 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT; 5092 5093 /* During reload iteration the amount of registers saved can change. 5094 Recompute the value as needed. Do not recompute when amount of registers 5095 didn't change as reload does multiple calls to the function and does not 5096 expect the decision to change within single iteration. */ 5097 if (!optimize_size 5098 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 5099 { 5100 int count = frame->nregs; 5101 5102 cfun->machine->use_fast_prologue_epilogue_nregs = count; 5103 /* The fast prologue uses move instead of push to save registers. This 5104 is significantly longer, but also executes faster as modern hardware 5105 can execute the moves in parallel, but can't do that for push/pop. 5106 5107 Be careful about choosing what prologue to emit: When function takes 5108 many instructions to execute we may use slow version as well as in 5109 case function is known to be outside hot spot (this is known with 5110 feedback only). Weight the size of function by number of registers 5111 to save as it is cheap to use one or two push instructions but very 5112 slow to use many of them. */ 5113 if (count) 5114 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 5115 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 5116 || (flag_branch_probabilities 5117 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 5118 cfun->machine->use_fast_prologue_epilogue = false; 5119 else 5120 cfun->machine->use_fast_prologue_epilogue 5121 = !expensive_function_p (count); 5122 } 5123 if (TARGET_PROLOGUE_USING_MOVE 5124 && cfun->machine->use_fast_prologue_epilogue) 5125 frame->save_regs_using_mov = true; 5126 else 5127 frame->save_regs_using_mov = false; 5128 5129 5130 /* Skip return address and saved base pointer. */ 5131 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 5132 5133 frame->hard_frame_pointer_offset = offset; 5134 5135 /* Do some sanity checking of stack_alignment_needed and 5136 preferred_alignment, since i386 port is the only using those features 5137 that may break easily. */ 5138 5139 gcc_assert (!size || stack_alignment_needed); 5140 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); 5141 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5142 gcc_assert (stack_alignment_needed 5143 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); 5144 5145 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) 5146 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; 5147 5148 /* Register save area */ 5149 offset += frame->nregs * UNITS_PER_WORD; 5150 5151 /* Va-arg area */ 5152 if (ix86_save_varrargs_registers) 5153 { 5154 offset += X86_64_VARARGS_SIZE; 5155 frame->va_arg_size = X86_64_VARARGS_SIZE; 5156 } 5157 else 5158 frame->va_arg_size = 0; 5159 5160 /* Align start of frame for local function. */ 5161 frame->padding1 = ((offset + stack_alignment_needed - 1) 5162 & -stack_alignment_needed) - offset; 5163 5164 offset += frame->padding1; 5165 5166 /* Frame pointer points here. */ 5167 frame->frame_pointer_offset = offset; 5168 5169 offset += size; 5170 5171 /* Add outgoing arguments area. Can be skipped if we eliminated 5172 all the function calls as dead code. 5173 Skipping is however impossible when function calls alloca. Alloca 5174 expander assumes that last current_function_outgoing_args_size 5175 of stack frame are unused. */ 5176 if (ACCUMULATE_OUTGOING_ARGS 5177 && (!current_function_is_leaf || current_function_calls_alloca 5178 || ix86_current_function_calls_tls_descriptor)) 5179 { 5180 offset += current_function_outgoing_args_size; 5181 frame->outgoing_arguments_size = current_function_outgoing_args_size; 5182 } 5183 else 5184 frame->outgoing_arguments_size = 0; 5185 5186 /* Align stack boundary. Only needed if we're calling another function 5187 or using alloca. */ 5188 if (!current_function_is_leaf || current_function_calls_alloca 5189 || ix86_current_function_calls_tls_descriptor) 5190 frame->padding2 = ((offset + preferred_alignment - 1) 5191 & -preferred_alignment) - offset; 5192 else 5193 frame->padding2 = 0; 5194 5195 offset += frame->padding2; 5196 5197 /* We've reached end of stack frame. */ 5198 frame->stack_pointer_offset = offset; 5199 5200 /* Size prologue needs to allocate. */ 5201 frame->to_allocate = 5202 (size + frame->padding1 + frame->padding2 5203 + frame->outgoing_arguments_size + frame->va_arg_size); 5204 5205 if ((!frame->to_allocate && frame->nregs <= 1) 5206 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)) 5207 frame->save_regs_using_mov = false; 5208 5209 if (TARGET_RED_ZONE && current_function_sp_is_unchanging 5210 && current_function_is_leaf 5211 && !ix86_current_function_calls_tls_descriptor) 5212 { 5213 frame->red_zone_size = frame->to_allocate; 5214 if (frame->save_regs_using_mov) 5215 frame->red_zone_size += frame->nregs * UNITS_PER_WORD; 5216 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 5217 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 5218 } 5219 else 5220 frame->red_zone_size = 0; 5221 frame->to_allocate -= frame->red_zone_size; 5222 frame->stack_pointer_offset -= frame->red_zone_size; 5223#if 0 5224 fprintf (stderr, "nregs: %i\n", frame->nregs); 5225 fprintf (stderr, "size: %i\n", size); 5226 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed); 5227 fprintf (stderr, "padding1: %i\n", frame->padding1); 5228 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size); 5229 fprintf (stderr, "padding2: %i\n", frame->padding2); 5230 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate); 5231 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size); 5232 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset); 5233 fprintf (stderr, "hard_frame_pointer_offset: %i\n", 5234 frame->hard_frame_pointer_offset); 5235 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset); 5236#endif 5237} 5238 5239/* Emit code to save registers in the prologue. */ 5240 5241static void 5242ix86_emit_save_regs (void) 5243{ 5244 unsigned int regno; 5245 rtx insn; 5246 5247 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; ) 5248 if (ix86_save_reg (regno, true)) 5249 { 5250 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 5251 RTX_FRAME_RELATED_P (insn) = 1; 5252 } 5253} 5254 5255/* Emit code to save registers using MOV insns. First register 5256 is restored from POINTER + OFFSET. */ 5257static void 5258ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 5259{ 5260 unsigned int regno; 5261 rtx insn; 5262 5263 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5264 if (ix86_save_reg (regno, true)) 5265 { 5266 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 5267 Pmode, offset), 5268 gen_rtx_REG (Pmode, regno)); 5269 RTX_FRAME_RELATED_P (insn) = 1; 5270 offset += UNITS_PER_WORD; 5271 } 5272} 5273 5274/* Expand prologue or epilogue stack adjustment. 5275 The pattern exist to put a dependency on all ebp-based memory accesses. 5276 STYLE should be negative if instructions should be marked as frame related, 5277 zero if %r11 register is live and cannot be freely used and positive 5278 otherwise. */ 5279 5280static void 5281pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 5282{ 5283 rtx insn; 5284 5285 if (! TARGET_64BIT) 5286 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 5287 else if (x86_64_immediate_operand (offset, DImode)) 5288 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 5289 else 5290 { 5291 rtx r11; 5292 /* r11 is used by indirect sibcall return as well, set before the 5293 epilogue and used after the epilogue. ATM indirect sibcall 5294 shouldn't be used together with huge frame sizes in one 5295 function because of the frame_size check in sibcall.c. */ 5296 gcc_assert (style); 5297 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5298 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 5299 if (style < 0) 5300 RTX_FRAME_RELATED_P (insn) = 1; 5301 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 5302 offset)); 5303 } 5304 if (style < 0) 5305 RTX_FRAME_RELATED_P (insn) = 1; 5306} 5307 5308/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ 5309 5310static rtx 5311ix86_internal_arg_pointer (void) 5312{ 5313 bool has_force_align_arg_pointer = 5314 (0 != lookup_attribute (ix86_force_align_arg_pointer_string, 5315 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))); 5316 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN 5317 && DECL_NAME (current_function_decl) 5318 && MAIN_NAME_P (DECL_NAME (current_function_decl)) 5319 && DECL_FILE_SCOPE_P (current_function_decl)) 5320 || ix86_force_align_arg_pointer 5321 || has_force_align_arg_pointer) 5322 { 5323 /* Nested functions can't realign the stack due to a register 5324 conflict. */ 5325 if (DECL_CONTEXT (current_function_decl) 5326 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL) 5327 { 5328 if (ix86_force_align_arg_pointer) 5329 warning (0, "-mstackrealign ignored for nested functions"); 5330 if (has_force_align_arg_pointer) 5331 error ("%s not supported for nested functions", 5332 ix86_force_align_arg_pointer_string); 5333 return virtual_incoming_args_rtx; 5334 } 5335 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2); 5336 return copy_to_reg (cfun->machine->force_align_arg_pointer); 5337 } 5338 else 5339 return virtual_incoming_args_rtx; 5340} 5341 5342/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. 5343 This is called from dwarf2out.c to emit call frame instructions 5344 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ 5345static void 5346ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) 5347{ 5348 rtx unspec = SET_SRC (pattern); 5349 gcc_assert (GET_CODE (unspec) == UNSPEC); 5350 5351 switch (index) 5352 { 5353 case UNSPEC_REG_SAVE: 5354 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), 5355 SET_DEST (pattern)); 5356 break; 5357 case UNSPEC_DEF_CFA: 5358 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), 5359 INTVAL (XVECEXP (unspec, 0, 0))); 5360 break; 5361 default: 5362 gcc_unreachable (); 5363 } 5364} 5365 5366/* Expand the prologue into a bunch of separate insns. */ 5367 5368void 5369ix86_expand_prologue (void) 5370{ 5371 rtx insn; 5372 bool pic_reg_used; 5373 struct ix86_frame frame; 5374 HOST_WIDE_INT allocate; 5375 5376 ix86_compute_frame_layout (&frame); 5377 5378 if (cfun->machine->force_align_arg_pointer) 5379 { 5380 rtx x, y; 5381 5382 /* Grab the argument pointer. */ 5383 x = plus_constant (stack_pointer_rtx, 4); 5384 y = cfun->machine->force_align_arg_pointer; 5385 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); 5386 RTX_FRAME_RELATED_P (insn) = 1; 5387 5388 /* The unwind info consists of two parts: install the fafp as the cfa, 5389 and record the fafp as the "save register" of the stack pointer. 5390 The later is there in order that the unwinder can see where it 5391 should restore the stack pointer across the and insn. */ 5392 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA); 5393 x = gen_rtx_SET (VOIDmode, y, x); 5394 RTX_FRAME_RELATED_P (x) = 1; 5395 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx), 5396 UNSPEC_REG_SAVE); 5397 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y); 5398 RTX_FRAME_RELATED_P (y) = 1; 5399 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)); 5400 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5401 REG_NOTES (insn) = x; 5402 5403 /* Align the stack. */ 5404 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx, 5405 GEN_INT (-16))); 5406 5407 /* And here we cheat like madmen with the unwind info. We force the 5408 cfa register back to sp+4, which is exactly what it was at the 5409 start of the function. Re-pushing the return address results in 5410 the return at the same spot relative to the cfa, and thus is 5411 correct wrt the unwind info. */ 5412 x = cfun->machine->force_align_arg_pointer; 5413 x = gen_frame_mem (Pmode, plus_constant (x, -4)); 5414 insn = emit_insn (gen_push (x)); 5415 RTX_FRAME_RELATED_P (insn) = 1; 5416 5417 x = GEN_INT (4); 5418 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA); 5419 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x); 5420 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL); 5421 REG_NOTES (insn) = x; 5422 } 5423 5424 /* Note: AT&T enter does NOT have reversed args. Enter is probably 5425 slower on all targets. Also sdb doesn't like it. */ 5426 5427 if (frame_pointer_needed) 5428 { 5429 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 5430 RTX_FRAME_RELATED_P (insn) = 1; 5431 5432 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 5433 RTX_FRAME_RELATED_P (insn) = 1; 5434 } 5435 5436 allocate = frame.to_allocate; 5437 5438 if (!frame.save_regs_using_mov) 5439 ix86_emit_save_regs (); 5440 else 5441 allocate += frame.nregs * UNITS_PER_WORD; 5442 5443 /* When using red zone we may start register saving before allocating 5444 the stack frame saving one cycle of the prologue. */ 5445 if (TARGET_RED_ZONE && frame.save_regs_using_mov) 5446 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx 5447 : stack_pointer_rtx, 5448 -frame.nregs * UNITS_PER_WORD); 5449 5450 if (allocate == 0) 5451 ; 5452 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 5453 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5454 GEN_INT (-allocate), -1); 5455 else 5456 { 5457 /* Only valid for Win32. */ 5458 rtx eax = gen_rtx_REG (SImode, 0); 5459 bool eax_live = ix86_eax_live_at_start_p (); 5460 rtx t; 5461 5462 gcc_assert (!TARGET_64BIT); 5463 5464 if (eax_live) 5465 { 5466 emit_insn (gen_push (eax)); 5467 allocate -= 4; 5468 } 5469 5470 emit_move_insn (eax, GEN_INT (allocate)); 5471 5472 insn = emit_insn (gen_allocate_stack_worker (eax)); 5473 RTX_FRAME_RELATED_P (insn) = 1; 5474 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); 5475 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); 5476 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 5477 t, REG_NOTES (insn)); 5478 5479 if (eax_live) 5480 { 5481 if (frame_pointer_needed) 5482 t = plus_constant (hard_frame_pointer_rtx, 5483 allocate 5484 - frame.to_allocate 5485 - frame.nregs * UNITS_PER_WORD); 5486 else 5487 t = plus_constant (stack_pointer_rtx, allocate); 5488 emit_move_insn (eax, gen_rtx_MEM (SImode, t)); 5489 } 5490 } 5491 5492 if (frame.save_regs_using_mov && !TARGET_RED_ZONE) 5493 { 5494 if (!frame_pointer_needed || !frame.to_allocate) 5495 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate); 5496 else 5497 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 5498 -frame.nregs * UNITS_PER_WORD); 5499 } 5500 5501 pic_reg_used = false; 5502 if (pic_offset_table_rtx 5503 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] 5504 || current_function_profile)) 5505 { 5506 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 5507 5508 if (alt_pic_reg_used != INVALID_REGNUM) 5509 REGNO (pic_offset_table_rtx) = alt_pic_reg_used; 5510 5511 pic_reg_used = true; 5512 } 5513 5514 if (pic_reg_used) 5515 { 5516 if (TARGET_64BIT) 5517 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); 5518 else 5519 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 5520 5521 /* Even with accurate pre-reload life analysis, we can wind up 5522 deleting all references to the pic register after reload. 5523 Consider if cross-jumping unifies two sides of a branch 5524 controlled by a comparison vs the only read from a global. 5525 In which case, allow the set_got to be deleted, though we're 5526 too late to do anything about the ebx save in the prologue. */ 5527 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL); 5528 } 5529 5530 /* Prevent function calls from be scheduled before the call to mcount. 5531 In the pic_reg_used case, make sure that the got load isn't deleted. */ 5532 if (current_function_profile) 5533 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx)); 5534} 5535 5536/* Emit code to restore saved registers using MOV insns. First register 5537 is restored from POINTER + OFFSET. */ 5538static void 5539ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 5540 int maybe_eh_return) 5541{ 5542 int regno; 5543 rtx base_address = gen_rtx_MEM (Pmode, pointer); 5544 5545 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5546 if (ix86_save_reg (regno, maybe_eh_return)) 5547 { 5548 /* Ensure that adjust_address won't be forced to produce pointer 5549 out of range allowed by x86-64 instruction set. */ 5550 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 5551 { 5552 rtx r11; 5553 5554 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 5555 emit_move_insn (r11, GEN_INT (offset)); 5556 emit_insn (gen_adddi3 (r11, r11, pointer)); 5557 base_address = gen_rtx_MEM (Pmode, r11); 5558 offset = 0; 5559 } 5560 emit_move_insn (gen_rtx_REG (Pmode, regno), 5561 adjust_address (base_address, Pmode, offset)); 5562 offset += UNITS_PER_WORD; 5563 } 5564} 5565 5566/* Restore function stack, frame, and registers. */ 5567 5568void 5569ix86_expand_epilogue (int style) 5570{ 5571 int regno; 5572 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; 5573 struct ix86_frame frame; 5574 HOST_WIDE_INT offset; 5575 5576 ix86_compute_frame_layout (&frame); 5577 5578 /* Calculate start of saved registers relative to ebp. Special care 5579 must be taken for the normal return case of a function using 5580 eh_return: the eax and edx registers are marked as saved, but not 5581 restored along this path. */ 5582 offset = frame.nregs; 5583 if (current_function_calls_eh_return && style != 2) 5584 offset -= 2; 5585 offset *= -UNITS_PER_WORD; 5586 5587 /* If we're only restoring one register and sp is not valid then 5588 using a move instruction to restore the register since it's 5589 less work than reloading sp and popping the register. 5590 5591 The default code result in stack adjustment using add/lea instruction, 5592 while this code results in LEAVE instruction (or discrete equivalent), 5593 so it is profitable in some other cases as well. Especially when there 5594 are no registers to restore. We also use this code when TARGET_USE_LEAVE 5595 and there is exactly one register to pop. This heuristic may need some 5596 tuning in future. */ 5597 if ((!sp_valid && frame.nregs <= 1) 5598 || (TARGET_EPILOGUE_USING_MOVE 5599 && cfun->machine->use_fast_prologue_epilogue 5600 && (frame.nregs > 1 || frame.to_allocate)) 5601 || (frame_pointer_needed && !frame.nregs && frame.to_allocate) 5602 || (frame_pointer_needed && TARGET_USE_LEAVE 5603 && cfun->machine->use_fast_prologue_epilogue 5604 && frame.nregs == 1) 5605 || current_function_calls_eh_return) 5606 { 5607 /* Restore registers. We can use ebp or esp to address the memory 5608 locations. If both are available, default to ebp, since offsets 5609 are known to be small. Only exception is esp pointing directly to the 5610 end of block of saved registers, where we may simplify addressing 5611 mode. */ 5612 5613 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate)) 5614 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 5615 frame.to_allocate, style == 2); 5616 else 5617 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 5618 offset, style == 2); 5619 5620 /* eh_return epilogues need %ecx added to the stack pointer. */ 5621 if (style == 2) 5622 { 5623 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 5624 5625 if (frame_pointer_needed) 5626 { 5627 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 5628 tmp = plus_constant (tmp, UNITS_PER_WORD); 5629 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 5630 5631 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 5632 emit_move_insn (hard_frame_pointer_rtx, tmp); 5633 5634 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 5635 const0_rtx, style); 5636 } 5637 else 5638 { 5639 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 5640 tmp = plus_constant (tmp, (frame.to_allocate 5641 + frame.nregs * UNITS_PER_WORD)); 5642 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 5643 } 5644 } 5645 else if (!frame_pointer_needed) 5646 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5647 GEN_INT (frame.to_allocate 5648 + frame.nregs * UNITS_PER_WORD), 5649 style); 5650 /* If not an i386, mov & pop is faster than "leave". */ 5651 else if (TARGET_USE_LEAVE || optimize_size 5652 || !cfun->machine->use_fast_prologue_epilogue) 5653 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5654 else 5655 { 5656 pro_epilogue_adjust_stack (stack_pointer_rtx, 5657 hard_frame_pointer_rtx, 5658 const0_rtx, style); 5659 if (TARGET_64BIT) 5660 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5661 else 5662 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5663 } 5664 } 5665 else 5666 { 5667 /* First step is to deallocate the stack frame so that we can 5668 pop the registers. */ 5669 if (!sp_valid) 5670 { 5671 gcc_assert (frame_pointer_needed); 5672 pro_epilogue_adjust_stack (stack_pointer_rtx, 5673 hard_frame_pointer_rtx, 5674 GEN_INT (offset), style); 5675 } 5676 else if (frame.to_allocate) 5677 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 5678 GEN_INT (frame.to_allocate), style); 5679 5680 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5681 if (ix86_save_reg (regno, false)) 5682 { 5683 if (TARGET_64BIT) 5684 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno))); 5685 else 5686 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno))); 5687 } 5688 if (frame_pointer_needed) 5689 { 5690 /* Leave results in shorter dependency chains on CPUs that are 5691 able to grok it fast. */ 5692 if (TARGET_USE_LEAVE) 5693 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ()); 5694 else if (TARGET_64BIT) 5695 emit_insn (gen_popdi1 (hard_frame_pointer_rtx)); 5696 else 5697 emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); 5698 } 5699 } 5700 5701 if (cfun->machine->force_align_arg_pointer) 5702 { 5703 emit_insn (gen_addsi3 (stack_pointer_rtx, 5704 cfun->machine->force_align_arg_pointer, 5705 GEN_INT (-4))); 5706 } 5707 5708 /* Sibcall epilogues don't want a return instruction. */ 5709 if (style == 0) 5710 return; 5711 5712 if (current_function_pops_args && current_function_args_size) 5713 { 5714 rtx popc = GEN_INT (current_function_pops_args); 5715 5716 /* i386 can only pop 64K bytes. If asked to pop more, pop 5717 return address, do explicit add, and jump indirectly to the 5718 caller. */ 5719 5720 if (current_function_pops_args >= 65536) 5721 { 5722 rtx ecx = gen_rtx_REG (SImode, 2); 5723 5724 /* There is no "pascal" calling convention in 64bit ABI. */ 5725 gcc_assert (!TARGET_64BIT); 5726 5727 emit_insn (gen_popsi1 (ecx)); 5728 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 5729 emit_jump_insn (gen_return_indirect_internal (ecx)); 5730 } 5731 else 5732 emit_jump_insn (gen_return_pop_internal (popc)); 5733 } 5734 else 5735 emit_jump_insn (gen_return_internal ()); 5736} 5737 5738/* Reset from the function's potential modifications. */ 5739 5740static void 5741ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 5742 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5743{ 5744 if (pic_offset_table_rtx) 5745 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; 5746#if TARGET_MACHO 5747 /* Mach-O doesn't support labels at the end of objects, so if 5748 it looks like we might want one, insert a NOP. */ 5749 { 5750 rtx insn = get_last_insn (); 5751 while (insn 5752 && NOTE_P (insn) 5753 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL) 5754 insn = PREV_INSN (insn); 5755 if (insn 5756 && (LABEL_P (insn) 5757 || (NOTE_P (insn) 5758 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))) 5759 fputs ("\tnop\n", file); 5760 } 5761#endif 5762 5763} 5764 5765/* Extract the parts of an RTL expression that is a valid memory address 5766 for an instruction. Return 0 if the structure of the address is 5767 grossly off. Return -1 if the address contains ASHIFT, so it is not 5768 strictly valid, but still used for computing length of lea instruction. */ 5769 5770int 5771ix86_decompose_address (rtx addr, struct ix86_address *out) 5772{ 5773 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; 5774 rtx base_reg, index_reg; 5775 HOST_WIDE_INT scale = 1; 5776 rtx scale_rtx = NULL_RTX; 5777 int retval = 1; 5778 enum ix86_address_seg seg = SEG_DEFAULT; 5779 5780 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG) 5781 base = addr; 5782 else if (GET_CODE (addr) == PLUS) 5783 { 5784 rtx addends[4], op; 5785 int n = 0, i; 5786 5787 op = addr; 5788 do 5789 { 5790 if (n >= 4) 5791 return 0; 5792 addends[n++] = XEXP (op, 1); 5793 op = XEXP (op, 0); 5794 } 5795 while (GET_CODE (op) == PLUS); 5796 if (n >= 4) 5797 return 0; 5798 addends[n] = op; 5799 5800 for (i = n; i >= 0; --i) 5801 { 5802 op = addends[i]; 5803 switch (GET_CODE (op)) 5804 { 5805 case MULT: 5806 if (index) 5807 return 0; 5808 index = XEXP (op, 0); 5809 scale_rtx = XEXP (op, 1); 5810 break; 5811 5812 case UNSPEC: 5813 if (XINT (op, 1) == UNSPEC_TP 5814 && TARGET_TLS_DIRECT_SEG_REFS 5815 && seg == SEG_DEFAULT) 5816 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 5817 else 5818 return 0; 5819 break; 5820 5821 case REG: 5822 case SUBREG: 5823 if (!base) 5824 base = op; 5825 else if (!index) 5826 index = op; 5827 else 5828 return 0; 5829 break; 5830 5831 case CONST: 5832 case CONST_INT: 5833 case SYMBOL_REF: 5834 case LABEL_REF: 5835 if (disp) 5836 return 0; 5837 disp = op; 5838 break; 5839 5840 default: 5841 return 0; 5842 } 5843 } 5844 } 5845 else if (GET_CODE (addr) == MULT) 5846 { 5847 index = XEXP (addr, 0); /* index*scale */ 5848 scale_rtx = XEXP (addr, 1); 5849 } 5850 else if (GET_CODE (addr) == ASHIFT) 5851 { 5852 rtx tmp; 5853 5854 /* We're called for lea too, which implements ashift on occasion. */ 5855 index = XEXP (addr, 0); 5856 tmp = XEXP (addr, 1); 5857 if (GET_CODE (tmp) != CONST_INT) 5858 return 0; 5859 scale = INTVAL (tmp); 5860 if ((unsigned HOST_WIDE_INT) scale > 3) 5861 return 0; 5862 scale = 1 << scale; 5863 retval = -1; 5864 } 5865 else 5866 disp = addr; /* displacement */ 5867 5868 /* Extract the integral value of scale. */ 5869 if (scale_rtx) 5870 { 5871 if (GET_CODE (scale_rtx) != CONST_INT) 5872 return 0; 5873 scale = INTVAL (scale_rtx); 5874 } 5875 5876 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; 5877 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; 5878 5879 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 5880 if (base_reg && index_reg && scale == 1 5881 && (index_reg == arg_pointer_rtx 5882 || index_reg == frame_pointer_rtx 5883 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) 5884 { 5885 rtx tmp; 5886 tmp = base, base = index, index = tmp; 5887 tmp = base_reg, base_reg = index_reg, index_reg = tmp; 5888 } 5889 5890 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 5891 if ((base_reg == hard_frame_pointer_rtx 5892 || base_reg == frame_pointer_rtx 5893 || base_reg == arg_pointer_rtx) && !disp) 5894 disp = const0_rtx; 5895 5896 /* Special case: on K6, [%esi] makes the instruction vector decoded. 5897 Avoid this by transforming to [%esi+0]. */ 5898 if (ix86_tune == PROCESSOR_K6 && !optimize_size 5899 && base_reg && !index_reg && !disp 5900 && REG_P (base_reg) 5901 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) 5902 disp = const0_rtx; 5903 5904 /* Special case: encode reg+reg instead of reg*2. */ 5905 if (!base && index && scale && scale == 2) 5906 base = index, base_reg = index_reg, scale = 1; 5907 5908 /* Special case: scaling cannot be encoded without base or displacement. */ 5909 if (!base && !disp && index && scale != 1) 5910 disp = const0_rtx; 5911 5912 out->base = base; 5913 out->index = index; 5914 out->disp = disp; 5915 out->scale = scale; 5916 out->seg = seg; 5917 5918 return retval; 5919} 5920 5921/* Return cost of the memory address x. 5922 For i386, it is better to use a complex address than let gcc copy 5923 the address into a reg and make a new pseudo. But not if the address 5924 requires to two regs - that would mean more pseudos with longer 5925 lifetimes. */ 5926static int 5927ix86_address_cost (rtx x) 5928{ 5929 struct ix86_address parts; 5930 int cost = 1; 5931 int ok = ix86_decompose_address (x, &parts); 5932 5933 gcc_assert (ok); 5934 5935 if (parts.base && GET_CODE (parts.base) == SUBREG) 5936 parts.base = SUBREG_REG (parts.base); 5937 if (parts.index && GET_CODE (parts.index) == SUBREG) 5938 parts.index = SUBREG_REG (parts.index); 5939 5940 /* More complex memory references are better. */ 5941 if (parts.disp && parts.disp != const0_rtx) 5942 cost--; 5943 if (parts.seg != SEG_DEFAULT) 5944 cost--; 5945 5946 /* Attempt to minimize number of registers in the address. */ 5947 if ((parts.base 5948 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 5949 || (parts.index 5950 && (!REG_P (parts.index) 5951 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 5952 cost++; 5953 5954 if (parts.base 5955 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 5956 && parts.index 5957 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 5958 && parts.base != parts.index) 5959 cost++; 5960 5961 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 5962 since it's predecode logic can't detect the length of instructions 5963 and it degenerates to vector decoded. Increase cost of such 5964 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 5965 to split such addresses or even refuse such addresses at all. 5966 5967 Following addressing modes are affected: 5968 [base+scale*index] 5969 [scale*index+disp] 5970 [base+index] 5971 5972 The first and last case may be avoidable by explicitly coding the zero in 5973 memory address, but I don't have AMD-K6 machine handy to check this 5974 theory. */ 5975 5976 if (TARGET_K6 5977 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 5978 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 5979 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 5980 cost += 10; 5981 5982 return cost; 5983} 5984 5985/* If X is a machine specific address (i.e. a symbol or label being 5986 referenced as a displacement from the GOT implemented using an 5987 UNSPEC), then return the base term. Otherwise return X. */ 5988 5989rtx 5990ix86_find_base_term (rtx x) 5991{ 5992 rtx term; 5993 5994 if (TARGET_64BIT) 5995 { 5996 if (GET_CODE (x) != CONST) 5997 return x; 5998 term = XEXP (x, 0); 5999 if (GET_CODE (term) == PLUS 6000 && (GET_CODE (XEXP (term, 1)) == CONST_INT 6001 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 6002 term = XEXP (term, 0); 6003 if (GET_CODE (term) != UNSPEC 6004 || XINT (term, 1) != UNSPEC_GOTPCREL) 6005 return x; 6006 6007 term = XVECEXP (term, 0, 0); 6008 6009 if (GET_CODE (term) != SYMBOL_REF 6010 && GET_CODE (term) != LABEL_REF) 6011 return x; 6012 6013 return term; 6014 } 6015 6016 term = ix86_delegitimize_address (x); 6017 6018 if (GET_CODE (term) != SYMBOL_REF 6019 && GET_CODE (term) != LABEL_REF) 6020 return x; 6021 6022 return term; 6023} 6024 6025/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as 6026 this is used for to form addresses to local data when -fPIC is in 6027 use. */ 6028 6029static bool 6030darwin_local_data_pic (rtx disp) 6031{ 6032 if (GET_CODE (disp) == MINUS) 6033 { 6034 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF 6035 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF) 6036 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF) 6037 { 6038 const char *sym_name = XSTR (XEXP (disp, 1), 0); 6039 if (! strcmp (sym_name, "<pic base>")) 6040 return true; 6041 } 6042 } 6043 6044 return false; 6045} 6046 6047/* Determine if a given RTX is a valid constant. We already know this 6048 satisfies CONSTANT_P. */ 6049 6050bool 6051legitimate_constant_p (rtx x) 6052{ 6053 switch (GET_CODE (x)) 6054 { 6055 case CONST: 6056 x = XEXP (x, 0); 6057 6058 if (GET_CODE (x) == PLUS) 6059 { 6060 if (GET_CODE (XEXP (x, 1)) != CONST_INT) 6061 return false; 6062 x = XEXP (x, 0); 6063 } 6064 6065 if (TARGET_MACHO && darwin_local_data_pic (x)) 6066 return true; 6067 6068 /* Only some unspecs are valid as "constants". */ 6069 if (GET_CODE (x) == UNSPEC) 6070 switch (XINT (x, 1)) 6071 { 6072 case UNSPEC_GOTOFF: 6073 return TARGET_64BIT; 6074 case UNSPEC_TPOFF: 6075 case UNSPEC_NTPOFF: 6076 x = XVECEXP (x, 0, 0); 6077 return (GET_CODE (x) == SYMBOL_REF 6078 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6079 case UNSPEC_DTPOFF: 6080 x = XVECEXP (x, 0, 0); 6081 return (GET_CODE (x) == SYMBOL_REF 6082 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); 6083 default: 6084 return false; 6085 } 6086 6087 /* We must have drilled down to a symbol. */ 6088 if (GET_CODE (x) == LABEL_REF) 6089 return true; 6090 if (GET_CODE (x) != SYMBOL_REF) 6091 return false; 6092 /* FALLTHRU */ 6093 6094 case SYMBOL_REF: 6095 /* TLS symbols are never valid. */ 6096 if (SYMBOL_REF_TLS_MODEL (x)) 6097 return false; 6098 break; 6099 6100 case CONST_DOUBLE: 6101 if (GET_MODE (x) == TImode 6102 && x != CONST0_RTX (TImode) 6103 && !TARGET_64BIT) 6104 return false; 6105 break; 6106 6107 case CONST_VECTOR: 6108 if (x == CONST0_RTX (GET_MODE (x))) 6109 return true; 6110 return false; 6111 6112 default: 6113 break; 6114 } 6115 6116 /* Otherwise we handle everything else in the move patterns. */ 6117 return true; 6118} 6119 6120/* Determine if it's legal to put X into the constant pool. This 6121 is not possible for the address of thread-local symbols, which 6122 is checked above. */ 6123 6124static bool 6125ix86_cannot_force_const_mem (rtx x) 6126{ 6127 /* We can always put integral constants and vectors in memory. */ 6128 switch (GET_CODE (x)) 6129 { 6130 case CONST_INT: 6131 case CONST_DOUBLE: 6132 case CONST_VECTOR: 6133 return false; 6134 6135 default: 6136 break; 6137 } 6138 return !legitimate_constant_p (x); 6139} 6140 6141/* Determine if a given RTX is a valid constant address. */ 6142 6143bool 6144constant_address_p (rtx x) 6145{ 6146 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 6147} 6148 6149/* Nonzero if the constant value X is a legitimate general operand 6150 when generating PIC code. It is given that flag_pic is on and 6151 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 6152 6153bool 6154legitimate_pic_operand_p (rtx x) 6155{ 6156 rtx inner; 6157 6158 switch (GET_CODE (x)) 6159 { 6160 case CONST: 6161 inner = XEXP (x, 0); 6162 if (GET_CODE (inner) == PLUS 6163 && GET_CODE (XEXP (inner, 1)) == CONST_INT) 6164 inner = XEXP (inner, 0); 6165 6166 /* Only some unspecs are valid as "constants". */ 6167 if (GET_CODE (inner) == UNSPEC) 6168 switch (XINT (inner, 1)) 6169 { 6170 case UNSPEC_GOTOFF: 6171 return TARGET_64BIT; 6172 case UNSPEC_TPOFF: 6173 x = XVECEXP (inner, 0, 0); 6174 return (GET_CODE (x) == SYMBOL_REF 6175 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 6176 default: 6177 return false; 6178 } 6179 /* FALLTHRU */ 6180 6181 case SYMBOL_REF: 6182 case LABEL_REF: 6183 return legitimate_pic_address_disp_p (x); 6184 6185 default: 6186 return true; 6187 } 6188} 6189 6190/* Determine if a given CONST RTX is a valid memory displacement 6191 in PIC mode. */ 6192 6193int 6194legitimate_pic_address_disp_p (rtx disp) 6195{ 6196 bool saw_plus; 6197 6198 /* In 64bit mode we can allow direct addresses of symbols and labels 6199 when they are not dynamic symbols. */ 6200 if (TARGET_64BIT) 6201 { 6202 rtx op0 = disp, op1; 6203 6204 switch (GET_CODE (disp)) 6205 { 6206 case LABEL_REF: 6207 return true; 6208 6209 case CONST: 6210 if (GET_CODE (XEXP (disp, 0)) != PLUS) 6211 break; 6212 op0 = XEXP (XEXP (disp, 0), 0); 6213 op1 = XEXP (XEXP (disp, 0), 1); 6214 if (GET_CODE (op1) != CONST_INT 6215 || INTVAL (op1) >= 16*1024*1024 6216 || INTVAL (op1) < -16*1024*1024) 6217 break; 6218 if (GET_CODE (op0) == LABEL_REF) 6219 return true; 6220 if (GET_CODE (op0) != SYMBOL_REF) 6221 break; 6222 /* FALLTHRU */ 6223 6224 case SYMBOL_REF: 6225 /* TLS references should always be enclosed in UNSPEC. */ 6226 if (SYMBOL_REF_TLS_MODEL (op0)) 6227 return false; 6228 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)) 6229 return true; 6230 break; 6231 6232 default: 6233 break; 6234 } 6235 } 6236 if (GET_CODE (disp) != CONST) 6237 return 0; 6238 disp = XEXP (disp, 0); 6239 6240 if (TARGET_64BIT) 6241 { 6242 /* We are unsafe to allow PLUS expressions. This limit allowed distance 6243 of GOT tables. We should not need these anyway. */ 6244 if (GET_CODE (disp) != UNSPEC 6245 || (XINT (disp, 1) != UNSPEC_GOTPCREL 6246 && XINT (disp, 1) != UNSPEC_GOTOFF)) 6247 return 0; 6248 6249 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 6250 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 6251 return 0; 6252 return 1; 6253 } 6254 6255 saw_plus = false; 6256 if (GET_CODE (disp) == PLUS) 6257 { 6258 if (GET_CODE (XEXP (disp, 1)) != CONST_INT) 6259 return 0; 6260 disp = XEXP (disp, 0); 6261 saw_plus = true; 6262 } 6263 6264 if (TARGET_MACHO && darwin_local_data_pic (disp)) 6265 return 1; 6266 6267 if (GET_CODE (disp) != UNSPEC) 6268 return 0; 6269 6270 switch (XINT (disp, 1)) 6271 { 6272 case UNSPEC_GOT: 6273 if (saw_plus) 6274 return false; 6275 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF; 6276 case UNSPEC_GOTOFF: 6277 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. 6278 While ABI specify also 32bit relocation but we don't produce it in 6279 small PIC model at all. */ 6280 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 6281 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 6282 && !TARGET_64BIT) 6283 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode); 6284 return false; 6285 case UNSPEC_GOTTPOFF: 6286 case UNSPEC_GOTNTPOFF: 6287 case UNSPEC_INDNTPOFF: 6288 if (saw_plus) 6289 return false; 6290 disp = XVECEXP (disp, 0, 0); 6291 return (GET_CODE (disp) == SYMBOL_REF 6292 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); 6293 case UNSPEC_NTPOFF: 6294 disp = XVECEXP (disp, 0, 0); 6295 return (GET_CODE (disp) == SYMBOL_REF 6296 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); 6297 case UNSPEC_DTPOFF: 6298 disp = XVECEXP (disp, 0, 0); 6299 return (GET_CODE (disp) == SYMBOL_REF 6300 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); 6301 } 6302 6303 return 0; 6304} 6305 6306/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 6307 memory address for an instruction. The MODE argument is the machine mode 6308 for the MEM expression that wants to use this address. 6309 6310 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 6311 convert common non-canonical forms to canonical form so that they will 6312 be recognized. */ 6313 6314int 6315legitimate_address_p (enum machine_mode mode, rtx addr, int strict) 6316{ 6317 struct ix86_address parts; 6318 rtx base, index, disp; 6319 HOST_WIDE_INT scale; 6320 const char *reason = NULL; 6321 rtx reason_rtx = NULL_RTX; 6322 6323 if (TARGET_DEBUG_ADDR) 6324 { 6325 fprintf (stderr, 6326 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n", 6327 GET_MODE_NAME (mode), strict); 6328 debug_rtx (addr); 6329 } 6330 6331 if (ix86_decompose_address (addr, &parts) <= 0) 6332 { 6333 reason = "decomposition failed"; 6334 goto report_error; 6335 } 6336 6337 base = parts.base; 6338 index = parts.index; 6339 disp = parts.disp; 6340 scale = parts.scale; 6341 6342 /* Validate base register. 6343 6344 Don't allow SUBREG's that span more than a word here. It can lead to spill 6345 failures when the base is one word out of a two word structure, which is 6346 represented internally as a DImode int. */ 6347 6348 if (base) 6349 { 6350 rtx reg; 6351 reason_rtx = base; 6352 6353 if (REG_P (base)) 6354 reg = base; 6355 else if (GET_CODE (base) == SUBREG 6356 && REG_P (SUBREG_REG (base)) 6357 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) 6358 <= UNITS_PER_WORD) 6359 reg = SUBREG_REG (base); 6360 else 6361 { 6362 reason = "base is not a register"; 6363 goto report_error; 6364 } 6365 6366 if (GET_MODE (base) != Pmode) 6367 { 6368 reason = "base is not in Pmode"; 6369 goto report_error; 6370 } 6371 6372 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 6373 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 6374 { 6375 reason = "base is not valid"; 6376 goto report_error; 6377 } 6378 } 6379 6380 /* Validate index register. 6381 6382 Don't allow SUBREG's that span more than a word here -- same as above. */ 6383 6384 if (index) 6385 { 6386 rtx reg; 6387 reason_rtx = index; 6388 6389 if (REG_P (index)) 6390 reg = index; 6391 else if (GET_CODE (index) == SUBREG 6392 && REG_P (SUBREG_REG (index)) 6393 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) 6394 <= UNITS_PER_WORD) 6395 reg = SUBREG_REG (index); 6396 else 6397 { 6398 reason = "index is not a register"; 6399 goto report_error; 6400 } 6401 6402 if (GET_MODE (index) != Pmode) 6403 { 6404 reason = "index is not in Pmode"; 6405 goto report_error; 6406 } 6407 6408 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 6409 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 6410 { 6411 reason = "index is not valid"; 6412 goto report_error; 6413 } 6414 } 6415 6416 /* Validate scale factor. */ 6417 if (scale != 1) 6418 { 6419 reason_rtx = GEN_INT (scale); 6420 if (!index) 6421 { 6422 reason = "scale without index"; 6423 goto report_error; 6424 } 6425 6426 if (scale != 2 && scale != 4 && scale != 8) 6427 { 6428 reason = "scale is not a valid multiplier"; 6429 goto report_error; 6430 } 6431 } 6432 6433 /* Validate displacement. */ 6434 if (disp) 6435 { 6436 reason_rtx = disp; 6437 6438 if (GET_CODE (disp) == CONST 6439 && GET_CODE (XEXP (disp, 0)) == UNSPEC) 6440 switch (XINT (XEXP (disp, 0), 1)) 6441 { 6442 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when 6443 used. While ABI specify also 32bit relocations, we don't produce 6444 them at all and use IP relative instead. */ 6445 case UNSPEC_GOT: 6446 case UNSPEC_GOTOFF: 6447 gcc_assert (flag_pic); 6448 if (!TARGET_64BIT) 6449 goto is_legitimate_pic; 6450 reason = "64bit address unspec"; 6451 goto report_error; 6452 6453 case UNSPEC_GOTPCREL: 6454 gcc_assert (flag_pic); 6455 goto is_legitimate_pic; 6456 6457 case UNSPEC_GOTTPOFF: 6458 case UNSPEC_GOTNTPOFF: 6459 case UNSPEC_INDNTPOFF: 6460 case UNSPEC_NTPOFF: 6461 case UNSPEC_DTPOFF: 6462 break; 6463 6464 default: 6465 reason = "invalid address unspec"; 6466 goto report_error; 6467 } 6468 6469 else if (SYMBOLIC_CONST (disp) 6470 && (flag_pic 6471 || (TARGET_MACHO 6472#if TARGET_MACHO 6473 && MACHOPIC_INDIRECT 6474 && !machopic_operand_p (disp) 6475#endif 6476 ))) 6477 { 6478 6479 is_legitimate_pic: 6480 if (TARGET_64BIT && (index || base)) 6481 { 6482 /* foo@dtpoff(%rX) is ok. */ 6483 if (GET_CODE (disp) != CONST 6484 || GET_CODE (XEXP (disp, 0)) != PLUS 6485 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 6486 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT 6487 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 6488 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 6489 { 6490 reason = "non-constant pic memory reference"; 6491 goto report_error; 6492 } 6493 } 6494 else if (! legitimate_pic_address_disp_p (disp)) 6495 { 6496 reason = "displacement is an invalid pic construct"; 6497 goto report_error; 6498 } 6499 6500 /* This code used to verify that a symbolic pic displacement 6501 includes the pic_offset_table_rtx register. 6502 6503 While this is good idea, unfortunately these constructs may 6504 be created by "adds using lea" optimization for incorrect 6505 code like: 6506 6507 int a; 6508 int foo(int i) 6509 { 6510 return *(&a+i); 6511 } 6512 6513 This code is nonsensical, but results in addressing 6514 GOT table with pic_offset_table_rtx base. We can't 6515 just refuse it easily, since it gets matched by 6516 "addsi3" pattern, that later gets split to lea in the 6517 case output register differs from input. While this 6518 can be handled by separate addsi pattern for this case 6519 that never results in lea, this seems to be easier and 6520 correct fix for crash to disable this test. */ 6521 } 6522 else if (GET_CODE (disp) != LABEL_REF 6523 && GET_CODE (disp) != CONST_INT 6524 && (GET_CODE (disp) != CONST 6525 || !legitimate_constant_p (disp)) 6526 && (GET_CODE (disp) != SYMBOL_REF 6527 || !legitimate_constant_p (disp))) 6528 { 6529 reason = "displacement is not constant"; 6530 goto report_error; 6531 } 6532 else if (TARGET_64BIT 6533 && !x86_64_immediate_operand (disp, VOIDmode)) 6534 { 6535 reason = "displacement is out of range"; 6536 goto report_error; 6537 } 6538 } 6539 6540 /* Everything looks valid. */ 6541 if (TARGET_DEBUG_ADDR) 6542 fprintf (stderr, "Success.\n"); 6543 return TRUE; 6544 6545 report_error: 6546 if (TARGET_DEBUG_ADDR) 6547 { 6548 fprintf (stderr, "Error: %s\n", reason); 6549 debug_rtx (reason_rtx); 6550 } 6551 return FALSE; 6552} 6553 6554/* Return a unique alias set for the GOT. */ 6555 6556static HOST_WIDE_INT 6557ix86_GOT_alias_set (void) 6558{ 6559 static HOST_WIDE_INT set = -1; 6560 if (set == -1) 6561 set = new_alias_set (); 6562 return set; 6563} 6564 6565/* Return a legitimate reference for ORIG (an address) using the 6566 register REG. If REG is 0, a new pseudo is generated. 6567 6568 There are two types of references that must be handled: 6569 6570 1. Global data references must load the address from the GOT, via 6571 the PIC reg. An insn is emitted to do this load, and the reg is 6572 returned. 6573 6574 2. Static data references, constant pool addresses, and code labels 6575 compute the address as an offset from the GOT, whose base is in 6576 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 6577 differentiate them from global data objects. The returned 6578 address is the PIC reg + an unspec constant. 6579 6580 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 6581 reg also appears in the address. */ 6582 6583static rtx 6584legitimize_pic_address (rtx orig, rtx reg) 6585{ 6586 rtx addr = orig; 6587 rtx new = orig; 6588 rtx base; 6589 6590#if TARGET_MACHO 6591 if (TARGET_MACHO && !TARGET_64BIT) 6592 { 6593 if (reg == 0) 6594 reg = gen_reg_rtx (Pmode); 6595 /* Use the generic Mach-O PIC machinery. */ 6596 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 6597 } 6598#endif 6599 6600 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 6601 new = addr; 6602 else if (TARGET_64BIT 6603 && ix86_cmodel != CM_SMALL_PIC 6604 && local_symbolic_operand (addr, Pmode)) 6605 { 6606 rtx tmpreg; 6607 /* This symbol may be referenced via a displacement from the PIC 6608 base address (@GOTOFF). */ 6609 6610 if (reload_in_progress) 6611 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6612 if (GET_CODE (addr) == CONST) 6613 addr = XEXP (addr, 0); 6614 if (GET_CODE (addr) == PLUS) 6615 { 6616 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6617 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6618 } 6619 else 6620 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6621 new = gen_rtx_CONST (Pmode, new); 6622 if (!reg) 6623 tmpreg = gen_reg_rtx (Pmode); 6624 else 6625 tmpreg = reg; 6626 emit_move_insn (tmpreg, new); 6627 6628 if (reg != 0) 6629 { 6630 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, 6631 tmpreg, 1, OPTAB_DIRECT); 6632 new = reg; 6633 } 6634 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); 6635 } 6636 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode)) 6637 { 6638 /* This symbol may be referenced via a displacement from the PIC 6639 base address (@GOTOFF). */ 6640 6641 if (reload_in_progress) 6642 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6643 if (GET_CODE (addr) == CONST) 6644 addr = XEXP (addr, 0); 6645 if (GET_CODE (addr) == PLUS) 6646 { 6647 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF); 6648 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1)); 6649 } 6650 else 6651 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 6652 new = gen_rtx_CONST (Pmode, new); 6653 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6654 6655 if (reg != 0) 6656 { 6657 emit_move_insn (reg, new); 6658 new = reg; 6659 } 6660 } 6661 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) 6662 { 6663 if (TARGET_64BIT) 6664 { 6665 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 6666 new = gen_rtx_CONST (Pmode, new); 6667 new = gen_const_mem (Pmode, new); 6668 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6669 6670 if (reg == 0) 6671 reg = gen_reg_rtx (Pmode); 6672 /* Use directly gen_movsi, otherwise the address is loaded 6673 into register for CSE. We don't want to CSE this addresses, 6674 instead we CSE addresses from the GOT table, so skip this. */ 6675 emit_insn (gen_movsi (reg, new)); 6676 new = reg; 6677 } 6678 else 6679 { 6680 /* This symbol must be referenced via a load from the 6681 Global Offset Table (@GOT). */ 6682 6683 if (reload_in_progress) 6684 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6685 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 6686 new = gen_rtx_CONST (Pmode, new); 6687 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6688 new = gen_const_mem (Pmode, new); 6689 set_mem_alias_set (new, ix86_GOT_alias_set ()); 6690 6691 if (reg == 0) 6692 reg = gen_reg_rtx (Pmode); 6693 emit_move_insn (reg, new); 6694 new = reg; 6695 } 6696 } 6697 else 6698 { 6699 if (GET_CODE (addr) == CONST_INT 6700 && !x86_64_immediate_operand (addr, VOIDmode)) 6701 { 6702 if (reg) 6703 { 6704 emit_move_insn (reg, addr); 6705 new = reg; 6706 } 6707 else 6708 new = force_reg (Pmode, addr); 6709 } 6710 else if (GET_CODE (addr) == CONST) 6711 { 6712 addr = XEXP (addr, 0); 6713 6714 /* We must match stuff we generate before. Assume the only 6715 unspecs that can get here are ours. Not that we could do 6716 anything with them anyway.... */ 6717 if (GET_CODE (addr) == UNSPEC 6718 || (GET_CODE (addr) == PLUS 6719 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 6720 return orig; 6721 gcc_assert (GET_CODE (addr) == PLUS); 6722 } 6723 if (GET_CODE (addr) == PLUS) 6724 { 6725 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 6726 6727 /* Check first to see if this is a constant offset from a @GOTOFF 6728 symbol reference. */ 6729 if (local_symbolic_operand (op0, Pmode) 6730 && GET_CODE (op1) == CONST_INT) 6731 { 6732 if (!TARGET_64BIT) 6733 { 6734 if (reload_in_progress) 6735 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6736 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 6737 UNSPEC_GOTOFF); 6738 new = gen_rtx_PLUS (Pmode, new, op1); 6739 new = gen_rtx_CONST (Pmode, new); 6740 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); 6741 6742 if (reg != 0) 6743 { 6744 emit_move_insn (reg, new); 6745 new = reg; 6746 } 6747 } 6748 else 6749 { 6750 if (INTVAL (op1) < -16*1024*1024 6751 || INTVAL (op1) >= 16*1024*1024) 6752 { 6753 if (!x86_64_immediate_operand (op1, Pmode)) 6754 op1 = force_reg (Pmode, op1); 6755 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); 6756 } 6757 } 6758 } 6759 else 6760 { 6761 base = legitimize_pic_address (XEXP (addr, 0), reg); 6762 new = legitimize_pic_address (XEXP (addr, 1), 6763 base == reg ? NULL_RTX : reg); 6764 6765 if (GET_CODE (new) == CONST_INT) 6766 new = plus_constant (base, INTVAL (new)); 6767 else 6768 { 6769 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1))) 6770 { 6771 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0)); 6772 new = XEXP (new, 1); 6773 } 6774 new = gen_rtx_PLUS (Pmode, base, new); 6775 } 6776 } 6777 } 6778 } 6779 return new; 6780} 6781 6782/* Load the thread pointer. If TO_REG is true, force it into a register. */ 6783 6784static rtx 6785get_thread_pointer (int to_reg) 6786{ 6787 rtx tp, reg, insn; 6788 6789 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 6790 if (!to_reg) 6791 return tp; 6792 6793 reg = gen_reg_rtx (Pmode); 6794 insn = gen_rtx_SET (VOIDmode, reg, tp); 6795 insn = emit_insn (insn); 6796 6797 return reg; 6798} 6799 6800/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 6801 false if we expect this to be used for a memory address and true if 6802 we expect to load the address into a register. */ 6803 6804static rtx 6805legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 6806{ 6807 rtx dest, base, off, pic, tp; 6808 int type; 6809 6810 switch (model) 6811 { 6812 case TLS_MODEL_GLOBAL_DYNAMIC: 6813 dest = gen_reg_rtx (Pmode); 6814 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 6815 6816 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 6817 { 6818 rtx rax = gen_rtx_REG (Pmode, 0), insns; 6819 6820 start_sequence (); 6821 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 6822 insns = get_insns (); 6823 end_sequence (); 6824 6825 emit_libcall_block (insns, dest, rax, x); 6826 } 6827 else if (TARGET_64BIT && TARGET_GNU2_TLS) 6828 emit_insn (gen_tls_global_dynamic_64 (dest, x)); 6829 else 6830 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 6831 6832 if (TARGET_GNU2_TLS) 6833 { 6834 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); 6835 6836 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 6837 } 6838 break; 6839 6840 case TLS_MODEL_LOCAL_DYNAMIC: 6841 base = gen_reg_rtx (Pmode); 6842 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 6843 6844 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 6845 { 6846 rtx rax = gen_rtx_REG (Pmode, 0), insns, note; 6847 6848 start_sequence (); 6849 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 6850 insns = get_insns (); 6851 end_sequence (); 6852 6853 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 6854 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 6855 emit_libcall_block (insns, base, rax, note); 6856 } 6857 else if (TARGET_64BIT && TARGET_GNU2_TLS) 6858 emit_insn (gen_tls_local_dynamic_base_64 (base)); 6859 else 6860 emit_insn (gen_tls_local_dynamic_base_32 (base)); 6861 6862 if (TARGET_GNU2_TLS) 6863 { 6864 rtx x = ix86_tls_module_base (); 6865 6866 set_unique_reg_note (get_last_insn (), REG_EQUIV, 6867 gen_rtx_MINUS (Pmode, x, tp)); 6868 } 6869 6870 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 6871 off = gen_rtx_CONST (Pmode, off); 6872 6873 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); 6874 6875 if (TARGET_GNU2_TLS) 6876 { 6877 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); 6878 6879 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 6880 } 6881 6882 break; 6883 6884 case TLS_MODEL_INITIAL_EXEC: 6885 if (TARGET_64BIT) 6886 { 6887 pic = NULL; 6888 type = UNSPEC_GOTNTPOFF; 6889 } 6890 else if (flag_pic) 6891 { 6892 if (reload_in_progress) 6893 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; 6894 pic = pic_offset_table_rtx; 6895 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 6896 } 6897 else if (!TARGET_ANY_GNU_TLS) 6898 { 6899 pic = gen_reg_rtx (Pmode); 6900 emit_insn (gen_set_got (pic)); 6901 type = UNSPEC_GOTTPOFF; 6902 } 6903 else 6904 { 6905 pic = NULL; 6906 type = UNSPEC_INDNTPOFF; 6907 } 6908 6909 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 6910 off = gen_rtx_CONST (Pmode, off); 6911 if (pic) 6912 off = gen_rtx_PLUS (Pmode, pic, off); 6913 off = gen_const_mem (Pmode, off); 6914 set_mem_alias_set (off, ix86_GOT_alias_set ()); 6915 6916 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 6917 { 6918 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 6919 off = force_reg (Pmode, off); 6920 return gen_rtx_PLUS (Pmode, base, off); 6921 } 6922 else 6923 { 6924 base = get_thread_pointer (true); 6925 dest = gen_reg_rtx (Pmode); 6926 emit_insn (gen_subsi3 (dest, base, off)); 6927 } 6928 break; 6929 6930 case TLS_MODEL_LOCAL_EXEC: 6931 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 6932 (TARGET_64BIT || TARGET_ANY_GNU_TLS) 6933 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 6934 off = gen_rtx_CONST (Pmode, off); 6935 6936 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 6937 { 6938 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 6939 return gen_rtx_PLUS (Pmode, base, off); 6940 } 6941 else 6942 { 6943 base = get_thread_pointer (true); 6944 dest = gen_reg_rtx (Pmode); 6945 emit_insn (gen_subsi3 (dest, base, off)); 6946 } 6947 break; 6948 6949 default: 6950 gcc_unreachable (); 6951 } 6952 6953 return dest; 6954} 6955 6956/* Try machine-dependent ways of modifying an illegitimate address 6957 to be legitimate. If we find one, return the new, valid address. 6958 This macro is used in only one place: `memory_address' in explow.c. 6959 6960 OLDX is the address as it was before break_out_memory_refs was called. 6961 In some cases it is useful to look at this to decide what needs to be done. 6962 6963 MODE and WIN are passed so that this macro can use 6964 GO_IF_LEGITIMATE_ADDRESS. 6965 6966 It is always safe for this macro to do nothing. It exists to recognize 6967 opportunities to optimize the output. 6968 6969 For the 80386, we handle X+REG by loading X into a register R and 6970 using R+REG. R will go in a general reg and indexing will be used. 6971 However, if REG is a broken-out memory address or multiplication, 6972 nothing needs to be done because REG can certainly go in a general reg. 6973 6974 When -fpic is used, special handling is needed for symbolic references. 6975 See comments by legitimize_pic_address in i386.c for details. */ 6976 6977rtx 6978legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 6979{ 6980 int changed = 0; 6981 unsigned log; 6982 6983 if (TARGET_DEBUG_ADDR) 6984 { 6985 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n", 6986 GET_MODE_NAME (mode)); 6987 debug_rtx (x); 6988 } 6989 6990 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; 6991 if (log) 6992 return legitimize_tls_address (x, log, false); 6993 if (GET_CODE (x) == CONST 6994 && GET_CODE (XEXP (x, 0)) == PLUS 6995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 6996 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) 6997 { 6998 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false); 6999 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 7000 } 7001 7002 if (flag_pic && SYMBOLIC_CONST (x)) 7003 return legitimize_pic_address (x, 0); 7004 7005 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 7006 if (GET_CODE (x) == ASHIFT 7007 && GET_CODE (XEXP (x, 1)) == CONST_INT 7008 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) 7009 { 7010 changed = 1; 7011 log = INTVAL (XEXP (x, 1)); 7012 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 7013 GEN_INT (1 << log)); 7014 } 7015 7016 if (GET_CODE (x) == PLUS) 7017 { 7018 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 7019 7020 if (GET_CODE (XEXP (x, 0)) == ASHIFT 7021 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 7022 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) 7023 { 7024 changed = 1; 7025 log = INTVAL (XEXP (XEXP (x, 0), 1)); 7026 XEXP (x, 0) = gen_rtx_MULT (Pmode, 7027 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 7028 GEN_INT (1 << log)); 7029 } 7030 7031 if (GET_CODE (XEXP (x, 1)) == ASHIFT 7032 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT 7033 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) 7034 { 7035 changed = 1; 7036 log = INTVAL (XEXP (XEXP (x, 1), 1)); 7037 XEXP (x, 1) = gen_rtx_MULT (Pmode, 7038 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 7039 GEN_INT (1 << log)); 7040 } 7041 7042 /* Put multiply first if it isn't already. */ 7043 if (GET_CODE (XEXP (x, 1)) == MULT) 7044 { 7045 rtx tmp = XEXP (x, 0); 7046 XEXP (x, 0) = XEXP (x, 1); 7047 XEXP (x, 1) = tmp; 7048 changed = 1; 7049 } 7050 7051 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 7052 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 7053 created by virtual register instantiation, register elimination, and 7054 similar optimizations. */ 7055 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 7056 { 7057 changed = 1; 7058 x = gen_rtx_PLUS (Pmode, 7059 gen_rtx_PLUS (Pmode, XEXP (x, 0), 7060 XEXP (XEXP (x, 1), 0)), 7061 XEXP (XEXP (x, 1), 1)); 7062 } 7063 7064 /* Canonicalize 7065 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 7066 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 7067 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 7068 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 7069 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 7070 && CONSTANT_P (XEXP (x, 1))) 7071 { 7072 rtx constant; 7073 rtx other = NULL_RTX; 7074 7075 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 7076 { 7077 constant = XEXP (x, 1); 7078 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 7079 } 7080 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT) 7081 { 7082 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 7083 other = XEXP (x, 1); 7084 } 7085 else 7086 constant = 0; 7087 7088 if (constant) 7089 { 7090 changed = 1; 7091 x = gen_rtx_PLUS (Pmode, 7092 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 7093 XEXP (XEXP (XEXP (x, 0), 1), 0)), 7094 plus_constant (other, INTVAL (constant))); 7095 } 7096 } 7097 7098 if (changed && legitimate_address_p (mode, x, FALSE)) 7099 return x; 7100 7101 if (GET_CODE (XEXP (x, 0)) == MULT) 7102 { 7103 changed = 1; 7104 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 7105 } 7106 7107 if (GET_CODE (XEXP (x, 1)) == MULT) 7108 { 7109 changed = 1; 7110 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 7111 } 7112 7113 if (changed 7114 && GET_CODE (XEXP (x, 1)) == REG 7115 && GET_CODE (XEXP (x, 0)) == REG) 7116 return x; 7117 7118 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 7119 { 7120 changed = 1; 7121 x = legitimize_pic_address (x, 0); 7122 } 7123 7124 if (changed && legitimate_address_p (mode, x, FALSE)) 7125 return x; 7126 7127 if (GET_CODE (XEXP (x, 0)) == REG) 7128 { 7129 rtx temp = gen_reg_rtx (Pmode); 7130 rtx val = force_operand (XEXP (x, 1), temp); 7131 if (val != temp) 7132 emit_move_insn (temp, val); 7133 7134 XEXP (x, 1) = temp; 7135 return x; 7136 } 7137 7138 else if (GET_CODE (XEXP (x, 1)) == REG) 7139 { 7140 rtx temp = gen_reg_rtx (Pmode); 7141 rtx val = force_operand (XEXP (x, 0), temp); 7142 if (val != temp) 7143 emit_move_insn (temp, val); 7144 7145 XEXP (x, 0) = temp; 7146 return x; 7147 } 7148 } 7149 7150 return x; 7151} 7152 7153/* Print an integer constant expression in assembler syntax. Addition 7154 and subtraction are the only arithmetic that may appear in these 7155 expressions. FILE is the stdio stream to write to, X is the rtx, and 7156 CODE is the operand print code from the output string. */ 7157 7158static void 7159output_pic_addr_const (FILE *file, rtx x, int code) 7160{ 7161 char buf[256]; 7162 7163 switch (GET_CODE (x)) 7164 { 7165 case PC: 7166 gcc_assert (flag_pic); 7167 putc ('.', file); 7168 break; 7169 7170 case SYMBOL_REF: 7171 if (! TARGET_MACHO || TARGET_64BIT) 7172 output_addr_const (file, x); 7173 else 7174 { 7175 const char *name = XSTR (x, 0); 7176 7177 /* Mark the decl as referenced so that cgraph will output the function. */ 7178 if (SYMBOL_REF_DECL (x)) 7179 mark_decl_referenced (SYMBOL_REF_DECL (x)); 7180 7181#if TARGET_MACHO 7182 if (MACHOPIC_INDIRECT 7183 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 7184 name = machopic_indirection_name (x, /*stub_p=*/true); 7185#endif 7186 assemble_name (file, name); 7187 } 7188 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 7189 fputs ("@PLT", file); 7190 break; 7191 7192 case LABEL_REF: 7193 x = XEXP (x, 0); 7194 /* FALLTHRU */ 7195 case CODE_LABEL: 7196 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 7197 assemble_name (asm_out_file, buf); 7198 break; 7199 7200 case CONST_INT: 7201 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 7202 break; 7203 7204 case CONST: 7205 /* This used to output parentheses around the expression, 7206 but that does not work on the 386 (either ATT or BSD assembler). */ 7207 output_pic_addr_const (file, XEXP (x, 0), code); 7208 break; 7209 7210 case CONST_DOUBLE: 7211 if (GET_MODE (x) == VOIDmode) 7212 { 7213 /* We can use %d if the number is <32 bits and positive. */ 7214 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 7215 fprintf (file, "0x%lx%08lx", 7216 (unsigned long) CONST_DOUBLE_HIGH (x), 7217 (unsigned long) CONST_DOUBLE_LOW (x)); 7218 else 7219 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 7220 } 7221 else 7222 /* We can't handle floating point constants; 7223 PRINT_OPERAND must handle them. */ 7224 output_operand_lossage ("floating constant misused"); 7225 break; 7226 7227 case PLUS: 7228 /* Some assemblers need integer constants to appear first. */ 7229 if (GET_CODE (XEXP (x, 0)) == CONST_INT) 7230 { 7231 output_pic_addr_const (file, XEXP (x, 0), code); 7232 putc ('+', file); 7233 output_pic_addr_const (file, XEXP (x, 1), code); 7234 } 7235 else 7236 { 7237 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT); 7238 output_pic_addr_const (file, XEXP (x, 1), code); 7239 putc ('+', file); 7240 output_pic_addr_const (file, XEXP (x, 0), code); 7241 } 7242 break; 7243 7244 case MINUS: 7245 if (!TARGET_MACHO) 7246 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 7247 output_pic_addr_const (file, XEXP (x, 0), code); 7248 putc ('-', file); 7249 output_pic_addr_const (file, XEXP (x, 1), code); 7250 if (!TARGET_MACHO) 7251 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 7252 break; 7253 7254 case UNSPEC: 7255 gcc_assert (XVECLEN (x, 0) == 1); 7256 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 7257 switch (XINT (x, 1)) 7258 { 7259 case UNSPEC_GOT: 7260 fputs ("@GOT", file); 7261 break; 7262 case UNSPEC_GOTOFF: 7263 fputs ("@GOTOFF", file); 7264 break; 7265 case UNSPEC_GOTPCREL: 7266 fputs ("@GOTPCREL(%rip)", file); 7267 break; 7268 case UNSPEC_GOTTPOFF: 7269 /* FIXME: This might be @TPOFF in Sun ld too. */ 7270 fputs ("@GOTTPOFF", file); 7271 break; 7272 case UNSPEC_TPOFF: 7273 fputs ("@TPOFF", file); 7274 break; 7275 case UNSPEC_NTPOFF: 7276 if (TARGET_64BIT) 7277 fputs ("@TPOFF", file); 7278 else 7279 fputs ("@NTPOFF", file); 7280 break; 7281 case UNSPEC_DTPOFF: 7282 fputs ("@DTPOFF", file); 7283 break; 7284 case UNSPEC_GOTNTPOFF: 7285 if (TARGET_64BIT) 7286 fputs ("@GOTTPOFF(%rip)", file); 7287 else 7288 fputs ("@GOTNTPOFF", file); 7289 break; 7290 case UNSPEC_INDNTPOFF: 7291 fputs ("@INDNTPOFF", file); 7292 break; 7293 default: 7294 output_operand_lossage ("invalid UNSPEC as operand"); 7295 break; 7296 } 7297 break; 7298 7299 default: 7300 output_operand_lossage ("invalid expression as operand"); 7301 } 7302} 7303 7304/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 7305 We need to emit DTP-relative relocations. */ 7306 7307static void 7308i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 7309{ 7310 fputs (ASM_LONG, file); 7311 output_addr_const (file, x); 7312 fputs ("@DTPOFF", file); 7313 switch (size) 7314 { 7315 case 4: 7316 break; 7317 case 8: 7318 fputs (", 0", file); 7319 break; 7320 default: 7321 gcc_unreachable (); 7322 } 7323} 7324 7325/* In the name of slightly smaller debug output, and to cater to 7326 general assembler lossage, recognize PIC+GOTOFF and turn it back 7327 into a direct symbol reference. 7328 7329 On Darwin, this is necessary to avoid a crash, because Darwin 7330 has a different PIC label for each routine but the DWARF debugging 7331 information is not associated with any particular routine, so it's 7332 necessary to remove references to the PIC label from RTL stored by 7333 the DWARF output code. */ 7334 7335static rtx 7336ix86_delegitimize_address (rtx orig_x) 7337{ 7338 rtx x = orig_x; 7339 /* reg_addend is NULL or a multiple of some register. */ 7340 rtx reg_addend = NULL_RTX; 7341 /* const_addend is NULL or a const_int. */ 7342 rtx const_addend = NULL_RTX; 7343 /* This is the result, or NULL. */ 7344 rtx result = NULL_RTX; 7345 7346 if (GET_CODE (x) == MEM) 7347 x = XEXP (x, 0); 7348 7349 if (TARGET_64BIT) 7350 { 7351 if (GET_CODE (x) != CONST 7352 || GET_CODE (XEXP (x, 0)) != UNSPEC 7353 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 7354 || GET_CODE (orig_x) != MEM) 7355 return orig_x; 7356 return XVECEXP (XEXP (x, 0), 0, 0); 7357 } 7358 7359 if (GET_CODE (x) != PLUS 7360 || GET_CODE (XEXP (x, 1)) != CONST) 7361 return orig_x; 7362 7363 if (GET_CODE (XEXP (x, 0)) == REG 7364 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) 7365 /* %ebx + GOT/GOTOFF */ 7366 ; 7367 else if (GET_CODE (XEXP (x, 0)) == PLUS) 7368 { 7369 /* %ebx + %reg * scale + GOT/GOTOFF */ 7370 reg_addend = XEXP (x, 0); 7371 if (GET_CODE (XEXP (reg_addend, 0)) == REG 7372 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM) 7373 reg_addend = XEXP (reg_addend, 1); 7374 else if (GET_CODE (XEXP (reg_addend, 1)) == REG 7375 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM) 7376 reg_addend = XEXP (reg_addend, 0); 7377 else 7378 return orig_x; 7379 if (GET_CODE (reg_addend) != REG 7380 && GET_CODE (reg_addend) != MULT 7381 && GET_CODE (reg_addend) != ASHIFT) 7382 return orig_x; 7383 } 7384 else 7385 return orig_x; 7386 7387 x = XEXP (XEXP (x, 1), 0); 7388 if (GET_CODE (x) == PLUS 7389 && GET_CODE (XEXP (x, 1)) == CONST_INT) 7390 { 7391 const_addend = XEXP (x, 1); 7392 x = XEXP (x, 0); 7393 } 7394 7395 if (GET_CODE (x) == UNSPEC 7396 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM) 7397 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM))) 7398 result = XVECEXP (x, 0, 0); 7399 7400 if (TARGET_MACHO && darwin_local_data_pic (x) 7401 && GET_CODE (orig_x) != MEM) 7402 result = XEXP (x, 0); 7403 7404 if (! result) 7405 return orig_x; 7406 7407 if (const_addend) 7408 result = gen_rtx_PLUS (Pmode, result, const_addend); 7409 if (reg_addend) 7410 result = gen_rtx_PLUS (Pmode, reg_addend, result); 7411 return result; 7412} 7413 7414static void 7415put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 7416 int fp, FILE *file) 7417{ 7418 const char *suffix; 7419 7420 if (mode == CCFPmode || mode == CCFPUmode) 7421 { 7422 enum rtx_code second_code, bypass_code; 7423 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 7424 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); 7425 code = ix86_fp_compare_code_to_integer (code); 7426 mode = CCmode; 7427 } 7428 if (reverse) 7429 code = reverse_condition (code); 7430 7431 switch (code) 7432 { 7433 case EQ: 7434 suffix = "e"; 7435 break; 7436 case NE: 7437 suffix = "ne"; 7438 break; 7439 case GT: 7440 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); 7441 suffix = "g"; 7442 break; 7443 case GTU: 7444 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. 7445 Those same assemblers have the same but opposite lossage on cmov. */ 7446 gcc_assert (mode == CCmode); 7447 suffix = fp ? "nbe" : "a"; 7448 break; 7449 case LT: 7450 switch (mode) 7451 { 7452 case CCNOmode: 7453 case CCGOCmode: 7454 suffix = "s"; 7455 break; 7456 7457 case CCmode: 7458 case CCGCmode: 7459 suffix = "l"; 7460 break; 7461 7462 default: 7463 gcc_unreachable (); 7464 } 7465 break; 7466 case LTU: 7467 gcc_assert (mode == CCmode); 7468 suffix = "b"; 7469 break; 7470 case GE: 7471 switch (mode) 7472 { 7473 case CCNOmode: 7474 case CCGOCmode: 7475 suffix = "ns"; 7476 break; 7477 7478 case CCmode: 7479 case CCGCmode: 7480 suffix = "ge"; 7481 break; 7482 7483 default: 7484 gcc_unreachable (); 7485 } 7486 break; 7487 case GEU: 7488 /* ??? As above. */ 7489 gcc_assert (mode == CCmode); 7490 suffix = fp ? "nb" : "ae"; 7491 break; 7492 case LE: 7493 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); 7494 suffix = "le"; 7495 break; 7496 case LEU: 7497 gcc_assert (mode == CCmode); 7498 suffix = "be"; 7499 break; 7500 case UNORDERED: 7501 suffix = fp ? "u" : "p"; 7502 break; 7503 case ORDERED: 7504 suffix = fp ? "nu" : "np"; 7505 break; 7506 default: 7507 gcc_unreachable (); 7508 } 7509 fputs (suffix, file); 7510} 7511 7512/* Print the name of register X to FILE based on its machine mode and number. 7513 If CODE is 'w', pretend the mode is HImode. 7514 If CODE is 'b', pretend the mode is QImode. 7515 If CODE is 'k', pretend the mode is SImode. 7516 If CODE is 'q', pretend the mode is DImode. 7517 If CODE is 'h', pretend the reg is the 'high' byte register. 7518 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */ 7519 7520void 7521print_reg (rtx x, int code, FILE *file) 7522{ 7523 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM 7524 && REGNO (x) != FRAME_POINTER_REGNUM 7525 && REGNO (x) != FLAGS_REG 7526 && REGNO (x) != FPSR_REG); 7527 7528 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0) 7529 putc ('%', file); 7530 7531 if (code == 'w' || MMX_REG_P (x)) 7532 code = 2; 7533 else if (code == 'b') 7534 code = 1; 7535 else if (code == 'k') 7536 code = 4; 7537 else if (code == 'q') 7538 code = 8; 7539 else if (code == 'y') 7540 code = 3; 7541 else if (code == 'h') 7542 code = 0; 7543 else 7544 code = GET_MODE_SIZE (GET_MODE (x)); 7545 7546 /* Irritatingly, AMD extended registers use different naming convention 7547 from the normal registers. */ 7548 if (REX_INT_REG_P (x)) 7549 { 7550 gcc_assert (TARGET_64BIT); 7551 switch (code) 7552 { 7553 case 0: 7554 error ("extended registers have no high halves"); 7555 break; 7556 case 1: 7557 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 7558 break; 7559 case 2: 7560 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 7561 break; 7562 case 4: 7563 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 7564 break; 7565 case 8: 7566 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 7567 break; 7568 default: 7569 error ("unsupported operand size for extended register"); 7570 break; 7571 } 7572 return; 7573 } 7574 switch (code) 7575 { 7576 case 3: 7577 if (STACK_TOP_P (x)) 7578 { 7579 fputs ("st(0)", file); 7580 break; 7581 } 7582 /* FALLTHRU */ 7583 case 8: 7584 case 4: 7585 case 12: 7586 if (! ANY_FP_REG_P (x)) 7587 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 7588 /* FALLTHRU */ 7589 case 16: 7590 case 2: 7591 normal: 7592 fputs (hi_reg_name[REGNO (x)], file); 7593 break; 7594 case 1: 7595 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 7596 goto normal; 7597 fputs (qi_reg_name[REGNO (x)], file); 7598 break; 7599 case 0: 7600 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 7601 goto normal; 7602 fputs (qi_high_reg_name[REGNO (x)], file); 7603 break; 7604 default: 7605 gcc_unreachable (); 7606 } 7607} 7608 7609/* Locate some local-dynamic symbol still in use by this function 7610 so that we can print its name in some tls_local_dynamic_base 7611 pattern. */ 7612 7613static const char * 7614get_some_local_dynamic_name (void) 7615{ 7616 rtx insn; 7617 7618 if (cfun->machine->some_ld_name) 7619 return cfun->machine->some_ld_name; 7620 7621 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 7622 if (INSN_P (insn) 7623 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 7624 return cfun->machine->some_ld_name; 7625 7626 gcc_unreachable (); 7627} 7628 7629static int 7630get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 7631{ 7632 rtx x = *px; 7633 7634 if (GET_CODE (x) == SYMBOL_REF 7635 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 7636 { 7637 cfun->machine->some_ld_name = XSTR (x, 0); 7638 return 1; 7639 } 7640 7641 return 0; 7642} 7643 7644/* Meaning of CODE: 7645 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 7646 C -- print opcode suffix for set/cmov insn. 7647 c -- like C, but print reversed condition 7648 F,f -- likewise, but for floating-point. 7649 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 7650 otherwise nothing 7651 R -- print the prefix for register names. 7652 z -- print the opcode suffix for the size of the current operand. 7653 * -- print a star (in certain assembler syntax) 7654 A -- print an absolute memory reference. 7655 w -- print the operand as if it's a "word" (HImode) even if it isn't. 7656 s -- print a shift double count, followed by the assemblers argument 7657 delimiter. 7658 b -- print the QImode name of the register for the indicated operand. 7659 %b0 would print %al if operands[0] is reg 0. 7660 w -- likewise, print the HImode name of the register. 7661 k -- likewise, print the SImode name of the register. 7662 q -- likewise, print the DImode name of the register. 7663 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 7664 y -- print "st(0)" instead of "st" as a register. 7665 D -- print condition for SSE cmp instruction. 7666 P -- if PIC, print an @PLT suffix. 7667 X -- don't print any sort of PIC '@' suffix for a symbol. 7668 & -- print some in-use local-dynamic symbol name. 7669 H -- print a memory address offset by 8; used for sse high-parts 7670 */ 7671 7672void 7673print_operand (FILE *file, rtx x, int code) 7674{ 7675 if (code) 7676 { 7677 switch (code) 7678 { 7679 case '*': 7680 if (ASSEMBLER_DIALECT == ASM_ATT) 7681 putc ('*', file); 7682 return; 7683 7684 case '&': 7685 assemble_name (file, get_some_local_dynamic_name ()); 7686 return; 7687 7688 case 'A': 7689 switch (ASSEMBLER_DIALECT) 7690 { 7691 case ASM_ATT: 7692 putc ('*', file); 7693 break; 7694 7695 case ASM_INTEL: 7696 /* Intel syntax. For absolute addresses, registers should not 7697 be surrounded by braces. */ 7698 if (GET_CODE (x) != REG) 7699 { 7700 putc ('[', file); 7701 PRINT_OPERAND (file, x, 0); 7702 putc (']', file); 7703 return; 7704 } 7705 break; 7706 7707 default: 7708 gcc_unreachable (); 7709 } 7710 7711 PRINT_OPERAND (file, x, 0); 7712 return; 7713 7714 7715 case 'L': 7716 if (ASSEMBLER_DIALECT == ASM_ATT) 7717 putc ('l', file); 7718 return; 7719 7720 case 'W': 7721 if (ASSEMBLER_DIALECT == ASM_ATT) 7722 putc ('w', file); 7723 return; 7724 7725 case 'B': 7726 if (ASSEMBLER_DIALECT == ASM_ATT) 7727 putc ('b', file); 7728 return; 7729 7730 case 'Q': 7731 if (ASSEMBLER_DIALECT == ASM_ATT) 7732 putc ('l', file); 7733 return; 7734 7735 case 'S': 7736 if (ASSEMBLER_DIALECT == ASM_ATT) 7737 putc ('s', file); 7738 return; 7739 7740 case 'T': 7741 if (ASSEMBLER_DIALECT == ASM_ATT) 7742 putc ('t', file); 7743 return; 7744 7745 case 'z': 7746 /* 387 opcodes don't get size suffixes if the operands are 7747 registers. */ 7748 if (STACK_REG_P (x)) 7749 return; 7750 7751 /* Likewise if using Intel opcodes. */ 7752 if (ASSEMBLER_DIALECT == ASM_INTEL) 7753 return; 7754 7755 /* This is the size of op from size of operand. */ 7756 switch (GET_MODE_SIZE (GET_MODE (x))) 7757 { 7758 case 2: 7759#ifdef HAVE_GAS_FILDS_FISTS 7760 putc ('s', file); 7761#endif 7762 return; 7763 7764 case 4: 7765 if (GET_MODE (x) == SFmode) 7766 { 7767 putc ('s', file); 7768 return; 7769 } 7770 else 7771 putc ('l', file); 7772 return; 7773 7774 case 12: 7775 case 16: 7776 putc ('t', file); 7777 return; 7778 7779 case 8: 7780 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 7781 { 7782#ifdef GAS_MNEMONICS 7783 putc ('q', file); 7784#else 7785 putc ('l', file); 7786 putc ('l', file); 7787#endif 7788 } 7789 else 7790 putc ('l', file); 7791 return; 7792 7793 default: 7794 gcc_unreachable (); 7795 } 7796 7797 case 'b': 7798 case 'w': 7799 case 'k': 7800 case 'q': 7801 case 'h': 7802 case 'y': 7803 case 'X': 7804 case 'P': 7805 break; 7806 7807 case 's': 7808 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT) 7809 { 7810 PRINT_OPERAND (file, x, 0); 7811 putc (',', file); 7812 } 7813 return; 7814 7815 case 'D': 7816 /* Little bit of braindamage here. The SSE compare instructions 7817 does use completely different names for the comparisons that the 7818 fp conditional moves. */ 7819 switch (GET_CODE (x)) 7820 { 7821 case EQ: 7822 case UNEQ: 7823 fputs ("eq", file); 7824 break; 7825 case LT: 7826 case UNLT: 7827 fputs ("lt", file); 7828 break; 7829 case LE: 7830 case UNLE: 7831 fputs ("le", file); 7832 break; 7833 case UNORDERED: 7834 fputs ("unord", file); 7835 break; 7836 case NE: 7837 case LTGT: 7838 fputs ("neq", file); 7839 break; 7840 case UNGE: 7841 case GE: 7842 fputs ("nlt", file); 7843 break; 7844 case UNGT: 7845 case GT: 7846 fputs ("nle", file); 7847 break; 7848 case ORDERED: 7849 fputs ("ord", file); 7850 break; 7851 default: 7852 gcc_unreachable (); 7853 } 7854 return; 7855 case 'O': 7856#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7857 if (ASSEMBLER_DIALECT == ASM_ATT) 7858 { 7859 switch (GET_MODE (x)) 7860 { 7861 case HImode: putc ('w', file); break; 7862 case SImode: 7863 case SFmode: putc ('l', file); break; 7864 case DImode: 7865 case DFmode: putc ('q', file); break; 7866 default: gcc_unreachable (); 7867 } 7868 putc ('.', file); 7869 } 7870#endif 7871 return; 7872 case 'C': 7873 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 7874 return; 7875 case 'F': 7876#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7877 if (ASSEMBLER_DIALECT == ASM_ATT) 7878 putc ('.', file); 7879#endif 7880 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 7881 return; 7882 7883 /* Like above, but reverse condition */ 7884 case 'c': 7885 /* Check to see if argument to %c is really a constant 7886 and not a condition code which needs to be reversed. */ 7887 if (!COMPARISON_P (x)) 7888 { 7889 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'"); 7890 return; 7891 } 7892 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 7893 return; 7894 case 'f': 7895#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 7896 if (ASSEMBLER_DIALECT == ASM_ATT) 7897 putc ('.', file); 7898#endif 7899 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 7900 return; 7901 7902 case 'H': 7903 /* It doesn't actually matter what mode we use here, as we're 7904 only going to use this for printing. */ 7905 x = adjust_address_nv (x, DImode, 8); 7906 break; 7907 7908 case '+': 7909 { 7910 rtx x; 7911 7912 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS) 7913 return; 7914 7915 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 7916 if (x) 7917 { 7918 int pred_val = INTVAL (XEXP (x, 0)); 7919 7920 if (pred_val < REG_BR_PROB_BASE * 45 / 100 7921 || pred_val > REG_BR_PROB_BASE * 55 / 100) 7922 { 7923 int taken = pred_val > REG_BR_PROB_BASE / 2; 7924 int cputaken = final_forward_branch_p (current_output_insn) == 0; 7925 7926 /* Emit hints only in the case default branch prediction 7927 heuristics would fail. */ 7928 if (taken != cputaken) 7929 { 7930 /* We use 3e (DS) prefix for taken branches and 7931 2e (CS) prefix for not taken branches. */ 7932 if (taken) 7933 fputs ("ds ; ", file); 7934 else 7935 fputs ("cs ; ", file); 7936 } 7937 } 7938 } 7939 return; 7940 } 7941 default: 7942 output_operand_lossage ("invalid operand code '%c'", code); 7943 } 7944 } 7945 7946 if (GET_CODE (x) == REG) 7947 print_reg (x, code, file); 7948 7949 else if (GET_CODE (x) == MEM) 7950 { 7951 /* No `byte ptr' prefix for call instructions. */ 7952 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 7953 { 7954 const char * size; 7955 switch (GET_MODE_SIZE (GET_MODE (x))) 7956 { 7957 case 1: size = "BYTE"; break; 7958 case 2: size = "WORD"; break; 7959 case 4: size = "DWORD"; break; 7960 case 8: size = "QWORD"; break; 7961 case 12: size = "XWORD"; break; 7962 case 16: size = "XMMWORD"; break; 7963 default: 7964 gcc_unreachable (); 7965 } 7966 7967 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 7968 if (code == 'b') 7969 size = "BYTE"; 7970 else if (code == 'w') 7971 size = "WORD"; 7972 else if (code == 'k') 7973 size = "DWORD"; 7974 7975 fputs (size, file); 7976 fputs (" PTR ", file); 7977 } 7978 7979 x = XEXP (x, 0); 7980 /* Avoid (%rip) for call operands. */ 7981 if (CONSTANT_ADDRESS_P (x) && code == 'P' 7982 && GET_CODE (x) != CONST_INT) 7983 output_addr_const (file, x); 7984 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 7985 output_operand_lossage ("invalid constraints for operand"); 7986 else 7987 output_address (x); 7988 } 7989 7990 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 7991 { 7992 REAL_VALUE_TYPE r; 7993 long l; 7994 7995 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7996 REAL_VALUE_TO_TARGET_SINGLE (r, l); 7997 7998 if (ASSEMBLER_DIALECT == ASM_ATT) 7999 putc ('$', file); 8000 fprintf (file, "0x%08lx", l); 8001 } 8002 8003 /* These float cases don't actually occur as immediate operands. */ 8004 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 8005 { 8006 char dstr[30]; 8007 8008 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8009 fprintf (file, "%s", dstr); 8010 } 8011 8012 else if (GET_CODE (x) == CONST_DOUBLE 8013 && GET_MODE (x) == XFmode) 8014 { 8015 char dstr[30]; 8016 8017 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 8018 fprintf (file, "%s", dstr); 8019 } 8020 8021 else 8022 { 8023 /* We have patterns that allow zero sets of memory, for instance. 8024 In 64-bit mode, we should probably support all 8-byte vectors, 8025 since we can in fact encode that into an immediate. */ 8026 if (GET_CODE (x) == CONST_VECTOR) 8027 { 8028 gcc_assert (x == CONST0_RTX (GET_MODE (x))); 8029 x = const0_rtx; 8030 } 8031 8032 if (code != 'P') 8033 { 8034 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) 8035 { 8036 if (ASSEMBLER_DIALECT == ASM_ATT) 8037 putc ('$', file); 8038 } 8039 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 8040 || GET_CODE (x) == LABEL_REF) 8041 { 8042 if (ASSEMBLER_DIALECT == ASM_ATT) 8043 putc ('$', file); 8044 else 8045 fputs ("OFFSET FLAT:", file); 8046 } 8047 } 8048 if (GET_CODE (x) == CONST_INT) 8049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 8050 else if (flag_pic) 8051 output_pic_addr_const (file, x, code); 8052 else 8053 output_addr_const (file, x); 8054 } 8055} 8056 8057/* Print a memory operand whose address is ADDR. */ 8058 8059void 8060print_operand_address (FILE *file, rtx addr) 8061{ 8062 struct ix86_address parts; 8063 rtx base, index, disp; 8064 int scale; 8065 int ok = ix86_decompose_address (addr, &parts); 8066 8067 gcc_assert (ok); 8068 8069 base = parts.base; 8070 index = parts.index; 8071 disp = parts.disp; 8072 scale = parts.scale; 8073 8074 switch (parts.seg) 8075 { 8076 case SEG_DEFAULT: 8077 break; 8078 case SEG_FS: 8079 case SEG_GS: 8080 if (USER_LABEL_PREFIX[0] == 0) 8081 putc ('%', file); 8082 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 8083 break; 8084 default: 8085 gcc_unreachable (); 8086 } 8087 8088 if (!base && !index) 8089 { 8090 /* Displacement only requires special attention. */ 8091 8092 if (GET_CODE (disp) == CONST_INT) 8093 { 8094 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 8095 { 8096 if (USER_LABEL_PREFIX[0] == 0) 8097 putc ('%', file); 8098 fputs ("ds:", file); 8099 } 8100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 8101 } 8102 else if (flag_pic) 8103 output_pic_addr_const (file, disp, 0); 8104 else 8105 output_addr_const (file, disp); 8106 8107 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 8108 if (TARGET_64BIT) 8109 { 8110 if (GET_CODE (disp) == CONST 8111 && GET_CODE (XEXP (disp, 0)) == PLUS 8112 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8113 disp = XEXP (XEXP (disp, 0), 0); 8114 if (GET_CODE (disp) == LABEL_REF 8115 || (GET_CODE (disp) == SYMBOL_REF 8116 && SYMBOL_REF_TLS_MODEL (disp) == 0)) 8117 fputs ("(%rip)", file); 8118 } 8119 } 8120 else 8121 { 8122 if (ASSEMBLER_DIALECT == ASM_ATT) 8123 { 8124 if (disp) 8125 { 8126 if (flag_pic) 8127 output_pic_addr_const (file, disp, 0); 8128 else if (GET_CODE (disp) == LABEL_REF) 8129 output_asm_label (disp); 8130 else 8131 output_addr_const (file, disp); 8132 } 8133 8134 putc ('(', file); 8135 if (base) 8136 print_reg (base, 0, file); 8137 if (index) 8138 { 8139 putc (',', file); 8140 print_reg (index, 0, file); 8141 if (scale != 1) 8142 fprintf (file, ",%d", scale); 8143 } 8144 putc (')', file); 8145 } 8146 else 8147 { 8148 rtx offset = NULL_RTX; 8149 8150 if (disp) 8151 { 8152 /* Pull out the offset of a symbol; print any symbol itself. */ 8153 if (GET_CODE (disp) == CONST 8154 && GET_CODE (XEXP (disp, 0)) == PLUS 8155 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT) 8156 { 8157 offset = XEXP (XEXP (disp, 0), 1); 8158 disp = gen_rtx_CONST (VOIDmode, 8159 XEXP (XEXP (disp, 0), 0)); 8160 } 8161 8162 if (flag_pic) 8163 output_pic_addr_const (file, disp, 0); 8164 else if (GET_CODE (disp) == LABEL_REF) 8165 output_asm_label (disp); 8166 else if (GET_CODE (disp) == CONST_INT) 8167 offset = disp; 8168 else 8169 output_addr_const (file, disp); 8170 } 8171 8172 putc ('[', file); 8173 if (base) 8174 { 8175 print_reg (base, 0, file); 8176 if (offset) 8177 { 8178 if (INTVAL (offset) >= 0) 8179 putc ('+', file); 8180 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8181 } 8182 } 8183 else if (offset) 8184 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 8185 else 8186 putc ('0', file); 8187 8188 if (index) 8189 { 8190 putc ('+', file); 8191 print_reg (index, 0, file); 8192 if (scale != 1) 8193 fprintf (file, "*%d", scale); 8194 } 8195 putc (']', file); 8196 } 8197 } 8198} 8199 8200bool 8201output_addr_const_extra (FILE *file, rtx x) 8202{ 8203 rtx op; 8204 8205 if (GET_CODE (x) != UNSPEC) 8206 return false; 8207 8208 op = XVECEXP (x, 0, 0); 8209 switch (XINT (x, 1)) 8210 { 8211 case UNSPEC_GOTTPOFF: 8212 output_addr_const (file, op); 8213 /* FIXME: This might be @TPOFF in Sun ld. */ 8214 fputs ("@GOTTPOFF", file); 8215 break; 8216 case UNSPEC_TPOFF: 8217 output_addr_const (file, op); 8218 fputs ("@TPOFF", file); 8219 break; 8220 case UNSPEC_NTPOFF: 8221 output_addr_const (file, op); 8222 if (TARGET_64BIT) 8223 fputs ("@TPOFF", file); 8224 else 8225 fputs ("@NTPOFF", file); 8226 break; 8227 case UNSPEC_DTPOFF: 8228 output_addr_const (file, op); 8229 fputs ("@DTPOFF", file); 8230 break; 8231 case UNSPEC_GOTNTPOFF: 8232 output_addr_const (file, op); 8233 if (TARGET_64BIT) 8234 fputs ("@GOTTPOFF(%rip)", file); 8235 else 8236 fputs ("@GOTNTPOFF", file); 8237 break; 8238 case UNSPEC_INDNTPOFF: 8239 output_addr_const (file, op); 8240 fputs ("@INDNTPOFF", file); 8241 break; 8242 8243 default: 8244 return false; 8245 } 8246 8247 return true; 8248} 8249 8250/* Split one or more DImode RTL references into pairs of SImode 8251 references. The RTL can be REG, offsettable MEM, integer constant, or 8252 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8253 split and "num" is its length. lo_half and hi_half are output arrays 8254 that parallel "operands". */ 8255 8256void 8257split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8258{ 8259 while (num--) 8260 { 8261 rtx op = operands[num]; 8262 8263 /* simplify_subreg refuse to split volatile memory addresses, 8264 but we still have to handle it. */ 8265 if (GET_CODE (op) == MEM) 8266 { 8267 lo_half[num] = adjust_address (op, SImode, 0); 8268 hi_half[num] = adjust_address (op, SImode, 4); 8269 } 8270 else 8271 { 8272 lo_half[num] = simplify_gen_subreg (SImode, op, 8273 GET_MODE (op) == VOIDmode 8274 ? DImode : GET_MODE (op), 0); 8275 hi_half[num] = simplify_gen_subreg (SImode, op, 8276 GET_MODE (op) == VOIDmode 8277 ? DImode : GET_MODE (op), 4); 8278 } 8279 } 8280} 8281/* Split one or more TImode RTL references into pairs of DImode 8282 references. The RTL can be REG, offsettable MEM, integer constant, or 8283 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 8284 split and "num" is its length. lo_half and hi_half are output arrays 8285 that parallel "operands". */ 8286 8287void 8288split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 8289{ 8290 while (num--) 8291 { 8292 rtx op = operands[num]; 8293 8294 /* simplify_subreg refuse to split volatile memory addresses, but we 8295 still have to handle it. */ 8296 if (GET_CODE (op) == MEM) 8297 { 8298 lo_half[num] = adjust_address (op, DImode, 0); 8299 hi_half[num] = adjust_address (op, DImode, 8); 8300 } 8301 else 8302 { 8303 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 8304 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 8305 } 8306 } 8307} 8308 8309/* Output code to perform a 387 binary operation in INSN, one of PLUS, 8310 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 8311 is the expression of the binary operation. The output may either be 8312 emitted here, or returned to the caller, like all output_* functions. 8313 8314 There is no guarantee that the operands are the same mode, as they 8315 might be within FLOAT or FLOAT_EXTEND expressions. */ 8316 8317#ifndef SYSV386_COMPAT 8318/* Set to 1 for compatibility with brain-damaged assemblers. No-one 8319 wants to fix the assemblers because that causes incompatibility 8320 with gcc. No-one wants to fix gcc because that causes 8321 incompatibility with assemblers... You can use the option of 8322 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 8323#define SYSV386_COMPAT 1 8324#endif 8325 8326const char * 8327output_387_binary_op (rtx insn, rtx *operands) 8328{ 8329 static char buf[30]; 8330 const char *p; 8331 const char *ssep; 8332 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); 8333 8334#ifdef ENABLE_CHECKING 8335 /* Even if we do not want to check the inputs, this documents input 8336 constraints. Which helps in understanding the following code. */ 8337 if (STACK_REG_P (operands[0]) 8338 && ((REG_P (operands[1]) 8339 && REGNO (operands[0]) == REGNO (operands[1]) 8340 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM)) 8341 || (REG_P (operands[2]) 8342 && REGNO (operands[0]) == REGNO (operands[2]) 8343 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM))) 8344 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 8345 ; /* ok */ 8346 else 8347 gcc_assert (is_sse); 8348#endif 8349 8350 switch (GET_CODE (operands[3])) 8351 { 8352 case PLUS: 8353 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8354 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8355 p = "fiadd"; 8356 else 8357 p = "fadd"; 8358 ssep = "add"; 8359 break; 8360 8361 case MINUS: 8362 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8363 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8364 p = "fisub"; 8365 else 8366 p = "fsub"; 8367 ssep = "sub"; 8368 break; 8369 8370 case MULT: 8371 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8372 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8373 p = "fimul"; 8374 else 8375 p = "fmul"; 8376 ssep = "mul"; 8377 break; 8378 8379 case DIV: 8380 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 8381 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 8382 p = "fidiv"; 8383 else 8384 p = "fdiv"; 8385 ssep = "div"; 8386 break; 8387 8388 default: 8389 gcc_unreachable (); 8390 } 8391 8392 if (is_sse) 8393 { 8394 strcpy (buf, ssep); 8395 if (GET_MODE (operands[0]) == SFmode) 8396 strcat (buf, "ss\t{%2, %0|%0, %2}"); 8397 else 8398 strcat (buf, "sd\t{%2, %0|%0, %2}"); 8399 return buf; 8400 } 8401 strcpy (buf, p); 8402 8403 switch (GET_CODE (operands[3])) 8404 { 8405 case MULT: 8406 case PLUS: 8407 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 8408 { 8409 rtx temp = operands[2]; 8410 operands[2] = operands[1]; 8411 operands[1] = temp; 8412 } 8413 8414 /* know operands[0] == operands[1]. */ 8415 8416 if (GET_CODE (operands[2]) == MEM) 8417 { 8418 p = "%z2\t%2"; 8419 break; 8420 } 8421 8422 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8423 { 8424 if (STACK_TOP_P (operands[0])) 8425 /* How is it that we are storing to a dead operand[2]? 8426 Well, presumably operands[1] is dead too. We can't 8427 store the result to st(0) as st(0) gets popped on this 8428 instruction. Instead store to operands[2] (which I 8429 think has to be st(1)). st(1) will be popped later. 8430 gcc <= 2.8.1 didn't have this check and generated 8431 assembly code that the Unixware assembler rejected. */ 8432 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8433 else 8434 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8435 break; 8436 } 8437 8438 if (STACK_TOP_P (operands[0])) 8439 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8440 else 8441 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8442 break; 8443 8444 case MINUS: 8445 case DIV: 8446 if (GET_CODE (operands[1]) == MEM) 8447 { 8448 p = "r%z1\t%1"; 8449 break; 8450 } 8451 8452 if (GET_CODE (operands[2]) == MEM) 8453 { 8454 p = "%z2\t%2"; 8455 break; 8456 } 8457 8458 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 8459 { 8460#if SYSV386_COMPAT 8461 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 8462 derived assemblers, confusingly reverse the direction of 8463 the operation for fsub{r} and fdiv{r} when the 8464 destination register is not st(0). The Intel assembler 8465 doesn't have this brain damage. Read !SYSV386_COMPAT to 8466 figure out what the hardware really does. */ 8467 if (STACK_TOP_P (operands[0])) 8468 p = "{p\t%0, %2|rp\t%2, %0}"; 8469 else 8470 p = "{rp\t%2, %0|p\t%0, %2}"; 8471#else 8472 if (STACK_TOP_P (operands[0])) 8473 /* As above for fmul/fadd, we can't store to st(0). */ 8474 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 8475 else 8476 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 8477#endif 8478 break; 8479 } 8480 8481 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 8482 { 8483#if SYSV386_COMPAT 8484 if (STACK_TOP_P (operands[0])) 8485 p = "{rp\t%0, %1|p\t%1, %0}"; 8486 else 8487 p = "{p\t%1, %0|rp\t%0, %1}"; 8488#else 8489 if (STACK_TOP_P (operands[0])) 8490 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 8491 else 8492 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 8493#endif 8494 break; 8495 } 8496 8497 if (STACK_TOP_P (operands[0])) 8498 { 8499 if (STACK_TOP_P (operands[1])) 8500 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 8501 else 8502 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 8503 break; 8504 } 8505 else if (STACK_TOP_P (operands[1])) 8506 { 8507#if SYSV386_COMPAT 8508 p = "{\t%1, %0|r\t%0, %1}"; 8509#else 8510 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 8511#endif 8512 } 8513 else 8514 { 8515#if SYSV386_COMPAT 8516 p = "{r\t%2, %0|\t%0, %2}"; 8517#else 8518 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 8519#endif 8520 } 8521 break; 8522 8523 default: 8524 gcc_unreachable (); 8525 } 8526 8527 strcat (buf, p); 8528 return buf; 8529} 8530 8531/* Return needed mode for entity in optimize_mode_switching pass. */ 8532 8533int 8534ix86_mode_needed (int entity, rtx insn) 8535{ 8536 enum attr_i387_cw mode; 8537 8538 /* The mode UNINITIALIZED is used to store control word after a 8539 function call or ASM pattern. The mode ANY specify that function 8540 has no requirements on the control word and make no changes in the 8541 bits we are interested in. */ 8542 8543 if (CALL_P (insn) 8544 || (NONJUMP_INSN_P (insn) 8545 && (asm_noperands (PATTERN (insn)) >= 0 8546 || GET_CODE (PATTERN (insn)) == ASM_INPUT))) 8547 return I387_CW_UNINITIALIZED; 8548 8549 if (recog_memoized (insn) < 0) 8550 return I387_CW_ANY; 8551 8552 mode = get_attr_i387_cw (insn); 8553 8554 switch (entity) 8555 { 8556 case I387_TRUNC: 8557 if (mode == I387_CW_TRUNC) 8558 return mode; 8559 break; 8560 8561 case I387_FLOOR: 8562 if (mode == I387_CW_FLOOR) 8563 return mode; 8564 break; 8565 8566 case I387_CEIL: 8567 if (mode == I387_CW_CEIL) 8568 return mode; 8569 break; 8570 8571 case I387_MASK_PM: 8572 if (mode == I387_CW_MASK_PM) 8573 return mode; 8574 break; 8575 8576 default: 8577 gcc_unreachable (); 8578 } 8579 8580 return I387_CW_ANY; 8581} 8582 8583/* Output code to initialize control word copies used by trunc?f?i and 8584 rounding patterns. CURRENT_MODE is set to current control word, 8585 while NEW_MODE is set to new control word. */ 8586 8587void 8588emit_i387_cw_initialization (int mode) 8589{ 8590 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); 8591 rtx new_mode; 8592 8593 int slot; 8594 8595 rtx reg = gen_reg_rtx (HImode); 8596 8597 emit_insn (gen_x86_fnstcw_1 (stored_mode)); 8598 emit_move_insn (reg, stored_mode); 8599 8600 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) 8601 { 8602 switch (mode) 8603 { 8604 case I387_CW_TRUNC: 8605 /* round toward zero (truncate) */ 8606 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); 8607 slot = SLOT_CW_TRUNC; 8608 break; 8609 8610 case I387_CW_FLOOR: 8611 /* round down toward -oo */ 8612 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8613 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); 8614 slot = SLOT_CW_FLOOR; 8615 break; 8616 8617 case I387_CW_CEIL: 8618 /* round up toward +oo */ 8619 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 8620 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); 8621 slot = SLOT_CW_CEIL; 8622 break; 8623 8624 case I387_CW_MASK_PM: 8625 /* mask precision exception for nearbyint() */ 8626 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8627 slot = SLOT_CW_MASK_PM; 8628 break; 8629 8630 default: 8631 gcc_unreachable (); 8632 } 8633 } 8634 else 8635 { 8636 switch (mode) 8637 { 8638 case I387_CW_TRUNC: 8639 /* round toward zero (truncate) */ 8640 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 8641 slot = SLOT_CW_TRUNC; 8642 break; 8643 8644 case I387_CW_FLOOR: 8645 /* round down toward -oo */ 8646 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); 8647 slot = SLOT_CW_FLOOR; 8648 break; 8649 8650 case I387_CW_CEIL: 8651 /* round up toward +oo */ 8652 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); 8653 slot = SLOT_CW_CEIL; 8654 break; 8655 8656 case I387_CW_MASK_PM: 8657 /* mask precision exception for nearbyint() */ 8658 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 8659 slot = SLOT_CW_MASK_PM; 8660 break; 8661 8662 default: 8663 gcc_unreachable (); 8664 } 8665 } 8666 8667 gcc_assert (slot < MAX_386_STACK_LOCALS); 8668 8669 new_mode = assign_386_stack_local (HImode, slot); 8670 emit_move_insn (new_mode, reg); 8671} 8672 8673/* Output code for INSN to convert a float to a signed int. OPERANDS 8674 are the insn operands. The output may be [HSD]Imode and the input 8675 operand may be [SDX]Fmode. */ 8676 8677const char * 8678output_fix_trunc (rtx insn, rtx *operands, int fisttp) 8679{ 8680 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8681 int dimode_p = GET_MODE (operands[0]) == DImode; 8682 int round_mode = get_attr_i387_cw (insn); 8683 8684 /* Jump through a hoop or two for DImode, since the hardware has no 8685 non-popping instruction. We used to do this a different way, but 8686 that was somewhat fragile and broke with post-reload splitters. */ 8687 if ((dimode_p || fisttp) && !stack_top_dies) 8688 output_asm_insn ("fld\t%y1", operands); 8689 8690 gcc_assert (STACK_TOP_P (operands[1])); 8691 gcc_assert (GET_CODE (operands[0]) == MEM); 8692 8693 if (fisttp) 8694 output_asm_insn ("fisttp%z0\t%0", operands); 8695 else 8696 { 8697 if (round_mode != I387_CW_ANY) 8698 output_asm_insn ("fldcw\t%3", operands); 8699 if (stack_top_dies || dimode_p) 8700 output_asm_insn ("fistp%z0\t%0", operands); 8701 else 8702 output_asm_insn ("fist%z0\t%0", operands); 8703 if (round_mode != I387_CW_ANY) 8704 output_asm_insn ("fldcw\t%2", operands); 8705 } 8706 8707 return ""; 8708} 8709 8710/* Output code for x87 ffreep insn. The OPNO argument, which may only 8711 have the values zero or one, indicates the ffreep insn's operand 8712 from the OPERANDS array. */ 8713 8714static const char * 8715output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) 8716{ 8717 if (TARGET_USE_FFREEP) 8718#if HAVE_AS_IX86_FFREEP 8719 return opno ? "ffreep\t%y1" : "ffreep\t%y0"; 8720#else 8721 switch (REGNO (operands[opno])) 8722 { 8723 case FIRST_STACK_REG + 0: return ".word\t0xc0df"; 8724 case FIRST_STACK_REG + 1: return ".word\t0xc1df"; 8725 case FIRST_STACK_REG + 2: return ".word\t0xc2df"; 8726 case FIRST_STACK_REG + 3: return ".word\t0xc3df"; 8727 case FIRST_STACK_REG + 4: return ".word\t0xc4df"; 8728 case FIRST_STACK_REG + 5: return ".word\t0xc5df"; 8729 case FIRST_STACK_REG + 6: return ".word\t0xc6df"; 8730 case FIRST_STACK_REG + 7: return ".word\t0xc7df"; 8731 } 8732#endif 8733 8734 return opno ? "fstp\t%y1" : "fstp\t%y0"; 8735} 8736 8737 8738/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 8739 should be used. UNORDERED_P is true when fucom should be used. */ 8740 8741const char * 8742output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 8743{ 8744 int stack_top_dies; 8745 rtx cmp_op0, cmp_op1; 8746 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); 8747 8748 if (eflags_p) 8749 { 8750 cmp_op0 = operands[0]; 8751 cmp_op1 = operands[1]; 8752 } 8753 else 8754 { 8755 cmp_op0 = operands[1]; 8756 cmp_op1 = operands[2]; 8757 } 8758 8759 if (is_sse) 8760 { 8761 if (GET_MODE (operands[0]) == SFmode) 8762 if (unordered_p) 8763 return "ucomiss\t{%1, %0|%0, %1}"; 8764 else 8765 return "comiss\t{%1, %0|%0, %1}"; 8766 else 8767 if (unordered_p) 8768 return "ucomisd\t{%1, %0|%0, %1}"; 8769 else 8770 return "comisd\t{%1, %0|%0, %1}"; 8771 } 8772 8773 gcc_assert (STACK_TOP_P (cmp_op0)); 8774 8775 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 8776 8777 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) 8778 { 8779 if (stack_top_dies) 8780 { 8781 output_asm_insn ("ftst\n\tfnstsw\t%0", operands); 8782 return output_387_ffreep (operands, 1); 8783 } 8784 else 8785 return "ftst\n\tfnstsw\t%0"; 8786 } 8787 8788 if (STACK_REG_P (cmp_op1) 8789 && stack_top_dies 8790 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 8791 && REGNO (cmp_op1) != FIRST_STACK_REG) 8792 { 8793 /* If both the top of the 387 stack dies, and the other operand 8794 is also a stack register that dies, then this must be a 8795 `fcompp' float compare */ 8796 8797 if (eflags_p) 8798 { 8799 /* There is no double popping fcomi variant. Fortunately, 8800 eflags is immune from the fstp's cc clobbering. */ 8801 if (unordered_p) 8802 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 8803 else 8804 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 8805 return output_387_ffreep (operands, 0); 8806 } 8807 else 8808 { 8809 if (unordered_p) 8810 return "fucompp\n\tfnstsw\t%0"; 8811 else 8812 return "fcompp\n\tfnstsw\t%0"; 8813 } 8814 } 8815 else 8816 { 8817 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 8818 8819 static const char * const alt[16] = 8820 { 8821 "fcom%z2\t%y2\n\tfnstsw\t%0", 8822 "fcomp%z2\t%y2\n\tfnstsw\t%0", 8823 "fucom%z2\t%y2\n\tfnstsw\t%0", 8824 "fucomp%z2\t%y2\n\tfnstsw\t%0", 8825 8826 "ficom%z2\t%y2\n\tfnstsw\t%0", 8827 "ficomp%z2\t%y2\n\tfnstsw\t%0", 8828 NULL, 8829 NULL, 8830 8831 "fcomi\t{%y1, %0|%0, %y1}", 8832 "fcomip\t{%y1, %0|%0, %y1}", 8833 "fucomi\t{%y1, %0|%0, %y1}", 8834 "fucomip\t{%y1, %0|%0, %y1}", 8835 8836 NULL, 8837 NULL, 8838 NULL, 8839 NULL 8840 }; 8841 8842 int mask; 8843 const char *ret; 8844 8845 mask = eflags_p << 3; 8846 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; 8847 mask |= unordered_p << 1; 8848 mask |= stack_top_dies; 8849 8850 gcc_assert (mask < 16); 8851 ret = alt[mask]; 8852 gcc_assert (ret); 8853 8854 return ret; 8855 } 8856} 8857 8858void 8859ix86_output_addr_vec_elt (FILE *file, int value) 8860{ 8861 const char *directive = ASM_LONG; 8862 8863#ifdef ASM_QUAD 8864 if (TARGET_64BIT) 8865 directive = ASM_QUAD; 8866#else 8867 gcc_assert (!TARGET_64BIT); 8868#endif 8869 8870 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 8871} 8872 8873void 8874ix86_output_addr_diff_elt (FILE *file, int value, int rel) 8875{ 8876 if (TARGET_64BIT) 8877 fprintf (file, "%s%s%d-%s%d\n", 8878 ASM_LONG, LPREFIX, value, LPREFIX, rel); 8879 else if (HAVE_AS_GOTOFF_IN_DATA) 8880 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 8881#if TARGET_MACHO 8882 else if (TARGET_MACHO) 8883 { 8884 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 8885 machopic_output_function_base_name (file); 8886 fprintf(file, "\n"); 8887 } 8888#endif 8889 else 8890 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 8891 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 8892} 8893 8894/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 8895 for the target. */ 8896 8897void 8898ix86_expand_clear (rtx dest) 8899{ 8900 rtx tmp; 8901 8902 /* We play register width games, which are only valid after reload. */ 8903 gcc_assert (reload_completed); 8904 8905 /* Avoid HImode and its attendant prefix byte. */ 8906 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 8907 dest = gen_rtx_REG (SImode, REGNO (dest)); 8908 8909 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 8910 8911 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 8912 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size)) 8913 { 8914 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17)); 8915 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 8916 } 8917 8918 emit_insn (tmp); 8919} 8920 8921/* X is an unchanging MEM. If it is a constant pool reference, return 8922 the constant pool rtx, else NULL. */ 8923 8924rtx 8925maybe_get_pool_constant (rtx x) 8926{ 8927 x = ix86_delegitimize_address (XEXP (x, 0)); 8928 8929 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 8930 return get_pool_constant (x); 8931 8932 return NULL_RTX; 8933} 8934 8935void 8936ix86_expand_move (enum machine_mode mode, rtx operands[]) 8937{ 8938 int strict = (reload_in_progress || reload_completed); 8939 rtx op0, op1; 8940 enum tls_model model; 8941 8942 op0 = operands[0]; 8943 op1 = operands[1]; 8944 8945 if (GET_CODE (op1) == SYMBOL_REF) 8946 { 8947 model = SYMBOL_REF_TLS_MODEL (op1); 8948 if (model) 8949 { 8950 op1 = legitimize_tls_address (op1, model, true); 8951 op1 = force_operand (op1, op0); 8952 if (op1 == op0) 8953 return; 8954 } 8955 } 8956 else if (GET_CODE (op1) == CONST 8957 && GET_CODE (XEXP (op1, 0)) == PLUS 8958 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) 8959 { 8960 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0)); 8961 if (model) 8962 { 8963 rtx addend = XEXP (XEXP (op1, 0), 1); 8964 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true); 8965 op1 = force_operand (op1, NULL); 8966 op1 = expand_simple_binop (Pmode, PLUS, op1, addend, 8967 op0, 1, OPTAB_DIRECT); 8968 if (op1 == op0) 8969 return; 8970 } 8971 } 8972 8973 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 8974 { 8975 if (TARGET_MACHO && !TARGET_64BIT) 8976 { 8977#if TARGET_MACHO 8978 if (MACHOPIC_PURE) 8979 { 8980 rtx temp = ((reload_in_progress 8981 || ((op0 && GET_CODE (op0) == REG) 8982 && mode == Pmode)) 8983 ? op0 : gen_reg_rtx (Pmode)); 8984 op1 = machopic_indirect_data_reference (op1, temp); 8985 op1 = machopic_legitimize_pic_address (op1, mode, 8986 temp == op1 ? 0 : temp); 8987 } 8988 else if (MACHOPIC_INDIRECT) 8989 op1 = machopic_indirect_data_reference (op1, 0); 8990 if (op0 == op1) 8991 return; 8992#endif 8993 } 8994 else 8995 { 8996 if (GET_CODE (op0) == MEM) 8997 op1 = force_reg (Pmode, op1); 8998 else 8999 op1 = legitimize_address (op1, op1, Pmode); 9000 } 9001 } 9002 else 9003 { 9004 if (GET_CODE (op0) == MEM 9005 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 9006 || !push_operand (op0, mode)) 9007 && GET_CODE (op1) == MEM) 9008 op1 = force_reg (mode, op1); 9009 9010 if (push_operand (op0, mode) 9011 && ! general_no_elim_operand (op1, mode)) 9012 op1 = copy_to_mode_reg (mode, op1); 9013 9014 /* Force large constants in 64bit compilation into register 9015 to get them CSEed. */ 9016 if (TARGET_64BIT && mode == DImode 9017 && immediate_operand (op1, mode) 9018 && !x86_64_zext_immediate_operand (op1, VOIDmode) 9019 && !register_operand (op0, mode) 9020 && optimize && !reload_completed && !reload_in_progress) 9021 op1 = copy_to_mode_reg (mode, op1); 9022 9023 if (FLOAT_MODE_P (mode)) 9024 { 9025 /* If we are loading a floating point constant to a register, 9026 force the value to memory now, since we'll get better code 9027 out the back end. */ 9028 9029 if (strict) 9030 ; 9031 else if (GET_CODE (op1) == CONST_DOUBLE) 9032 { 9033 op1 = validize_mem (force_const_mem (mode, op1)); 9034 if (!register_operand (op0, mode)) 9035 { 9036 rtx temp = gen_reg_rtx (mode); 9037 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 9038 emit_move_insn (op0, temp); 9039 return; 9040 } 9041 } 9042 } 9043 } 9044 9045 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9046} 9047 9048void 9049ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 9050{ 9051 rtx op0 = operands[0], op1 = operands[1]; 9052 9053 /* Force constants other than zero into memory. We do not know how 9054 the instructions used to build constants modify the upper 64 bits 9055 of the register, once we have that information we may be able 9056 to handle some of them more efficiently. */ 9057 if ((reload_in_progress | reload_completed) == 0 9058 && register_operand (op0, mode) 9059 && CONSTANT_P (op1) 9060 && standard_sse_constant_p (op1) <= 0) 9061 op1 = validize_mem (force_const_mem (mode, op1)); 9062 9063 /* Make operand1 a register if it isn't already. */ 9064 if (!no_new_pseudos 9065 && !register_operand (op0, mode) 9066 && !register_operand (op1, mode)) 9067 { 9068 emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); 9069 return; 9070 } 9071 9072 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 9073} 9074 9075/* Implement the movmisalign patterns for SSE. Non-SSE modes go 9076 straight to ix86_expand_vector_move. */ 9077 9078void 9079ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) 9080{ 9081 rtx op0, op1, m; 9082 9083 op0 = operands[0]; 9084 op1 = operands[1]; 9085 9086 if (MEM_P (op1)) 9087 { 9088 /* If we're optimizing for size, movups is the smallest. */ 9089 if (optimize_size) 9090 { 9091 op0 = gen_lowpart (V4SFmode, op0); 9092 op1 = gen_lowpart (V4SFmode, op1); 9093 emit_insn (gen_sse_movups (op0, op1)); 9094 return; 9095 } 9096 9097 /* ??? If we have typed data, then it would appear that using 9098 movdqu is the only way to get unaligned data loaded with 9099 integer type. */ 9100 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9101 { 9102 op0 = gen_lowpart (V16QImode, op0); 9103 op1 = gen_lowpart (V16QImode, op1); 9104 emit_insn (gen_sse2_movdqu (op0, op1)); 9105 return; 9106 } 9107 9108 if (TARGET_SSE2 && mode == V2DFmode) 9109 { 9110 rtx zero; 9111 9112 /* When SSE registers are split into halves, we can avoid 9113 writing to the top half twice. */ 9114 if (TARGET_SSE_SPLIT_REGS) 9115 { 9116 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9117 zero = op0; 9118 } 9119 else 9120 { 9121 /* ??? Not sure about the best option for the Intel chips. 9122 The following would seem to satisfy; the register is 9123 entirely cleared, breaking the dependency chain. We 9124 then store to the upper half, with a dependency depth 9125 of one. A rumor has it that Intel recommends two movsd 9126 followed by an unpacklpd, but this is unconfirmed. And 9127 given that the dependency depth of the unpacklpd would 9128 still be one, I'm not sure why this would be better. */ 9129 zero = CONST0_RTX (V2DFmode); 9130 } 9131 9132 m = adjust_address (op1, DFmode, 0); 9133 emit_insn (gen_sse2_loadlpd (op0, zero, m)); 9134 m = adjust_address (op1, DFmode, 8); 9135 emit_insn (gen_sse2_loadhpd (op0, op0, m)); 9136 } 9137 else 9138 { 9139 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) 9140 emit_move_insn (op0, CONST0_RTX (mode)); 9141 else 9142 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0)); 9143 9144 if (mode != V4SFmode) 9145 op0 = gen_lowpart (V4SFmode, op0); 9146 m = adjust_address (op1, V2SFmode, 0); 9147 emit_insn (gen_sse_loadlps (op0, op0, m)); 9148 m = adjust_address (op1, V2SFmode, 8); 9149 emit_insn (gen_sse_loadhps (op0, op0, m)); 9150 } 9151 } 9152 else if (MEM_P (op0)) 9153 { 9154 /* If we're optimizing for size, movups is the smallest. */ 9155 if (optimize_size) 9156 { 9157 op0 = gen_lowpart (V4SFmode, op0); 9158 op1 = gen_lowpart (V4SFmode, op1); 9159 emit_insn (gen_sse_movups (op0, op1)); 9160 return; 9161 } 9162 9163 /* ??? Similar to above, only less clear because of quote 9164 typeless stores unquote. */ 9165 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES 9166 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 9167 { 9168 op0 = gen_lowpart (V16QImode, op0); 9169 op1 = gen_lowpart (V16QImode, op1); 9170 emit_insn (gen_sse2_movdqu (op0, op1)); 9171 return; 9172 } 9173 9174 if (TARGET_SSE2 && mode == V2DFmode) 9175 { 9176 m = adjust_address (op0, DFmode, 0); 9177 emit_insn (gen_sse2_storelpd (m, op1)); 9178 m = adjust_address (op0, DFmode, 8); 9179 emit_insn (gen_sse2_storehpd (m, op1)); 9180 } 9181 else 9182 { 9183 if (mode != V4SFmode) 9184 op1 = gen_lowpart (V4SFmode, op1); 9185 m = adjust_address (op0, V2SFmode, 0); 9186 emit_insn (gen_sse_storelps (m, op1)); 9187 m = adjust_address (op0, V2SFmode, 8); 9188 emit_insn (gen_sse_storehps (m, op1)); 9189 } 9190 } 9191 else 9192 gcc_unreachable (); 9193} 9194 9195/* Expand a push in MODE. This is some mode for which we do not support 9196 proper push instructions, at least from the registers that we expect 9197 the value to live in. */ 9198 9199void 9200ix86_expand_push (enum machine_mode mode, rtx x) 9201{ 9202 rtx tmp; 9203 9204 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, 9205 GEN_INT (-GET_MODE_SIZE (mode)), 9206 stack_pointer_rtx, 1, OPTAB_DIRECT); 9207 if (tmp != stack_pointer_rtx) 9208 emit_move_insn (stack_pointer_rtx, tmp); 9209 9210 tmp = gen_rtx_MEM (mode, stack_pointer_rtx); 9211 emit_move_insn (tmp, x); 9212} 9213 9214/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the 9215 destination to use for the operation. If different from the true 9216 destination in operands[0], a copy operation will be required. */ 9217 9218rtx 9219ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, 9220 rtx operands[]) 9221{ 9222 int matching_memory; 9223 rtx src1, src2, dst; 9224 9225 dst = operands[0]; 9226 src1 = operands[1]; 9227 src2 = operands[2]; 9228 9229 /* Recognize <var1> = <value> <op> <var1> for commutative operators */ 9230 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9231 && (rtx_equal_p (dst, src2) 9232 || immediate_operand (src1, mode))) 9233 { 9234 rtx temp = src1; 9235 src1 = src2; 9236 src2 = temp; 9237 } 9238 9239 /* If the destination is memory, and we do not have matching source 9240 operands, do things in registers. */ 9241 matching_memory = 0; 9242 if (GET_CODE (dst) == MEM) 9243 { 9244 if (rtx_equal_p (dst, src1)) 9245 matching_memory = 1; 9246 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9247 && rtx_equal_p (dst, src2)) 9248 matching_memory = 2; 9249 else 9250 dst = gen_reg_rtx (mode); 9251 } 9252 9253 /* Both source operands cannot be in memory. */ 9254 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM) 9255 { 9256 if (matching_memory != 2) 9257 src2 = force_reg (mode, src2); 9258 else 9259 src1 = force_reg (mode, src1); 9260 } 9261 9262 /* If the operation is not commutable, source 1 cannot be a constant 9263 or non-matching memory. */ 9264 if ((CONSTANT_P (src1) 9265 || (!matching_memory && GET_CODE (src1) == MEM)) 9266 && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9267 src1 = force_reg (mode, src1); 9268 9269 src1 = operands[1] = src1; 9270 src2 = operands[2] = src2; 9271 return dst; 9272} 9273 9274/* Similarly, but assume that the destination has already been 9275 set up properly. */ 9276 9277void 9278ix86_fixup_binary_operands_no_copy (enum rtx_code code, 9279 enum machine_mode mode, rtx operands[]) 9280{ 9281 rtx dst = ix86_fixup_binary_operands (code, mode, operands); 9282 gcc_assert (dst == operands[0]); 9283} 9284 9285/* Attempt to expand a binary operator. Make the expansion closer to the 9286 actual machine, then just general_operand, which will allow 3 separate 9287 memory references (one output, two input) in a single insn. */ 9288 9289void 9290ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 9291 rtx operands[]) 9292{ 9293 rtx src1, src2, dst, op, clob; 9294 9295 dst = ix86_fixup_binary_operands (code, mode, operands); 9296 src1 = operands[1]; 9297 src2 = operands[2]; 9298 9299 /* Emit the instruction. */ 9300 9301 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 9302 if (reload_in_progress) 9303 { 9304 /* Reload doesn't know about the flags register, and doesn't know that 9305 it doesn't want to clobber it. We can only do this with PLUS. */ 9306 gcc_assert (code == PLUS); 9307 emit_insn (op); 9308 } 9309 else 9310 { 9311 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9312 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9313 } 9314 9315 /* Fix up the destination if needed. */ 9316 if (dst != operands[0]) 9317 emit_move_insn (operands[0], dst); 9318} 9319 9320/* Return TRUE or FALSE depending on whether the binary operator meets the 9321 appropriate constraints. */ 9322 9323int 9324ix86_binary_operator_ok (enum rtx_code code, 9325 enum machine_mode mode ATTRIBUTE_UNUSED, 9326 rtx operands[3]) 9327{ 9328 /* Both source operands cannot be in memory. */ 9329 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM) 9330 return 0; 9331 /* If the operation is not commutable, source 1 cannot be a constant. */ 9332 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH) 9333 return 0; 9334 /* If the destination is memory, we must have a matching source operand. */ 9335 if (GET_CODE (operands[0]) == MEM 9336 && ! (rtx_equal_p (operands[0], operands[1]) 9337 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH 9338 && rtx_equal_p (operands[0], operands[2])))) 9339 return 0; 9340 /* If the operation is not commutable and the source 1 is memory, we must 9341 have a matching destination. */ 9342 if (GET_CODE (operands[1]) == MEM 9343 && GET_RTX_CLASS (code) != RTX_COMM_ARITH 9344 && ! rtx_equal_p (operands[0], operands[1])) 9345 return 0; 9346 return 1; 9347} 9348 9349/* Attempt to expand a unary operator. Make the expansion closer to the 9350 actual machine, then just general_operand, which will allow 2 separate 9351 memory references (one output, one input) in a single insn. */ 9352 9353void 9354ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 9355 rtx operands[]) 9356{ 9357 int matching_memory; 9358 rtx src, dst, op, clob; 9359 9360 dst = operands[0]; 9361 src = operands[1]; 9362 9363 /* If the destination is memory, and we do not have matching source 9364 operands, do things in registers. */ 9365 matching_memory = 0; 9366 if (MEM_P (dst)) 9367 { 9368 if (rtx_equal_p (dst, src)) 9369 matching_memory = 1; 9370 else 9371 dst = gen_reg_rtx (mode); 9372 } 9373 9374 /* When source operand is memory, destination must match. */ 9375 if (MEM_P (src) && !matching_memory) 9376 src = force_reg (mode, src); 9377 9378 /* Emit the instruction. */ 9379 9380 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 9381 if (reload_in_progress || code == NOT) 9382 { 9383 /* Reload doesn't know about the flags register, and doesn't know that 9384 it doesn't want to clobber it. */ 9385 gcc_assert (code == NOT); 9386 emit_insn (op); 9387 } 9388 else 9389 { 9390 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9391 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 9392 } 9393 9394 /* Fix up the destination if needed. */ 9395 if (dst != operands[0]) 9396 emit_move_insn (operands[0], dst); 9397} 9398 9399/* Return TRUE or FALSE depending on whether the unary operator meets the 9400 appropriate constraints. */ 9401 9402int 9403ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 9404 enum machine_mode mode ATTRIBUTE_UNUSED, 9405 rtx operands[2] ATTRIBUTE_UNUSED) 9406{ 9407 /* If one of operands is memory, source and destination must match. */ 9408 if ((GET_CODE (operands[0]) == MEM 9409 || GET_CODE (operands[1]) == MEM) 9410 && ! rtx_equal_p (operands[0], operands[1])) 9411 return FALSE; 9412 return TRUE; 9413} 9414 9415/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. 9416 Create a mask for the sign bit in MODE for an SSE register. If VECT is 9417 true, then replicate the mask for all elements of the vector register. 9418 If INVERT is true, then create a mask excluding the sign bit. */ 9419 9420rtx 9421ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) 9422{ 9423 enum machine_mode vec_mode; 9424 HOST_WIDE_INT hi, lo; 9425 int shift = 63; 9426 rtvec v; 9427 rtx mask; 9428 9429 /* Find the sign bit, sign extended to 2*HWI. */ 9430 if (mode == SFmode) 9431 lo = 0x80000000, hi = lo < 0; 9432 else if (HOST_BITS_PER_WIDE_INT >= 64) 9433 lo = (HOST_WIDE_INT)1 << shift, hi = -1; 9434 else 9435 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 9436 9437 if (invert) 9438 lo = ~lo, hi = ~hi; 9439 9440 /* Force this value into the low part of a fp vector constant. */ 9441 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); 9442 mask = gen_lowpart (mode, mask); 9443 9444 if (mode == SFmode) 9445 { 9446 if (vect) 9447 v = gen_rtvec (4, mask, mask, mask, mask); 9448 else 9449 v = gen_rtvec (4, mask, CONST0_RTX (SFmode), 9450 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9451 vec_mode = V4SFmode; 9452 } 9453 else 9454 { 9455 if (vect) 9456 v = gen_rtvec (2, mask, mask); 9457 else 9458 v = gen_rtvec (2, mask, CONST0_RTX (DFmode)); 9459 vec_mode = V2DFmode; 9460 } 9461 9462 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v)); 9463} 9464 9465/* Generate code for floating point ABS or NEG. */ 9466 9467void 9468ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, 9469 rtx operands[]) 9470{ 9471 rtx mask, set, use, clob, dst, src; 9472 bool matching_memory; 9473 bool use_sse = false; 9474 bool vector_mode = VECTOR_MODE_P (mode); 9475 enum machine_mode elt_mode = mode; 9476 9477 if (vector_mode) 9478 { 9479 elt_mode = GET_MODE_INNER (mode); 9480 use_sse = true; 9481 } 9482 else if (TARGET_SSE_MATH) 9483 use_sse = SSE_FLOAT_MODE_P (mode); 9484 9485 /* NEG and ABS performed with SSE use bitwise mask operations. 9486 Create the appropriate mask now. */ 9487 if (use_sse) 9488 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); 9489 else 9490 mask = NULL_RTX; 9491 9492 dst = operands[0]; 9493 src = operands[1]; 9494 9495 /* If the destination is memory, and we don't have matching source 9496 operands or we're using the x87, do things in registers. */ 9497 matching_memory = false; 9498 if (MEM_P (dst)) 9499 { 9500 if (use_sse && rtx_equal_p (dst, src)) 9501 matching_memory = true; 9502 else 9503 dst = gen_reg_rtx (mode); 9504 } 9505 if (MEM_P (src) && !matching_memory) 9506 src = force_reg (mode, src); 9507 9508 if (vector_mode) 9509 { 9510 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); 9511 set = gen_rtx_SET (VOIDmode, dst, set); 9512 emit_insn (set); 9513 } 9514 else 9515 { 9516 set = gen_rtx_fmt_e (code, mode, src); 9517 set = gen_rtx_SET (VOIDmode, dst, set); 9518 if (mask) 9519 { 9520 use = gen_rtx_USE (VOIDmode, mask); 9521 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 9522 emit_insn (gen_rtx_PARALLEL (VOIDmode, 9523 gen_rtvec (3, set, use, clob))); 9524 } 9525 else 9526 emit_insn (set); 9527 } 9528 9529 if (dst != operands[0]) 9530 emit_move_insn (operands[0], dst); 9531} 9532 9533/* Expand a copysign operation. Special case operand 0 being a constant. */ 9534 9535void 9536ix86_expand_copysign (rtx operands[]) 9537{ 9538 enum machine_mode mode, vmode; 9539 rtx dest, op0, op1, mask, nmask; 9540 9541 dest = operands[0]; 9542 op0 = operands[1]; 9543 op1 = operands[2]; 9544 9545 mode = GET_MODE (dest); 9546 vmode = mode == SFmode ? V4SFmode : V2DFmode; 9547 9548 if (GET_CODE (op0) == CONST_DOUBLE) 9549 { 9550 rtvec v; 9551 9552 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) 9553 op0 = simplify_unary_operation (ABS, mode, op0, mode); 9554 9555 if (op0 == CONST0_RTX (mode)) 9556 op0 = CONST0_RTX (vmode); 9557 else 9558 { 9559 if (mode == SFmode) 9560 v = gen_rtvec (4, op0, CONST0_RTX (SFmode), 9561 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 9562 else 9563 v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); 9564 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); 9565 } 9566 9567 mask = ix86_build_signbit_mask (mode, 0, 0); 9568 9569 if (mode == SFmode) 9570 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask)); 9571 else 9572 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask)); 9573 } 9574 else 9575 { 9576 nmask = ix86_build_signbit_mask (mode, 0, 1); 9577 mask = ix86_build_signbit_mask (mode, 0, 0); 9578 9579 if (mode == SFmode) 9580 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask)); 9581 else 9582 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask)); 9583 } 9584} 9585 9586/* Deconstruct a copysign operation into bit masks. Operand 0 is known to 9587 be a constant, and so has already been expanded into a vector constant. */ 9588 9589void 9590ix86_split_copysign_const (rtx operands[]) 9591{ 9592 enum machine_mode mode, vmode; 9593 rtx dest, op0, op1, mask, x; 9594 9595 dest = operands[0]; 9596 op0 = operands[1]; 9597 op1 = operands[2]; 9598 mask = operands[3]; 9599 9600 mode = GET_MODE (dest); 9601 vmode = GET_MODE (mask); 9602 9603 dest = simplify_gen_subreg (vmode, dest, mode, 0); 9604 x = gen_rtx_AND (vmode, dest, mask); 9605 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9606 9607 if (op0 != CONST0_RTX (vmode)) 9608 { 9609 x = gen_rtx_IOR (vmode, dest, op0); 9610 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9611 } 9612} 9613 9614/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, 9615 so we have to do two masks. */ 9616 9617void 9618ix86_split_copysign_var (rtx operands[]) 9619{ 9620 enum machine_mode mode, vmode; 9621 rtx dest, scratch, op0, op1, mask, nmask, x; 9622 9623 dest = operands[0]; 9624 scratch = operands[1]; 9625 op0 = operands[2]; 9626 op1 = operands[3]; 9627 nmask = operands[4]; 9628 mask = operands[5]; 9629 9630 mode = GET_MODE (dest); 9631 vmode = GET_MODE (mask); 9632 9633 if (rtx_equal_p (op0, op1)) 9634 { 9635 /* Shouldn't happen often (it's useless, obviously), but when it does 9636 we'd generate incorrect code if we continue below. */ 9637 emit_move_insn (dest, op0); 9638 return; 9639 } 9640 9641 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ 9642 { 9643 gcc_assert (REGNO (op1) == REGNO (scratch)); 9644 9645 x = gen_rtx_AND (vmode, scratch, mask); 9646 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 9647 9648 dest = mask; 9649 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 9650 x = gen_rtx_NOT (vmode, dest); 9651 x = gen_rtx_AND (vmode, x, op0); 9652 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9653 } 9654 else 9655 { 9656 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ 9657 { 9658 x = gen_rtx_AND (vmode, scratch, mask); 9659 } 9660 else /* alternative 2,4 */ 9661 { 9662 gcc_assert (REGNO (mask) == REGNO (scratch)); 9663 op1 = simplify_gen_subreg (vmode, op1, mode, 0); 9664 x = gen_rtx_AND (vmode, scratch, op1); 9665 } 9666 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 9667 9668 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ 9669 { 9670 dest = simplify_gen_subreg (vmode, op0, mode, 0); 9671 x = gen_rtx_AND (vmode, dest, nmask); 9672 } 9673 else /* alternative 3,4 */ 9674 { 9675 gcc_assert (REGNO (nmask) == REGNO (dest)); 9676 dest = nmask; 9677 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 9678 x = gen_rtx_AND (vmode, dest, op0); 9679 } 9680 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9681 } 9682 9683 x = gen_rtx_IOR (vmode, dest, scratch); 9684 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 9685} 9686 9687/* Return TRUE or FALSE depending on whether the first SET in INSN 9688 has source and destination with matching CC modes, and that the 9689 CC mode is at least as constrained as REQ_MODE. */ 9690 9691int 9692ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 9693{ 9694 rtx set; 9695 enum machine_mode set_mode; 9696 9697 set = PATTERN (insn); 9698 if (GET_CODE (set) == PARALLEL) 9699 set = XVECEXP (set, 0, 0); 9700 gcc_assert (GET_CODE (set) == SET); 9701 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); 9702 9703 set_mode = GET_MODE (SET_DEST (set)); 9704 switch (set_mode) 9705 { 9706 case CCNOmode: 9707 if (req_mode != CCNOmode 9708 && (req_mode != CCmode 9709 || XEXP (SET_SRC (set), 1) != const0_rtx)) 9710 return 0; 9711 break; 9712 case CCmode: 9713 if (req_mode == CCGCmode) 9714 return 0; 9715 /* FALLTHRU */ 9716 case CCGCmode: 9717 if (req_mode == CCGOCmode || req_mode == CCNOmode) 9718 return 0; 9719 /* FALLTHRU */ 9720 case CCGOCmode: 9721 if (req_mode == CCZmode) 9722 return 0; 9723 /* FALLTHRU */ 9724 case CCZmode: 9725 break; 9726 9727 default: 9728 gcc_unreachable (); 9729 } 9730 9731 return (GET_MODE (SET_SRC (set)) == set_mode); 9732} 9733 9734/* Generate insn patterns to do an integer compare of OPERANDS. */ 9735 9736static rtx 9737ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 9738{ 9739 enum machine_mode cmpmode; 9740 rtx tmp, flags; 9741 9742 cmpmode = SELECT_CC_MODE (code, op0, op1); 9743 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 9744 9745 /* This is very simple, but making the interface the same as in the 9746 FP case makes the rest of the code easier. */ 9747 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 9748 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 9749 9750 /* Return the test that should be put into the flags user, i.e. 9751 the bcc, scc, or cmov instruction. */ 9752 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 9753} 9754 9755/* Figure out whether to use ordered or unordered fp comparisons. 9756 Return the appropriate mode to use. */ 9757 9758enum machine_mode 9759ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 9760{ 9761 /* ??? In order to make all comparisons reversible, we do all comparisons 9762 non-trapping when compiling for IEEE. Once gcc is able to distinguish 9763 all forms trapping and nontrapping comparisons, we can make inequality 9764 comparisons trapping again, since it results in better code when using 9765 FCOM based compares. */ 9766 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 9767} 9768 9769enum machine_mode 9770ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 9771{ 9772 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 9773 return ix86_fp_compare_mode (code); 9774 switch (code) 9775 { 9776 /* Only zero flag is needed. */ 9777 case EQ: /* ZF=0 */ 9778 case NE: /* ZF!=0 */ 9779 return CCZmode; 9780 /* Codes needing carry flag. */ 9781 case GEU: /* CF=0 */ 9782 case GTU: /* CF=0 & ZF=0 */ 9783 case LTU: /* CF=1 */ 9784 case LEU: /* CF=1 | ZF=1 */ 9785 return CCmode; 9786 /* Codes possibly doable only with sign flag when 9787 comparing against zero. */ 9788 case GE: /* SF=OF or SF=0 */ 9789 case LT: /* SF<>OF or SF=1 */ 9790 if (op1 == const0_rtx) 9791 return CCGOCmode; 9792 else 9793 /* For other cases Carry flag is not required. */ 9794 return CCGCmode; 9795 /* Codes doable only with sign flag when comparing 9796 against zero, but we miss jump instruction for it 9797 so we need to use relational tests against overflow 9798 that thus needs to be zero. */ 9799 case GT: /* ZF=0 & SF=OF */ 9800 case LE: /* ZF=1 | SF<>OF */ 9801 if (op1 == const0_rtx) 9802 return CCNOmode; 9803 else 9804 return CCGCmode; 9805 /* strcmp pattern do (use flags) and combine may ask us for proper 9806 mode. */ 9807 case USE: 9808 return CCmode; 9809 default: 9810 gcc_unreachable (); 9811 } 9812} 9813 9814/* Return the fixed registers used for condition codes. */ 9815 9816static bool 9817ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 9818{ 9819 *p1 = FLAGS_REG; 9820 *p2 = FPSR_REG; 9821 return true; 9822} 9823 9824/* If two condition code modes are compatible, return a condition code 9825 mode which is compatible with both. Otherwise, return 9826 VOIDmode. */ 9827 9828static enum machine_mode 9829ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 9830{ 9831 if (m1 == m2) 9832 return m1; 9833 9834 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 9835 return VOIDmode; 9836 9837 if ((m1 == CCGCmode && m2 == CCGOCmode) 9838 || (m1 == CCGOCmode && m2 == CCGCmode)) 9839 return CCGCmode; 9840 9841 switch (m1) 9842 { 9843 default: 9844 gcc_unreachable (); 9845 9846 case CCmode: 9847 case CCGCmode: 9848 case CCGOCmode: 9849 case CCNOmode: 9850 case CCZmode: 9851 switch (m2) 9852 { 9853 default: 9854 return VOIDmode; 9855 9856 case CCmode: 9857 case CCGCmode: 9858 case CCGOCmode: 9859 case CCNOmode: 9860 case CCZmode: 9861 return CCmode; 9862 } 9863 9864 case CCFPmode: 9865 case CCFPUmode: 9866 /* These are only compatible with themselves, which we already 9867 checked above. */ 9868 return VOIDmode; 9869 } 9870} 9871 9872/* Return true if we should use an FCOMI instruction for this fp comparison. */ 9873 9874int 9875ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 9876{ 9877 enum rtx_code swapped_code = swap_condition (code); 9878 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code)) 9879 || (ix86_fp_comparison_cost (swapped_code) 9880 == ix86_fp_comparison_fcomi_cost (swapped_code))); 9881} 9882 9883/* Swap, force into registers, or otherwise massage the two operands 9884 to a fp comparison. The operands are updated in place; the new 9885 comparison code is returned. */ 9886 9887static enum rtx_code 9888ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 9889{ 9890 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 9891 rtx op0 = *pop0, op1 = *pop1; 9892 enum machine_mode op_mode = GET_MODE (op0); 9893 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); 9894 9895 /* All of the unordered compare instructions only work on registers. 9896 The same is true of the fcomi compare instructions. The XFmode 9897 compare instructions require registers except when comparing 9898 against zero or when converting operand 1 from fixed point to 9899 floating point. */ 9900 9901 if (!is_sse 9902 && (fpcmp_mode == CCFPUmode 9903 || (op_mode == XFmode 9904 && ! (standard_80387_constant_p (op0) == 1 9905 || standard_80387_constant_p (op1) == 1) 9906 && GET_CODE (op1) != FLOAT) 9907 || ix86_use_fcomi_compare (code))) 9908 { 9909 op0 = force_reg (op_mode, op0); 9910 op1 = force_reg (op_mode, op1); 9911 } 9912 else 9913 { 9914 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 9915 things around if they appear profitable, otherwise force op0 9916 into a register. */ 9917 9918 if (standard_80387_constant_p (op0) == 0 9919 || (GET_CODE (op0) == MEM 9920 && ! (standard_80387_constant_p (op1) == 0 9921 || GET_CODE (op1) == MEM))) 9922 { 9923 rtx tmp; 9924 tmp = op0, op0 = op1, op1 = tmp; 9925 code = swap_condition (code); 9926 } 9927 9928 if (GET_CODE (op0) != REG) 9929 op0 = force_reg (op_mode, op0); 9930 9931 if (CONSTANT_P (op1)) 9932 { 9933 int tmp = standard_80387_constant_p (op1); 9934 if (tmp == 0) 9935 op1 = validize_mem (force_const_mem (op_mode, op1)); 9936 else if (tmp == 1) 9937 { 9938 if (TARGET_CMOVE) 9939 op1 = force_reg (op_mode, op1); 9940 } 9941 else 9942 op1 = force_reg (op_mode, op1); 9943 } 9944 } 9945 9946 /* Try to rearrange the comparison to make it cheaper. */ 9947 if (ix86_fp_comparison_cost (code) 9948 > ix86_fp_comparison_cost (swap_condition (code)) 9949 && (GET_CODE (op1) == REG || !no_new_pseudos)) 9950 { 9951 rtx tmp; 9952 tmp = op0, op0 = op1, op1 = tmp; 9953 code = swap_condition (code); 9954 if (GET_CODE (op0) != REG) 9955 op0 = force_reg (op_mode, op0); 9956 } 9957 9958 *pop0 = op0; 9959 *pop1 = op1; 9960 return code; 9961} 9962 9963/* Convert comparison codes we use to represent FP comparison to integer 9964 code that will result in proper branch. Return UNKNOWN if no such code 9965 is available. */ 9966 9967enum rtx_code 9968ix86_fp_compare_code_to_integer (enum rtx_code code) 9969{ 9970 switch (code) 9971 { 9972 case GT: 9973 return GTU; 9974 case GE: 9975 return GEU; 9976 case ORDERED: 9977 case UNORDERED: 9978 return code; 9979 break; 9980 case UNEQ: 9981 return EQ; 9982 break; 9983 case UNLT: 9984 return LTU; 9985 break; 9986 case UNLE: 9987 return LEU; 9988 break; 9989 case LTGT: 9990 return NE; 9991 break; 9992 default: 9993 return UNKNOWN; 9994 } 9995} 9996 9997/* Split comparison code CODE into comparisons we can do using branch 9998 instructions. BYPASS_CODE is comparison code for branch that will 9999 branch around FIRST_CODE and SECOND_CODE. If some of branches 10000 is not required, set value to UNKNOWN. 10001 We never require more than two branches. */ 10002 10003void 10004ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 10005 enum rtx_code *first_code, 10006 enum rtx_code *second_code) 10007{ 10008 *first_code = code; 10009 *bypass_code = UNKNOWN; 10010 *second_code = UNKNOWN; 10011 10012 /* The fcomi comparison sets flags as follows: 10013 10014 cmp ZF PF CF 10015 > 0 0 0 10016 < 0 0 1 10017 = 1 0 0 10018 un 1 1 1 */ 10019 10020 switch (code) 10021 { 10022 case GT: /* GTU - CF=0 & ZF=0 */ 10023 case GE: /* GEU - CF=0 */ 10024 case ORDERED: /* PF=0 */ 10025 case UNORDERED: /* PF=1 */ 10026 case UNEQ: /* EQ - ZF=1 */ 10027 case UNLT: /* LTU - CF=1 */ 10028 case UNLE: /* LEU - CF=1 | ZF=1 */ 10029 case LTGT: /* EQ - ZF=0 */ 10030 break; 10031 case LT: /* LTU - CF=1 - fails on unordered */ 10032 *first_code = UNLT; 10033 *bypass_code = UNORDERED; 10034 break; 10035 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 10036 *first_code = UNLE; 10037 *bypass_code = UNORDERED; 10038 break; 10039 case EQ: /* EQ - ZF=1 - fails on unordered */ 10040 *first_code = UNEQ; 10041 *bypass_code = UNORDERED; 10042 break; 10043 case NE: /* NE - ZF=0 - fails on unordered */ 10044 *first_code = LTGT; 10045 *second_code = UNORDERED; 10046 break; 10047 case UNGE: /* GEU - CF=0 - fails on unordered */ 10048 *first_code = GE; 10049 *second_code = UNORDERED; 10050 break; 10051 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 10052 *first_code = GT; 10053 *second_code = UNORDERED; 10054 break; 10055 default: 10056 gcc_unreachable (); 10057 } 10058 if (!TARGET_IEEE_FP) 10059 { 10060 *second_code = UNKNOWN; 10061 *bypass_code = UNKNOWN; 10062 } 10063} 10064 10065/* Return cost of comparison done fcom + arithmetics operations on AX. 10066 All following functions do use number of instructions as a cost metrics. 10067 In future this should be tweaked to compute bytes for optimize_size and 10068 take into account performance of various instructions on various CPUs. */ 10069static int 10070ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 10071{ 10072 if (!TARGET_IEEE_FP) 10073 return 4; 10074 /* The cost of code output by ix86_expand_fp_compare. */ 10075 switch (code) 10076 { 10077 case UNLE: 10078 case UNLT: 10079 case LTGT: 10080 case GT: 10081 case GE: 10082 case UNORDERED: 10083 case ORDERED: 10084 case UNEQ: 10085 return 4; 10086 break; 10087 case LT: 10088 case NE: 10089 case EQ: 10090 case UNGE: 10091 return 5; 10092 break; 10093 case LE: 10094 case UNGT: 10095 return 6; 10096 break; 10097 default: 10098 gcc_unreachable (); 10099 } 10100} 10101 10102/* Return cost of comparison done using fcomi operation. 10103 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10104static int 10105ix86_fp_comparison_fcomi_cost (enum rtx_code code) 10106{ 10107 enum rtx_code bypass_code, first_code, second_code; 10108 /* Return arbitrarily high cost when instruction is not supported - this 10109 prevents gcc from using it. */ 10110 if (!TARGET_CMOVE) 10111 return 1024; 10112 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10113 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; 10114} 10115 10116/* Return cost of comparison done using sahf operation. 10117 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10118static int 10119ix86_fp_comparison_sahf_cost (enum rtx_code code) 10120{ 10121 enum rtx_code bypass_code, first_code, second_code; 10122 /* Return arbitrarily high cost when instruction is not preferred - this 10123 avoids gcc from using it. */ 10124 if (!TARGET_USE_SAHF && !optimize_size) 10125 return 1024; 10126 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10127 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; 10128} 10129 10130/* Compute cost of the comparison done using any method. 10131 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 10132static int 10133ix86_fp_comparison_cost (enum rtx_code code) 10134{ 10135 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 10136 int min; 10137 10138 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 10139 sahf_cost = ix86_fp_comparison_sahf_cost (code); 10140 10141 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 10142 if (min > sahf_cost) 10143 min = sahf_cost; 10144 if (min > fcomi_cost) 10145 min = fcomi_cost; 10146 return min; 10147} 10148 10149/* Generate insn patterns to do a floating point compare of OPERANDS. */ 10150 10151static rtx 10152ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 10153 rtx *second_test, rtx *bypass_test) 10154{ 10155 enum machine_mode fpcmp_mode, intcmp_mode; 10156 rtx tmp, tmp2; 10157 int cost = ix86_fp_comparison_cost (code); 10158 enum rtx_code bypass_code, first_code, second_code; 10159 10160 fpcmp_mode = ix86_fp_compare_mode (code); 10161 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 10162 10163 if (second_test) 10164 *second_test = NULL_RTX; 10165 if (bypass_test) 10166 *bypass_test = NULL_RTX; 10167 10168 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10169 10170 /* Do fcomi/sahf based test when profitable. */ 10171 if ((bypass_code == UNKNOWN || bypass_test) 10172 && (second_code == UNKNOWN || second_test) 10173 && ix86_fp_comparison_arithmetics_cost (code) > cost) 10174 { 10175 if (TARGET_CMOVE) 10176 { 10177 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10178 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 10179 tmp); 10180 emit_insn (tmp); 10181 } 10182 else 10183 { 10184 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10185 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10186 if (!scratch) 10187 scratch = gen_reg_rtx (HImode); 10188 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10189 emit_insn (gen_x86_sahf_1 (scratch)); 10190 } 10191 10192 /* The FP codes work out to act like unsigned. */ 10193 intcmp_mode = fpcmp_mode; 10194 code = first_code; 10195 if (bypass_code != UNKNOWN) 10196 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 10197 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10198 const0_rtx); 10199 if (second_code != UNKNOWN) 10200 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 10201 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10202 const0_rtx); 10203 } 10204 else 10205 { 10206 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 10207 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 10208 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 10209 if (!scratch) 10210 scratch = gen_reg_rtx (HImode); 10211 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 10212 10213 /* In the unordered case, we have to check C2 for NaN's, which 10214 doesn't happen to work out to anything nice combination-wise. 10215 So do some bit twiddling on the value we've got in AH to come 10216 up with an appropriate set of condition codes. */ 10217 10218 intcmp_mode = CCNOmode; 10219 switch (code) 10220 { 10221 case GT: 10222 case UNGT: 10223 if (code == GT || !TARGET_IEEE_FP) 10224 { 10225 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10226 code = EQ; 10227 } 10228 else 10229 { 10230 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10231 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10232 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 10233 intcmp_mode = CCmode; 10234 code = GEU; 10235 } 10236 break; 10237 case LT: 10238 case UNLT: 10239 if (code == LT && TARGET_IEEE_FP) 10240 { 10241 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10242 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 10243 intcmp_mode = CCmode; 10244 code = EQ; 10245 } 10246 else 10247 { 10248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 10249 code = NE; 10250 } 10251 break; 10252 case GE: 10253 case UNGE: 10254 if (code == GE || !TARGET_IEEE_FP) 10255 { 10256 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 10257 code = EQ; 10258 } 10259 else 10260 { 10261 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10262 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10263 GEN_INT (0x01))); 10264 code = NE; 10265 } 10266 break; 10267 case LE: 10268 case UNLE: 10269 if (code == LE && TARGET_IEEE_FP) 10270 { 10271 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10272 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 10273 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10274 intcmp_mode = CCmode; 10275 code = LTU; 10276 } 10277 else 10278 { 10279 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 10280 code = NE; 10281 } 10282 break; 10283 case EQ: 10284 case UNEQ: 10285 if (code == EQ && TARGET_IEEE_FP) 10286 { 10287 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10288 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 10289 intcmp_mode = CCmode; 10290 code = EQ; 10291 } 10292 else 10293 { 10294 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10295 code = NE; 10296 break; 10297 } 10298 break; 10299 case NE: 10300 case LTGT: 10301 if (code == NE && TARGET_IEEE_FP) 10302 { 10303 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 10304 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 10305 GEN_INT (0x40))); 10306 code = NE; 10307 } 10308 else 10309 { 10310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 10311 code = EQ; 10312 } 10313 break; 10314 10315 case UNORDERED: 10316 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10317 code = NE; 10318 break; 10319 case ORDERED: 10320 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 10321 code = EQ; 10322 break; 10323 10324 default: 10325 gcc_unreachable (); 10326 } 10327 } 10328 10329 /* Return the test that should be put into the flags user, i.e. 10330 the bcc, scc, or cmov instruction. */ 10331 return gen_rtx_fmt_ee (code, VOIDmode, 10332 gen_rtx_REG (intcmp_mode, FLAGS_REG), 10333 const0_rtx); 10334} 10335 10336rtx 10337ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 10338{ 10339 rtx op0, op1, ret; 10340 op0 = ix86_compare_op0; 10341 op1 = ix86_compare_op1; 10342 10343 if (second_test) 10344 *second_test = NULL_RTX; 10345 if (bypass_test) 10346 *bypass_test = NULL_RTX; 10347 10348 if (ix86_compare_emitted) 10349 { 10350 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); 10351 ix86_compare_emitted = NULL_RTX; 10352 } 10353 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 10354 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 10355 second_test, bypass_test); 10356 else 10357 ret = ix86_expand_int_compare (code, op0, op1); 10358 10359 return ret; 10360} 10361 10362/* Return true if the CODE will result in nontrivial jump sequence. */ 10363bool 10364ix86_fp_jump_nontrivial_p (enum rtx_code code) 10365{ 10366 enum rtx_code bypass_code, first_code, second_code; 10367 if (!TARGET_CMOVE) 10368 return true; 10369 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10370 return bypass_code != UNKNOWN || second_code != UNKNOWN; 10371} 10372 10373void 10374ix86_expand_branch (enum rtx_code code, rtx label) 10375{ 10376 rtx tmp; 10377 10378 /* If we have emitted a compare insn, go straight to simple. 10379 ix86_expand_compare won't emit anything if ix86_compare_emitted 10380 is non NULL. */ 10381 if (ix86_compare_emitted) 10382 goto simple; 10383 10384 switch (GET_MODE (ix86_compare_op0)) 10385 { 10386 case QImode: 10387 case HImode: 10388 case SImode: 10389 simple: 10390 tmp = ix86_expand_compare (code, NULL, NULL); 10391 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10392 gen_rtx_LABEL_REF (VOIDmode, label), 10393 pc_rtx); 10394 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 10395 return; 10396 10397 case SFmode: 10398 case DFmode: 10399 case XFmode: 10400 { 10401 rtvec vec; 10402 int use_fcomi; 10403 enum rtx_code bypass_code, first_code, second_code; 10404 10405 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 10406 &ix86_compare_op1); 10407 10408 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 10409 10410 /* Check whether we will use the natural sequence with one jump. If 10411 so, we can expand jump early. Otherwise delay expansion by 10412 creating compound insn to not confuse optimizers. */ 10413 if (bypass_code == UNKNOWN && second_code == UNKNOWN 10414 && TARGET_CMOVE) 10415 { 10416 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 10417 gen_rtx_LABEL_REF (VOIDmode, label), 10418 pc_rtx, NULL_RTX, NULL_RTX); 10419 } 10420 else 10421 { 10422 tmp = gen_rtx_fmt_ee (code, VOIDmode, 10423 ix86_compare_op0, ix86_compare_op1); 10424 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 10425 gen_rtx_LABEL_REF (VOIDmode, label), 10426 pc_rtx); 10427 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 10428 10429 use_fcomi = ix86_use_fcomi_compare (code); 10430 vec = rtvec_alloc (3 + !use_fcomi); 10431 RTVEC_ELT (vec, 0) = tmp; 10432 RTVEC_ELT (vec, 1) 10433 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18)); 10434 RTVEC_ELT (vec, 2) 10435 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17)); 10436 if (! use_fcomi) 10437 RTVEC_ELT (vec, 3) 10438 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 10439 10440 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 10441 } 10442 return; 10443 } 10444 10445 case DImode: 10446 if (TARGET_64BIT) 10447 goto simple; 10448 case TImode: 10449 /* Expand DImode branch into multiple compare+branch. */ 10450 { 10451 rtx lo[2], hi[2], label2; 10452 enum rtx_code code1, code2, code3; 10453 enum machine_mode submode; 10454 10455 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 10456 { 10457 tmp = ix86_compare_op0; 10458 ix86_compare_op0 = ix86_compare_op1; 10459 ix86_compare_op1 = tmp; 10460 code = swap_condition (code); 10461 } 10462 if (GET_MODE (ix86_compare_op0) == DImode) 10463 { 10464 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 10465 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 10466 submode = SImode; 10467 } 10468 else 10469 { 10470 split_ti (&ix86_compare_op0, 1, lo+0, hi+0); 10471 split_ti (&ix86_compare_op1, 1, lo+1, hi+1); 10472 submode = DImode; 10473 } 10474 10475 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 10476 avoid two branches. This costs one extra insn, so disable when 10477 optimizing for size. */ 10478 10479 if ((code == EQ || code == NE) 10480 && (!optimize_size 10481 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 10482 { 10483 rtx xor0, xor1; 10484 10485 xor1 = hi[0]; 10486 if (hi[1] != const0_rtx) 10487 xor1 = expand_binop (submode, xor_optab, xor1, hi[1], 10488 NULL_RTX, 0, OPTAB_WIDEN); 10489 10490 xor0 = lo[0]; 10491 if (lo[1] != const0_rtx) 10492 xor0 = expand_binop (submode, xor_optab, xor0, lo[1], 10493 NULL_RTX, 0, OPTAB_WIDEN); 10494 10495 tmp = expand_binop (submode, ior_optab, xor1, xor0, 10496 NULL_RTX, 0, OPTAB_WIDEN); 10497 10498 ix86_compare_op0 = tmp; 10499 ix86_compare_op1 = const0_rtx; 10500 ix86_expand_branch (code, label); 10501 return; 10502 } 10503 10504 /* Otherwise, if we are doing less-than or greater-or-equal-than, 10505 op1 is a constant and the low word is zero, then we can just 10506 examine the high word. */ 10507 10508 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) 10509 switch (code) 10510 { 10511 case LT: case LTU: case GE: case GEU: 10512 ix86_compare_op0 = hi[0]; 10513 ix86_compare_op1 = hi[1]; 10514 ix86_expand_branch (code, label); 10515 return; 10516 default: 10517 break; 10518 } 10519 10520 /* Otherwise, we need two or three jumps. */ 10521 10522 label2 = gen_label_rtx (); 10523 10524 code1 = code; 10525 code2 = swap_condition (code); 10526 code3 = unsigned_condition (code); 10527 10528 switch (code) 10529 { 10530 case LT: case GT: case LTU: case GTU: 10531 break; 10532 10533 case LE: code1 = LT; code2 = GT; break; 10534 case GE: code1 = GT; code2 = LT; break; 10535 case LEU: code1 = LTU; code2 = GTU; break; 10536 case GEU: code1 = GTU; code2 = LTU; break; 10537 10538 case EQ: code1 = UNKNOWN; code2 = NE; break; 10539 case NE: code2 = UNKNOWN; break; 10540 10541 default: 10542 gcc_unreachable (); 10543 } 10544 10545 /* 10546 * a < b => 10547 * if (hi(a) < hi(b)) goto true; 10548 * if (hi(a) > hi(b)) goto false; 10549 * if (lo(a) < lo(b)) goto true; 10550 * false: 10551 */ 10552 10553 ix86_compare_op0 = hi[0]; 10554 ix86_compare_op1 = hi[1]; 10555 10556 if (code1 != UNKNOWN) 10557 ix86_expand_branch (code1, label); 10558 if (code2 != UNKNOWN) 10559 ix86_expand_branch (code2, label2); 10560 10561 ix86_compare_op0 = lo[0]; 10562 ix86_compare_op1 = lo[1]; 10563 ix86_expand_branch (code3, label); 10564 10565 if (code2 != UNKNOWN) 10566 emit_label (label2); 10567 return; 10568 } 10569 10570 default: 10571 gcc_unreachable (); 10572 } 10573} 10574 10575/* Split branch based on floating point condition. */ 10576void 10577ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 10578 rtx target1, rtx target2, rtx tmp, rtx pushed) 10579{ 10580 rtx second, bypass; 10581 rtx label = NULL_RTX; 10582 rtx condition; 10583 int bypass_probability = -1, second_probability = -1, probability = -1; 10584 rtx i; 10585 10586 if (target2 != pc_rtx) 10587 { 10588 rtx tmp = target2; 10589 code = reverse_condition_maybe_unordered (code); 10590 target2 = target1; 10591 target1 = tmp; 10592 } 10593 10594 condition = ix86_expand_fp_compare (code, op1, op2, 10595 tmp, &second, &bypass); 10596 10597 /* Remove pushed operand from stack. */ 10598 if (pushed) 10599 ix86_free_from_memory (GET_MODE (pushed)); 10600 10601 if (split_branch_probability >= 0) 10602 { 10603 /* Distribute the probabilities across the jumps. 10604 Assume the BYPASS and SECOND to be always test 10605 for UNORDERED. */ 10606 probability = split_branch_probability; 10607 10608 /* Value of 1 is low enough to make no need for probability 10609 to be updated. Later we may run some experiments and see 10610 if unordered values are more frequent in practice. */ 10611 if (bypass) 10612 bypass_probability = 1; 10613 if (second) 10614 second_probability = 1; 10615 } 10616 if (bypass != NULL_RTX) 10617 { 10618 label = gen_label_rtx (); 10619 i = emit_jump_insn (gen_rtx_SET 10620 (VOIDmode, pc_rtx, 10621 gen_rtx_IF_THEN_ELSE (VOIDmode, 10622 bypass, 10623 gen_rtx_LABEL_REF (VOIDmode, 10624 label), 10625 pc_rtx))); 10626 if (bypass_probability >= 0) 10627 REG_NOTES (i) 10628 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10629 GEN_INT (bypass_probability), 10630 REG_NOTES (i)); 10631 } 10632 i = emit_jump_insn (gen_rtx_SET 10633 (VOIDmode, pc_rtx, 10634 gen_rtx_IF_THEN_ELSE (VOIDmode, 10635 condition, target1, target2))); 10636 if (probability >= 0) 10637 REG_NOTES (i) 10638 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10639 GEN_INT (probability), 10640 REG_NOTES (i)); 10641 if (second != NULL_RTX) 10642 { 10643 i = emit_jump_insn (gen_rtx_SET 10644 (VOIDmode, pc_rtx, 10645 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 10646 target2))); 10647 if (second_probability >= 0) 10648 REG_NOTES (i) 10649 = gen_rtx_EXPR_LIST (REG_BR_PROB, 10650 GEN_INT (second_probability), 10651 REG_NOTES (i)); 10652 } 10653 if (label != NULL_RTX) 10654 emit_label (label); 10655} 10656 10657int 10658ix86_expand_setcc (enum rtx_code code, rtx dest) 10659{ 10660 rtx ret, tmp, tmpreg, equiv; 10661 rtx second_test, bypass_test; 10662 10663 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) 10664 return 0; /* FAIL */ 10665 10666 gcc_assert (GET_MODE (dest) == QImode); 10667 10668 ret = ix86_expand_compare (code, &second_test, &bypass_test); 10669 PUT_MODE (ret, QImode); 10670 10671 tmp = dest; 10672 tmpreg = dest; 10673 10674 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 10675 if (bypass_test || second_test) 10676 { 10677 rtx test = second_test; 10678 int bypass = 0; 10679 rtx tmp2 = gen_reg_rtx (QImode); 10680 if (bypass_test) 10681 { 10682 gcc_assert (!second_test); 10683 test = bypass_test; 10684 bypass = 1; 10685 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 10686 } 10687 PUT_MODE (test, QImode); 10688 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 10689 10690 if (bypass) 10691 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 10692 else 10693 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 10694 } 10695 10696 /* Attach a REG_EQUAL note describing the comparison result. */ 10697 if (ix86_compare_op0 && ix86_compare_op1) 10698 { 10699 equiv = simplify_gen_relational (code, QImode, 10700 GET_MODE (ix86_compare_op0), 10701 ix86_compare_op0, ix86_compare_op1); 10702 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 10703 } 10704 10705 return 1; /* DONE */ 10706} 10707 10708/* Expand comparison setting or clearing carry flag. Return true when 10709 successful and set pop for the operation. */ 10710static bool 10711ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 10712{ 10713 enum machine_mode mode = 10714 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 10715 10716 /* Do not handle DImode compares that go through special path. Also we can't 10717 deal with FP compares yet. This is possible to add. */ 10718 if (mode == (TARGET_64BIT ? TImode : DImode)) 10719 return false; 10720 if (FLOAT_MODE_P (mode)) 10721 { 10722 rtx second_test = NULL, bypass_test = NULL; 10723 rtx compare_op, compare_seq; 10724 10725 /* Shortcut: following common codes never translate into carry flag compares. */ 10726 if (code == EQ || code == NE || code == UNEQ || code == LTGT 10727 || code == ORDERED || code == UNORDERED) 10728 return false; 10729 10730 /* These comparisons require zero flag; swap operands so they won't. */ 10731 if ((code == GT || code == UNLE || code == LE || code == UNGT) 10732 && !TARGET_IEEE_FP) 10733 { 10734 rtx tmp = op0; 10735 op0 = op1; 10736 op1 = tmp; 10737 code = swap_condition (code); 10738 } 10739 10740 /* Try to expand the comparison and verify that we end up with carry flag 10741 based comparison. This is fails to be true only when we decide to expand 10742 comparison using arithmetic that is not too common scenario. */ 10743 start_sequence (); 10744 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 10745 &second_test, &bypass_test); 10746 compare_seq = get_insns (); 10747 end_sequence (); 10748 10749 if (second_test || bypass_test) 10750 return false; 10751 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 10752 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 10753 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 10754 else 10755 code = GET_CODE (compare_op); 10756 if (code != LTU && code != GEU) 10757 return false; 10758 emit_insn (compare_seq); 10759 *pop = compare_op; 10760 return true; 10761 } 10762 if (!INTEGRAL_MODE_P (mode)) 10763 return false; 10764 switch (code) 10765 { 10766 case LTU: 10767 case GEU: 10768 break; 10769 10770 /* Convert a==0 into (unsigned)a<1. */ 10771 case EQ: 10772 case NE: 10773 if (op1 != const0_rtx) 10774 return false; 10775 op1 = const1_rtx; 10776 code = (code == EQ ? LTU : GEU); 10777 break; 10778 10779 /* Convert a>b into b<a or a>=b-1. */ 10780 case GTU: 10781 case LEU: 10782 if (GET_CODE (op1) == CONST_INT) 10783 { 10784 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 10785 /* Bail out on overflow. We still can swap operands but that 10786 would force loading of the constant into register. */ 10787 if (op1 == const0_rtx 10788 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 10789 return false; 10790 code = (code == GTU ? GEU : LTU); 10791 } 10792 else 10793 { 10794 rtx tmp = op1; 10795 op1 = op0; 10796 op0 = tmp; 10797 code = (code == GTU ? LTU : GEU); 10798 } 10799 break; 10800 10801 /* Convert a>=0 into (unsigned)a<0x80000000. */ 10802 case LT: 10803 case GE: 10804 if (mode == DImode || op1 != const0_rtx) 10805 return false; 10806 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 10807 code = (code == LT ? GEU : LTU); 10808 break; 10809 case LE: 10810 case GT: 10811 if (mode == DImode || op1 != constm1_rtx) 10812 return false; 10813 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 10814 code = (code == LE ? GEU : LTU); 10815 break; 10816 10817 default: 10818 return false; 10819 } 10820 /* Swapping operands may cause constant to appear as first operand. */ 10821 if (!nonimmediate_operand (op0, VOIDmode)) 10822 { 10823 if (no_new_pseudos) 10824 return false; 10825 op0 = force_reg (mode, op0); 10826 } 10827 ix86_compare_op0 = op0; 10828 ix86_compare_op1 = op1; 10829 *pop = ix86_expand_compare (code, NULL, NULL); 10830 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); 10831 return true; 10832} 10833 10834int 10835ix86_expand_int_movcc (rtx operands[]) 10836{ 10837 enum rtx_code code = GET_CODE (operands[1]), compare_code; 10838 rtx compare_seq, compare_op; 10839 rtx second_test, bypass_test; 10840 enum machine_mode mode = GET_MODE (operands[0]); 10841 bool sign_bit_compare_p = false;; 10842 10843 start_sequence (); 10844 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 10845 compare_seq = get_insns (); 10846 end_sequence (); 10847 10848 compare_code = GET_CODE (compare_op); 10849 10850 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 10851 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 10852 sign_bit_compare_p = true; 10853 10854 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 10855 HImode insns, we'd be swallowed in word prefix ops. */ 10856 10857 if ((mode != HImode || TARGET_FAST_PREFIX) 10858 && (mode != (TARGET_64BIT ? TImode : DImode)) 10859 && GET_CODE (operands[2]) == CONST_INT 10860 && GET_CODE (operands[3]) == CONST_INT) 10861 { 10862 rtx out = operands[0]; 10863 HOST_WIDE_INT ct = INTVAL (operands[2]); 10864 HOST_WIDE_INT cf = INTVAL (operands[3]); 10865 HOST_WIDE_INT diff; 10866 10867 diff = ct - cf; 10868 /* Sign bit compares are better done using shifts than we do by using 10869 sbb. */ 10870 if (sign_bit_compare_p 10871 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 10872 ix86_compare_op1, &compare_op)) 10873 { 10874 /* Detect overlap between destination and compare sources. */ 10875 rtx tmp = out; 10876 10877 if (!sign_bit_compare_p) 10878 { 10879 bool fpcmp = false; 10880 10881 compare_code = GET_CODE (compare_op); 10882 10883 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 10884 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 10885 { 10886 fpcmp = true; 10887 compare_code = ix86_fp_compare_code_to_integer (compare_code); 10888 } 10889 10890 /* To simplify rest of code, restrict to the GEU case. */ 10891 if (compare_code == LTU) 10892 { 10893 HOST_WIDE_INT tmp = ct; 10894 ct = cf; 10895 cf = tmp; 10896 compare_code = reverse_condition (compare_code); 10897 code = reverse_condition (code); 10898 } 10899 else 10900 { 10901 if (fpcmp) 10902 PUT_CODE (compare_op, 10903 reverse_condition_maybe_unordered 10904 (GET_CODE (compare_op))); 10905 else 10906 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 10907 } 10908 diff = ct - cf; 10909 10910 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 10911 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 10912 tmp = gen_reg_rtx (mode); 10913 10914 if (mode == DImode) 10915 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 10916 else 10917 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 10918 } 10919 else 10920 { 10921 if (code == GT || code == GE) 10922 code = reverse_condition (code); 10923 else 10924 { 10925 HOST_WIDE_INT tmp = ct; 10926 ct = cf; 10927 cf = tmp; 10928 diff = ct - cf; 10929 } 10930 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 10931 ix86_compare_op1, VOIDmode, 0, -1); 10932 } 10933 10934 if (diff == 1) 10935 { 10936 /* 10937 * cmpl op0,op1 10938 * sbbl dest,dest 10939 * [addl dest, ct] 10940 * 10941 * Size 5 - 8. 10942 */ 10943 if (ct) 10944 tmp = expand_simple_binop (mode, PLUS, 10945 tmp, GEN_INT (ct), 10946 copy_rtx (tmp), 1, OPTAB_DIRECT); 10947 } 10948 else if (cf == -1) 10949 { 10950 /* 10951 * cmpl op0,op1 10952 * sbbl dest,dest 10953 * orl $ct, dest 10954 * 10955 * Size 8. 10956 */ 10957 tmp = expand_simple_binop (mode, IOR, 10958 tmp, GEN_INT (ct), 10959 copy_rtx (tmp), 1, OPTAB_DIRECT); 10960 } 10961 else if (diff == -1 && ct) 10962 { 10963 /* 10964 * cmpl op0,op1 10965 * sbbl dest,dest 10966 * notl dest 10967 * [addl dest, cf] 10968 * 10969 * Size 8 - 11. 10970 */ 10971 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 10972 if (cf) 10973 tmp = expand_simple_binop (mode, PLUS, 10974 copy_rtx (tmp), GEN_INT (cf), 10975 copy_rtx (tmp), 1, OPTAB_DIRECT); 10976 } 10977 else 10978 { 10979 /* 10980 * cmpl op0,op1 10981 * sbbl dest,dest 10982 * [notl dest] 10983 * andl cf - ct, dest 10984 * [addl dest, ct] 10985 * 10986 * Size 8 - 11. 10987 */ 10988 10989 if (cf == 0) 10990 { 10991 cf = ct; 10992 ct = 0; 10993 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 10994 } 10995 10996 tmp = expand_simple_binop (mode, AND, 10997 copy_rtx (tmp), 10998 gen_int_mode (cf - ct, mode), 10999 copy_rtx (tmp), 1, OPTAB_DIRECT); 11000 if (ct) 11001 tmp = expand_simple_binop (mode, PLUS, 11002 copy_rtx (tmp), GEN_INT (ct), 11003 copy_rtx (tmp), 1, OPTAB_DIRECT); 11004 } 11005 11006 if (!rtx_equal_p (tmp, out)) 11007 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 11008 11009 return 1; /* DONE */ 11010 } 11011 11012 if (diff < 0) 11013 { 11014 HOST_WIDE_INT tmp; 11015 tmp = ct, ct = cf, cf = tmp; 11016 diff = -diff; 11017 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11018 { 11019 /* We may be reversing unordered compare to normal compare, that 11020 is not valid in general (we may convert non-trapping condition 11021 to trapping one), however on i386 we currently emit all 11022 comparisons unordered. */ 11023 compare_code = reverse_condition_maybe_unordered (compare_code); 11024 code = reverse_condition_maybe_unordered (code); 11025 } 11026 else 11027 { 11028 compare_code = reverse_condition (compare_code); 11029 code = reverse_condition (code); 11030 } 11031 } 11032 11033 compare_code = UNKNOWN; 11034 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 11035 && GET_CODE (ix86_compare_op1) == CONST_INT) 11036 { 11037 if (ix86_compare_op1 == const0_rtx 11038 && (code == LT || code == GE)) 11039 compare_code = code; 11040 else if (ix86_compare_op1 == constm1_rtx) 11041 { 11042 if (code == LE) 11043 compare_code = LT; 11044 else if (code == GT) 11045 compare_code = GE; 11046 } 11047 } 11048 11049 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 11050 if (compare_code != UNKNOWN 11051 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 11052 && (cf == -1 || ct == -1)) 11053 { 11054 /* If lea code below could be used, only optimize 11055 if it results in a 2 insn sequence. */ 11056 11057 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 11058 || diff == 3 || diff == 5 || diff == 9) 11059 || (compare_code == LT && ct == -1) 11060 || (compare_code == GE && cf == -1)) 11061 { 11062 /* 11063 * notl op1 (if necessary) 11064 * sarl $31, op1 11065 * orl cf, op1 11066 */ 11067 if (ct != -1) 11068 { 11069 cf = ct; 11070 ct = -1; 11071 code = reverse_condition (code); 11072 } 11073 11074 out = emit_store_flag (out, code, ix86_compare_op0, 11075 ix86_compare_op1, VOIDmode, 0, -1); 11076 11077 out = expand_simple_binop (mode, IOR, 11078 out, GEN_INT (cf), 11079 out, 1, OPTAB_DIRECT); 11080 if (out != operands[0]) 11081 emit_move_insn (operands[0], out); 11082 11083 return 1; /* DONE */ 11084 } 11085 } 11086 11087 11088 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 11089 || diff == 3 || diff == 5 || diff == 9) 11090 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 11091 && (mode != DImode 11092 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) 11093 { 11094 /* 11095 * xorl dest,dest 11096 * cmpl op1,op2 11097 * setcc dest 11098 * lea cf(dest*(ct-cf)),dest 11099 * 11100 * Size 14. 11101 * 11102 * This also catches the degenerate setcc-only case. 11103 */ 11104 11105 rtx tmp; 11106 int nops; 11107 11108 out = emit_store_flag (out, code, ix86_compare_op0, 11109 ix86_compare_op1, VOIDmode, 0, 1); 11110 11111 nops = 0; 11112 /* On x86_64 the lea instruction operates on Pmode, so we need 11113 to get arithmetics done in proper mode to match. */ 11114 if (diff == 1) 11115 tmp = copy_rtx (out); 11116 else 11117 { 11118 rtx out1; 11119 out1 = copy_rtx (out); 11120 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 11121 nops++; 11122 if (diff & 1) 11123 { 11124 tmp = gen_rtx_PLUS (mode, tmp, out1); 11125 nops++; 11126 } 11127 } 11128 if (cf != 0) 11129 { 11130 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 11131 nops++; 11132 } 11133 if (!rtx_equal_p (tmp, out)) 11134 { 11135 if (nops == 1) 11136 out = force_operand (tmp, copy_rtx (out)); 11137 else 11138 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 11139 } 11140 if (!rtx_equal_p (out, operands[0])) 11141 emit_move_insn (operands[0], copy_rtx (out)); 11142 11143 return 1; /* DONE */ 11144 } 11145 11146 /* 11147 * General case: Jumpful: 11148 * xorl dest,dest cmpl op1, op2 11149 * cmpl op1, op2 movl ct, dest 11150 * setcc dest jcc 1f 11151 * decl dest movl cf, dest 11152 * andl (cf-ct),dest 1: 11153 * addl ct,dest 11154 * 11155 * Size 20. Size 14. 11156 * 11157 * This is reasonably steep, but branch mispredict costs are 11158 * high on modern cpus, so consider failing only if optimizing 11159 * for space. 11160 */ 11161 11162 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11163 && BRANCH_COST >= 2) 11164 { 11165 if (cf == 0) 11166 { 11167 cf = ct; 11168 ct = 0; 11169 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0))) 11170 /* We may be reversing unordered compare to normal compare, 11171 that is not valid in general (we may convert non-trapping 11172 condition to trapping one), however on i386 we currently 11173 emit all comparisons unordered. */ 11174 code = reverse_condition_maybe_unordered (code); 11175 else 11176 { 11177 code = reverse_condition (code); 11178 if (compare_code != UNKNOWN) 11179 compare_code = reverse_condition (compare_code); 11180 } 11181 } 11182 11183 if (compare_code != UNKNOWN) 11184 { 11185 /* notl op1 (if needed) 11186 sarl $31, op1 11187 andl (cf-ct), op1 11188 addl ct, op1 11189 11190 For x < 0 (resp. x <= -1) there will be no notl, 11191 so if possible swap the constants to get rid of the 11192 complement. 11193 True/false will be -1/0 while code below (store flag 11194 followed by decrement) is 0/-1, so the constants need 11195 to be exchanged once more. */ 11196 11197 if (compare_code == GE || !cf) 11198 { 11199 code = reverse_condition (code); 11200 compare_code = LT; 11201 } 11202 else 11203 { 11204 HOST_WIDE_INT tmp = cf; 11205 cf = ct; 11206 ct = tmp; 11207 } 11208 11209 out = emit_store_flag (out, code, ix86_compare_op0, 11210 ix86_compare_op1, VOIDmode, 0, -1); 11211 } 11212 else 11213 { 11214 out = emit_store_flag (out, code, ix86_compare_op0, 11215 ix86_compare_op1, VOIDmode, 0, 1); 11216 11217 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 11218 copy_rtx (out), 1, OPTAB_DIRECT); 11219 } 11220 11221 out = expand_simple_binop (mode, AND, copy_rtx (out), 11222 gen_int_mode (cf - ct, mode), 11223 copy_rtx (out), 1, OPTAB_DIRECT); 11224 if (ct) 11225 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 11226 copy_rtx (out), 1, OPTAB_DIRECT); 11227 if (!rtx_equal_p (out, operands[0])) 11228 emit_move_insn (operands[0], copy_rtx (out)); 11229 11230 return 1; /* DONE */ 11231 } 11232 } 11233 11234 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 11235 { 11236 /* Try a few things more with specific constants and a variable. */ 11237 11238 optab op; 11239 rtx var, orig_out, out, tmp; 11240 11241 if (BRANCH_COST <= 2) 11242 return 0; /* FAIL */ 11243 11244 /* If one of the two operands is an interesting constant, load a 11245 constant with the above and mask it in with a logical operation. */ 11246 11247 if (GET_CODE (operands[2]) == CONST_INT) 11248 { 11249 var = operands[3]; 11250 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 11251 operands[3] = constm1_rtx, op = and_optab; 11252 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 11253 operands[3] = const0_rtx, op = ior_optab; 11254 else 11255 return 0; /* FAIL */ 11256 } 11257 else if (GET_CODE (operands[3]) == CONST_INT) 11258 { 11259 var = operands[2]; 11260 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 11261 operands[2] = constm1_rtx, op = and_optab; 11262 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 11263 operands[2] = const0_rtx, op = ior_optab; 11264 else 11265 return 0; /* FAIL */ 11266 } 11267 else 11268 return 0; /* FAIL */ 11269 11270 orig_out = operands[0]; 11271 tmp = gen_reg_rtx (mode); 11272 operands[0] = tmp; 11273 11274 /* Recurse to get the constant loaded. */ 11275 if (ix86_expand_int_movcc (operands) == 0) 11276 return 0; /* FAIL */ 11277 11278 /* Mask in the interesting variable. */ 11279 out = expand_binop (mode, op, var, tmp, orig_out, 0, 11280 OPTAB_WIDEN); 11281 if (!rtx_equal_p (out, orig_out)) 11282 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 11283 11284 return 1; /* DONE */ 11285 } 11286 11287 /* 11288 * For comparison with above, 11289 * 11290 * movl cf,dest 11291 * movl ct,tmp 11292 * cmpl op1,op2 11293 * cmovcc tmp,dest 11294 * 11295 * Size 15. 11296 */ 11297 11298 if (! nonimmediate_operand (operands[2], mode)) 11299 operands[2] = force_reg (mode, operands[2]); 11300 if (! nonimmediate_operand (operands[3], mode)) 11301 operands[3] = force_reg (mode, operands[3]); 11302 11303 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11304 { 11305 rtx tmp = gen_reg_rtx (mode); 11306 emit_move_insn (tmp, operands[3]); 11307 operands[3] = tmp; 11308 } 11309 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11310 { 11311 rtx tmp = gen_reg_rtx (mode); 11312 emit_move_insn (tmp, operands[2]); 11313 operands[2] = tmp; 11314 } 11315 11316 if (! register_operand (operands[2], VOIDmode) 11317 && (mode == QImode 11318 || ! register_operand (operands[3], VOIDmode))) 11319 operands[2] = force_reg (mode, operands[2]); 11320 11321 if (mode == QImode 11322 && ! register_operand (operands[3], VOIDmode)) 11323 operands[3] = force_reg (mode, operands[3]); 11324 11325 emit_insn (compare_seq); 11326 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11327 gen_rtx_IF_THEN_ELSE (mode, 11328 compare_op, operands[2], 11329 operands[3]))); 11330 if (bypass_test) 11331 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11332 gen_rtx_IF_THEN_ELSE (mode, 11333 bypass_test, 11334 copy_rtx (operands[3]), 11335 copy_rtx (operands[0])))); 11336 if (second_test) 11337 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 11338 gen_rtx_IF_THEN_ELSE (mode, 11339 second_test, 11340 copy_rtx (operands[2]), 11341 copy_rtx (operands[0])))); 11342 11343 return 1; /* DONE */ 11344} 11345 11346/* Swap, force into registers, or otherwise massage the two operands 11347 to an sse comparison with a mask result. Thus we differ a bit from 11348 ix86_prepare_fp_compare_args which expects to produce a flags result. 11349 11350 The DEST operand exists to help determine whether to commute commutative 11351 operators. The POP0/POP1 operands are updated in place. The new 11352 comparison code is returned, or UNKNOWN if not implementable. */ 11353 11354static enum rtx_code 11355ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, 11356 rtx *pop0, rtx *pop1) 11357{ 11358 rtx tmp; 11359 11360 switch (code) 11361 { 11362 case LTGT: 11363 case UNEQ: 11364 /* We have no LTGT as an operator. We could implement it with 11365 NE & ORDERED, but this requires an extra temporary. It's 11366 not clear that it's worth it. */ 11367 return UNKNOWN; 11368 11369 case LT: 11370 case LE: 11371 case UNGT: 11372 case UNGE: 11373 /* These are supported directly. */ 11374 break; 11375 11376 case EQ: 11377 case NE: 11378 case UNORDERED: 11379 case ORDERED: 11380 /* For commutative operators, try to canonicalize the destination 11381 operand to be first in the comparison - this helps reload to 11382 avoid extra moves. */ 11383 if (!dest || !rtx_equal_p (dest, *pop1)) 11384 break; 11385 /* FALLTHRU */ 11386 11387 case GE: 11388 case GT: 11389 case UNLE: 11390 case UNLT: 11391 /* These are not supported directly. Swap the comparison operands 11392 to transform into something that is supported. */ 11393 tmp = *pop0; 11394 *pop0 = *pop1; 11395 *pop1 = tmp; 11396 code = swap_condition (code); 11397 break; 11398 11399 default: 11400 gcc_unreachable (); 11401 } 11402 11403 return code; 11404} 11405 11406/* Detect conditional moves that exactly match min/max operational 11407 semantics. Note that this is IEEE safe, as long as we don't 11408 interchange the operands. 11409 11410 Returns FALSE if this conditional move doesn't match a MIN/MAX, 11411 and TRUE if the operation is successful and instructions are emitted. */ 11412 11413static bool 11414ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, 11415 rtx cmp_op1, rtx if_true, rtx if_false) 11416{ 11417 enum machine_mode mode; 11418 bool is_min; 11419 rtx tmp; 11420 11421 if (code == LT) 11422 ; 11423 else if (code == UNGE) 11424 { 11425 tmp = if_true; 11426 if_true = if_false; 11427 if_false = tmp; 11428 } 11429 else 11430 return false; 11431 11432 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) 11433 is_min = true; 11434 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) 11435 is_min = false; 11436 else 11437 return false; 11438 11439 mode = GET_MODE (dest); 11440 11441 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, 11442 but MODE may be a vector mode and thus not appropriate. */ 11443 if (!flag_finite_math_only || !flag_unsafe_math_optimizations) 11444 { 11445 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; 11446 rtvec v; 11447 11448 if_true = force_reg (mode, if_true); 11449 v = gen_rtvec (2, if_true, if_false); 11450 tmp = gen_rtx_UNSPEC (mode, v, u); 11451 } 11452 else 11453 { 11454 code = is_min ? SMIN : SMAX; 11455 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); 11456 } 11457 11458 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 11459 return true; 11460} 11461 11462/* Expand an sse vector comparison. Return the register with the result. */ 11463 11464static rtx 11465ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, 11466 rtx op_true, rtx op_false) 11467{ 11468 enum machine_mode mode = GET_MODE (dest); 11469 rtx x; 11470 11471 cmp_op0 = force_reg (mode, cmp_op0); 11472 if (!nonimmediate_operand (cmp_op1, mode)) 11473 cmp_op1 = force_reg (mode, cmp_op1); 11474 11475 if (optimize 11476 || reg_overlap_mentioned_p (dest, op_true) 11477 || reg_overlap_mentioned_p (dest, op_false)) 11478 dest = gen_reg_rtx (mode); 11479 11480 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); 11481 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11482 11483 return dest; 11484} 11485 11486/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical 11487 operations. This is used for both scalar and vector conditional moves. */ 11488 11489static void 11490ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) 11491{ 11492 enum machine_mode mode = GET_MODE (dest); 11493 rtx t2, t3, x; 11494 11495 if (op_false == CONST0_RTX (mode)) 11496 { 11497 op_true = force_reg (mode, op_true); 11498 x = gen_rtx_AND (mode, cmp, op_true); 11499 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11500 } 11501 else if (op_true == CONST0_RTX (mode)) 11502 { 11503 op_false = force_reg (mode, op_false); 11504 x = gen_rtx_NOT (mode, cmp); 11505 x = gen_rtx_AND (mode, x, op_false); 11506 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11507 } 11508 else 11509 { 11510 op_true = force_reg (mode, op_true); 11511 op_false = force_reg (mode, op_false); 11512 11513 t2 = gen_reg_rtx (mode); 11514 if (optimize) 11515 t3 = gen_reg_rtx (mode); 11516 else 11517 t3 = dest; 11518 11519 x = gen_rtx_AND (mode, op_true, cmp); 11520 emit_insn (gen_rtx_SET (VOIDmode, t2, x)); 11521 11522 x = gen_rtx_NOT (mode, cmp); 11523 x = gen_rtx_AND (mode, x, op_false); 11524 emit_insn (gen_rtx_SET (VOIDmode, t3, x)); 11525 11526 x = gen_rtx_IOR (mode, t3, t2); 11527 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 11528 } 11529} 11530 11531/* Expand a floating-point conditional move. Return true if successful. */ 11532 11533int 11534ix86_expand_fp_movcc (rtx operands[]) 11535{ 11536 enum machine_mode mode = GET_MODE (operands[0]); 11537 enum rtx_code code = GET_CODE (operands[1]); 11538 rtx tmp, compare_op, second_test, bypass_test; 11539 11540 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 11541 { 11542 enum machine_mode cmode; 11543 11544 /* Since we've no cmove for sse registers, don't force bad register 11545 allocation just to gain access to it. Deny movcc when the 11546 comparison mode doesn't match the move mode. */ 11547 cmode = GET_MODE (ix86_compare_op0); 11548 if (cmode == VOIDmode) 11549 cmode = GET_MODE (ix86_compare_op1); 11550 if (cmode != mode) 11551 return 0; 11552 11553 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11554 &ix86_compare_op0, 11555 &ix86_compare_op1); 11556 if (code == UNKNOWN) 11557 return 0; 11558 11559 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, 11560 ix86_compare_op1, operands[2], 11561 operands[3])) 11562 return 1; 11563 11564 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, 11565 ix86_compare_op1, operands[2], operands[3]); 11566 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); 11567 return 1; 11568 } 11569 11570 /* The floating point conditional move instructions don't directly 11571 support conditions resulting from a signed integer comparison. */ 11572 11573 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11574 11575 /* The floating point conditional move instructions don't directly 11576 support signed integer comparisons. */ 11577 11578 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 11579 { 11580 gcc_assert (!second_test && !bypass_test); 11581 tmp = gen_reg_rtx (QImode); 11582 ix86_expand_setcc (code, tmp); 11583 code = NE; 11584 ix86_compare_op0 = tmp; 11585 ix86_compare_op1 = const0_rtx; 11586 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 11587 } 11588 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 11589 { 11590 tmp = gen_reg_rtx (mode); 11591 emit_move_insn (tmp, operands[3]); 11592 operands[3] = tmp; 11593 } 11594 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 11595 { 11596 tmp = gen_reg_rtx (mode); 11597 emit_move_insn (tmp, operands[2]); 11598 operands[2] = tmp; 11599 } 11600 11601 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11602 gen_rtx_IF_THEN_ELSE (mode, compare_op, 11603 operands[2], operands[3]))); 11604 if (bypass_test) 11605 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11606 gen_rtx_IF_THEN_ELSE (mode, bypass_test, 11607 operands[3], operands[0]))); 11608 if (second_test) 11609 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 11610 gen_rtx_IF_THEN_ELSE (mode, second_test, 11611 operands[2], operands[0]))); 11612 11613 return 1; 11614} 11615 11616/* Expand a floating-point vector conditional move; a vcond operation 11617 rather than a movcc operation. */ 11618 11619bool 11620ix86_expand_fp_vcond (rtx operands[]) 11621{ 11622 enum rtx_code code = GET_CODE (operands[3]); 11623 rtx cmp; 11624 11625 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 11626 &operands[4], &operands[5]); 11627 if (code == UNKNOWN) 11628 return false; 11629 11630 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], 11631 operands[5], operands[1], operands[2])) 11632 return true; 11633 11634 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], 11635 operands[1], operands[2]); 11636 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); 11637 return true; 11638} 11639 11640/* Expand a signed integral vector conditional move. */ 11641 11642bool 11643ix86_expand_int_vcond (rtx operands[]) 11644{ 11645 enum machine_mode mode = GET_MODE (operands[0]); 11646 enum rtx_code code = GET_CODE (operands[3]); 11647 bool negate = false; 11648 rtx x, cop0, cop1; 11649 11650 cop0 = operands[4]; 11651 cop1 = operands[5]; 11652 11653 /* Canonicalize the comparison to EQ, GT, GTU. */ 11654 switch (code) 11655 { 11656 case EQ: 11657 case GT: 11658 case GTU: 11659 break; 11660 11661 case NE: 11662 case LE: 11663 case LEU: 11664 code = reverse_condition (code); 11665 negate = true; 11666 break; 11667 11668 case GE: 11669 case GEU: 11670 code = reverse_condition (code); 11671 negate = true; 11672 /* FALLTHRU */ 11673 11674 case LT: 11675 case LTU: 11676 code = swap_condition (code); 11677 x = cop0, cop0 = cop1, cop1 = x; 11678 break; 11679 11680 default: 11681 gcc_unreachable (); 11682 } 11683 11684 /* Unsigned parallel compare is not supported by the hardware. Play some 11685 tricks to turn this into a signed comparison against 0. */ 11686 if (code == GTU) 11687 { 11688 cop0 = force_reg (mode, cop0); 11689 11690 switch (mode) 11691 { 11692 case V4SImode: 11693 { 11694 rtx t1, t2, mask; 11695 11696 /* Perform a parallel modulo subtraction. */ 11697 t1 = gen_reg_rtx (mode); 11698 emit_insn (gen_subv4si3 (t1, cop0, cop1)); 11699 11700 /* Extract the original sign bit of op0. */ 11701 mask = GEN_INT (-0x80000000); 11702 mask = gen_rtx_CONST_VECTOR (mode, 11703 gen_rtvec (4, mask, mask, mask, mask)); 11704 mask = force_reg (mode, mask); 11705 t2 = gen_reg_rtx (mode); 11706 emit_insn (gen_andv4si3 (t2, cop0, mask)); 11707 11708 /* XOR it back into the result of the subtraction. This results 11709 in the sign bit set iff we saw unsigned underflow. */ 11710 x = gen_reg_rtx (mode); 11711 emit_insn (gen_xorv4si3 (x, t1, t2)); 11712 11713 code = GT; 11714 } 11715 break; 11716 11717 case V16QImode: 11718 case V8HImode: 11719 /* Perform a parallel unsigned saturating subtraction. */ 11720 x = gen_reg_rtx (mode); 11721 emit_insn (gen_rtx_SET (VOIDmode, x, 11722 gen_rtx_US_MINUS (mode, cop0, cop1))); 11723 11724 code = EQ; 11725 negate = !negate; 11726 break; 11727 11728 default: 11729 gcc_unreachable (); 11730 } 11731 11732 cop0 = x; 11733 cop1 = CONST0_RTX (mode); 11734 } 11735 11736 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, 11737 operands[1+negate], operands[2-negate]); 11738 11739 ix86_expand_sse_movcc (operands[0], x, operands[1+negate], 11740 operands[2-negate]); 11741 return true; 11742} 11743 11744/* Expand conditional increment or decrement using adb/sbb instructions. 11745 The default case using setcc followed by the conditional move can be 11746 done by generic code. */ 11747int 11748ix86_expand_int_addcc (rtx operands[]) 11749{ 11750 enum rtx_code code = GET_CODE (operands[1]); 11751 rtx compare_op; 11752 rtx val = const0_rtx; 11753 bool fpcmp = false; 11754 enum machine_mode mode = GET_MODE (operands[0]); 11755 11756 if (operands[3] != const1_rtx 11757 && operands[3] != constm1_rtx) 11758 return 0; 11759 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 11760 ix86_compare_op1, &compare_op)) 11761 return 0; 11762 code = GET_CODE (compare_op); 11763 11764 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 11765 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 11766 { 11767 fpcmp = true; 11768 code = ix86_fp_compare_code_to_integer (code); 11769 } 11770 11771 if (code != LTU) 11772 { 11773 val = constm1_rtx; 11774 if (fpcmp) 11775 PUT_CODE (compare_op, 11776 reverse_condition_maybe_unordered 11777 (GET_CODE (compare_op))); 11778 else 11779 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 11780 } 11781 PUT_MODE (compare_op, mode); 11782 11783 /* Construct either adc or sbb insn. */ 11784 if ((code == LTU) == (operands[3] == constm1_rtx)) 11785 { 11786 switch (GET_MODE (operands[0])) 11787 { 11788 case QImode: 11789 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 11790 break; 11791 case HImode: 11792 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 11793 break; 11794 case SImode: 11795 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 11796 break; 11797 case DImode: 11798 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 11799 break; 11800 default: 11801 gcc_unreachable (); 11802 } 11803 } 11804 else 11805 { 11806 switch (GET_MODE (operands[0])) 11807 { 11808 case QImode: 11809 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 11810 break; 11811 case HImode: 11812 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 11813 break; 11814 case SImode: 11815 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 11816 break; 11817 case DImode: 11818 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 11819 break; 11820 default: 11821 gcc_unreachable (); 11822 } 11823 } 11824 return 1; /* DONE */ 11825} 11826 11827 11828/* Split operands 0 and 1 into SImode parts. Similar to split_di, but 11829 works for floating pointer parameters and nonoffsetable memories. 11830 For pushes, it returns just stack offsets; the values will be saved 11831 in the right order. Maximally three parts are generated. */ 11832 11833static int 11834ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 11835{ 11836 int size; 11837 11838 if (!TARGET_64BIT) 11839 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 11840 else 11841 size = (GET_MODE_SIZE (mode) + 4) / 8; 11842 11843 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand))); 11844 gcc_assert (size >= 2 && size <= 3); 11845 11846 /* Optimize constant pool reference to immediates. This is used by fp 11847 moves, that force all constants to memory to allow combining. */ 11848 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand)) 11849 { 11850 rtx tmp = maybe_get_pool_constant (operand); 11851 if (tmp) 11852 operand = tmp; 11853 } 11854 11855 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand)) 11856 { 11857 /* The only non-offsetable memories we handle are pushes. */ 11858 int ok = push_operand (operand, VOIDmode); 11859 11860 gcc_assert (ok); 11861 11862 operand = copy_rtx (operand); 11863 PUT_MODE (operand, Pmode); 11864 parts[0] = parts[1] = parts[2] = operand; 11865 return size; 11866 } 11867 11868 if (GET_CODE (operand) == CONST_VECTOR) 11869 { 11870 enum machine_mode imode = int_mode_for_mode (mode); 11871 /* Caution: if we looked through a constant pool memory above, 11872 the operand may actually have a different mode now. That's 11873 ok, since we want to pun this all the way back to an integer. */ 11874 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); 11875 gcc_assert (operand != NULL); 11876 mode = imode; 11877 } 11878 11879 if (!TARGET_64BIT) 11880 { 11881 if (mode == DImode) 11882 split_di (&operand, 1, &parts[0], &parts[1]); 11883 else 11884 { 11885 if (REG_P (operand)) 11886 { 11887 gcc_assert (reload_completed); 11888 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0); 11889 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1); 11890 if (size == 3) 11891 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2); 11892 } 11893 else if (offsettable_memref_p (operand)) 11894 { 11895 operand = adjust_address (operand, SImode, 0); 11896 parts[0] = operand; 11897 parts[1] = adjust_address (operand, SImode, 4); 11898 if (size == 3) 11899 parts[2] = adjust_address (operand, SImode, 8); 11900 } 11901 else if (GET_CODE (operand) == CONST_DOUBLE) 11902 { 11903 REAL_VALUE_TYPE r; 11904 long l[4]; 11905 11906 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 11907 switch (mode) 11908 { 11909 case XFmode: 11910 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 11911 parts[2] = gen_int_mode (l[2], SImode); 11912 break; 11913 case DFmode: 11914 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 11915 break; 11916 default: 11917 gcc_unreachable (); 11918 } 11919 parts[1] = gen_int_mode (l[1], SImode); 11920 parts[0] = gen_int_mode (l[0], SImode); 11921 } 11922 else 11923 gcc_unreachable (); 11924 } 11925 } 11926 else 11927 { 11928 if (mode == TImode) 11929 split_ti (&operand, 1, &parts[0], &parts[1]); 11930 if (mode == XFmode || mode == TFmode) 11931 { 11932 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 11933 if (REG_P (operand)) 11934 { 11935 gcc_assert (reload_completed); 11936 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 11937 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 11938 } 11939 else if (offsettable_memref_p (operand)) 11940 { 11941 operand = adjust_address (operand, DImode, 0); 11942 parts[0] = operand; 11943 parts[1] = adjust_address (operand, upper_mode, 8); 11944 } 11945 else if (GET_CODE (operand) == CONST_DOUBLE) 11946 { 11947 REAL_VALUE_TYPE r; 11948 long l[4]; 11949 11950 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 11951 real_to_target (l, &r, mode); 11952 11953 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 11954 if (HOST_BITS_PER_WIDE_INT >= 64) 11955 parts[0] 11956 = gen_int_mode 11957 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 11958 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 11959 DImode); 11960 else 11961 parts[0] = immed_double_const (l[0], l[1], DImode); 11962 11963 if (upper_mode == SImode) 11964 parts[1] = gen_int_mode (l[2], SImode); 11965 else if (HOST_BITS_PER_WIDE_INT >= 64) 11966 parts[1] 11967 = gen_int_mode 11968 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 11969 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 11970 DImode); 11971 else 11972 parts[1] = immed_double_const (l[2], l[3], DImode); 11973 } 11974 else 11975 gcc_unreachable (); 11976 } 11977 } 11978 11979 return size; 11980} 11981 11982/* Emit insns to perform a move or push of DI, DF, and XF values. 11983 Return false when normal moves are needed; true when all required 11984 insns have been emitted. Operands 2-4 contain the input values 11985 int the correct order; operands 5-7 contain the output values. */ 11986 11987void 11988ix86_split_long_move (rtx operands[]) 11989{ 11990 rtx part[2][3]; 11991 int nparts; 11992 int push = 0; 11993 int collisions = 0; 11994 enum machine_mode mode = GET_MODE (operands[0]); 11995 11996 /* The DFmode expanders may ask us to move double. 11997 For 64bit target this is single move. By hiding the fact 11998 here we simplify i386.md splitters. */ 11999 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 12000 { 12001 /* Optimize constant pool reference to immediates. This is used by 12002 fp moves, that force all constants to memory to allow combining. */ 12003 12004 if (GET_CODE (operands[1]) == MEM 12005 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 12006 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 12007 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 12008 if (push_operand (operands[0], VOIDmode)) 12009 { 12010 operands[0] = copy_rtx (operands[0]); 12011 PUT_MODE (operands[0], Pmode); 12012 } 12013 else 12014 operands[0] = gen_lowpart (DImode, operands[0]); 12015 operands[1] = gen_lowpart (DImode, operands[1]); 12016 emit_move_insn (operands[0], operands[1]); 12017 return; 12018 } 12019 12020 /* The only non-offsettable memory we handle is push. */ 12021 if (push_operand (operands[0], VOIDmode)) 12022 push = 1; 12023 else 12024 gcc_assert (GET_CODE (operands[0]) != MEM 12025 || offsettable_memref_p (operands[0])); 12026 12027 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 12028 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 12029 12030 /* When emitting push, take care for source operands on the stack. */ 12031 if (push && GET_CODE (operands[1]) == MEM 12032 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 12033 { 12034 if (nparts == 3) 12035 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), 12036 XEXP (part[1][2], 0)); 12037 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]), 12038 XEXP (part[1][1], 0)); 12039 } 12040 12041 /* We need to do copy in the right order in case an address register 12042 of the source overlaps the destination. */ 12043 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM) 12044 { 12045 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))) 12046 collisions++; 12047 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12048 collisions++; 12049 if (nparts == 3 12050 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0))) 12051 collisions++; 12052 12053 /* Collision in the middle part can be handled by reordering. */ 12054 if (collisions == 1 && nparts == 3 12055 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0))) 12056 { 12057 rtx tmp; 12058 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 12059 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 12060 } 12061 12062 /* If there are more collisions, we can't handle it by reordering. 12063 Do an lea to the last part and use only one colliding move. */ 12064 else if (collisions > 1) 12065 { 12066 rtx base; 12067 12068 collisions = 1; 12069 12070 base = part[0][nparts - 1]; 12071 12072 /* Handle the case when the last part isn't valid for lea. 12073 Happens in 64-bit mode storing the 12-byte XFmode. */ 12074 if (GET_MODE (base) != Pmode) 12075 base = gen_rtx_REG (Pmode, REGNO (base)); 12076 12077 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 12078 part[1][0] = replace_equiv_address (part[1][0], base); 12079 part[1][1] = replace_equiv_address (part[1][1], 12080 plus_constant (base, UNITS_PER_WORD)); 12081 if (nparts == 3) 12082 part[1][2] = replace_equiv_address (part[1][2], 12083 plus_constant (base, 8)); 12084 } 12085 } 12086 12087 if (push) 12088 { 12089 if (!TARGET_64BIT) 12090 { 12091 if (nparts == 3) 12092 { 12093 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 12094 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4))); 12095 emit_move_insn (part[0][2], part[1][2]); 12096 } 12097 } 12098 else 12099 { 12100 /* In 64bit mode we don't have 32bit push available. In case this is 12101 register, it is OK - we will just use larger counterpart. We also 12102 retype memory - these comes from attempt to avoid REX prefix on 12103 moving of second half of TFmode value. */ 12104 if (GET_MODE (part[1][1]) == SImode) 12105 { 12106 switch (GET_CODE (part[1][1])) 12107 { 12108 case MEM: 12109 part[1][1] = adjust_address (part[1][1], DImode, 0); 12110 break; 12111 12112 case REG: 12113 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 12114 break; 12115 12116 default: 12117 gcc_unreachable (); 12118 } 12119 12120 if (GET_MODE (part[1][0]) == SImode) 12121 part[1][0] = part[1][1]; 12122 } 12123 } 12124 emit_move_insn (part[0][1], part[1][1]); 12125 emit_move_insn (part[0][0], part[1][0]); 12126 return; 12127 } 12128 12129 /* Choose correct order to not overwrite the source before it is copied. */ 12130 if ((REG_P (part[0][0]) 12131 && REG_P (part[1][1]) 12132 && (REGNO (part[0][0]) == REGNO (part[1][1]) 12133 || (nparts == 3 12134 && REGNO (part[0][0]) == REGNO (part[1][2])))) 12135 || (collisions > 0 12136 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 12137 { 12138 if (nparts == 3) 12139 { 12140 operands[2] = part[0][2]; 12141 operands[3] = part[0][1]; 12142 operands[4] = part[0][0]; 12143 operands[5] = part[1][2]; 12144 operands[6] = part[1][1]; 12145 operands[7] = part[1][0]; 12146 } 12147 else 12148 { 12149 operands[2] = part[0][1]; 12150 operands[3] = part[0][0]; 12151 operands[5] = part[1][1]; 12152 operands[6] = part[1][0]; 12153 } 12154 } 12155 else 12156 { 12157 if (nparts == 3) 12158 { 12159 operands[2] = part[0][0]; 12160 operands[3] = part[0][1]; 12161 operands[4] = part[0][2]; 12162 operands[5] = part[1][0]; 12163 operands[6] = part[1][1]; 12164 operands[7] = part[1][2]; 12165 } 12166 else 12167 { 12168 operands[2] = part[0][0]; 12169 operands[3] = part[0][1]; 12170 operands[5] = part[1][0]; 12171 operands[6] = part[1][1]; 12172 } 12173 } 12174 12175 /* If optimizing for size, attempt to locally unCSE nonzero constants. */ 12176 if (optimize_size) 12177 { 12178 if (GET_CODE (operands[5]) == CONST_INT 12179 && operands[5] != const0_rtx 12180 && REG_P (operands[2])) 12181 { 12182 if (GET_CODE (operands[6]) == CONST_INT 12183 && INTVAL (operands[6]) == INTVAL (operands[5])) 12184 operands[6] = operands[2]; 12185 12186 if (nparts == 3 12187 && GET_CODE (operands[7]) == CONST_INT 12188 && INTVAL (operands[7]) == INTVAL (operands[5])) 12189 operands[7] = operands[2]; 12190 } 12191 12192 if (nparts == 3 12193 && GET_CODE (operands[6]) == CONST_INT 12194 && operands[6] != const0_rtx 12195 && REG_P (operands[3]) 12196 && GET_CODE (operands[7]) == CONST_INT 12197 && INTVAL (operands[7]) == INTVAL (operands[6])) 12198 operands[7] = operands[3]; 12199 } 12200 12201 emit_move_insn (operands[2], operands[5]); 12202 emit_move_insn (operands[3], operands[6]); 12203 if (nparts == 3) 12204 emit_move_insn (operands[4], operands[7]); 12205 12206 return; 12207} 12208 12209/* Helper function of ix86_split_ashl used to generate an SImode/DImode 12210 left shift by a constant, either using a single shift or 12211 a sequence of add instructions. */ 12212 12213static void 12214ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) 12215{ 12216 if (count == 1) 12217 { 12218 emit_insn ((mode == DImode 12219 ? gen_addsi3 12220 : gen_adddi3) (operand, operand, operand)); 12221 } 12222 else if (!optimize_size 12223 && count * ix86_cost->add <= ix86_cost->shift_const) 12224 { 12225 int i; 12226 for (i=0; i<count; i++) 12227 { 12228 emit_insn ((mode == DImode 12229 ? gen_addsi3 12230 : gen_adddi3) (operand, operand, operand)); 12231 } 12232 } 12233 else 12234 emit_insn ((mode == DImode 12235 ? gen_ashlsi3 12236 : gen_ashldi3) (operand, operand, GEN_INT (count))); 12237} 12238 12239void 12240ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) 12241{ 12242 rtx low[2], high[2]; 12243 int count; 12244 const int single_width = mode == DImode ? 32 : 64; 12245 12246 if (GET_CODE (operands[2]) == CONST_INT) 12247 { 12248 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12249 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12250 12251 if (count >= single_width) 12252 { 12253 emit_move_insn (high[0], low[1]); 12254 emit_move_insn (low[0], const0_rtx); 12255 12256 if (count > single_width) 12257 ix86_expand_ashl_const (high[0], count - single_width, mode); 12258 } 12259 else 12260 { 12261 if (!rtx_equal_p (operands[0], operands[1])) 12262 emit_move_insn (operands[0], operands[1]); 12263 emit_insn ((mode == DImode 12264 ? gen_x86_shld_1 12265 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); 12266 ix86_expand_ashl_const (low[0], count, mode); 12267 } 12268 return; 12269 } 12270 12271 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12272 12273 if (operands[1] == const1_rtx) 12274 { 12275 /* Assuming we've chosen a QImode capable registers, then 1 << N 12276 can be done with two 32/64-bit shifts, no branches, no cmoves. */ 12277 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) 12278 { 12279 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); 12280 12281 ix86_expand_clear (low[0]); 12282 ix86_expand_clear (high[0]); 12283 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); 12284 12285 d = gen_lowpart (QImode, low[0]); 12286 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12287 s = gen_rtx_EQ (QImode, flags, const0_rtx); 12288 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12289 12290 d = gen_lowpart (QImode, high[0]); 12291 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 12292 s = gen_rtx_NE (QImode, flags, const0_rtx); 12293 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 12294 } 12295 12296 /* Otherwise, we can get the same results by manually performing 12297 a bit extract operation on bit 5/6, and then performing the two 12298 shifts. The two methods of getting 0/1 into low/high are exactly 12299 the same size. Avoiding the shift in the bit extract case helps 12300 pentium4 a bit; no one else seems to care much either way. */ 12301 else 12302 { 12303 rtx x; 12304 12305 if (TARGET_PARTIAL_REG_STALL && !optimize_size) 12306 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); 12307 else 12308 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); 12309 emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); 12310 12311 emit_insn ((mode == DImode 12312 ? gen_lshrsi3 12313 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); 12314 emit_insn ((mode == DImode 12315 ? gen_andsi3 12316 : gen_anddi3) (high[0], high[0], GEN_INT (1))); 12317 emit_move_insn (low[0], high[0]); 12318 emit_insn ((mode == DImode 12319 ? gen_xorsi3 12320 : gen_xordi3) (low[0], low[0], GEN_INT (1))); 12321 } 12322 12323 emit_insn ((mode == DImode 12324 ? gen_ashlsi3 12325 : gen_ashldi3) (low[0], low[0], operands[2])); 12326 emit_insn ((mode == DImode 12327 ? gen_ashlsi3 12328 : gen_ashldi3) (high[0], high[0], operands[2])); 12329 return; 12330 } 12331 12332 if (operands[1] == constm1_rtx) 12333 { 12334 /* For -1 << N, we can avoid the shld instruction, because we 12335 know that we're shifting 0...31/63 ones into a -1. */ 12336 emit_move_insn (low[0], constm1_rtx); 12337 if (optimize_size) 12338 emit_move_insn (high[0], low[0]); 12339 else 12340 emit_move_insn (high[0], constm1_rtx); 12341 } 12342 else 12343 { 12344 if (!rtx_equal_p (operands[0], operands[1])) 12345 emit_move_insn (operands[0], operands[1]); 12346 12347 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12348 emit_insn ((mode == DImode 12349 ? gen_x86_shld_1 12350 : gen_x86_64_shld) (high[0], low[0], operands[2])); 12351 } 12352 12353 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); 12354 12355 if (TARGET_CMOVE && scratch) 12356 { 12357 ix86_expand_clear (scratch); 12358 emit_insn ((mode == DImode 12359 ? gen_x86_shift_adj_1 12360 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch)); 12361 } 12362 else 12363 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); 12364} 12365 12366void 12367ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) 12368{ 12369 rtx low[2], high[2]; 12370 int count; 12371 const int single_width = mode == DImode ? 32 : 64; 12372 12373 if (GET_CODE (operands[2]) == CONST_INT) 12374 { 12375 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12376 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12377 12378 if (count == single_width * 2 - 1) 12379 { 12380 emit_move_insn (high[0], high[1]); 12381 emit_insn ((mode == DImode 12382 ? gen_ashrsi3 12383 : gen_ashrdi3) (high[0], high[0], 12384 GEN_INT (single_width - 1))); 12385 emit_move_insn (low[0], high[0]); 12386 12387 } 12388 else if (count >= single_width) 12389 { 12390 emit_move_insn (low[0], high[1]); 12391 emit_move_insn (high[0], low[0]); 12392 emit_insn ((mode == DImode 12393 ? gen_ashrsi3 12394 : gen_ashrdi3) (high[0], high[0], 12395 GEN_INT (single_width - 1))); 12396 if (count > single_width) 12397 emit_insn ((mode == DImode 12398 ? gen_ashrsi3 12399 : gen_ashrdi3) (low[0], low[0], 12400 GEN_INT (count - single_width))); 12401 } 12402 else 12403 { 12404 if (!rtx_equal_p (operands[0], operands[1])) 12405 emit_move_insn (operands[0], operands[1]); 12406 emit_insn ((mode == DImode 12407 ? gen_x86_shrd_1 12408 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12409 emit_insn ((mode == DImode 12410 ? gen_ashrsi3 12411 : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); 12412 } 12413 } 12414 else 12415 { 12416 if (!rtx_equal_p (operands[0], operands[1])) 12417 emit_move_insn (operands[0], operands[1]); 12418 12419 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12420 12421 emit_insn ((mode == DImode 12422 ? gen_x86_shrd_1 12423 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12424 emit_insn ((mode == DImode 12425 ? gen_ashrsi3 12426 : gen_ashrdi3) (high[0], high[0], operands[2])); 12427 12428 if (TARGET_CMOVE && scratch) 12429 { 12430 emit_move_insn (scratch, high[0]); 12431 emit_insn ((mode == DImode 12432 ? gen_ashrsi3 12433 : gen_ashrdi3) (scratch, scratch, 12434 GEN_INT (single_width - 1))); 12435 emit_insn ((mode == DImode 12436 ? gen_x86_shift_adj_1 12437 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12438 scratch)); 12439 } 12440 else 12441 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); 12442 } 12443} 12444 12445void 12446ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) 12447{ 12448 rtx low[2], high[2]; 12449 int count; 12450 const int single_width = mode == DImode ? 32 : 64; 12451 12452 if (GET_CODE (operands[2]) == CONST_INT) 12453 { 12454 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 12455 count = INTVAL (operands[2]) & (single_width * 2 - 1); 12456 12457 if (count >= single_width) 12458 { 12459 emit_move_insn (low[0], high[1]); 12460 ix86_expand_clear (high[0]); 12461 12462 if (count > single_width) 12463 emit_insn ((mode == DImode 12464 ? gen_lshrsi3 12465 : gen_lshrdi3) (low[0], low[0], 12466 GEN_INT (count - single_width))); 12467 } 12468 else 12469 { 12470 if (!rtx_equal_p (operands[0], operands[1])) 12471 emit_move_insn (operands[0], operands[1]); 12472 emit_insn ((mode == DImode 12473 ? gen_x86_shrd_1 12474 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 12475 emit_insn ((mode == DImode 12476 ? gen_lshrsi3 12477 : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); 12478 } 12479 } 12480 else 12481 { 12482 if (!rtx_equal_p (operands[0], operands[1])) 12483 emit_move_insn (operands[0], operands[1]); 12484 12485 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 12486 12487 emit_insn ((mode == DImode 12488 ? gen_x86_shrd_1 12489 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 12490 emit_insn ((mode == DImode 12491 ? gen_lshrsi3 12492 : gen_lshrdi3) (high[0], high[0], operands[2])); 12493 12494 /* Heh. By reversing the arguments, we can reuse this pattern. */ 12495 if (TARGET_CMOVE && scratch) 12496 { 12497 ix86_expand_clear (scratch); 12498 emit_insn ((mode == DImode 12499 ? gen_x86_shift_adj_1 12500 : gen_x86_64_shift_adj) (low[0], high[0], operands[2], 12501 scratch)); 12502 } 12503 else 12504 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); 12505 } 12506} 12507 12508/* Helper function for the string operations below. Dest VARIABLE whether 12509 it is aligned to VALUE bytes. If true, jump to the label. */ 12510static rtx 12511ix86_expand_aligntest (rtx variable, int value) 12512{ 12513 rtx label = gen_label_rtx (); 12514 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 12515 if (GET_MODE (variable) == DImode) 12516 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 12517 else 12518 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 12519 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 12520 1, label); 12521 return label; 12522} 12523 12524/* Adjust COUNTER by the VALUE. */ 12525static void 12526ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 12527{ 12528 if (GET_MODE (countreg) == DImode) 12529 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 12530 else 12531 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 12532} 12533 12534/* Zero extend possibly SImode EXP to Pmode register. */ 12535rtx 12536ix86_zero_extend_to_Pmode (rtx exp) 12537{ 12538 rtx r; 12539 if (GET_MODE (exp) == VOIDmode) 12540 return force_reg (Pmode, exp); 12541 if (GET_MODE (exp) == Pmode) 12542 return copy_to_mode_reg (Pmode, exp); 12543 r = gen_reg_rtx (Pmode); 12544 emit_insn (gen_zero_extendsidi2 (r, exp)); 12545 return r; 12546} 12547 12548/* Expand string move (memcpy) operation. Use i386 string operations when 12549 profitable. expand_clrmem contains similar code. */ 12550int 12551ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) 12552{ 12553 rtx srcreg, destreg, countreg, srcexp, destexp; 12554 enum machine_mode counter_mode; 12555 HOST_WIDE_INT align = 0; 12556 unsigned HOST_WIDE_INT count = 0; 12557 12558 if (GET_CODE (align_exp) == CONST_INT) 12559 align = INTVAL (align_exp); 12560 12561 /* Can't use any of this if the user has appropriated esi or edi. */ 12562 if (global_regs[4] || global_regs[5]) 12563 return 0; 12564 12565 /* This simple hack avoids all inlining code and simplifies code below. */ 12566 if (!TARGET_ALIGN_STRINGOPS) 12567 align = 64; 12568 12569 if (GET_CODE (count_exp) == CONST_INT) 12570 { 12571 count = INTVAL (count_exp); 12572 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 12573 return 0; 12574 } 12575 12576 /* Figure out proper mode for counter. For 32bits it is always SImode, 12577 for 64bits use SImode when possible, otherwise DImode. 12578 Set count to number of bytes copied when known at compile time. */ 12579 if (!TARGET_64BIT 12580 || GET_MODE (count_exp) == SImode 12581 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 12582 counter_mode = SImode; 12583 else 12584 counter_mode = DImode; 12585 12586 gcc_assert (counter_mode == SImode || counter_mode == DImode); 12587 12588 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 12589 if (destreg != XEXP (dst, 0)) 12590 dst = replace_equiv_address_nv (dst, destreg); 12591 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 12592 if (srcreg != XEXP (src, 0)) 12593 src = replace_equiv_address_nv (src, srcreg); 12594 12595 /* When optimizing for size emit simple rep ; movsb instruction for 12596 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)? 12597 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb. 12598 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is 12599 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes, 12600 but we don't know whether upper 24 (resp. 56) bits of %ecx will be 12601 known to be zero or not. The rep; movsb sequence causes higher 12602 register pressure though, so take that into account. */ 12603 12604 if ((!optimize || optimize_size) 12605 && (count == 0 12606 || ((count & 0x03) 12607 && (!optimize_size 12608 || count > 5 * 4 12609 || (count & 3) + count / 4 > 6)))) 12610 { 12611 emit_insn (gen_cld ()); 12612 countreg = ix86_zero_extend_to_Pmode (count_exp); 12613 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 12614 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg); 12615 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg, 12616 destexp, srcexp)); 12617 } 12618 12619 /* For constant aligned (or small unaligned) copies use rep movsl 12620 followed by code copying the rest. For PentiumPro ensure 8 byte 12621 alignment to allow rep movsl acceleration. */ 12622 12623 else if (count != 0 12624 && (align >= 8 12625 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 12626 || optimize_size || count < (unsigned int) 64)) 12627 { 12628 unsigned HOST_WIDE_INT offset = 0; 12629 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 12630 rtx srcmem, dstmem; 12631 12632 emit_insn (gen_cld ()); 12633 if (count & ~(size - 1)) 12634 { 12635 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4) 12636 { 12637 enum machine_mode movs_mode = size == 4 ? SImode : DImode; 12638 12639 while (offset < (count & ~(size - 1))) 12640 { 12641 srcmem = adjust_automodify_address_nv (src, movs_mode, 12642 srcreg, offset); 12643 dstmem = adjust_automodify_address_nv (dst, movs_mode, 12644 destreg, offset); 12645 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12646 offset += size; 12647 } 12648 } 12649 else 12650 { 12651 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3)) 12652 & (TARGET_64BIT ? -1 : 0x3fffffff)); 12653 countreg = copy_to_mode_reg (counter_mode, countreg); 12654 countreg = ix86_zero_extend_to_Pmode (countreg); 12655 12656 destexp = gen_rtx_ASHIFT (Pmode, countreg, 12657 GEN_INT (size == 4 ? 2 : 3)); 12658 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 12659 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 12660 12661 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 12662 countreg, destexp, srcexp)); 12663 offset = count & ~(size - 1); 12664 } 12665 } 12666 if (size == 8 && (count & 0x04)) 12667 { 12668 srcmem = adjust_automodify_address_nv (src, SImode, srcreg, 12669 offset); 12670 dstmem = adjust_automodify_address_nv (dst, SImode, destreg, 12671 offset); 12672 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12673 offset += 4; 12674 } 12675 if (count & 0x02) 12676 { 12677 srcmem = adjust_automodify_address_nv (src, HImode, srcreg, 12678 offset); 12679 dstmem = adjust_automodify_address_nv (dst, HImode, destreg, 12680 offset); 12681 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12682 offset += 2; 12683 } 12684 if (count & 0x01) 12685 { 12686 srcmem = adjust_automodify_address_nv (src, QImode, srcreg, 12687 offset); 12688 dstmem = adjust_automodify_address_nv (dst, QImode, destreg, 12689 offset); 12690 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12691 } 12692 } 12693 /* The generic code based on the glibc implementation: 12694 - align destination to 4 bytes (8 byte alignment is used for PentiumPro 12695 allowing accelerated copying there) 12696 - copy the data using rep movsl 12697 - copy the rest. */ 12698 else 12699 { 12700 rtx countreg2; 12701 rtx label = NULL; 12702 rtx srcmem, dstmem; 12703 int desired_alignment = (TARGET_PENTIUMPRO 12704 && (count == 0 || count >= (unsigned int) 260) 12705 ? 8 : UNITS_PER_WORD); 12706 /* Get rid of MEM_OFFSETs, they won't be accurate. */ 12707 dst = change_address (dst, BLKmode, destreg); 12708 src = change_address (src, BLKmode, srcreg); 12709 12710 /* In case we don't know anything about the alignment, default to 12711 library version, since it is usually equally fast and result in 12712 shorter code. 12713 12714 Also emit call when we know that the count is large and call overhead 12715 will not be important. */ 12716 if (!TARGET_INLINE_ALL_STRINGOPS 12717 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 12718 return 0; 12719 12720 if (TARGET_SINGLE_STRINGOP) 12721 emit_insn (gen_cld ()); 12722 12723 countreg2 = gen_reg_rtx (Pmode); 12724 countreg = copy_to_mode_reg (counter_mode, count_exp); 12725 12726 /* We don't use loops to align destination and to copy parts smaller 12727 than 4 bytes, because gcc is able to optimize such code better (in 12728 the case the destination or the count really is aligned, gcc is often 12729 able to predict the branches) and also it is friendlier to the 12730 hardware branch prediction. 12731 12732 Using loops is beneficial for generic case, because we can 12733 handle small counts using the loops. Many CPUs (such as Athlon) 12734 have large REP prefix setup costs. 12735 12736 This is quite costly. Maybe we can revisit this decision later or 12737 add some customizability to this code. */ 12738 12739 if (count == 0 && align < desired_alignment) 12740 { 12741 label = gen_label_rtx (); 12742 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 12743 LEU, 0, counter_mode, 1, label); 12744 } 12745 if (align <= 1) 12746 { 12747 rtx label = ix86_expand_aligntest (destreg, 1); 12748 srcmem = change_address (src, QImode, srcreg); 12749 dstmem = change_address (dst, QImode, destreg); 12750 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12751 ix86_adjust_counter (countreg, 1); 12752 emit_label (label); 12753 LABEL_NUSES (label) = 1; 12754 } 12755 if (align <= 2) 12756 { 12757 rtx label = ix86_expand_aligntest (destreg, 2); 12758 srcmem = change_address (src, HImode, srcreg); 12759 dstmem = change_address (dst, HImode, destreg); 12760 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12761 ix86_adjust_counter (countreg, 2); 12762 emit_label (label); 12763 LABEL_NUSES (label) = 1; 12764 } 12765 if (align <= 4 && desired_alignment > 4) 12766 { 12767 rtx label = ix86_expand_aligntest (destreg, 4); 12768 srcmem = change_address (src, SImode, srcreg); 12769 dstmem = change_address (dst, SImode, destreg); 12770 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12771 ix86_adjust_counter (countreg, 4); 12772 emit_label (label); 12773 LABEL_NUSES (label) = 1; 12774 } 12775 12776 if (label && desired_alignment > 4 && !TARGET_64BIT) 12777 { 12778 emit_label (label); 12779 LABEL_NUSES (label) = 1; 12780 label = NULL_RTX; 12781 } 12782 if (!TARGET_SINGLE_STRINGOP) 12783 emit_insn (gen_cld ()); 12784 if (TARGET_64BIT) 12785 { 12786 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 12787 GEN_INT (3))); 12788 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 12789 } 12790 else 12791 { 12792 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 12793 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 12794 } 12795 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); 12796 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 12797 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, 12798 countreg2, destexp, srcexp)); 12799 12800 if (label) 12801 { 12802 emit_label (label); 12803 LABEL_NUSES (label) = 1; 12804 } 12805 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 12806 { 12807 srcmem = change_address (src, SImode, srcreg); 12808 dstmem = change_address (dst, SImode, destreg); 12809 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12810 } 12811 if ((align <= 4 || count == 0) && TARGET_64BIT) 12812 { 12813 rtx label = ix86_expand_aligntest (countreg, 4); 12814 srcmem = change_address (src, SImode, srcreg); 12815 dstmem = change_address (dst, SImode, destreg); 12816 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12817 emit_label (label); 12818 LABEL_NUSES (label) = 1; 12819 } 12820 if (align > 2 && count != 0 && (count & 2)) 12821 { 12822 srcmem = change_address (src, HImode, srcreg); 12823 dstmem = change_address (dst, HImode, destreg); 12824 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12825 } 12826 if (align <= 2 || count == 0) 12827 { 12828 rtx label = ix86_expand_aligntest (countreg, 2); 12829 srcmem = change_address (src, HImode, srcreg); 12830 dstmem = change_address (dst, HImode, destreg); 12831 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12832 emit_label (label); 12833 LABEL_NUSES (label) = 1; 12834 } 12835 if (align > 1 && count != 0 && (count & 1)) 12836 { 12837 srcmem = change_address (src, QImode, srcreg); 12838 dstmem = change_address (dst, QImode, destreg); 12839 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12840 } 12841 if (align <= 1 || count == 0) 12842 { 12843 rtx label = ix86_expand_aligntest (countreg, 1); 12844 srcmem = change_address (src, QImode, srcreg); 12845 dstmem = change_address (dst, QImode, destreg); 12846 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); 12847 emit_label (label); 12848 LABEL_NUSES (label) = 1; 12849 } 12850 } 12851 12852 return 1; 12853} 12854 12855/* Expand string clear operation (bzero). Use i386 string operations when 12856 profitable. expand_movmem contains similar code. */ 12857int 12858ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp) 12859{ 12860 rtx destreg, zeroreg, countreg, destexp; 12861 enum machine_mode counter_mode; 12862 HOST_WIDE_INT align = 0; 12863 unsigned HOST_WIDE_INT count = 0; 12864 12865 if (GET_CODE (align_exp) == CONST_INT) 12866 align = INTVAL (align_exp); 12867 12868 /* Can't use any of this if the user has appropriated esi. */ 12869 if (global_regs[4]) 12870 return 0; 12871 12872 /* This simple hack avoids all inlining code and simplifies code below. */ 12873 if (!TARGET_ALIGN_STRINGOPS) 12874 align = 32; 12875 12876 if (GET_CODE (count_exp) == CONST_INT) 12877 { 12878 count = INTVAL (count_exp); 12879 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) 12880 return 0; 12881 } 12882 /* Figure out proper mode for counter. For 32bits it is always SImode, 12883 for 64bits use SImode when possible, otherwise DImode. 12884 Set count to number of bytes copied when known at compile time. */ 12885 if (!TARGET_64BIT 12886 || GET_MODE (count_exp) == SImode 12887 || x86_64_zext_immediate_operand (count_exp, VOIDmode)) 12888 counter_mode = SImode; 12889 else 12890 counter_mode = DImode; 12891 12892 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 12893 if (destreg != XEXP (dst, 0)) 12894 dst = replace_equiv_address_nv (dst, destreg); 12895 12896 12897 /* When optimizing for size emit simple rep ; movsb instruction for 12898 counts not divisible by 4. The movl $N, %ecx; rep; stosb 12899 sequence is 7 bytes long, so if optimizing for size and count is 12900 small enough that some stosl, stosw and stosb instructions without 12901 rep are shorter, fall back into the next if. */ 12902 12903 if ((!optimize || optimize_size) 12904 && (count == 0 12905 || ((count & 0x03) 12906 && (!optimize_size || (count & 0x03) + (count >> 2) > 7)))) 12907 { 12908 emit_insn (gen_cld ()); 12909 12910 countreg = ix86_zero_extend_to_Pmode (count_exp); 12911 zeroreg = copy_to_mode_reg (QImode, const0_rtx); 12912 destexp = gen_rtx_PLUS (Pmode, destreg, countreg); 12913 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp)); 12914 } 12915 else if (count != 0 12916 && (align >= 8 12917 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4) 12918 || optimize_size || count < (unsigned int) 64)) 12919 { 12920 int size = TARGET_64BIT && !optimize_size ? 8 : 4; 12921 unsigned HOST_WIDE_INT offset = 0; 12922 12923 emit_insn (gen_cld ()); 12924 12925 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx); 12926 if (count & ~(size - 1)) 12927 { 12928 unsigned HOST_WIDE_INT repcount; 12929 unsigned int max_nonrep; 12930 12931 repcount = count >> (size == 4 ? 2 : 3); 12932 if (!TARGET_64BIT) 12933 repcount &= 0x3fffffff; 12934 12935 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes. 12936 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN 12937 bytes. In both cases the latter seems to be faster for small 12938 values of N. */ 12939 max_nonrep = size == 4 ? 7 : 4; 12940 if (!optimize_size) 12941 switch (ix86_tune) 12942 { 12943 case PROCESSOR_PENTIUM4: 12944 case PROCESSOR_NOCONA: 12945 max_nonrep = 3; 12946 break; 12947 default: 12948 break; 12949 } 12950 12951 if (repcount <= max_nonrep) 12952 while (repcount-- > 0) 12953 { 12954 rtx mem = adjust_automodify_address_nv (dst, 12955 GET_MODE (zeroreg), 12956 destreg, offset); 12957 emit_insn (gen_strset (destreg, mem, zeroreg)); 12958 offset += size; 12959 } 12960 else 12961 { 12962 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount)); 12963 countreg = ix86_zero_extend_to_Pmode (countreg); 12964 destexp = gen_rtx_ASHIFT (Pmode, countreg, 12965 GEN_INT (size == 4 ? 2 : 3)); 12966 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 12967 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, 12968 destexp)); 12969 offset = count & ~(size - 1); 12970 } 12971 } 12972 if (size == 8 && (count & 0x04)) 12973 { 12974 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg, 12975 offset); 12976 emit_insn (gen_strset (destreg, mem, 12977 gen_rtx_SUBREG (SImode, zeroreg, 0))); 12978 offset += 4; 12979 } 12980 if (count & 0x02) 12981 { 12982 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg, 12983 offset); 12984 emit_insn (gen_strset (destreg, mem, 12985 gen_rtx_SUBREG (HImode, zeroreg, 0))); 12986 offset += 2; 12987 } 12988 if (count & 0x01) 12989 { 12990 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg, 12991 offset); 12992 emit_insn (gen_strset (destreg, mem, 12993 gen_rtx_SUBREG (QImode, zeroreg, 0))); 12994 } 12995 } 12996 else 12997 { 12998 rtx countreg2; 12999 rtx label = NULL; 13000 /* Compute desired alignment of the string operation. */ 13001 int desired_alignment = (TARGET_PENTIUMPRO 13002 && (count == 0 || count >= (unsigned int) 260) 13003 ? 8 : UNITS_PER_WORD); 13004 13005 /* In case we don't know anything about the alignment, default to 13006 library version, since it is usually equally fast and result in 13007 shorter code. 13008 13009 Also emit call when we know that the count is large and call overhead 13010 will not be important. */ 13011 if (!TARGET_INLINE_ALL_STRINGOPS 13012 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL)) 13013 return 0; 13014 13015 if (TARGET_SINGLE_STRINGOP) 13016 emit_insn (gen_cld ()); 13017 13018 countreg2 = gen_reg_rtx (Pmode); 13019 countreg = copy_to_mode_reg (counter_mode, count_exp); 13020 zeroreg = copy_to_mode_reg (Pmode, const0_rtx); 13021 /* Get rid of MEM_OFFSET, it won't be accurate. */ 13022 dst = change_address (dst, BLKmode, destreg); 13023 13024 if (count == 0 && align < desired_alignment) 13025 { 13026 label = gen_label_rtx (); 13027 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1), 13028 LEU, 0, counter_mode, 1, label); 13029 } 13030 if (align <= 1) 13031 { 13032 rtx label = ix86_expand_aligntest (destreg, 1); 13033 emit_insn (gen_strset (destreg, dst, 13034 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13035 ix86_adjust_counter (countreg, 1); 13036 emit_label (label); 13037 LABEL_NUSES (label) = 1; 13038 } 13039 if (align <= 2) 13040 { 13041 rtx label = ix86_expand_aligntest (destreg, 2); 13042 emit_insn (gen_strset (destreg, dst, 13043 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13044 ix86_adjust_counter (countreg, 2); 13045 emit_label (label); 13046 LABEL_NUSES (label) = 1; 13047 } 13048 if (align <= 4 && desired_alignment > 4) 13049 { 13050 rtx label = ix86_expand_aligntest (destreg, 4); 13051 emit_insn (gen_strset (destreg, dst, 13052 (TARGET_64BIT 13053 ? gen_rtx_SUBREG (SImode, zeroreg, 0) 13054 : zeroreg))); 13055 ix86_adjust_counter (countreg, 4); 13056 emit_label (label); 13057 LABEL_NUSES (label) = 1; 13058 } 13059 13060 if (label && desired_alignment > 4 && !TARGET_64BIT) 13061 { 13062 emit_label (label); 13063 LABEL_NUSES (label) = 1; 13064 label = NULL_RTX; 13065 } 13066 13067 if (!TARGET_SINGLE_STRINGOP) 13068 emit_insn (gen_cld ()); 13069 if (TARGET_64BIT) 13070 { 13071 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg), 13072 GEN_INT (3))); 13073 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3)); 13074 } 13075 else 13076 { 13077 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx)); 13078 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx); 13079 } 13080 destexp = gen_rtx_PLUS (Pmode, destexp, destreg); 13081 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp)); 13082 13083 if (label) 13084 { 13085 emit_label (label); 13086 LABEL_NUSES (label) = 1; 13087 } 13088 13089 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4)) 13090 emit_insn (gen_strset (destreg, dst, 13091 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13092 if (TARGET_64BIT && (align <= 4 || count == 0)) 13093 { 13094 rtx label = ix86_expand_aligntest (countreg, 4); 13095 emit_insn (gen_strset (destreg, dst, 13096 gen_rtx_SUBREG (SImode, zeroreg, 0))); 13097 emit_label (label); 13098 LABEL_NUSES (label) = 1; 13099 } 13100 if (align > 2 && count != 0 && (count & 2)) 13101 emit_insn (gen_strset (destreg, dst, 13102 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13103 if (align <= 2 || count == 0) 13104 { 13105 rtx label = ix86_expand_aligntest (countreg, 2); 13106 emit_insn (gen_strset (destreg, dst, 13107 gen_rtx_SUBREG (HImode, zeroreg, 0))); 13108 emit_label (label); 13109 LABEL_NUSES (label) = 1; 13110 } 13111 if (align > 1 && count != 0 && (count & 1)) 13112 emit_insn (gen_strset (destreg, dst, 13113 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13114 if (align <= 1 || count == 0) 13115 { 13116 rtx label = ix86_expand_aligntest (countreg, 1); 13117 emit_insn (gen_strset (destreg, dst, 13118 gen_rtx_SUBREG (QImode, zeroreg, 0))); 13119 emit_label (label); 13120 LABEL_NUSES (label) = 1; 13121 } 13122 } 13123 return 1; 13124} 13125 13126/* Expand strlen. */ 13127int 13128ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 13129{ 13130 rtx addr, scratch1, scratch2, scratch3, scratch4; 13131 13132 /* The generic case of strlen expander is long. Avoid it's 13133 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 13134 13135 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13136 && !TARGET_INLINE_ALL_STRINGOPS 13137 && !optimize_size 13138 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)) 13139 return 0; 13140 13141 addr = force_reg (Pmode, XEXP (src, 0)); 13142 scratch1 = gen_reg_rtx (Pmode); 13143 13144 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 13145 && !optimize_size) 13146 { 13147 /* Well it seems that some optimizer does not combine a call like 13148 foo(strlen(bar), strlen(bar)); 13149 when the move and the subtraction is done here. It does calculate 13150 the length just once when these instructions are done inside of 13151 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 13152 often used and I use one fewer register for the lifetime of 13153 output_strlen_unroll() this is better. */ 13154 13155 emit_move_insn (out, addr); 13156 13157 ix86_expand_strlensi_unroll_1 (out, src, align); 13158 13159 /* strlensi_unroll_1 returns the address of the zero at the end of 13160 the string, like memchr(), so compute the length by subtracting 13161 the start address. */ 13162 if (TARGET_64BIT) 13163 emit_insn (gen_subdi3 (out, out, addr)); 13164 else 13165 emit_insn (gen_subsi3 (out, out, addr)); 13166 } 13167 else 13168 { 13169 rtx unspec; 13170 scratch2 = gen_reg_rtx (Pmode); 13171 scratch3 = gen_reg_rtx (Pmode); 13172 scratch4 = force_reg (Pmode, constm1_rtx); 13173 13174 emit_move_insn (scratch3, addr); 13175 eoschar = force_reg (QImode, eoschar); 13176 13177 emit_insn (gen_cld ()); 13178 src = replace_equiv_address_nv (src, scratch3); 13179 13180 /* If .md starts supporting :P, this can be done in .md. */ 13181 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 13182 scratch4), UNSPEC_SCAS); 13183 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 13184 if (TARGET_64BIT) 13185 { 13186 emit_insn (gen_one_cmpldi2 (scratch2, scratch1)); 13187 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx)); 13188 } 13189 else 13190 { 13191 emit_insn (gen_one_cmplsi2 (scratch2, scratch1)); 13192 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx)); 13193 } 13194 } 13195 return 1; 13196} 13197 13198/* Expand the appropriate insns for doing strlen if not just doing 13199 repnz; scasb 13200 13201 out = result, initialized with the start address 13202 align_rtx = alignment of the address. 13203 scratch = scratch register, initialized with the startaddress when 13204 not aligned, otherwise undefined 13205 13206 This is just the body. It needs the initializations mentioned above and 13207 some address computing at the end. These things are done in i386.md. */ 13208 13209static void 13210ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 13211{ 13212 int align; 13213 rtx tmp; 13214 rtx align_2_label = NULL_RTX; 13215 rtx align_3_label = NULL_RTX; 13216 rtx align_4_label = gen_label_rtx (); 13217 rtx end_0_label = gen_label_rtx (); 13218 rtx mem; 13219 rtx tmpreg = gen_reg_rtx (SImode); 13220 rtx scratch = gen_reg_rtx (SImode); 13221 rtx cmp; 13222 13223 align = 0; 13224 if (GET_CODE (align_rtx) == CONST_INT) 13225 align = INTVAL (align_rtx); 13226 13227 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 13228 13229 /* Is there a known alignment and is it less than 4? */ 13230 if (align < 4) 13231 { 13232 rtx scratch1 = gen_reg_rtx (Pmode); 13233 emit_move_insn (scratch1, out); 13234 /* Is there a known alignment and is it not 2? */ 13235 if (align != 2) 13236 { 13237 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 13238 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 13239 13240 /* Leave just the 3 lower bits. */ 13241 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 13242 NULL_RTX, 0, OPTAB_WIDEN); 13243 13244 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13245 Pmode, 1, align_4_label); 13246 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, 13247 Pmode, 1, align_2_label); 13248 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, 13249 Pmode, 1, align_3_label); 13250 } 13251 else 13252 { 13253 /* Since the alignment is 2, we have to check 2 or 0 bytes; 13254 check if is aligned to 4 - byte. */ 13255 13256 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, 13257 NULL_RTX, 0, OPTAB_WIDEN); 13258 13259 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 13260 Pmode, 1, align_4_label); 13261 } 13262 13263 mem = change_address (src, QImode, out); 13264 13265 /* Now compare the bytes. */ 13266 13267 /* Compare the first n unaligned byte on a byte per byte basis. */ 13268 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 13269 QImode, 1, end_0_label); 13270 13271 /* Increment the address. */ 13272 if (TARGET_64BIT) 13273 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13274 else 13275 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13276 13277 /* Not needed with an alignment of 2 */ 13278 if (align != 2) 13279 { 13280 emit_label (align_2_label); 13281 13282 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13283 end_0_label); 13284 13285 if (TARGET_64BIT) 13286 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13287 else 13288 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13289 13290 emit_label (align_3_label); 13291 } 13292 13293 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 13294 end_0_label); 13295 13296 if (TARGET_64BIT) 13297 emit_insn (gen_adddi3 (out, out, const1_rtx)); 13298 else 13299 emit_insn (gen_addsi3 (out, out, const1_rtx)); 13300 } 13301 13302 /* Generate loop to check 4 bytes at a time. It is not a good idea to 13303 align this loop. It gives only huge programs, but does not help to 13304 speed up. */ 13305 emit_label (align_4_label); 13306 13307 mem = change_address (src, SImode, out); 13308 emit_move_insn (scratch, mem); 13309 if (TARGET_64BIT) 13310 emit_insn (gen_adddi3 (out, out, GEN_INT (4))); 13311 else 13312 emit_insn (gen_addsi3 (out, out, GEN_INT (4))); 13313 13314 /* This formula yields a nonzero result iff one of the bytes is zero. 13315 This saves three branches inside loop and many cycles. */ 13316 13317 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 13318 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 13319 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 13320 emit_insn (gen_andsi3 (tmpreg, tmpreg, 13321 gen_int_mode (0x80808080, SImode))); 13322 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 13323 align_4_label); 13324 13325 if (TARGET_CMOVE) 13326 { 13327 rtx reg = gen_reg_rtx (SImode); 13328 rtx reg2 = gen_reg_rtx (Pmode); 13329 emit_move_insn (reg, tmpreg); 13330 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 13331 13332 /* If zero is not in the first two bytes, move two bytes forward. */ 13333 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13334 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13335 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13336 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 13337 gen_rtx_IF_THEN_ELSE (SImode, tmp, 13338 reg, 13339 tmpreg))); 13340 /* Emit lea manually to avoid clobbering of flags. */ 13341 emit_insn (gen_rtx_SET (SImode, reg2, 13342 gen_rtx_PLUS (Pmode, out, const2_rtx))); 13343 13344 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13345 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 13346 emit_insn (gen_rtx_SET (VOIDmode, out, 13347 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 13348 reg2, 13349 out))); 13350 13351 } 13352 else 13353 { 13354 rtx end_2_label = gen_label_rtx (); 13355 /* Is zero in the first two bytes? */ 13356 13357 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 13358 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 13359 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 13360 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 13361 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 13362 pc_rtx); 13363 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 13364 JUMP_LABEL (tmp) = end_2_label; 13365 13366 /* Not in the first two. Move two bytes forward. */ 13367 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 13368 if (TARGET_64BIT) 13369 emit_insn (gen_adddi3 (out, out, const2_rtx)); 13370 else 13371 emit_insn (gen_addsi3 (out, out, const2_rtx)); 13372 13373 emit_label (end_2_label); 13374 13375 } 13376 13377 /* Avoid branch in fixing the byte. */ 13378 tmpreg = gen_lowpart (QImode, tmpreg); 13379 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 13380 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx); 13381 if (TARGET_64BIT) 13382 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp)); 13383 else 13384 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp)); 13385 13386 emit_label (end_0_label); 13387} 13388 13389void 13390ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 13391 rtx callarg2 ATTRIBUTE_UNUSED, 13392 rtx pop, int sibcall) 13393{ 13394 rtx use = NULL, call; 13395 13396 if (pop == const0_rtx) 13397 pop = NULL; 13398 gcc_assert (!TARGET_64BIT || !pop); 13399 13400 if (TARGET_MACHO && !TARGET_64BIT) 13401 { 13402#if TARGET_MACHO 13403 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 13404 fnaddr = machopic_indirect_call_target (fnaddr); 13405#endif 13406 } 13407 else 13408 { 13409 /* Static functions and indirect calls don't need the pic register. */ 13410 if (! TARGET_64BIT && flag_pic 13411 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 13412 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 13413 use_reg (&use, pic_offset_table_rtx); 13414 } 13415 13416 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 13417 { 13418 rtx al = gen_rtx_REG (QImode, 0); 13419 emit_move_insn (al, callarg2); 13420 use_reg (&use, al); 13421 } 13422 13423 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode)) 13424 { 13425 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13426 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13427 } 13428 if (sibcall && TARGET_64BIT 13429 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode)) 13430 { 13431 rtx addr; 13432 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 13433 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */); 13434 emit_move_insn (fnaddr, addr); 13435 fnaddr = gen_rtx_MEM (QImode, fnaddr); 13436 } 13437 13438 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 13439 if (retval) 13440 call = gen_rtx_SET (VOIDmode, retval, call); 13441 if (pop) 13442 { 13443 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 13444 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 13445 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 13446 } 13447 13448 call = emit_call_insn (call); 13449 if (use) 13450 CALL_INSN_FUNCTION_USAGE (call) = use; 13451} 13452 13453 13454/* Clear stack slot assignments remembered from previous functions. 13455 This is called from INIT_EXPANDERS once before RTL is emitted for each 13456 function. */ 13457 13458static struct machine_function * 13459ix86_init_machine_status (void) 13460{ 13461 struct machine_function *f; 13462 13463 f = ggc_alloc_cleared (sizeof (struct machine_function)); 13464 f->use_fast_prologue_epilogue_nregs = -1; 13465 f->tls_descriptor_call_expanded_p = 0; 13466 13467 return f; 13468} 13469 13470/* Return a MEM corresponding to a stack slot with mode MODE. 13471 Allocate a new slot if necessary. 13472 13473 The RTL for a function can have several slots available: N is 13474 which slot to use. */ 13475 13476rtx 13477assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) 13478{ 13479 struct stack_local_entry *s; 13480 13481 gcc_assert (n < MAX_386_STACK_LOCALS); 13482 13483 /* Virtual slot is valid only before vregs are instantiated. */ 13484 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); 13485 13486 for (s = ix86_stack_locals; s; s = s->next) 13487 if (s->mode == mode && s->n == n) 13488 return s->rtl; 13489 13490 s = (struct stack_local_entry *) 13491 ggc_alloc (sizeof (struct stack_local_entry)); 13492 s->n = n; 13493 s->mode = mode; 13494 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 13495 13496 s->next = ix86_stack_locals; 13497 ix86_stack_locals = s; 13498 return s->rtl; 13499} 13500 13501/* Construct the SYMBOL_REF for the tls_get_addr function. */ 13502 13503static GTY(()) rtx ix86_tls_symbol; 13504rtx 13505ix86_tls_get_addr (void) 13506{ 13507 13508 if (!ix86_tls_symbol) 13509 { 13510 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 13511 (TARGET_ANY_GNU_TLS 13512 && !TARGET_64BIT) 13513 ? "___tls_get_addr" 13514 : "__tls_get_addr"); 13515 } 13516 13517 return ix86_tls_symbol; 13518} 13519 13520/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ 13521 13522static GTY(()) rtx ix86_tls_module_base_symbol; 13523rtx 13524ix86_tls_module_base (void) 13525{ 13526 13527 if (!ix86_tls_module_base_symbol) 13528 { 13529 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, 13530 "_TLS_MODULE_BASE_"); 13531 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) 13532 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; 13533 } 13534 13535 return ix86_tls_module_base_symbol; 13536} 13537 13538/* Calculate the length of the memory address in the instruction 13539 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 13540 13541int 13542memory_address_length (rtx addr) 13543{ 13544 struct ix86_address parts; 13545 rtx base, index, disp; 13546 int len; 13547 int ok; 13548 13549 if (GET_CODE (addr) == PRE_DEC 13550 || GET_CODE (addr) == POST_INC 13551 || GET_CODE (addr) == PRE_MODIFY 13552 || GET_CODE (addr) == POST_MODIFY) 13553 return 0; 13554 13555 ok = ix86_decompose_address (addr, &parts); 13556 gcc_assert (ok); 13557 13558 if (parts.base && GET_CODE (parts.base) == SUBREG) 13559 parts.base = SUBREG_REG (parts.base); 13560 if (parts.index && GET_CODE (parts.index) == SUBREG) 13561 parts.index = SUBREG_REG (parts.index); 13562 13563 base = parts.base; 13564 index = parts.index; 13565 disp = parts.disp; 13566 len = 0; 13567 13568 /* Rule of thumb: 13569 - esp as the base always wants an index, 13570 - ebp as the base always wants a displacement. */ 13571 13572 /* Register Indirect. */ 13573 if (base && !index && !disp) 13574 { 13575 /* esp (for its index) and ebp (for its displacement) need 13576 the two-byte modrm form. */ 13577 if (addr == stack_pointer_rtx 13578 || addr == arg_pointer_rtx 13579 || addr == frame_pointer_rtx 13580 || addr == hard_frame_pointer_rtx) 13581 len = 1; 13582 } 13583 13584 /* Direct Addressing. */ 13585 else if (disp && !base && !index) 13586 len = 4; 13587 13588 else 13589 { 13590 /* Find the length of the displacement constant. */ 13591 if (disp) 13592 { 13593 if (base && satisfies_constraint_K (disp)) 13594 len = 1; 13595 else 13596 len = 4; 13597 } 13598 /* ebp always wants a displacement. */ 13599 else if (base == hard_frame_pointer_rtx) 13600 len = 1; 13601 13602 /* An index requires the two-byte modrm form.... */ 13603 if (index 13604 /* ...like esp, which always wants an index. */ 13605 || base == stack_pointer_rtx 13606 || base == arg_pointer_rtx 13607 || base == frame_pointer_rtx) 13608 len += 1; 13609 } 13610 13611 return len; 13612} 13613 13614/* Compute default value for "length_immediate" attribute. When SHORTFORM 13615 is set, expect that insn have 8bit immediate alternative. */ 13616int 13617ix86_attr_length_immediate_default (rtx insn, int shortform) 13618{ 13619 int len = 0; 13620 int i; 13621 extract_insn_cached (insn); 13622 for (i = recog_data.n_operands - 1; i >= 0; --i) 13623 if (CONSTANT_P (recog_data.operand[i])) 13624 { 13625 gcc_assert (!len); 13626 if (shortform && satisfies_constraint_K (recog_data.operand[i])) 13627 len = 1; 13628 else 13629 { 13630 switch (get_attr_mode (insn)) 13631 { 13632 case MODE_QI: 13633 len+=1; 13634 break; 13635 case MODE_HI: 13636 len+=2; 13637 break; 13638 case MODE_SI: 13639 len+=4; 13640 break; 13641 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 13642 case MODE_DI: 13643 len+=4; 13644 break; 13645 default: 13646 fatal_insn ("unknown insn mode", insn); 13647 } 13648 } 13649 } 13650 return len; 13651} 13652/* Compute default value for "length_address" attribute. */ 13653int 13654ix86_attr_length_address_default (rtx insn) 13655{ 13656 int i; 13657 13658 if (get_attr_type (insn) == TYPE_LEA) 13659 { 13660 rtx set = PATTERN (insn); 13661 13662 if (GET_CODE (set) == PARALLEL) 13663 set = XVECEXP (set, 0, 0); 13664 13665 gcc_assert (GET_CODE (set) == SET); 13666 13667 return memory_address_length (SET_SRC (set)); 13668 } 13669 13670 extract_insn_cached (insn); 13671 for (i = recog_data.n_operands - 1; i >= 0; --i) 13672 if (GET_CODE (recog_data.operand[i]) == MEM) 13673 { 13674 return memory_address_length (XEXP (recog_data.operand[i], 0)); 13675 break; 13676 } 13677 return 0; 13678} 13679 13680/* Return the maximum number of instructions a cpu can issue. */ 13681 13682static int 13683ix86_issue_rate (void) 13684{ 13685 switch (ix86_tune) 13686 { 13687 case PROCESSOR_PENTIUM: 13688 case PROCESSOR_K6: 13689 return 2; 13690 13691 case PROCESSOR_PENTIUMPRO: 13692 case PROCESSOR_PENTIUM4: 13693 case PROCESSOR_ATHLON: 13694 case PROCESSOR_K8: 13695 case PROCESSOR_NOCONA: 13696 case PROCESSOR_GENERIC32: 13697 case PROCESSOR_GENERIC64: 13698 return 3; 13699 13700 default: 13701 return 1; 13702 } 13703} 13704 13705/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 13706 by DEP_INSN and nothing set by DEP_INSN. */ 13707 13708static int 13709ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 13710{ 13711 rtx set, set2; 13712 13713 /* Simplify the test for uninteresting insns. */ 13714 if (insn_type != TYPE_SETCC 13715 && insn_type != TYPE_ICMOV 13716 && insn_type != TYPE_FCMOV 13717 && insn_type != TYPE_IBR) 13718 return 0; 13719 13720 if ((set = single_set (dep_insn)) != 0) 13721 { 13722 set = SET_DEST (set); 13723 set2 = NULL_RTX; 13724 } 13725 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 13726 && XVECLEN (PATTERN (dep_insn), 0) == 2 13727 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 13728 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 13729 { 13730 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 13731 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 13732 } 13733 else 13734 return 0; 13735 13736 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG) 13737 return 0; 13738 13739 /* This test is true if the dependent insn reads the flags but 13740 not any other potentially set register. */ 13741 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 13742 return 0; 13743 13744 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 13745 return 0; 13746 13747 return 1; 13748} 13749 13750/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 13751 address with operands set by DEP_INSN. */ 13752 13753static int 13754ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 13755{ 13756 rtx addr; 13757 13758 if (insn_type == TYPE_LEA 13759 && TARGET_PENTIUM) 13760 { 13761 addr = PATTERN (insn); 13762 13763 if (GET_CODE (addr) == PARALLEL) 13764 addr = XVECEXP (addr, 0, 0); 13765 13766 gcc_assert (GET_CODE (addr) == SET); 13767 13768 addr = SET_SRC (addr); 13769 } 13770 else 13771 { 13772 int i; 13773 extract_insn_cached (insn); 13774 for (i = recog_data.n_operands - 1; i >= 0; --i) 13775 if (GET_CODE (recog_data.operand[i]) == MEM) 13776 { 13777 addr = XEXP (recog_data.operand[i], 0); 13778 goto found; 13779 } 13780 return 0; 13781 found:; 13782 } 13783 13784 return modified_in_p (addr, dep_insn); 13785} 13786 13787static int 13788ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 13789{ 13790 enum attr_type insn_type, dep_insn_type; 13791 enum attr_memory memory; 13792 rtx set, set2; 13793 int dep_insn_code_number; 13794 13795 /* Anti and output dependencies have zero cost on all CPUs. */ 13796 if (REG_NOTE_KIND (link) != 0) 13797 return 0; 13798 13799 dep_insn_code_number = recog_memoized (dep_insn); 13800 13801 /* If we can't recognize the insns, we can't really do anything. */ 13802 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 13803 return cost; 13804 13805 insn_type = get_attr_type (insn); 13806 dep_insn_type = get_attr_type (dep_insn); 13807 13808 switch (ix86_tune) 13809 { 13810 case PROCESSOR_PENTIUM: 13811 /* Address Generation Interlock adds a cycle of latency. */ 13812 if (ix86_agi_dependent (insn, dep_insn, insn_type)) 13813 cost += 1; 13814 13815 /* ??? Compares pair with jump/setcc. */ 13816 if (ix86_flags_dependent (insn, dep_insn, insn_type)) 13817 cost = 0; 13818 13819 /* Floating point stores require value to be ready one cycle earlier. */ 13820 if (insn_type == TYPE_FMOV 13821 && get_attr_memory (insn) == MEMORY_STORE 13822 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13823 cost += 1; 13824 break; 13825 13826 case PROCESSOR_PENTIUMPRO: 13827 memory = get_attr_memory (insn); 13828 13829 /* INT->FP conversion is expensive. */ 13830 if (get_attr_fp_int_src (dep_insn)) 13831 cost += 5; 13832 13833 /* There is one cycle extra latency between an FP op and a store. */ 13834 if (insn_type == TYPE_FMOV 13835 && (set = single_set (dep_insn)) != NULL_RTX 13836 && (set2 = single_set (insn)) != NULL_RTX 13837 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 13838 && GET_CODE (SET_DEST (set2)) == MEM) 13839 cost += 1; 13840 13841 /* Show ability of reorder buffer to hide latency of load by executing 13842 in parallel with previous instruction in case 13843 previous instruction is not needed to compute the address. */ 13844 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 13845 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13846 { 13847 /* Claim moves to take one cycle, as core can issue one load 13848 at time and the next load can start cycle later. */ 13849 if (dep_insn_type == TYPE_IMOV 13850 || dep_insn_type == TYPE_FMOV) 13851 cost = 1; 13852 else if (cost > 1) 13853 cost--; 13854 } 13855 break; 13856 13857 case PROCESSOR_K6: 13858 memory = get_attr_memory (insn); 13859 13860 /* The esp dependency is resolved before the instruction is really 13861 finished. */ 13862 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 13863 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 13864 return 1; 13865 13866 /* INT->FP conversion is expensive. */ 13867 if (get_attr_fp_int_src (dep_insn)) 13868 cost += 5; 13869 13870 /* Show ability of reorder buffer to hide latency of load by executing 13871 in parallel with previous instruction in case 13872 previous instruction is not needed to compute the address. */ 13873 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 13874 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13875 { 13876 /* Claim moves to take one cycle, as core can issue one load 13877 at time and the next load can start cycle later. */ 13878 if (dep_insn_type == TYPE_IMOV 13879 || dep_insn_type == TYPE_FMOV) 13880 cost = 1; 13881 else if (cost > 2) 13882 cost -= 2; 13883 else 13884 cost = 1; 13885 } 13886 break; 13887 13888 case PROCESSOR_ATHLON: 13889 case PROCESSOR_K8: 13890 case PROCESSOR_GENERIC32: 13891 case PROCESSOR_GENERIC64: 13892 memory = get_attr_memory (insn); 13893 13894 /* Show ability of reorder buffer to hide latency of load by executing 13895 in parallel with previous instruction in case 13896 previous instruction is not needed to compute the address. */ 13897 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 13898 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 13899 { 13900 enum attr_unit unit = get_attr_unit (insn); 13901 int loadcost = 3; 13902 13903 /* Because of the difference between the length of integer and 13904 floating unit pipeline preparation stages, the memory operands 13905 for floating point are cheaper. 13906 13907 ??? For Athlon it the difference is most probably 2. */ 13908 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 13909 loadcost = 3; 13910 else 13911 loadcost = TARGET_ATHLON ? 2 : 0; 13912 13913 if (cost >= loadcost) 13914 cost -= loadcost; 13915 else 13916 cost = 0; 13917 } 13918 13919 default: 13920 break; 13921 } 13922 13923 return cost; 13924} 13925 13926/* How many alternative schedules to try. This should be as wide as the 13927 scheduling freedom in the DFA, but no wider. Making this value too 13928 large results extra work for the scheduler. */ 13929 13930static int 13931ia32_multipass_dfa_lookahead (void) 13932{ 13933 if (ix86_tune == PROCESSOR_PENTIUM) 13934 return 2; 13935 13936 if (ix86_tune == PROCESSOR_PENTIUMPRO 13937 || ix86_tune == PROCESSOR_K6) 13938 return 1; 13939 13940 else 13941 return 0; 13942} 13943 13944 13945/* Compute the alignment given to a constant that is being placed in memory. 13946 EXP is the constant and ALIGN is the alignment that the object would 13947 ordinarily have. 13948 The value of this function is used instead of that alignment to align 13949 the object. */ 13950 13951int 13952ix86_constant_alignment (tree exp, int align) 13953{ 13954 if (TREE_CODE (exp) == REAL_CST) 13955 { 13956 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 13957 return 64; 13958 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 13959 return 128; 13960 } 13961 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 13962 && !TARGET_NO_ALIGN_LONG_STRINGS 13963 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 13964 return BITS_PER_WORD; 13965 13966 return align; 13967} 13968 13969/* Compute the alignment for a static variable. 13970 TYPE is the data type, and ALIGN is the alignment that 13971 the object would ordinarily have. The value of this function is used 13972 instead of that alignment to align the object. */ 13973 13974int 13975ix86_data_alignment (tree type, int align) 13976{ 13977 int max_align = optimize_size ? BITS_PER_WORD : 256; 13978 13979 if (AGGREGATE_TYPE_P (type) 13980 && TYPE_SIZE (type) 13981 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 13982 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align 13983 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) 13984 && align < max_align) 13985 align = max_align; 13986 13987 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 13988 to 16byte boundary. */ 13989 if (TARGET_64BIT) 13990 { 13991 if (AGGREGATE_TYPE_P (type) 13992 && TYPE_SIZE (type) 13993 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 13994 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 13995 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 13996 return 128; 13997 } 13998 13999 if (TREE_CODE (type) == ARRAY_TYPE) 14000 { 14001 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14002 return 64; 14003 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14004 return 128; 14005 } 14006 else if (TREE_CODE (type) == COMPLEX_TYPE) 14007 { 14008 14009 if (TYPE_MODE (type) == DCmode && align < 64) 14010 return 64; 14011 if (TYPE_MODE (type) == XCmode && align < 128) 14012 return 128; 14013 } 14014 else if ((TREE_CODE (type) == RECORD_TYPE 14015 || TREE_CODE (type) == UNION_TYPE 14016 || TREE_CODE (type) == QUAL_UNION_TYPE) 14017 && TYPE_FIELDS (type)) 14018 { 14019 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14020 return 64; 14021 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14022 return 128; 14023 } 14024 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14025 || TREE_CODE (type) == INTEGER_TYPE) 14026 { 14027 if (TYPE_MODE (type) == DFmode && align < 64) 14028 return 64; 14029 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14030 return 128; 14031 } 14032 14033 return align; 14034} 14035 14036/* Compute the alignment for a local variable. 14037 TYPE is the data type, and ALIGN is the alignment that 14038 the object would ordinarily have. The value of this macro is used 14039 instead of that alignment to align the object. */ 14040 14041int 14042ix86_local_alignment (tree type, int align) 14043{ 14044 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 14045 to 16byte boundary. */ 14046 if (TARGET_64BIT) 14047 { 14048 if (AGGREGATE_TYPE_P (type) 14049 && TYPE_SIZE (type) 14050 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 14051 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 14052 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 14053 return 128; 14054 } 14055 if (TREE_CODE (type) == ARRAY_TYPE) 14056 { 14057 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 14058 return 64; 14059 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 14060 return 128; 14061 } 14062 else if (TREE_CODE (type) == COMPLEX_TYPE) 14063 { 14064 if (TYPE_MODE (type) == DCmode && align < 64) 14065 return 64; 14066 if (TYPE_MODE (type) == XCmode && align < 128) 14067 return 128; 14068 } 14069 else if ((TREE_CODE (type) == RECORD_TYPE 14070 || TREE_CODE (type) == UNION_TYPE 14071 || TREE_CODE (type) == QUAL_UNION_TYPE) 14072 && TYPE_FIELDS (type)) 14073 { 14074 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 14075 return 64; 14076 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 14077 return 128; 14078 } 14079 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 14080 || TREE_CODE (type) == INTEGER_TYPE) 14081 { 14082 14083 if (TYPE_MODE (type) == DFmode && align < 64) 14084 return 64; 14085 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 14086 return 128; 14087 } 14088 return align; 14089} 14090 14091/* Emit RTL insns to initialize the variable parts of a trampoline. 14092 FNADDR is an RTX for the address of the function's pure code. 14093 CXT is an RTX for the static chain value for the function. */ 14094void 14095x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 14096{ 14097 if (!TARGET_64BIT) 14098 { 14099 /* Compute offset from the end of the jmp to the target function. */ 14100 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 14101 plus_constant (tramp, 10), 14102 NULL_RTX, 1, OPTAB_DIRECT); 14103 emit_move_insn (gen_rtx_MEM (QImode, tramp), 14104 gen_int_mode (0xb9, QImode)); 14105 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 14106 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 14107 gen_int_mode (0xe9, QImode)); 14108 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 14109 } 14110 else 14111 { 14112 int offset = 0; 14113 /* Try to load address using shorter movl instead of movabs. 14114 We may want to support movq for kernel mode, but kernel does not use 14115 trampolines at the moment. */ 14116 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) 14117 { 14118 fnaddr = copy_to_mode_reg (DImode, fnaddr); 14119 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14120 gen_int_mode (0xbb41, HImode)); 14121 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 14122 gen_lowpart (SImode, fnaddr)); 14123 offset += 6; 14124 } 14125 else 14126 { 14127 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14128 gen_int_mode (0xbb49, HImode)); 14129 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14130 fnaddr); 14131 offset += 10; 14132 } 14133 /* Load static chain using movabs to r10. */ 14134 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14135 gen_int_mode (0xba49, HImode)); 14136 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 14137 cxt); 14138 offset += 10; 14139 /* Jump to the r11 */ 14140 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 14141 gen_int_mode (0xff49, HImode)); 14142 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 14143 gen_int_mode (0xe3, QImode)); 14144 offset += 3; 14145 gcc_assert (offset <= TRAMPOLINE_SIZE); 14146 } 14147 14148#ifdef ENABLE_EXECUTE_STACK 14149 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 14150 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 14151#endif 14152} 14153 14154/* Codes for all the SSE/MMX builtins. */ 14155enum ix86_builtins 14156{ 14157 IX86_BUILTIN_ADDPS, 14158 IX86_BUILTIN_ADDSS, 14159 IX86_BUILTIN_DIVPS, 14160 IX86_BUILTIN_DIVSS, 14161 IX86_BUILTIN_MULPS, 14162 IX86_BUILTIN_MULSS, 14163 IX86_BUILTIN_SUBPS, 14164 IX86_BUILTIN_SUBSS, 14165 14166 IX86_BUILTIN_CMPEQPS, 14167 IX86_BUILTIN_CMPLTPS, 14168 IX86_BUILTIN_CMPLEPS, 14169 IX86_BUILTIN_CMPGTPS, 14170 IX86_BUILTIN_CMPGEPS, 14171 IX86_BUILTIN_CMPNEQPS, 14172 IX86_BUILTIN_CMPNLTPS, 14173 IX86_BUILTIN_CMPNLEPS, 14174 IX86_BUILTIN_CMPNGTPS, 14175 IX86_BUILTIN_CMPNGEPS, 14176 IX86_BUILTIN_CMPORDPS, 14177 IX86_BUILTIN_CMPUNORDPS, 14178 IX86_BUILTIN_CMPEQSS, 14179 IX86_BUILTIN_CMPLTSS, 14180 IX86_BUILTIN_CMPLESS, 14181 IX86_BUILTIN_CMPNEQSS, 14182 IX86_BUILTIN_CMPNLTSS, 14183 IX86_BUILTIN_CMPNLESS, 14184 IX86_BUILTIN_CMPNGTSS, 14185 IX86_BUILTIN_CMPNGESS, 14186 IX86_BUILTIN_CMPORDSS, 14187 IX86_BUILTIN_CMPUNORDSS, 14188 14189 IX86_BUILTIN_COMIEQSS, 14190 IX86_BUILTIN_COMILTSS, 14191 IX86_BUILTIN_COMILESS, 14192 IX86_BUILTIN_COMIGTSS, 14193 IX86_BUILTIN_COMIGESS, 14194 IX86_BUILTIN_COMINEQSS, 14195 IX86_BUILTIN_UCOMIEQSS, 14196 IX86_BUILTIN_UCOMILTSS, 14197 IX86_BUILTIN_UCOMILESS, 14198 IX86_BUILTIN_UCOMIGTSS, 14199 IX86_BUILTIN_UCOMIGESS, 14200 IX86_BUILTIN_UCOMINEQSS, 14201 14202 IX86_BUILTIN_CVTPI2PS, 14203 IX86_BUILTIN_CVTPS2PI, 14204 IX86_BUILTIN_CVTSI2SS, 14205 IX86_BUILTIN_CVTSI642SS, 14206 IX86_BUILTIN_CVTSS2SI, 14207 IX86_BUILTIN_CVTSS2SI64, 14208 IX86_BUILTIN_CVTTPS2PI, 14209 IX86_BUILTIN_CVTTSS2SI, 14210 IX86_BUILTIN_CVTTSS2SI64, 14211 14212 IX86_BUILTIN_MAXPS, 14213 IX86_BUILTIN_MAXSS, 14214 IX86_BUILTIN_MINPS, 14215 IX86_BUILTIN_MINSS, 14216 14217 IX86_BUILTIN_LOADUPS, 14218 IX86_BUILTIN_STOREUPS, 14219 IX86_BUILTIN_MOVSS, 14220 14221 IX86_BUILTIN_MOVHLPS, 14222 IX86_BUILTIN_MOVLHPS, 14223 IX86_BUILTIN_LOADHPS, 14224 IX86_BUILTIN_LOADLPS, 14225 IX86_BUILTIN_STOREHPS, 14226 IX86_BUILTIN_STORELPS, 14227 14228 IX86_BUILTIN_MASKMOVQ, 14229 IX86_BUILTIN_MOVMSKPS, 14230 IX86_BUILTIN_PMOVMSKB, 14231 14232 IX86_BUILTIN_MOVNTPS, 14233 IX86_BUILTIN_MOVNTQ, 14234 14235 IX86_BUILTIN_LOADDQU, 14236 IX86_BUILTIN_STOREDQU, 14237 14238 IX86_BUILTIN_PACKSSWB, 14239 IX86_BUILTIN_PACKSSDW, 14240 IX86_BUILTIN_PACKUSWB, 14241 14242 IX86_BUILTIN_PADDB, 14243 IX86_BUILTIN_PADDW, 14244 IX86_BUILTIN_PADDD, 14245 IX86_BUILTIN_PADDQ, 14246 IX86_BUILTIN_PADDSB, 14247 IX86_BUILTIN_PADDSW, 14248 IX86_BUILTIN_PADDUSB, 14249 IX86_BUILTIN_PADDUSW, 14250 IX86_BUILTIN_PSUBB, 14251 IX86_BUILTIN_PSUBW, 14252 IX86_BUILTIN_PSUBD, 14253 IX86_BUILTIN_PSUBQ, 14254 IX86_BUILTIN_PSUBSB, 14255 IX86_BUILTIN_PSUBSW, 14256 IX86_BUILTIN_PSUBUSB, 14257 IX86_BUILTIN_PSUBUSW, 14258 14259 IX86_BUILTIN_PAND, 14260 IX86_BUILTIN_PANDN, 14261 IX86_BUILTIN_POR, 14262 IX86_BUILTIN_PXOR, 14263 14264 IX86_BUILTIN_PAVGB, 14265 IX86_BUILTIN_PAVGW, 14266 14267 IX86_BUILTIN_PCMPEQB, 14268 IX86_BUILTIN_PCMPEQW, 14269 IX86_BUILTIN_PCMPEQD, 14270 IX86_BUILTIN_PCMPGTB, 14271 IX86_BUILTIN_PCMPGTW, 14272 IX86_BUILTIN_PCMPGTD, 14273 14274 IX86_BUILTIN_PMADDWD, 14275 14276 IX86_BUILTIN_PMAXSW, 14277 IX86_BUILTIN_PMAXUB, 14278 IX86_BUILTIN_PMINSW, 14279 IX86_BUILTIN_PMINUB, 14280 14281 IX86_BUILTIN_PMULHUW, 14282 IX86_BUILTIN_PMULHW, 14283 IX86_BUILTIN_PMULLW, 14284 14285 IX86_BUILTIN_PSADBW, 14286 IX86_BUILTIN_PSHUFW, 14287 14288 IX86_BUILTIN_PSLLW, 14289 IX86_BUILTIN_PSLLD, 14290 IX86_BUILTIN_PSLLQ, 14291 IX86_BUILTIN_PSRAW, 14292 IX86_BUILTIN_PSRAD, 14293 IX86_BUILTIN_PSRLW, 14294 IX86_BUILTIN_PSRLD, 14295 IX86_BUILTIN_PSRLQ, 14296 IX86_BUILTIN_PSLLWI, 14297 IX86_BUILTIN_PSLLDI, 14298 IX86_BUILTIN_PSLLQI, 14299 IX86_BUILTIN_PSRAWI, 14300 IX86_BUILTIN_PSRADI, 14301 IX86_BUILTIN_PSRLWI, 14302 IX86_BUILTIN_PSRLDI, 14303 IX86_BUILTIN_PSRLQI, 14304 14305 IX86_BUILTIN_PUNPCKHBW, 14306 IX86_BUILTIN_PUNPCKHWD, 14307 IX86_BUILTIN_PUNPCKHDQ, 14308 IX86_BUILTIN_PUNPCKLBW, 14309 IX86_BUILTIN_PUNPCKLWD, 14310 IX86_BUILTIN_PUNPCKLDQ, 14311 14312 IX86_BUILTIN_SHUFPS, 14313 14314 IX86_BUILTIN_RCPPS, 14315 IX86_BUILTIN_RCPSS, 14316 IX86_BUILTIN_RSQRTPS, 14317 IX86_BUILTIN_RSQRTSS, 14318 IX86_BUILTIN_SQRTPS, 14319 IX86_BUILTIN_SQRTSS, 14320 14321 IX86_BUILTIN_UNPCKHPS, 14322 IX86_BUILTIN_UNPCKLPS, 14323 14324 IX86_BUILTIN_ANDPS, 14325 IX86_BUILTIN_ANDNPS, 14326 IX86_BUILTIN_ORPS, 14327 IX86_BUILTIN_XORPS, 14328 14329 IX86_BUILTIN_EMMS, 14330 IX86_BUILTIN_LDMXCSR, 14331 IX86_BUILTIN_STMXCSR, 14332 IX86_BUILTIN_SFENCE, 14333 14334 /* 3DNow! Original */ 14335 IX86_BUILTIN_FEMMS, 14336 IX86_BUILTIN_PAVGUSB, 14337 IX86_BUILTIN_PF2ID, 14338 IX86_BUILTIN_PFACC, 14339 IX86_BUILTIN_PFADD, 14340 IX86_BUILTIN_PFCMPEQ, 14341 IX86_BUILTIN_PFCMPGE, 14342 IX86_BUILTIN_PFCMPGT, 14343 IX86_BUILTIN_PFMAX, 14344 IX86_BUILTIN_PFMIN, 14345 IX86_BUILTIN_PFMUL, 14346 IX86_BUILTIN_PFRCP, 14347 IX86_BUILTIN_PFRCPIT1, 14348 IX86_BUILTIN_PFRCPIT2, 14349 IX86_BUILTIN_PFRSQIT1, 14350 IX86_BUILTIN_PFRSQRT, 14351 IX86_BUILTIN_PFSUB, 14352 IX86_BUILTIN_PFSUBR, 14353 IX86_BUILTIN_PI2FD, 14354 IX86_BUILTIN_PMULHRW, 14355 14356 /* 3DNow! Athlon Extensions */ 14357 IX86_BUILTIN_PF2IW, 14358 IX86_BUILTIN_PFNACC, 14359 IX86_BUILTIN_PFPNACC, 14360 IX86_BUILTIN_PI2FW, 14361 IX86_BUILTIN_PSWAPDSI, 14362 IX86_BUILTIN_PSWAPDSF, 14363 14364 /* SSE2 */ 14365 IX86_BUILTIN_ADDPD, 14366 IX86_BUILTIN_ADDSD, 14367 IX86_BUILTIN_DIVPD, 14368 IX86_BUILTIN_DIVSD, 14369 IX86_BUILTIN_MULPD, 14370 IX86_BUILTIN_MULSD, 14371 IX86_BUILTIN_SUBPD, 14372 IX86_BUILTIN_SUBSD, 14373 14374 IX86_BUILTIN_CMPEQPD, 14375 IX86_BUILTIN_CMPLTPD, 14376 IX86_BUILTIN_CMPLEPD, 14377 IX86_BUILTIN_CMPGTPD, 14378 IX86_BUILTIN_CMPGEPD, 14379 IX86_BUILTIN_CMPNEQPD, 14380 IX86_BUILTIN_CMPNLTPD, 14381 IX86_BUILTIN_CMPNLEPD, 14382 IX86_BUILTIN_CMPNGTPD, 14383 IX86_BUILTIN_CMPNGEPD, 14384 IX86_BUILTIN_CMPORDPD, 14385 IX86_BUILTIN_CMPUNORDPD, 14386 IX86_BUILTIN_CMPNEPD, 14387 IX86_BUILTIN_CMPEQSD, 14388 IX86_BUILTIN_CMPLTSD, 14389 IX86_BUILTIN_CMPLESD, 14390 IX86_BUILTIN_CMPNEQSD, 14391 IX86_BUILTIN_CMPNLTSD, 14392 IX86_BUILTIN_CMPNLESD, 14393 IX86_BUILTIN_CMPORDSD, 14394 IX86_BUILTIN_CMPUNORDSD, 14395 IX86_BUILTIN_CMPNESD, 14396 14397 IX86_BUILTIN_COMIEQSD, 14398 IX86_BUILTIN_COMILTSD, 14399 IX86_BUILTIN_COMILESD, 14400 IX86_BUILTIN_COMIGTSD, 14401 IX86_BUILTIN_COMIGESD, 14402 IX86_BUILTIN_COMINEQSD, 14403 IX86_BUILTIN_UCOMIEQSD, 14404 IX86_BUILTIN_UCOMILTSD, 14405 IX86_BUILTIN_UCOMILESD, 14406 IX86_BUILTIN_UCOMIGTSD, 14407 IX86_BUILTIN_UCOMIGESD, 14408 IX86_BUILTIN_UCOMINEQSD, 14409 14410 IX86_BUILTIN_MAXPD, 14411 IX86_BUILTIN_MAXSD, 14412 IX86_BUILTIN_MINPD, 14413 IX86_BUILTIN_MINSD, 14414 14415 IX86_BUILTIN_ANDPD, 14416 IX86_BUILTIN_ANDNPD, 14417 IX86_BUILTIN_ORPD, 14418 IX86_BUILTIN_XORPD, 14419 14420 IX86_BUILTIN_SQRTPD, 14421 IX86_BUILTIN_SQRTSD, 14422 14423 IX86_BUILTIN_UNPCKHPD, 14424 IX86_BUILTIN_UNPCKLPD, 14425 14426 IX86_BUILTIN_SHUFPD, 14427 14428 IX86_BUILTIN_LOADUPD, 14429 IX86_BUILTIN_STOREUPD, 14430 IX86_BUILTIN_MOVSD, 14431 14432 IX86_BUILTIN_LOADHPD, 14433 IX86_BUILTIN_LOADLPD, 14434 14435 IX86_BUILTIN_CVTDQ2PD, 14436 IX86_BUILTIN_CVTDQ2PS, 14437 14438 IX86_BUILTIN_CVTPD2DQ, 14439 IX86_BUILTIN_CVTPD2PI, 14440 IX86_BUILTIN_CVTPD2PS, 14441 IX86_BUILTIN_CVTTPD2DQ, 14442 IX86_BUILTIN_CVTTPD2PI, 14443 14444 IX86_BUILTIN_CVTPI2PD, 14445 IX86_BUILTIN_CVTSI2SD, 14446 IX86_BUILTIN_CVTSI642SD, 14447 14448 IX86_BUILTIN_CVTSD2SI, 14449 IX86_BUILTIN_CVTSD2SI64, 14450 IX86_BUILTIN_CVTSD2SS, 14451 IX86_BUILTIN_CVTSS2SD, 14452 IX86_BUILTIN_CVTTSD2SI, 14453 IX86_BUILTIN_CVTTSD2SI64, 14454 14455 IX86_BUILTIN_CVTPS2DQ, 14456 IX86_BUILTIN_CVTPS2PD, 14457 IX86_BUILTIN_CVTTPS2DQ, 14458 14459 IX86_BUILTIN_MOVNTI, 14460 IX86_BUILTIN_MOVNTPD, 14461 IX86_BUILTIN_MOVNTDQ, 14462 14463 /* SSE2 MMX */ 14464 IX86_BUILTIN_MASKMOVDQU, 14465 IX86_BUILTIN_MOVMSKPD, 14466 IX86_BUILTIN_PMOVMSKB128, 14467 14468 IX86_BUILTIN_PACKSSWB128, 14469 IX86_BUILTIN_PACKSSDW128, 14470 IX86_BUILTIN_PACKUSWB128, 14471 14472 IX86_BUILTIN_PADDB128, 14473 IX86_BUILTIN_PADDW128, 14474 IX86_BUILTIN_PADDD128, 14475 IX86_BUILTIN_PADDQ128, 14476 IX86_BUILTIN_PADDSB128, 14477 IX86_BUILTIN_PADDSW128, 14478 IX86_BUILTIN_PADDUSB128, 14479 IX86_BUILTIN_PADDUSW128, 14480 IX86_BUILTIN_PSUBB128, 14481 IX86_BUILTIN_PSUBW128, 14482 IX86_BUILTIN_PSUBD128, 14483 IX86_BUILTIN_PSUBQ128, 14484 IX86_BUILTIN_PSUBSB128, 14485 IX86_BUILTIN_PSUBSW128, 14486 IX86_BUILTIN_PSUBUSB128, 14487 IX86_BUILTIN_PSUBUSW128, 14488 14489 IX86_BUILTIN_PAND128, 14490 IX86_BUILTIN_PANDN128, 14491 IX86_BUILTIN_POR128, 14492 IX86_BUILTIN_PXOR128, 14493 14494 IX86_BUILTIN_PAVGB128, 14495 IX86_BUILTIN_PAVGW128, 14496 14497 IX86_BUILTIN_PCMPEQB128, 14498 IX86_BUILTIN_PCMPEQW128, 14499 IX86_BUILTIN_PCMPEQD128, 14500 IX86_BUILTIN_PCMPGTB128, 14501 IX86_BUILTIN_PCMPGTW128, 14502 IX86_BUILTIN_PCMPGTD128, 14503 14504 IX86_BUILTIN_PMADDWD128, 14505 14506 IX86_BUILTIN_PMAXSW128, 14507 IX86_BUILTIN_PMAXUB128, 14508 IX86_BUILTIN_PMINSW128, 14509 IX86_BUILTIN_PMINUB128, 14510 14511 IX86_BUILTIN_PMULUDQ, 14512 IX86_BUILTIN_PMULUDQ128, 14513 IX86_BUILTIN_PMULHUW128, 14514 IX86_BUILTIN_PMULHW128, 14515 IX86_BUILTIN_PMULLW128, 14516 14517 IX86_BUILTIN_PSADBW128, 14518 IX86_BUILTIN_PSHUFHW, 14519 IX86_BUILTIN_PSHUFLW, 14520 IX86_BUILTIN_PSHUFD, 14521 14522 IX86_BUILTIN_PSLLW128, 14523 IX86_BUILTIN_PSLLD128, 14524 IX86_BUILTIN_PSLLQ128, 14525 IX86_BUILTIN_PSRAW128, 14526 IX86_BUILTIN_PSRAD128, 14527 IX86_BUILTIN_PSRLW128, 14528 IX86_BUILTIN_PSRLD128, 14529 IX86_BUILTIN_PSRLQ128, 14530 IX86_BUILTIN_PSLLDQI128, 14531 IX86_BUILTIN_PSLLWI128, 14532 IX86_BUILTIN_PSLLDI128, 14533 IX86_BUILTIN_PSLLQI128, 14534 IX86_BUILTIN_PSRAWI128, 14535 IX86_BUILTIN_PSRADI128, 14536 IX86_BUILTIN_PSRLDQI128, 14537 IX86_BUILTIN_PSRLWI128, 14538 IX86_BUILTIN_PSRLDI128, 14539 IX86_BUILTIN_PSRLQI128, 14540 14541 IX86_BUILTIN_PUNPCKHBW128, 14542 IX86_BUILTIN_PUNPCKHWD128, 14543 IX86_BUILTIN_PUNPCKHDQ128, 14544 IX86_BUILTIN_PUNPCKHQDQ128, 14545 IX86_BUILTIN_PUNPCKLBW128, 14546 IX86_BUILTIN_PUNPCKLWD128, 14547 IX86_BUILTIN_PUNPCKLDQ128, 14548 IX86_BUILTIN_PUNPCKLQDQ128, 14549 14550 IX86_BUILTIN_CLFLUSH, 14551 IX86_BUILTIN_MFENCE, 14552 IX86_BUILTIN_LFENCE, 14553 14554 /* Prescott New Instructions. */ 14555 IX86_BUILTIN_ADDSUBPS, 14556 IX86_BUILTIN_HADDPS, 14557 IX86_BUILTIN_HSUBPS, 14558 IX86_BUILTIN_MOVSHDUP, 14559 IX86_BUILTIN_MOVSLDUP, 14560 IX86_BUILTIN_ADDSUBPD, 14561 IX86_BUILTIN_HADDPD, 14562 IX86_BUILTIN_HSUBPD, 14563 IX86_BUILTIN_LDDQU, 14564 14565 IX86_BUILTIN_MONITOR, 14566 IX86_BUILTIN_MWAIT, 14567 14568 IX86_BUILTIN_VEC_INIT_V2SI, 14569 IX86_BUILTIN_VEC_INIT_V4HI, 14570 IX86_BUILTIN_VEC_INIT_V8QI, 14571 IX86_BUILTIN_VEC_EXT_V2DF, 14572 IX86_BUILTIN_VEC_EXT_V2DI, 14573 IX86_BUILTIN_VEC_EXT_V4SF, 14574 IX86_BUILTIN_VEC_EXT_V4SI, 14575 IX86_BUILTIN_VEC_EXT_V8HI, 14576 IX86_BUILTIN_VEC_EXT_V16QI, 14577 IX86_BUILTIN_VEC_EXT_V2SI, 14578 IX86_BUILTIN_VEC_EXT_V4HI, 14579 IX86_BUILTIN_VEC_SET_V8HI, 14580 IX86_BUILTIN_VEC_SET_V4HI, 14581 14582 IX86_BUILTIN_MAX 14583}; 14584 14585#define def_builtin(MASK, NAME, TYPE, CODE) \ 14586do { \ 14587 if ((MASK) & target_flags \ 14588 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ 14589 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ 14590 NULL, NULL_TREE); \ 14591} while (0) 14592 14593/* Bits for builtin_description.flag. */ 14594 14595/* Set when we don't support the comparison natively, and should 14596 swap_comparison in order to support it. */ 14597#define BUILTIN_DESC_SWAP_OPERANDS 1 14598 14599struct builtin_description 14600{ 14601 const unsigned int mask; 14602 const enum insn_code icode; 14603 const char *const name; 14604 const enum ix86_builtins code; 14605 const enum rtx_code comparison; 14606 const unsigned int flag; 14607}; 14608 14609static const struct builtin_description bdesc_comi[] = 14610{ 14611 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 14612 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 14613 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 14614 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 14615 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 14616 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 14617 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 14618 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 14619 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 14620 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 14621 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 14622 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 14623 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 14624 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 14625 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 14626 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 14627 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 14628 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 14629 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 14630 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 14631 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 14632 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 14633 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 14634 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 14635}; 14636 14637static const struct builtin_description bdesc_2arg[] = 14638{ 14639 /* SSE */ 14640 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 }, 14641 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 }, 14642 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 }, 14643 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 }, 14644 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 }, 14645 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 }, 14646 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 }, 14647 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 }, 14648 14649 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, 14650 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, 14651 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, 14652 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 14653 BUILTIN_DESC_SWAP_OPERANDS }, 14654 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 14655 BUILTIN_DESC_SWAP_OPERANDS }, 14656 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, 14657 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 }, 14658 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 }, 14659 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 }, 14660 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, 14661 BUILTIN_DESC_SWAP_OPERANDS }, 14662 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, 14663 BUILTIN_DESC_SWAP_OPERANDS }, 14664 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 }, 14665 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, 14666 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, 14667 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 }, 14668 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 }, 14669 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 }, 14670 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 }, 14671 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 }, 14672 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, 14673 BUILTIN_DESC_SWAP_OPERANDS }, 14674 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, 14675 BUILTIN_DESC_SWAP_OPERANDS }, 14676 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 }, 14677 14678 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 }, 14679 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 }, 14680 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, 14681 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, 14682 14683 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, 14684 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, 14685 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, 14686 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, 14687 14688 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, 14689 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, 14690 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, 14691 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 }, 14692 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 }, 14693 14694 /* MMX */ 14695 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, 14696 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, 14697 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, 14698 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, 14699 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, 14700 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, 14701 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, 14702 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, 14703 14704 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, 14705 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, 14706 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 }, 14707 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 }, 14708 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 }, 14709 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 }, 14710 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 }, 14711 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 }, 14712 14713 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 }, 14714 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 }, 14715 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 }, 14716 14717 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 }, 14718 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 }, 14719 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 }, 14720 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 }, 14721 14722 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 }, 14723 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 }, 14724 14725 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 }, 14726 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 }, 14727 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 }, 14728 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 }, 14729 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 }, 14730 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 }, 14731 14732 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 }, 14733 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 }, 14734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 }, 14735 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 }, 14736 14737 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 }, 14738 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 }, 14739 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 }, 14740 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 }, 14741 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 }, 14742 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 }, 14743 14744 /* Special. */ 14745 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 }, 14746 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 }, 14747 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 }, 14748 14749 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, 14750 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, 14751 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, 14752 14753 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, 14754 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, 14755 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, 14756 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, 14757 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, 14758 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, 14759 14760 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, 14761 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, 14762 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, 14763 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, 14764 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, 14765 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, 14766 14767 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, 14768 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, 14769 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, 14770 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, 14771 14772 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, 14773 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, 14774 14775 /* SSE2 */ 14776 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 }, 14777 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 }, 14778 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 }, 14779 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 }, 14780 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 }, 14781 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 }, 14782 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 }, 14783 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 }, 14784 14785 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, 14786 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, 14787 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, 14788 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 14789 BUILTIN_DESC_SWAP_OPERANDS }, 14790 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 14791 BUILTIN_DESC_SWAP_OPERANDS }, 14792 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, 14793 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 }, 14794 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 }, 14795 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 }, 14796 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, 14797 BUILTIN_DESC_SWAP_OPERANDS }, 14798 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, 14799 BUILTIN_DESC_SWAP_OPERANDS }, 14800 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 }, 14801 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, 14802 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, 14803 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 }, 14804 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 }, 14805 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 }, 14806 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 }, 14807 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 }, 14808 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 }, 14809 14810 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 }, 14811 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 }, 14812 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, 14813 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, 14814 14815 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, 14816 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, 14817 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, 14818 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, 14819 14820 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, 14821 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, 14822 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 }, 14823 14824 /* SSE2 MMX */ 14825 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, 14826 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, 14827 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, 14828 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, 14829 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, 14830 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, 14831 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, 14832 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, 14833 14834 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, 14835 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, 14836 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 }, 14837 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 }, 14838 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 }, 14839 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 }, 14840 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 }, 14841 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 }, 14842 14843 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, 14844 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, 14845 14846 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, 14847 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, 14848 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 }, 14849 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 }, 14850 14851 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 }, 14852 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 }, 14853 14854 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 }, 14855 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 }, 14856 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 }, 14857 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 }, 14858 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 }, 14859 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 }, 14860 14861 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 }, 14862 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 }, 14863 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 }, 14864 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 }, 14865 14866 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, 14867 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, 14868 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, 14869 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 }, 14870 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, 14871 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, 14872 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, 14873 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 }, 14874 14875 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 }, 14876 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 }, 14877 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 }, 14878 14879 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, 14880 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, 14881 14882 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, 14883 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, 14884 14885 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, 14886 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 }, 14887 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 }, 14888 14889 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 }, 14890 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 }, 14891 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 }, 14892 14893 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 }, 14894 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 }, 14895 14896 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, 14897 14898 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, 14899 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, 14900 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, 14901 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, 14902 14903 /* SSE3 MMX */ 14904 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, 14905 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, 14906 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, 14907 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, 14908 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, 14909 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } 14910}; 14911 14912static const struct builtin_description bdesc_1arg[] = 14913{ 14914 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 }, 14915 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 }, 14916 14917 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 }, 14918 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 }, 14919 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 }, 14920 14921 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, 14922 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, 14923 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, 14924 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, 14925 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, 14926 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, 14927 14928 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, 14929 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, 14930 14931 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 }, 14932 14933 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 }, 14934 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 }, 14935 14936 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 }, 14937 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 }, 14938 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 }, 14939 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 }, 14940 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 }, 14941 14942 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 }, 14943 14944 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, 14945 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, 14946 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, 14947 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, 14948 14949 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, 14950 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, 14951 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, 14952 14953 /* SSE3 */ 14954 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, 14955 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, 14956}; 14957 14958static void 14959ix86_init_builtins (void) 14960{ 14961 if (TARGET_MMX) 14962 ix86_init_mmx_sse_builtins (); 14963} 14964 14965/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX 14966 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX 14967 builtins. */ 14968static void 14969ix86_init_mmx_sse_builtins (void) 14970{ 14971 const struct builtin_description * d; 14972 size_t i; 14973 14974 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode); 14975 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 14976 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); 14977 tree V2DI_type_node 14978 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); 14979 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); 14980 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); 14981 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); 14982 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 14983 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); 14984 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); 14985 14986 tree pchar_type_node = build_pointer_type (char_type_node); 14987 tree pcchar_type_node = build_pointer_type ( 14988 build_type_variant (char_type_node, 1, 0)); 14989 tree pfloat_type_node = build_pointer_type (float_type_node); 14990 tree pcfloat_type_node = build_pointer_type ( 14991 build_type_variant (float_type_node, 1, 0)); 14992 tree pv2si_type_node = build_pointer_type (V2SI_type_node); 14993 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 14994 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 14995 14996 /* Comparisons. */ 14997 tree int_ftype_v4sf_v4sf 14998 = build_function_type_list (integer_type_node, 14999 V4SF_type_node, V4SF_type_node, NULL_TREE); 15000 tree v4si_ftype_v4sf_v4sf 15001 = build_function_type_list (V4SI_type_node, 15002 V4SF_type_node, V4SF_type_node, NULL_TREE); 15003 /* MMX/SSE/integer conversions. */ 15004 tree int_ftype_v4sf 15005 = build_function_type_list (integer_type_node, 15006 V4SF_type_node, NULL_TREE); 15007 tree int64_ftype_v4sf 15008 = build_function_type_list (long_long_integer_type_node, 15009 V4SF_type_node, NULL_TREE); 15010 tree int_ftype_v8qi 15011 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 15012 tree v4sf_ftype_v4sf_int 15013 = build_function_type_list (V4SF_type_node, 15014 V4SF_type_node, integer_type_node, NULL_TREE); 15015 tree v4sf_ftype_v4sf_int64 15016 = build_function_type_list (V4SF_type_node, 15017 V4SF_type_node, long_long_integer_type_node, 15018 NULL_TREE); 15019 tree v4sf_ftype_v4sf_v2si 15020 = build_function_type_list (V4SF_type_node, 15021 V4SF_type_node, V2SI_type_node, NULL_TREE); 15022 15023 /* Miscellaneous. */ 15024 tree v8qi_ftype_v4hi_v4hi 15025 = build_function_type_list (V8QI_type_node, 15026 V4HI_type_node, V4HI_type_node, NULL_TREE); 15027 tree v4hi_ftype_v2si_v2si 15028 = build_function_type_list (V4HI_type_node, 15029 V2SI_type_node, V2SI_type_node, NULL_TREE); 15030 tree v4sf_ftype_v4sf_v4sf_int 15031 = build_function_type_list (V4SF_type_node, 15032 V4SF_type_node, V4SF_type_node, 15033 integer_type_node, NULL_TREE); 15034 tree v2si_ftype_v4hi_v4hi 15035 = build_function_type_list (V2SI_type_node, 15036 V4HI_type_node, V4HI_type_node, NULL_TREE); 15037 tree v4hi_ftype_v4hi_int 15038 = build_function_type_list (V4HI_type_node, 15039 V4HI_type_node, integer_type_node, NULL_TREE); 15040 tree v4hi_ftype_v4hi_di 15041 = build_function_type_list (V4HI_type_node, 15042 V4HI_type_node, long_long_unsigned_type_node, 15043 NULL_TREE); 15044 tree v2si_ftype_v2si_di 15045 = build_function_type_list (V2SI_type_node, 15046 V2SI_type_node, long_long_unsigned_type_node, 15047 NULL_TREE); 15048 tree void_ftype_void 15049 = build_function_type (void_type_node, void_list_node); 15050 tree void_ftype_unsigned 15051 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 15052 tree void_ftype_unsigned_unsigned 15053 = build_function_type_list (void_type_node, unsigned_type_node, 15054 unsigned_type_node, NULL_TREE); 15055 tree void_ftype_pcvoid_unsigned_unsigned 15056 = build_function_type_list (void_type_node, const_ptr_type_node, 15057 unsigned_type_node, unsigned_type_node, 15058 NULL_TREE); 15059 tree unsigned_ftype_void 15060 = build_function_type (unsigned_type_node, void_list_node); 15061 tree v2si_ftype_v4sf 15062 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 15063 /* Loads/stores. */ 15064 tree void_ftype_v8qi_v8qi_pchar 15065 = build_function_type_list (void_type_node, 15066 V8QI_type_node, V8QI_type_node, 15067 pchar_type_node, NULL_TREE); 15068 tree v4sf_ftype_pcfloat 15069 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 15070 /* @@@ the type is bogus */ 15071 tree v4sf_ftype_v4sf_pv2si 15072 = build_function_type_list (V4SF_type_node, 15073 V4SF_type_node, pv2si_type_node, NULL_TREE); 15074 tree void_ftype_pv2si_v4sf 15075 = build_function_type_list (void_type_node, 15076 pv2si_type_node, V4SF_type_node, NULL_TREE); 15077 tree void_ftype_pfloat_v4sf 15078 = build_function_type_list (void_type_node, 15079 pfloat_type_node, V4SF_type_node, NULL_TREE); 15080 tree void_ftype_pdi_di 15081 = build_function_type_list (void_type_node, 15082 pdi_type_node, long_long_unsigned_type_node, 15083 NULL_TREE); 15084 tree void_ftype_pv2di_v2di 15085 = build_function_type_list (void_type_node, 15086 pv2di_type_node, V2DI_type_node, NULL_TREE); 15087 /* Normal vector unops. */ 15088 tree v4sf_ftype_v4sf 15089 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 15090 15091 /* Normal vector binops. */ 15092 tree v4sf_ftype_v4sf_v4sf 15093 = build_function_type_list (V4SF_type_node, 15094 V4SF_type_node, V4SF_type_node, NULL_TREE); 15095 tree v8qi_ftype_v8qi_v8qi 15096 = build_function_type_list (V8QI_type_node, 15097 V8QI_type_node, V8QI_type_node, NULL_TREE); 15098 tree v4hi_ftype_v4hi_v4hi 15099 = build_function_type_list (V4HI_type_node, 15100 V4HI_type_node, V4HI_type_node, NULL_TREE); 15101 tree v2si_ftype_v2si_v2si 15102 = build_function_type_list (V2SI_type_node, 15103 V2SI_type_node, V2SI_type_node, NULL_TREE); 15104 tree di_ftype_di_di 15105 = build_function_type_list (long_long_unsigned_type_node, 15106 long_long_unsigned_type_node, 15107 long_long_unsigned_type_node, NULL_TREE); 15108 15109 tree v2si_ftype_v2sf 15110 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 15111 tree v2sf_ftype_v2si 15112 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 15113 tree v2si_ftype_v2si 15114 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 15115 tree v2sf_ftype_v2sf 15116 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 15117 tree v2sf_ftype_v2sf_v2sf 15118 = build_function_type_list (V2SF_type_node, 15119 V2SF_type_node, V2SF_type_node, NULL_TREE); 15120 tree v2si_ftype_v2sf_v2sf 15121 = build_function_type_list (V2SI_type_node, 15122 V2SF_type_node, V2SF_type_node, NULL_TREE); 15123 tree pint_type_node = build_pointer_type (integer_type_node); 15124 tree pdouble_type_node = build_pointer_type (double_type_node); 15125 tree pcdouble_type_node = build_pointer_type ( 15126 build_type_variant (double_type_node, 1, 0)); 15127 tree int_ftype_v2df_v2df 15128 = build_function_type_list (integer_type_node, 15129 V2DF_type_node, V2DF_type_node, NULL_TREE); 15130 15131 tree void_ftype_pcvoid 15132 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 15133 tree v4sf_ftype_v4si 15134 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 15135 tree v4si_ftype_v4sf 15136 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 15137 tree v2df_ftype_v4si 15138 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 15139 tree v4si_ftype_v2df 15140 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 15141 tree v2si_ftype_v2df 15142 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 15143 tree v4sf_ftype_v2df 15144 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 15145 tree v2df_ftype_v2si 15146 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 15147 tree v2df_ftype_v4sf 15148 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 15149 tree int_ftype_v2df 15150 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 15151 tree int64_ftype_v2df 15152 = build_function_type_list (long_long_integer_type_node, 15153 V2DF_type_node, NULL_TREE); 15154 tree v2df_ftype_v2df_int 15155 = build_function_type_list (V2DF_type_node, 15156 V2DF_type_node, integer_type_node, NULL_TREE); 15157 tree v2df_ftype_v2df_int64 15158 = build_function_type_list (V2DF_type_node, 15159 V2DF_type_node, long_long_integer_type_node, 15160 NULL_TREE); 15161 tree v4sf_ftype_v4sf_v2df 15162 = build_function_type_list (V4SF_type_node, 15163 V4SF_type_node, V2DF_type_node, NULL_TREE); 15164 tree v2df_ftype_v2df_v4sf 15165 = build_function_type_list (V2DF_type_node, 15166 V2DF_type_node, V4SF_type_node, NULL_TREE); 15167 tree v2df_ftype_v2df_v2df_int 15168 = build_function_type_list (V2DF_type_node, 15169 V2DF_type_node, V2DF_type_node, 15170 integer_type_node, 15171 NULL_TREE); 15172 tree v2df_ftype_v2df_pcdouble 15173 = build_function_type_list (V2DF_type_node, 15174 V2DF_type_node, pcdouble_type_node, NULL_TREE); 15175 tree void_ftype_pdouble_v2df 15176 = build_function_type_list (void_type_node, 15177 pdouble_type_node, V2DF_type_node, NULL_TREE); 15178 tree void_ftype_pint_int 15179 = build_function_type_list (void_type_node, 15180 pint_type_node, integer_type_node, NULL_TREE); 15181 tree void_ftype_v16qi_v16qi_pchar 15182 = build_function_type_list (void_type_node, 15183 V16QI_type_node, V16QI_type_node, 15184 pchar_type_node, NULL_TREE); 15185 tree v2df_ftype_pcdouble 15186 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 15187 tree v2df_ftype_v2df_v2df 15188 = build_function_type_list (V2DF_type_node, 15189 V2DF_type_node, V2DF_type_node, NULL_TREE); 15190 tree v16qi_ftype_v16qi_v16qi 15191 = build_function_type_list (V16QI_type_node, 15192 V16QI_type_node, V16QI_type_node, NULL_TREE); 15193 tree v8hi_ftype_v8hi_v8hi 15194 = build_function_type_list (V8HI_type_node, 15195 V8HI_type_node, V8HI_type_node, NULL_TREE); 15196 tree v4si_ftype_v4si_v4si 15197 = build_function_type_list (V4SI_type_node, 15198 V4SI_type_node, V4SI_type_node, NULL_TREE); 15199 tree v2di_ftype_v2di_v2di 15200 = build_function_type_list (V2DI_type_node, 15201 V2DI_type_node, V2DI_type_node, NULL_TREE); 15202 tree v2di_ftype_v2df_v2df 15203 = build_function_type_list (V2DI_type_node, 15204 V2DF_type_node, V2DF_type_node, NULL_TREE); 15205 tree v2df_ftype_v2df 15206 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 15207 tree v2di_ftype_v2di_int 15208 = build_function_type_list (V2DI_type_node, 15209 V2DI_type_node, integer_type_node, NULL_TREE); 15210 tree v4si_ftype_v4si_int 15211 = build_function_type_list (V4SI_type_node, 15212 V4SI_type_node, integer_type_node, NULL_TREE); 15213 tree v8hi_ftype_v8hi_int 15214 = build_function_type_list (V8HI_type_node, 15215 V8HI_type_node, integer_type_node, NULL_TREE); 15216 tree v4si_ftype_v8hi_v8hi 15217 = build_function_type_list (V4SI_type_node, 15218 V8HI_type_node, V8HI_type_node, NULL_TREE); 15219 tree di_ftype_v8qi_v8qi 15220 = build_function_type_list (long_long_unsigned_type_node, 15221 V8QI_type_node, V8QI_type_node, NULL_TREE); 15222 tree di_ftype_v2si_v2si 15223 = build_function_type_list (long_long_unsigned_type_node, 15224 V2SI_type_node, V2SI_type_node, NULL_TREE); 15225 tree v2di_ftype_v16qi_v16qi 15226 = build_function_type_list (V2DI_type_node, 15227 V16QI_type_node, V16QI_type_node, NULL_TREE); 15228 tree v2di_ftype_v4si_v4si 15229 = build_function_type_list (V2DI_type_node, 15230 V4SI_type_node, V4SI_type_node, NULL_TREE); 15231 tree int_ftype_v16qi 15232 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 15233 tree v16qi_ftype_pcchar 15234 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 15235 tree void_ftype_pchar_v16qi 15236 = build_function_type_list (void_type_node, 15237 pchar_type_node, V16QI_type_node, NULL_TREE); 15238 15239 tree float80_type; 15240 tree float128_type; 15241 tree ftype; 15242 15243 /* The __float80 type. */ 15244 if (TYPE_MODE (long_double_type_node) == XFmode) 15245 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 15246 "__float80"); 15247 else 15248 { 15249 /* The __float80 type. */ 15250 float80_type = make_node (REAL_TYPE); 15251 TYPE_PRECISION (float80_type) = 80; 15252 layout_type (float80_type); 15253 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 15254 } 15255 15256 if (TARGET_64BIT) 15257 { 15258 float128_type = make_node (REAL_TYPE); 15259 TYPE_PRECISION (float128_type) = 128; 15260 layout_type (float128_type); 15261 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 15262 } 15263 15264 /* Add all builtins that are more or less simple operations on two 15265 operands. */ 15266 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 15267 { 15268 /* Use one of the operands; the target can have a different mode for 15269 mask-generating compares. */ 15270 enum machine_mode mode; 15271 tree type; 15272 15273 if (d->name == 0) 15274 continue; 15275 mode = insn_data[d->icode].operand[1].mode; 15276 15277 switch (mode) 15278 { 15279 case V16QImode: 15280 type = v16qi_ftype_v16qi_v16qi; 15281 break; 15282 case V8HImode: 15283 type = v8hi_ftype_v8hi_v8hi; 15284 break; 15285 case V4SImode: 15286 type = v4si_ftype_v4si_v4si; 15287 break; 15288 case V2DImode: 15289 type = v2di_ftype_v2di_v2di; 15290 break; 15291 case V2DFmode: 15292 type = v2df_ftype_v2df_v2df; 15293 break; 15294 case V4SFmode: 15295 type = v4sf_ftype_v4sf_v4sf; 15296 break; 15297 case V8QImode: 15298 type = v8qi_ftype_v8qi_v8qi; 15299 break; 15300 case V4HImode: 15301 type = v4hi_ftype_v4hi_v4hi; 15302 break; 15303 case V2SImode: 15304 type = v2si_ftype_v2si_v2si; 15305 break; 15306 case DImode: 15307 type = di_ftype_di_di; 15308 break; 15309 15310 default: 15311 gcc_unreachable (); 15312 } 15313 15314 /* Override for comparisons. */ 15315 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 15316 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3) 15317 type = v4si_ftype_v4sf_v4sf; 15318 15319 if (d->icode == CODE_FOR_sse2_maskcmpv2df3 15320 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 15321 type = v2di_ftype_v2df_v2df; 15322 15323 def_builtin (d->mask, d->name, type, d->code); 15324 } 15325 15326 /* Add the remaining MMX insns with somewhat more complicated types. */ 15327 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); 15328 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); 15329 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); 15330 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); 15331 15332 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW); 15333 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD); 15334 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ); 15335 15336 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW); 15337 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD); 15338 15339 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); 15340 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); 15341 15342 /* comi/ucomi insns. */ 15343 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 15344 if (d->mask == MASK_SSE2) 15345 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code); 15346 else 15347 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 15348 15349 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB); 15350 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); 15351 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); 15352 15353 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 15354 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 15355 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); 15356 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); 15357 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); 15358 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); 15359 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); 15360 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); 15361 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); 15362 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); 15363 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); 15364 15365 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 15366 15367 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); 15368 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); 15369 15370 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS); 15371 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS); 15372 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS); 15373 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS); 15374 15375 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS); 15376 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB); 15377 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS); 15378 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); 15379 15380 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); 15381 15382 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); 15383 15384 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS); 15385 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS); 15386 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS); 15387 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS); 15388 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS); 15389 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS); 15390 15391 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS); 15392 15393 /* Original 3DNow! */ 15394 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS); 15395 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB); 15396 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID); 15397 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC); 15398 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD); 15399 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ); 15400 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE); 15401 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT); 15402 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX); 15403 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN); 15404 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL); 15405 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP); 15406 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1); 15407 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2); 15408 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT); 15409 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1); 15410 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB); 15411 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); 15412 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); 15413 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); 15414 15415 /* 3DNow! extension as used in the Athlon CPU. */ 15416 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); 15417 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC); 15418 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC); 15419 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW); 15420 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); 15421 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); 15422 15423 /* SSE2 */ 15424 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 15425 15426 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); 15427 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); 15428 15429 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); 15430 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); 15431 15432 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); 15433 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); 15434 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI); 15435 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD); 15436 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ); 15437 15438 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD); 15439 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW); 15440 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW); 15441 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128); 15442 15443 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD); 15444 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD); 15445 15446 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD); 15447 15448 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD); 15449 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS); 15450 15451 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ); 15452 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI); 15453 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS); 15454 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ); 15455 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI); 15456 15457 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD); 15458 15459 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); 15460 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); 15461 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); 15462 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); 15463 15464 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); 15465 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); 15466 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); 15467 15468 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); 15469 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); 15470 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); 15471 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); 15472 15473 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 15474 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); 15475 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 15476 15477 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); 15478 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); 15479 15480 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); 15481 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); 15482 15483 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128); 15484 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128); 15485 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); 15486 15487 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128); 15488 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128); 15489 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128); 15490 15491 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128); 15492 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128); 15493 15494 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128); 15495 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); 15496 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); 15497 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); 15498 15499 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); 15500 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); 15501 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); 15502 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); 15503 15504 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); 15505 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); 15506 15507 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); 15508 15509 /* Prescott New Instructions. */ 15510 def_builtin (MASK_SSE3, "__builtin_ia32_monitor", 15511 void_ftype_pcvoid_unsigned_unsigned, 15512 IX86_BUILTIN_MONITOR); 15513 def_builtin (MASK_SSE3, "__builtin_ia32_mwait", 15514 void_ftype_unsigned_unsigned, 15515 IX86_BUILTIN_MWAIT); 15516 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup", 15517 v4sf_ftype_v4sf, 15518 IX86_BUILTIN_MOVSHDUP); 15519 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup", 15520 v4sf_ftype_v4sf, 15521 IX86_BUILTIN_MOVSLDUP); 15522 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", 15523 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); 15524 15525 /* Access to the vec_init patterns. */ 15526 ftype = build_function_type_list (V2SI_type_node, integer_type_node, 15527 integer_type_node, NULL_TREE); 15528 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", 15529 ftype, IX86_BUILTIN_VEC_INIT_V2SI); 15530 15531 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, 15532 short_integer_type_node, 15533 short_integer_type_node, 15534 short_integer_type_node, NULL_TREE); 15535 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi", 15536 ftype, IX86_BUILTIN_VEC_INIT_V4HI); 15537 15538 ftype = build_function_type_list (V8QI_type_node, char_type_node, 15539 char_type_node, char_type_node, 15540 char_type_node, char_type_node, 15541 char_type_node, char_type_node, 15542 char_type_node, NULL_TREE); 15543 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi", 15544 ftype, IX86_BUILTIN_VEC_INIT_V8QI); 15545 15546 /* Access to the vec_extract patterns. */ 15547 ftype = build_function_type_list (double_type_node, V2DF_type_node, 15548 integer_type_node, NULL_TREE); 15549 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df", 15550 ftype, IX86_BUILTIN_VEC_EXT_V2DF); 15551 15552 ftype = build_function_type_list (long_long_integer_type_node, 15553 V2DI_type_node, integer_type_node, 15554 NULL_TREE); 15555 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di", 15556 ftype, IX86_BUILTIN_VEC_EXT_V2DI); 15557 15558 ftype = build_function_type_list (float_type_node, V4SF_type_node, 15559 integer_type_node, NULL_TREE); 15560 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf", 15561 ftype, IX86_BUILTIN_VEC_EXT_V4SF); 15562 15563 ftype = build_function_type_list (intSI_type_node, V4SI_type_node, 15564 integer_type_node, NULL_TREE); 15565 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si", 15566 ftype, IX86_BUILTIN_VEC_EXT_V4SI); 15567 15568 ftype = build_function_type_list (intHI_type_node, V8HI_type_node, 15569 integer_type_node, NULL_TREE); 15570 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi", 15571 ftype, IX86_BUILTIN_VEC_EXT_V8HI); 15572 15573 ftype = build_function_type_list (intHI_type_node, V4HI_type_node, 15574 integer_type_node, NULL_TREE); 15575 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", 15576 ftype, IX86_BUILTIN_VEC_EXT_V4HI); 15577 15578 ftype = build_function_type_list (intSI_type_node, V2SI_type_node, 15579 integer_type_node, NULL_TREE); 15580 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si", 15581 ftype, IX86_BUILTIN_VEC_EXT_V2SI); 15582 15583 ftype = build_function_type_list (intQI_type_node, V16QI_type_node, 15584 integer_type_node, NULL_TREE); 15585 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); 15586 15587 /* Access to the vec_set patterns. */ 15588 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, 15589 intHI_type_node, 15590 integer_type_node, NULL_TREE); 15591 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi", 15592 ftype, IX86_BUILTIN_VEC_SET_V8HI); 15593 15594 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, 15595 intHI_type_node, 15596 integer_type_node, NULL_TREE); 15597 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi", 15598 ftype, IX86_BUILTIN_VEC_SET_V4HI); 15599} 15600 15601/* Errors in the source file can cause expand_expr to return const0_rtx 15602 where we expect a vector. To avoid crashing, use one of the vector 15603 clear instructions. */ 15604static rtx 15605safe_vector_operand (rtx x, enum machine_mode mode) 15606{ 15607 if (x == const0_rtx) 15608 x = CONST0_RTX (mode); 15609 return x; 15610} 15611 15612/* Subroutine of ix86_expand_builtin to take care of binop insns. */ 15613 15614static rtx 15615ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) 15616{ 15617 rtx pat, xops[3]; 15618 tree arg0 = TREE_VALUE (arglist); 15619 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15620 rtx op0 = expand_normal (arg0); 15621 rtx op1 = expand_normal (arg1); 15622 enum machine_mode tmode = insn_data[icode].operand[0].mode; 15623 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 15624 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 15625 15626 if (VECTOR_MODE_P (mode0)) 15627 op0 = safe_vector_operand (op0, mode0); 15628 if (VECTOR_MODE_P (mode1)) 15629 op1 = safe_vector_operand (op1, mode1); 15630 15631 if (optimize || !target 15632 || GET_MODE (target) != tmode 15633 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 15634 target = gen_reg_rtx (tmode); 15635 15636 if (GET_MODE (op1) == SImode && mode1 == TImode) 15637 { 15638 rtx x = gen_reg_rtx (V4SImode); 15639 emit_insn (gen_sse2_loadd (x, op1)); 15640 op1 = gen_lowpart (TImode, x); 15641 } 15642 15643 /* The insn must want input operands in the same modes as the 15644 result. */ 15645 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) 15646 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); 15647 15648 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) 15649 op0 = copy_to_mode_reg (mode0, op0); 15650 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) 15651 op1 = copy_to_mode_reg (mode1, op1); 15652 15653 /* ??? Using ix86_fixup_binary_operands is problematic when 15654 we've got mismatched modes. Fake it. */ 15655 15656 xops[0] = target; 15657 xops[1] = op0; 15658 xops[2] = op1; 15659 15660 if (tmode == mode0 && tmode == mode1) 15661 { 15662 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops); 15663 op0 = xops[1]; 15664 op1 = xops[2]; 15665 } 15666 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops)) 15667 { 15668 op0 = force_reg (mode0, op0); 15669 op1 = force_reg (mode1, op1); 15670 target = gen_reg_rtx (tmode); 15671 } 15672 15673 pat = GEN_FCN (icode) (target, op0, op1); 15674 if (! pat) 15675 return 0; 15676 emit_insn (pat); 15677 return target; 15678} 15679 15680/* Subroutine of ix86_expand_builtin to take care of stores. */ 15681 15682static rtx 15683ix86_expand_store_builtin (enum insn_code icode, tree arglist) 15684{ 15685 rtx pat; 15686 tree arg0 = TREE_VALUE (arglist); 15687 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15688 rtx op0 = expand_normal (arg0); 15689 rtx op1 = expand_normal (arg1); 15690 enum machine_mode mode0 = insn_data[icode].operand[0].mode; 15691 enum machine_mode mode1 = insn_data[icode].operand[1].mode; 15692 15693 if (VECTOR_MODE_P (mode1)) 15694 op1 = safe_vector_operand (op1, mode1); 15695 15696 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 15697 op1 = copy_to_mode_reg (mode1, op1); 15698 15699 pat = GEN_FCN (icode) (op0, op1); 15700 if (pat) 15701 emit_insn (pat); 15702 return 0; 15703} 15704 15705/* Subroutine of ix86_expand_builtin to take care of unop insns. */ 15706 15707static rtx 15708ix86_expand_unop_builtin (enum insn_code icode, tree arglist, 15709 rtx target, int do_load) 15710{ 15711 rtx pat; 15712 tree arg0 = TREE_VALUE (arglist); 15713 rtx op0 = expand_normal (arg0); 15714 enum machine_mode tmode = insn_data[icode].operand[0].mode; 15715 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 15716 15717 if (optimize || !target 15718 || GET_MODE (target) != tmode 15719 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 15720 target = gen_reg_rtx (tmode); 15721 if (do_load) 15722 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 15723 else 15724 { 15725 if (VECTOR_MODE_P (mode0)) 15726 op0 = safe_vector_operand (op0, mode0); 15727 15728 if ((optimize && !register_operand (op0, mode0)) 15729 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 15730 op0 = copy_to_mode_reg (mode0, op0); 15731 } 15732 15733 pat = GEN_FCN (icode) (target, op0); 15734 if (! pat) 15735 return 0; 15736 emit_insn (pat); 15737 return target; 15738} 15739 15740/* Subroutine of ix86_expand_builtin to take care of three special unop insns: 15741 sqrtss, rsqrtss, rcpss. */ 15742 15743static rtx 15744ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) 15745{ 15746 rtx pat; 15747 tree arg0 = TREE_VALUE (arglist); 15748 rtx op1, op0 = expand_normal (arg0); 15749 enum machine_mode tmode = insn_data[icode].operand[0].mode; 15750 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 15751 15752 if (optimize || !target 15753 || GET_MODE (target) != tmode 15754 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 15755 target = gen_reg_rtx (tmode); 15756 15757 if (VECTOR_MODE_P (mode0)) 15758 op0 = safe_vector_operand (op0, mode0); 15759 15760 if ((optimize && !register_operand (op0, mode0)) 15761 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 15762 op0 = copy_to_mode_reg (mode0, op0); 15763 15764 op1 = op0; 15765 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 15766 op1 = copy_to_mode_reg (mode0, op1); 15767 15768 pat = GEN_FCN (icode) (target, op0, op1); 15769 if (! pat) 15770 return 0; 15771 emit_insn (pat); 15772 return target; 15773} 15774 15775/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 15776 15777static rtx 15778ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, 15779 rtx target) 15780{ 15781 rtx pat; 15782 tree arg0 = TREE_VALUE (arglist); 15783 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15784 rtx op0 = expand_normal (arg0); 15785 rtx op1 = expand_normal (arg1); 15786 rtx op2; 15787 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 15788 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 15789 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 15790 enum rtx_code comparison = d->comparison; 15791 15792 if (VECTOR_MODE_P (mode0)) 15793 op0 = safe_vector_operand (op0, mode0); 15794 if (VECTOR_MODE_P (mode1)) 15795 op1 = safe_vector_operand (op1, mode1); 15796 15797 /* Swap operands if we have a comparison that isn't available in 15798 hardware. */ 15799 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 15800 { 15801 rtx tmp = gen_reg_rtx (mode1); 15802 emit_move_insn (tmp, op1); 15803 op1 = op0; 15804 op0 = tmp; 15805 } 15806 15807 if (optimize || !target 15808 || GET_MODE (target) != tmode 15809 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 15810 target = gen_reg_rtx (tmode); 15811 15812 if ((optimize && !register_operand (op0, mode0)) 15813 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 15814 op0 = copy_to_mode_reg (mode0, op0); 15815 if ((optimize && !register_operand (op1, mode1)) 15816 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 15817 op1 = copy_to_mode_reg (mode1, op1); 15818 15819 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 15820 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 15821 if (! pat) 15822 return 0; 15823 emit_insn (pat); 15824 return target; 15825} 15826 15827/* Subroutine of ix86_expand_builtin to take care of comi insns. */ 15828 15829static rtx 15830ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, 15831 rtx target) 15832{ 15833 rtx pat; 15834 tree arg0 = TREE_VALUE (arglist); 15835 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15836 rtx op0 = expand_normal (arg0); 15837 rtx op1 = expand_normal (arg1); 15838 rtx op2; 15839 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 15840 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 15841 enum rtx_code comparison = d->comparison; 15842 15843 if (VECTOR_MODE_P (mode0)) 15844 op0 = safe_vector_operand (op0, mode0); 15845 if (VECTOR_MODE_P (mode1)) 15846 op1 = safe_vector_operand (op1, mode1); 15847 15848 /* Swap operands if we have a comparison that isn't available in 15849 hardware. */ 15850 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 15851 { 15852 rtx tmp = op1; 15853 op1 = op0; 15854 op0 = tmp; 15855 } 15856 15857 target = gen_reg_rtx (SImode); 15858 emit_move_insn (target, const0_rtx); 15859 target = gen_rtx_SUBREG (QImode, target, 0); 15860 15861 if ((optimize && !register_operand (op0, mode0)) 15862 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 15863 op0 = copy_to_mode_reg (mode0, op0); 15864 if ((optimize && !register_operand (op1, mode1)) 15865 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 15866 op1 = copy_to_mode_reg (mode1, op1); 15867 15868 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 15869 pat = GEN_FCN (d->icode) (op0, op1); 15870 if (! pat) 15871 return 0; 15872 emit_insn (pat); 15873 emit_insn (gen_rtx_SET (VOIDmode, 15874 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 15875 gen_rtx_fmt_ee (comparison, QImode, 15876 SET_DEST (pat), 15877 const0_rtx))); 15878 15879 return SUBREG_REG (target); 15880} 15881 15882/* Return the integer constant in ARG. Constrain it to be in the range 15883 of the subparts of VEC_TYPE; issue an error if not. */ 15884 15885static int 15886get_element_number (tree vec_type, tree arg) 15887{ 15888 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; 15889 15890 if (!host_integerp (arg, 1) 15891 || (elt = tree_low_cst (arg, 1), elt > max)) 15892 { 15893 error ("selector must be an integer constant in the range 0..%wi", max); 15894 return 0; 15895 } 15896 15897 return elt; 15898} 15899 15900/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 15901 ix86_expand_vector_init. We DO have language-level syntax for this, in 15902 the form of (type){ init-list }. Except that since we can't place emms 15903 instructions from inside the compiler, we can't allow the use of MMX 15904 registers unless the user explicitly asks for it. So we do *not* define 15905 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead 15906 we have builtins invoked by mmintrin.h that gives us license to emit 15907 these sorts of instructions. */ 15908 15909static rtx 15910ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target) 15911{ 15912 enum machine_mode tmode = TYPE_MODE (type); 15913 enum machine_mode inner_mode = GET_MODE_INNER (tmode); 15914 int i, n_elt = GET_MODE_NUNITS (tmode); 15915 rtvec v = rtvec_alloc (n_elt); 15916 15917 gcc_assert (VECTOR_MODE_P (tmode)); 15918 15919 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist)) 15920 { 15921 rtx x = expand_normal (TREE_VALUE (arglist)); 15922 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); 15923 } 15924 15925 gcc_assert (arglist == NULL); 15926 15927 if (!target || !register_operand (target, tmode)) 15928 target = gen_reg_rtx (tmode); 15929 15930 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); 15931 return target; 15932} 15933 15934/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 15935 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we 15936 had a language-level syntax for referencing vector elements. */ 15937 15938static rtx 15939ix86_expand_vec_ext_builtin (tree arglist, rtx target) 15940{ 15941 enum machine_mode tmode, mode0; 15942 tree arg0, arg1; 15943 int elt; 15944 rtx op0; 15945 15946 arg0 = TREE_VALUE (arglist); 15947 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15948 15949 op0 = expand_normal (arg0); 15950 elt = get_element_number (TREE_TYPE (arg0), arg1); 15951 15952 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 15953 mode0 = TYPE_MODE (TREE_TYPE (arg0)); 15954 gcc_assert (VECTOR_MODE_P (mode0)); 15955 15956 op0 = force_reg (mode0, op0); 15957 15958 if (optimize || !target || !register_operand (target, tmode)) 15959 target = gen_reg_rtx (tmode); 15960 15961 ix86_expand_vector_extract (true, target, op0, elt); 15962 15963 return target; 15964} 15965 15966/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 15967 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had 15968 a language-level syntax for referencing vector elements. */ 15969 15970static rtx 15971ix86_expand_vec_set_builtin (tree arglist) 15972{ 15973 enum machine_mode tmode, mode1; 15974 tree arg0, arg1, arg2; 15975 int elt; 15976 rtx op0, op1, target; 15977 15978 arg0 = TREE_VALUE (arglist); 15979 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 15980 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 15981 15982 tmode = TYPE_MODE (TREE_TYPE (arg0)); 15983 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 15984 gcc_assert (VECTOR_MODE_P (tmode)); 15985 15986 op0 = expand_expr (arg0, NULL_RTX, tmode, 0); 15987 op1 = expand_expr (arg1, NULL_RTX, mode1, 0); 15988 elt = get_element_number (TREE_TYPE (arg0), arg2); 15989 15990 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) 15991 op1 = convert_modes (mode1, GET_MODE (op1), op1, true); 15992 15993 op0 = force_reg (tmode, op0); 15994 op1 = force_reg (mode1, op1); 15995 15996 /* OP0 is the source of these builtin functions and shouldn't be 15997 modified. Create a copy, use it and return it as target. */ 15998 target = gen_reg_rtx (tmode); 15999 emit_move_insn (target, op0); 16000 ix86_expand_vector_set (true, target, op1, elt); 16001 16002 return target; 16003} 16004 16005/* Expand an expression EXP that calls a built-in function, 16006 with result going to TARGET if that's convenient 16007 (and in mode MODE if that's convenient). 16008 SUBTARGET may be used as the target for computing one of EXP's operands. 16009 IGNORE is nonzero if the value is to be ignored. */ 16010 16011static rtx 16012ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 16013 enum machine_mode mode ATTRIBUTE_UNUSED, 16014 int ignore ATTRIBUTE_UNUSED) 16015{ 16016 const struct builtin_description *d; 16017 size_t i; 16018 enum insn_code icode; 16019 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 16020 tree arglist = TREE_OPERAND (exp, 1); 16021 tree arg0, arg1, arg2; 16022 rtx op0, op1, op2, pat; 16023 enum machine_mode tmode, mode0, mode1, mode2; 16024 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 16025 16026 switch (fcode) 16027 { 16028 case IX86_BUILTIN_EMMS: 16029 emit_insn (gen_mmx_emms ()); 16030 return 0; 16031 16032 case IX86_BUILTIN_SFENCE: 16033 emit_insn (gen_sse_sfence ()); 16034 return 0; 16035 16036 case IX86_BUILTIN_MASKMOVQ: 16037 case IX86_BUILTIN_MASKMOVDQU: 16038 icode = (fcode == IX86_BUILTIN_MASKMOVQ 16039 ? CODE_FOR_mmx_maskmovq 16040 : CODE_FOR_sse2_maskmovdqu); 16041 /* Note the arg order is different from the operand order. */ 16042 arg1 = TREE_VALUE (arglist); 16043 arg2 = TREE_VALUE (TREE_CHAIN (arglist)); 16044 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16045 op0 = expand_normal (arg0); 16046 op1 = expand_normal (arg1); 16047 op2 = expand_normal (arg2); 16048 mode0 = insn_data[icode].operand[0].mode; 16049 mode1 = insn_data[icode].operand[1].mode; 16050 mode2 = insn_data[icode].operand[2].mode; 16051 16052 op0 = force_reg (Pmode, op0); 16053 op0 = gen_rtx_MEM (mode1, op0); 16054 16055 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 16056 op0 = copy_to_mode_reg (mode0, op0); 16057 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 16058 op1 = copy_to_mode_reg (mode1, op1); 16059 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 16060 op2 = copy_to_mode_reg (mode2, op2); 16061 pat = GEN_FCN (icode) (op0, op1, op2); 16062 if (! pat) 16063 return 0; 16064 emit_insn (pat); 16065 return 0; 16066 16067 case IX86_BUILTIN_SQRTSS: 16068 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target); 16069 case IX86_BUILTIN_RSQRTSS: 16070 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target); 16071 case IX86_BUILTIN_RCPSS: 16072 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); 16073 16074 case IX86_BUILTIN_LOADUPS: 16075 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); 16076 16077 case IX86_BUILTIN_STOREUPS: 16078 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); 16079 16080 case IX86_BUILTIN_LOADHPS: 16081 case IX86_BUILTIN_LOADLPS: 16082 case IX86_BUILTIN_LOADHPD: 16083 case IX86_BUILTIN_LOADLPD: 16084 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps 16085 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps 16086 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd 16087 : CODE_FOR_sse2_loadlpd); 16088 arg0 = TREE_VALUE (arglist); 16089 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16090 op0 = expand_normal (arg0); 16091 op1 = expand_normal (arg1); 16092 tmode = insn_data[icode].operand[0].mode; 16093 mode0 = insn_data[icode].operand[1].mode; 16094 mode1 = insn_data[icode].operand[2].mode; 16095 16096 op0 = force_reg (mode0, op0); 16097 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); 16098 if (optimize || target == 0 16099 || GET_MODE (target) != tmode 16100 || !register_operand (target, tmode)) 16101 target = gen_reg_rtx (tmode); 16102 pat = GEN_FCN (icode) (target, op0, op1); 16103 if (! pat) 16104 return 0; 16105 emit_insn (pat); 16106 return target; 16107 16108 case IX86_BUILTIN_STOREHPS: 16109 case IX86_BUILTIN_STORELPS: 16110 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps 16111 : CODE_FOR_sse_storelps); 16112 arg0 = TREE_VALUE (arglist); 16113 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16114 op0 = expand_normal (arg0); 16115 op1 = expand_normal (arg1); 16116 mode0 = insn_data[icode].operand[0].mode; 16117 mode1 = insn_data[icode].operand[1].mode; 16118 16119 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); 16120 op1 = force_reg (mode1, op1); 16121 16122 pat = GEN_FCN (icode) (op0, op1); 16123 if (! pat) 16124 return 0; 16125 emit_insn (pat); 16126 return const0_rtx; 16127 16128 case IX86_BUILTIN_MOVNTPS: 16129 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); 16130 case IX86_BUILTIN_MOVNTQ: 16131 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); 16132 16133 case IX86_BUILTIN_LDMXCSR: 16134 op0 = expand_normal (TREE_VALUE (arglist)); 16135 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16136 emit_move_insn (target, op0); 16137 emit_insn (gen_sse_ldmxcsr (target)); 16138 return 0; 16139 16140 case IX86_BUILTIN_STMXCSR: 16141 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 16142 emit_insn (gen_sse_stmxcsr (target)); 16143 return copy_to_mode_reg (SImode, target); 16144 16145 case IX86_BUILTIN_SHUFPS: 16146 case IX86_BUILTIN_SHUFPD: 16147 icode = (fcode == IX86_BUILTIN_SHUFPS 16148 ? CODE_FOR_sse_shufps 16149 : CODE_FOR_sse2_shufpd); 16150 arg0 = TREE_VALUE (arglist); 16151 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16152 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16153 op0 = expand_normal (arg0); 16154 op1 = expand_normal (arg1); 16155 op2 = expand_normal (arg2); 16156 tmode = insn_data[icode].operand[0].mode; 16157 mode0 = insn_data[icode].operand[1].mode; 16158 mode1 = insn_data[icode].operand[2].mode; 16159 mode2 = insn_data[icode].operand[3].mode; 16160 16161 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 16162 op0 = copy_to_mode_reg (mode0, op0); 16163 if ((optimize && !register_operand (op1, mode1)) 16164 || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) 16165 op1 = copy_to_mode_reg (mode1, op1); 16166 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) 16167 { 16168 /* @@@ better error message */ 16169 error ("mask must be an immediate"); 16170 return gen_reg_rtx (tmode); 16171 } 16172 if (optimize || target == 0 16173 || GET_MODE (target) != tmode 16174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16175 target = gen_reg_rtx (tmode); 16176 pat = GEN_FCN (icode) (target, op0, op1, op2); 16177 if (! pat) 16178 return 0; 16179 emit_insn (pat); 16180 return target; 16181 16182 case IX86_BUILTIN_PSHUFW: 16183 case IX86_BUILTIN_PSHUFD: 16184 case IX86_BUILTIN_PSHUFHW: 16185 case IX86_BUILTIN_PSHUFLW: 16186 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw 16187 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw 16188 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd 16189 : CODE_FOR_mmx_pshufw); 16190 arg0 = TREE_VALUE (arglist); 16191 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16192 op0 = expand_normal (arg0); 16193 op1 = expand_normal (arg1); 16194 tmode = insn_data[icode].operand[0].mode; 16195 mode1 = insn_data[icode].operand[1].mode; 16196 mode2 = insn_data[icode].operand[2].mode; 16197 16198 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16199 op0 = copy_to_mode_reg (mode1, op0); 16200 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16201 { 16202 /* @@@ better error message */ 16203 error ("mask must be an immediate"); 16204 return const0_rtx; 16205 } 16206 if (target == 0 16207 || GET_MODE (target) != tmode 16208 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 16209 target = gen_reg_rtx (tmode); 16210 pat = GEN_FCN (icode) (target, op0, op1); 16211 if (! pat) 16212 return 0; 16213 emit_insn (pat); 16214 return target; 16215 16216 case IX86_BUILTIN_PSLLWI128: 16217 icode = CODE_FOR_ashlv8hi3; 16218 goto do_pshifti; 16219 case IX86_BUILTIN_PSLLDI128: 16220 icode = CODE_FOR_ashlv4si3; 16221 goto do_pshifti; 16222 case IX86_BUILTIN_PSLLQI128: 16223 icode = CODE_FOR_ashlv2di3; 16224 goto do_pshifti; 16225 case IX86_BUILTIN_PSRAWI128: 16226 icode = CODE_FOR_ashrv8hi3; 16227 goto do_pshifti; 16228 case IX86_BUILTIN_PSRADI128: 16229 icode = CODE_FOR_ashrv4si3; 16230 goto do_pshifti; 16231 case IX86_BUILTIN_PSRLWI128: 16232 icode = CODE_FOR_lshrv8hi3; 16233 goto do_pshifti; 16234 case IX86_BUILTIN_PSRLDI128: 16235 icode = CODE_FOR_lshrv4si3; 16236 goto do_pshifti; 16237 case IX86_BUILTIN_PSRLQI128: 16238 icode = CODE_FOR_lshrv2di3; 16239 goto do_pshifti; 16240 do_pshifti: 16241 arg0 = TREE_VALUE (arglist); 16242 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16243 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16244 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16245 16246 if (GET_CODE (op1) != CONST_INT) 16247 { 16248 error ("shift must be an immediate"); 16249 return const0_rtx; 16250 } 16251 if (INTVAL (op1) < 0 || INTVAL (op1) > 255) 16252 op1 = GEN_INT (255); 16253 16254 tmode = insn_data[icode].operand[0].mode; 16255 mode1 = insn_data[icode].operand[1].mode; 16256 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16257 op0 = copy_to_reg (op0); 16258 16259 target = gen_reg_rtx (tmode); 16260 pat = GEN_FCN (icode) (target, op0, op1); 16261 if (!pat) 16262 return 0; 16263 emit_insn (pat); 16264 return target; 16265 16266 case IX86_BUILTIN_PSLLW128: 16267 icode = CODE_FOR_ashlv8hi3; 16268 goto do_pshift; 16269 case IX86_BUILTIN_PSLLD128: 16270 icode = CODE_FOR_ashlv4si3; 16271 goto do_pshift; 16272 case IX86_BUILTIN_PSLLQ128: 16273 icode = CODE_FOR_ashlv2di3; 16274 goto do_pshift; 16275 case IX86_BUILTIN_PSRAW128: 16276 icode = CODE_FOR_ashrv8hi3; 16277 goto do_pshift; 16278 case IX86_BUILTIN_PSRAD128: 16279 icode = CODE_FOR_ashrv4si3; 16280 goto do_pshift; 16281 case IX86_BUILTIN_PSRLW128: 16282 icode = CODE_FOR_lshrv8hi3; 16283 goto do_pshift; 16284 case IX86_BUILTIN_PSRLD128: 16285 icode = CODE_FOR_lshrv4si3; 16286 goto do_pshift; 16287 case IX86_BUILTIN_PSRLQ128: 16288 icode = CODE_FOR_lshrv2di3; 16289 goto do_pshift; 16290 do_pshift: 16291 arg0 = TREE_VALUE (arglist); 16292 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16293 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); 16294 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 16295 16296 tmode = insn_data[icode].operand[0].mode; 16297 mode1 = insn_data[icode].operand[1].mode; 16298 16299 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16300 op0 = copy_to_reg (op0); 16301 16302 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0); 16303 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) 16304 op1 = copy_to_reg (op1); 16305 16306 target = gen_reg_rtx (tmode); 16307 pat = GEN_FCN (icode) (target, op0, op1); 16308 if (!pat) 16309 return 0; 16310 emit_insn (pat); 16311 return target; 16312 16313 case IX86_BUILTIN_PSLLDQI128: 16314 case IX86_BUILTIN_PSRLDQI128: 16315 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 16316 : CODE_FOR_sse2_lshrti3); 16317 arg0 = TREE_VALUE (arglist); 16318 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16319 op0 = expand_normal (arg0); 16320 op1 = expand_normal (arg1); 16321 tmode = insn_data[icode].operand[0].mode; 16322 mode1 = insn_data[icode].operand[1].mode; 16323 mode2 = insn_data[icode].operand[2].mode; 16324 16325 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) 16326 { 16327 op0 = copy_to_reg (op0); 16328 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); 16329 } 16330 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) 16331 { 16332 error ("shift must be an immediate"); 16333 return const0_rtx; 16334 } 16335 target = gen_reg_rtx (V2DImode); 16336 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), 16337 op0, op1); 16338 if (! pat) 16339 return 0; 16340 emit_insn (pat); 16341 return target; 16342 16343 case IX86_BUILTIN_FEMMS: 16344 emit_insn (gen_mmx_femms ()); 16345 return NULL_RTX; 16346 16347 case IX86_BUILTIN_PAVGUSB: 16348 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target); 16349 16350 case IX86_BUILTIN_PF2ID: 16351 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0); 16352 16353 case IX86_BUILTIN_PFACC: 16354 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target); 16355 16356 case IX86_BUILTIN_PFADD: 16357 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target); 16358 16359 case IX86_BUILTIN_PFCMPEQ: 16360 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target); 16361 16362 case IX86_BUILTIN_PFCMPGE: 16363 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target); 16364 16365 case IX86_BUILTIN_PFCMPGT: 16366 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target); 16367 16368 case IX86_BUILTIN_PFMAX: 16369 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target); 16370 16371 case IX86_BUILTIN_PFMIN: 16372 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target); 16373 16374 case IX86_BUILTIN_PFMUL: 16375 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target); 16376 16377 case IX86_BUILTIN_PFRCP: 16378 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0); 16379 16380 case IX86_BUILTIN_PFRCPIT1: 16381 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target); 16382 16383 case IX86_BUILTIN_PFRCPIT2: 16384 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target); 16385 16386 case IX86_BUILTIN_PFRSQIT1: 16387 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target); 16388 16389 case IX86_BUILTIN_PFRSQRT: 16390 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0); 16391 16392 case IX86_BUILTIN_PFSUB: 16393 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target); 16394 16395 case IX86_BUILTIN_PFSUBR: 16396 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target); 16397 16398 case IX86_BUILTIN_PI2FD: 16399 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0); 16400 16401 case IX86_BUILTIN_PMULHRW: 16402 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target); 16403 16404 case IX86_BUILTIN_PF2IW: 16405 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0); 16406 16407 case IX86_BUILTIN_PFNACC: 16408 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target); 16409 16410 case IX86_BUILTIN_PFPNACC: 16411 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target); 16412 16413 case IX86_BUILTIN_PI2FW: 16414 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0); 16415 16416 case IX86_BUILTIN_PSWAPDSI: 16417 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0); 16418 16419 case IX86_BUILTIN_PSWAPDSF: 16420 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0); 16421 16422 case IX86_BUILTIN_SQRTSD: 16423 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target); 16424 case IX86_BUILTIN_LOADUPD: 16425 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1); 16426 case IX86_BUILTIN_STOREUPD: 16427 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist); 16428 16429 case IX86_BUILTIN_MFENCE: 16430 emit_insn (gen_sse2_mfence ()); 16431 return 0; 16432 case IX86_BUILTIN_LFENCE: 16433 emit_insn (gen_sse2_lfence ()); 16434 return 0; 16435 16436 case IX86_BUILTIN_CLFLUSH: 16437 arg0 = TREE_VALUE (arglist); 16438 op0 = expand_normal (arg0); 16439 icode = CODE_FOR_sse2_clflush; 16440 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 16441 op0 = copy_to_mode_reg (Pmode, op0); 16442 16443 emit_insn (gen_sse2_clflush (op0)); 16444 return 0; 16445 16446 case IX86_BUILTIN_MOVNTPD: 16447 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist); 16448 case IX86_BUILTIN_MOVNTDQ: 16449 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist); 16450 case IX86_BUILTIN_MOVNTI: 16451 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist); 16452 16453 case IX86_BUILTIN_LOADDQU: 16454 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); 16455 case IX86_BUILTIN_STOREDQU: 16456 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); 16457 16458 case IX86_BUILTIN_MONITOR: 16459 arg0 = TREE_VALUE (arglist); 16460 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16461 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 16462 op0 = expand_normal (arg0); 16463 op1 = expand_normal (arg1); 16464 op2 = expand_normal (arg2); 16465 if (!REG_P (op0)) 16466 op0 = copy_to_mode_reg (Pmode, op0); 16467 if (!REG_P (op1)) 16468 op1 = copy_to_mode_reg (SImode, op1); 16469 if (!REG_P (op2)) 16470 op2 = copy_to_mode_reg (SImode, op2); 16471 if (!TARGET_64BIT) 16472 emit_insn (gen_sse3_monitor (op0, op1, op2)); 16473 else 16474 emit_insn (gen_sse3_monitor64 (op0, op1, op2)); 16475 return 0; 16476 16477 case IX86_BUILTIN_MWAIT: 16478 arg0 = TREE_VALUE (arglist); 16479 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 16480 op0 = expand_normal (arg0); 16481 op1 = expand_normal (arg1); 16482 if (!REG_P (op0)) 16483 op0 = copy_to_mode_reg (SImode, op0); 16484 if (!REG_P (op1)) 16485 op1 = copy_to_mode_reg (SImode, op1); 16486 emit_insn (gen_sse3_mwait (op0, op1)); 16487 return 0; 16488 16489 case IX86_BUILTIN_LDDQU: 16490 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, 16491 target, 1); 16492 16493 case IX86_BUILTIN_VEC_INIT_V2SI: 16494 case IX86_BUILTIN_VEC_INIT_V4HI: 16495 case IX86_BUILTIN_VEC_INIT_V8QI: 16496 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); 16497 16498 case IX86_BUILTIN_VEC_EXT_V2DF: 16499 case IX86_BUILTIN_VEC_EXT_V2DI: 16500 case IX86_BUILTIN_VEC_EXT_V4SF: 16501 case IX86_BUILTIN_VEC_EXT_V4SI: 16502 case IX86_BUILTIN_VEC_EXT_V8HI: 16503 case IX86_BUILTIN_VEC_EXT_V16QI: 16504 case IX86_BUILTIN_VEC_EXT_V2SI: 16505 case IX86_BUILTIN_VEC_EXT_V4HI: 16506 return ix86_expand_vec_ext_builtin (arglist, target); 16507 16508 case IX86_BUILTIN_VEC_SET_V8HI: 16509 case IX86_BUILTIN_VEC_SET_V4HI: 16510 return ix86_expand_vec_set_builtin (arglist); 16511 16512 default: 16513 break; 16514 } 16515 16516 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) 16517 if (d->code == fcode) 16518 { 16519 /* Compares are treated specially. */ 16520 if (d->icode == CODE_FOR_sse_maskcmpv4sf3 16521 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3 16522 || d->icode == CODE_FOR_sse2_maskcmpv2df3 16523 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) 16524 return ix86_expand_sse_compare (d, arglist, target); 16525 16526 return ix86_expand_binop_builtin (d->icode, arglist, target); 16527 } 16528 16529 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) 16530 if (d->code == fcode) 16531 return ix86_expand_unop_builtin (d->icode, arglist, target, 0); 16532 16533 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 16534 if (d->code == fcode) 16535 return ix86_expand_sse_comi (d, arglist, target); 16536 16537 gcc_unreachable (); 16538} 16539 16540/* Store OPERAND to the memory after reload is completed. This means 16541 that we can't easily use assign_stack_local. */ 16542rtx 16543ix86_force_to_memory (enum machine_mode mode, rtx operand) 16544{ 16545 rtx result; 16546 16547 gcc_assert (reload_completed); 16548 if (TARGET_RED_ZONE) 16549 { 16550 result = gen_rtx_MEM (mode, 16551 gen_rtx_PLUS (Pmode, 16552 stack_pointer_rtx, 16553 GEN_INT (-RED_ZONE_SIZE))); 16554 emit_move_insn (result, operand); 16555 } 16556 else if (!TARGET_RED_ZONE && TARGET_64BIT) 16557 { 16558 switch (mode) 16559 { 16560 case HImode: 16561 case SImode: 16562 operand = gen_lowpart (DImode, operand); 16563 /* FALLTHRU */ 16564 case DImode: 16565 emit_insn ( 16566 gen_rtx_SET (VOIDmode, 16567 gen_rtx_MEM (DImode, 16568 gen_rtx_PRE_DEC (DImode, 16569 stack_pointer_rtx)), 16570 operand)); 16571 break; 16572 default: 16573 gcc_unreachable (); 16574 } 16575 result = gen_rtx_MEM (mode, stack_pointer_rtx); 16576 } 16577 else 16578 { 16579 switch (mode) 16580 { 16581 case DImode: 16582 { 16583 rtx operands[2]; 16584 split_di (&operand, 1, operands, operands + 1); 16585 emit_insn ( 16586 gen_rtx_SET (VOIDmode, 16587 gen_rtx_MEM (SImode, 16588 gen_rtx_PRE_DEC (Pmode, 16589 stack_pointer_rtx)), 16590 operands[1])); 16591 emit_insn ( 16592 gen_rtx_SET (VOIDmode, 16593 gen_rtx_MEM (SImode, 16594 gen_rtx_PRE_DEC (Pmode, 16595 stack_pointer_rtx)), 16596 operands[0])); 16597 } 16598 break; 16599 case HImode: 16600 /* Store HImodes as SImodes. */ 16601 operand = gen_lowpart (SImode, operand); 16602 /* FALLTHRU */ 16603 case SImode: 16604 emit_insn ( 16605 gen_rtx_SET (VOIDmode, 16606 gen_rtx_MEM (GET_MODE (operand), 16607 gen_rtx_PRE_DEC (SImode, 16608 stack_pointer_rtx)), 16609 operand)); 16610 break; 16611 default: 16612 gcc_unreachable (); 16613 } 16614 result = gen_rtx_MEM (mode, stack_pointer_rtx); 16615 } 16616 return result; 16617} 16618 16619/* Free operand from the memory. */ 16620void 16621ix86_free_from_memory (enum machine_mode mode) 16622{ 16623 if (!TARGET_RED_ZONE) 16624 { 16625 int size; 16626 16627 if (mode == DImode || TARGET_64BIT) 16628 size = 8; 16629 else 16630 size = 4; 16631 /* Use LEA to deallocate stack space. In peephole2 it will be converted 16632 to pop or add instruction if registers are available. */ 16633 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 16634 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 16635 GEN_INT (size)))); 16636 } 16637} 16638 16639/* Put float CONST_DOUBLE in the constant pool instead of fp regs. 16640 QImode must go into class Q_REGS. 16641 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 16642 movdf to do mem-to-mem moves through integer regs. */ 16643enum reg_class 16644ix86_preferred_reload_class (rtx x, enum reg_class class) 16645{ 16646 enum machine_mode mode = GET_MODE (x); 16647 16648 /* We're only allowed to return a subclass of CLASS. Many of the 16649 following checks fail for NO_REGS, so eliminate that early. */ 16650 if (class == NO_REGS) 16651 return NO_REGS; 16652 16653 /* All classes can load zeros. */ 16654 if (x == CONST0_RTX (mode)) 16655 return class; 16656 16657 /* Force constants into memory if we are loading a (nonzero) constant into 16658 an MMX or SSE register. This is because there are no MMX/SSE instructions 16659 to load from a constant. */ 16660 if (CONSTANT_P (x) 16661 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))) 16662 return NO_REGS; 16663 16664 /* Prefer SSE regs only, if we can use them for math. */ 16665 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) 16666 return SSE_CLASS_P (class) ? class : NO_REGS; 16667 16668 /* Floating-point constants need more complex checks. */ 16669 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 16670 { 16671 /* General regs can load everything. */ 16672 if (reg_class_subset_p (class, GENERAL_REGS)) 16673 return class; 16674 16675 /* Floats can load 0 and 1 plus some others. Note that we eliminated 16676 zero above. We only want to wind up preferring 80387 registers if 16677 we plan on doing computation with them. */ 16678 if (TARGET_80387 16679 && standard_80387_constant_p (x)) 16680 { 16681 /* Limit class to non-sse. */ 16682 if (class == FLOAT_SSE_REGS) 16683 return FLOAT_REGS; 16684 if (class == FP_TOP_SSE_REGS) 16685 return FP_TOP_REG; 16686 if (class == FP_SECOND_SSE_REGS) 16687 return FP_SECOND_REG; 16688 if (class == FLOAT_INT_REGS || class == FLOAT_REGS) 16689 return class; 16690 } 16691 16692 return NO_REGS; 16693 } 16694 16695 /* Generally when we see PLUS here, it's the function invariant 16696 (plus soft-fp const_int). Which can only be computed into general 16697 regs. */ 16698 if (GET_CODE (x) == PLUS) 16699 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS; 16700 16701 /* QImode constants are easy to load, but non-constant QImode data 16702 must go into Q_REGS. */ 16703 if (GET_MODE (x) == QImode && !CONSTANT_P (x)) 16704 { 16705 if (reg_class_subset_p (class, Q_REGS)) 16706 return class; 16707 if (reg_class_subset_p (Q_REGS, class)) 16708 return Q_REGS; 16709 return NO_REGS; 16710 } 16711 16712 return class; 16713} 16714 16715/* Discourage putting floating-point values in SSE registers unless 16716 SSE math is being used, and likewise for the 387 registers. */ 16717enum reg_class 16718ix86_preferred_output_reload_class (rtx x, enum reg_class class) 16719{ 16720 enum machine_mode mode = GET_MODE (x); 16721 16722 /* Restrict the output reload class to the register bank that we are doing 16723 math on. If we would like not to return a subset of CLASS, reject this 16724 alternative: if reload cannot do this, it will still use its choice. */ 16725 mode = GET_MODE (x); 16726 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 16727 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS; 16728 16729 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode)) 16730 { 16731 if (class == FP_TOP_SSE_REGS) 16732 return FP_TOP_REG; 16733 else if (class == FP_SECOND_SSE_REGS) 16734 return FP_SECOND_REG; 16735 else 16736 return FLOAT_CLASS_P (class) ? class : NO_REGS; 16737 } 16738 16739 return class; 16740} 16741 16742/* If we are copying between general and FP registers, we need a memory 16743 location. The same is true for SSE and MMX registers. 16744 16745 The macro can't work reliably when one of the CLASSES is class containing 16746 registers from multiple units (SSE, MMX, integer). We avoid this by never 16747 combining those units in single alternative in the machine description. 16748 Ensure that this constraint holds to avoid unexpected surprises. 16749 16750 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 16751 enforce these sanity checks. */ 16752 16753int 16754ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 16755 enum machine_mode mode, int strict) 16756{ 16757 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 16758 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 16759 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 16760 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 16761 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 16762 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 16763 { 16764 gcc_assert (!strict); 16765 return true; 16766 } 16767 16768 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) 16769 return true; 16770 16771 /* ??? This is a lie. We do have moves between mmx/general, and for 16772 mmx/sse2. But by saying we need secondary memory we discourage the 16773 register allocator from using the mmx registers unless needed. */ 16774 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 16775 return true; 16776 16777 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 16778 { 16779 /* SSE1 doesn't have any direct moves from other classes. */ 16780 if (!TARGET_SSE2) 16781 return true; 16782 16783 /* If the target says that inter-unit moves are more expensive 16784 than moving through memory, then don't generate them. */ 16785 if (!TARGET_INTER_UNIT_MOVES && !optimize_size) 16786 return true; 16787 16788 /* Between SSE and general, we have moves no larger than word size. */ 16789 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 16790 return true; 16791 16792 /* ??? For the cost of one register reformat penalty, we could use 16793 the same instructions to move SFmode and DFmode data, but the 16794 relevant move patterns don't support those alternatives. */ 16795 if (mode == SFmode || mode == DFmode) 16796 return true; 16797 } 16798 16799 return false; 16800} 16801 16802/* Return true if the registers in CLASS cannot represent the change from 16803 modes FROM to TO. */ 16804 16805bool 16806ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 16807 enum reg_class class) 16808{ 16809 if (from == to) 16810 return false; 16811 16812 /* x87 registers can't do subreg at all, as all values are reformatted 16813 to extended precision. */ 16814 if (MAYBE_FLOAT_CLASS_P (class)) 16815 return true; 16816 16817 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class)) 16818 { 16819 /* Vector registers do not support QI or HImode loads. If we don't 16820 disallow a change to these modes, reload will assume it's ok to 16821 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects 16822 the vec_dupv4hi pattern. */ 16823 if (GET_MODE_SIZE (from) < 4) 16824 return true; 16825 16826 /* Vector registers do not support subreg with nonzero offsets, which 16827 are otherwise valid for integer registers. Since we can't see 16828 whether we have a nonzero offset from here, prohibit all 16829 nonparadoxical subregs changing size. */ 16830 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) 16831 return true; 16832 } 16833 16834 return false; 16835} 16836 16837/* Return the cost of moving data from a register in class CLASS1 to 16838 one in class CLASS2. 16839 16840 It is not required that the cost always equal 2 when FROM is the same as TO; 16841 on some machines it is expensive to move between registers if they are not 16842 general registers. */ 16843 16844int 16845ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, 16846 enum reg_class class2) 16847{ 16848 /* In case we require secondary memory, compute cost of the store followed 16849 by load. In order to avoid bad register allocation choices, we need 16850 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 16851 16852 if (ix86_secondary_memory_needed (class1, class2, mode, 0)) 16853 { 16854 int cost = 1; 16855 16856 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0), 16857 MEMORY_MOVE_COST (mode, class1, 1)); 16858 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0), 16859 MEMORY_MOVE_COST (mode, class2, 1)); 16860 16861 /* In case of copying from general_purpose_register we may emit multiple 16862 stores followed by single load causing memory size mismatch stall. 16863 Count this as arbitrarily high cost of 20. */ 16864 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 16865 cost += 20; 16866 16867 /* In the case of FP/MMX moves, the registers actually overlap, and we 16868 have to switch modes in order to treat them differently. */ 16869 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 16870 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 16871 cost += 20; 16872 16873 return cost; 16874 } 16875 16876 /* Moves between SSE/MMX and integer unit are expensive. */ 16877 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 16878 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 16879 return ix86_cost->mmxsse_to_integer; 16880 if (MAYBE_FLOAT_CLASS_P (class1)) 16881 return ix86_cost->fp_move; 16882 if (MAYBE_SSE_CLASS_P (class1)) 16883 return ix86_cost->sse_move; 16884 if (MAYBE_MMX_CLASS_P (class1)) 16885 return ix86_cost->mmx_move; 16886 return 2; 16887} 16888 16889/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 16890 16891bool 16892ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) 16893{ 16894 /* Flags and only flags can only hold CCmode values. */ 16895 if (CC_REGNO_P (regno)) 16896 return GET_MODE_CLASS (mode) == MODE_CC; 16897 if (GET_MODE_CLASS (mode) == MODE_CC 16898 || GET_MODE_CLASS (mode) == MODE_RANDOM 16899 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 16900 return 0; 16901 if (FP_REGNO_P (regno)) 16902 return VALID_FP_MODE_P (mode); 16903 if (SSE_REGNO_P (regno)) 16904 { 16905 /* We implement the move patterns for all vector modes into and 16906 out of SSE registers, even when no operation instructions 16907 are available. */ 16908 return (VALID_SSE_REG_MODE (mode) 16909 || VALID_SSE2_REG_MODE (mode) 16910 || VALID_MMX_REG_MODE (mode) 16911 || VALID_MMX_REG_MODE_3DNOW (mode)); 16912 } 16913 if (MMX_REGNO_P (regno)) 16914 { 16915 /* We implement the move patterns for 3DNOW modes even in MMX mode, 16916 so if the register is available at all, then we can move data of 16917 the given mode into or out of it. */ 16918 return (VALID_MMX_REG_MODE (mode) 16919 || VALID_MMX_REG_MODE_3DNOW (mode)); 16920 } 16921 16922 if (mode == QImode) 16923 { 16924 /* Take care for QImode values - they can be in non-QI regs, 16925 but then they do cause partial register stalls. */ 16926 if (regno < 4 || TARGET_64BIT) 16927 return 1; 16928 if (!TARGET_PARTIAL_REG_STALL) 16929 return 1; 16930 return reload_in_progress || reload_completed; 16931 } 16932 /* We handle both integer and floats in the general purpose registers. */ 16933 else if (VALID_INT_MODE_P (mode)) 16934 return 1; 16935 else if (VALID_FP_MODE_P (mode)) 16936 return 1; 16937 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go 16938 on to use that value in smaller contexts, this can easily force a 16939 pseudo to be allocated to GENERAL_REGS. Since this is no worse than 16940 supporting DImode, allow it. */ 16941 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) 16942 return 1; 16943 16944 return 0; 16945} 16946 16947/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a 16948 tieable integer mode. */ 16949 16950static bool 16951ix86_tieable_integer_mode_p (enum machine_mode mode) 16952{ 16953 switch (mode) 16954 { 16955 case HImode: 16956 case SImode: 16957 return true; 16958 16959 case QImode: 16960 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; 16961 16962 case DImode: 16963 return TARGET_64BIT; 16964 16965 default: 16966 return false; 16967 } 16968} 16969 16970/* Return true if MODE1 is accessible in a register that can hold MODE2 16971 without copying. That is, all register classes that can hold MODE2 16972 can also hold MODE1. */ 16973 16974bool 16975ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 16976{ 16977 if (mode1 == mode2) 16978 return true; 16979 16980 if (ix86_tieable_integer_mode_p (mode1) 16981 && ix86_tieable_integer_mode_p (mode2)) 16982 return true; 16983 16984 /* MODE2 being XFmode implies fp stack or general regs, which means we 16985 can tie any smaller floating point modes to it. Note that we do not 16986 tie this with TFmode. */ 16987 if (mode2 == XFmode) 16988 return mode1 == SFmode || mode1 == DFmode; 16989 16990 /* MODE2 being DFmode implies fp stack, general or sse regs, which means 16991 that we can tie it with SFmode. */ 16992 if (mode2 == DFmode) 16993 return mode1 == SFmode; 16994 16995 /* If MODE2 is only appropriate for an SSE register, then tie with 16996 any other mode acceptable to SSE registers. */ 16997 if (GET_MODE_SIZE (mode2) >= 8 16998 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 16999 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); 17000 17001 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie 17002 with any other mode acceptable to MMX registers. */ 17003 if (GET_MODE_SIZE (mode2) == 8 17004 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) 17005 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1); 17006 17007 return false; 17008} 17009 17010/* Return the cost of moving data of mode M between a 17011 register and memory. A value of 2 is the default; this cost is 17012 relative to those in `REGISTER_MOVE_COST'. 17013 17014 If moving between registers and memory is more expensive than 17015 between two registers, you should define this macro to express the 17016 relative cost. 17017 17018 Model also increased moving costs of QImode registers in non 17019 Q_REGS classes. 17020 */ 17021int 17022ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) 17023{ 17024 if (FLOAT_CLASS_P (class)) 17025 { 17026 int index; 17027 switch (mode) 17028 { 17029 case SFmode: 17030 index = 0; 17031 break; 17032 case DFmode: 17033 index = 1; 17034 break; 17035 case XFmode: 17036 index = 2; 17037 break; 17038 default: 17039 return 100; 17040 } 17041 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 17042 } 17043 if (SSE_CLASS_P (class)) 17044 { 17045 int index; 17046 switch (GET_MODE_SIZE (mode)) 17047 { 17048 case 4: 17049 index = 0; 17050 break; 17051 case 8: 17052 index = 1; 17053 break; 17054 case 16: 17055 index = 2; 17056 break; 17057 default: 17058 return 100; 17059 } 17060 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 17061 } 17062 if (MMX_CLASS_P (class)) 17063 { 17064 int index; 17065 switch (GET_MODE_SIZE (mode)) 17066 { 17067 case 4: 17068 index = 0; 17069 break; 17070 case 8: 17071 index = 1; 17072 break; 17073 default: 17074 return 100; 17075 } 17076 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 17077 } 17078 switch (GET_MODE_SIZE (mode)) 17079 { 17080 case 1: 17081 if (in) 17082 return (Q_CLASS_P (class) ? ix86_cost->int_load[0] 17083 : ix86_cost->movzbl_load); 17084 else 17085 return (Q_CLASS_P (class) ? ix86_cost->int_store[0] 17086 : ix86_cost->int_store[0] + 4); 17087 break; 17088 case 2: 17089 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 17090 default: 17091 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 17092 if (mode == TFmode) 17093 mode = XFmode; 17094 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2]) 17095 * (((int) GET_MODE_SIZE (mode) 17096 + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); 17097 } 17098} 17099 17100/* Compute a (partial) cost for rtx X. Return true if the complete 17101 cost has been computed, and false if subexpressions should be 17102 scanned. In either case, *TOTAL contains the cost result. */ 17103 17104static bool 17105ix86_rtx_costs (rtx x, int code, int outer_code, int *total) 17106{ 17107 enum machine_mode mode = GET_MODE (x); 17108 17109 switch (code) 17110 { 17111 case CONST_INT: 17112 case CONST: 17113 case LABEL_REF: 17114 case SYMBOL_REF: 17115 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) 17116 *total = 3; 17117 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) 17118 *total = 2; 17119 else if (flag_pic && SYMBOLIC_CONST (x) 17120 && (!TARGET_64BIT 17121 || (!GET_CODE (x) != LABEL_REF 17122 && (GET_CODE (x) != SYMBOL_REF 17123 || !SYMBOL_REF_LOCAL_P (x))))) 17124 *total = 1; 17125 else 17126 *total = 0; 17127 return true; 17128 17129 case CONST_DOUBLE: 17130 if (mode == VOIDmode) 17131 *total = 0; 17132 else 17133 switch (standard_80387_constant_p (x)) 17134 { 17135 case 1: /* 0.0 */ 17136 *total = 1; 17137 break; 17138 default: /* Other constants */ 17139 *total = 2; 17140 break; 17141 case 0: 17142 case -1: 17143 /* Start with (MEM (SYMBOL_REF)), since that's where 17144 it'll probably end up. Add a penalty for size. */ 17145 *total = (COSTS_N_INSNS (1) 17146 + (flag_pic != 0 && !TARGET_64BIT) 17147 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); 17148 break; 17149 } 17150 return true; 17151 17152 case ZERO_EXTEND: 17153 /* The zero extensions is often completely free on x86_64, so make 17154 it as cheap as possible. */ 17155 if (TARGET_64BIT && mode == DImode 17156 && GET_MODE (XEXP (x, 0)) == SImode) 17157 *total = 1; 17158 else if (TARGET_ZERO_EXTEND_WITH_AND) 17159 *total = ix86_cost->add; 17160 else 17161 *total = ix86_cost->movzx; 17162 return false; 17163 17164 case SIGN_EXTEND: 17165 *total = ix86_cost->movsx; 17166 return false; 17167 17168 case ASHIFT: 17169 if (GET_CODE (XEXP (x, 1)) == CONST_INT 17170 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) 17171 { 17172 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17173 if (value == 1) 17174 { 17175 *total = ix86_cost->add; 17176 return false; 17177 } 17178 if ((value == 2 || value == 3) 17179 && ix86_cost->lea <= ix86_cost->shift_const) 17180 { 17181 *total = ix86_cost->lea; 17182 return false; 17183 } 17184 } 17185 /* FALLTHRU */ 17186 17187 case ROTATE: 17188 case ASHIFTRT: 17189 case LSHIFTRT: 17190 case ROTATERT: 17191 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) 17192 { 17193 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17194 { 17195 if (INTVAL (XEXP (x, 1)) > 32) 17196 *total = ix86_cost->shift_const + COSTS_N_INSNS (2); 17197 else 17198 *total = ix86_cost->shift_const * 2; 17199 } 17200 else 17201 { 17202 if (GET_CODE (XEXP (x, 1)) == AND) 17203 *total = ix86_cost->shift_var * 2; 17204 else 17205 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2); 17206 } 17207 } 17208 else 17209 { 17210 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17211 *total = ix86_cost->shift_const; 17212 else 17213 *total = ix86_cost->shift_var; 17214 } 17215 return false; 17216 17217 case MULT: 17218 if (FLOAT_MODE_P (mode)) 17219 { 17220 *total = ix86_cost->fmul; 17221 return false; 17222 } 17223 else 17224 { 17225 rtx op0 = XEXP (x, 0); 17226 rtx op1 = XEXP (x, 1); 17227 int nbits; 17228 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 17229 { 17230 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 17231 for (nbits = 0; value != 0; value &= value - 1) 17232 nbits++; 17233 } 17234 else 17235 /* This is arbitrary. */ 17236 nbits = 7; 17237 17238 /* Compute costs correctly for widening multiplication. */ 17239 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND) 17240 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 17241 == GET_MODE_SIZE (mode)) 17242 { 17243 int is_mulwiden = 0; 17244 enum machine_mode inner_mode = GET_MODE (op0); 17245 17246 if (GET_CODE (op0) == GET_CODE (op1)) 17247 is_mulwiden = 1, op1 = XEXP (op1, 0); 17248 else if (GET_CODE (op1) == CONST_INT) 17249 { 17250 if (GET_CODE (op0) == SIGN_EXTEND) 17251 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) 17252 == INTVAL (op1); 17253 else 17254 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); 17255 } 17256 17257 if (is_mulwiden) 17258 op0 = XEXP (op0, 0), mode = GET_MODE (op0); 17259 } 17260 17261 *total = (ix86_cost->mult_init[MODE_INDEX (mode)] 17262 + nbits * ix86_cost->mult_bit 17263 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code)); 17264 17265 return true; 17266 } 17267 17268 case DIV: 17269 case UDIV: 17270 case MOD: 17271 case UMOD: 17272 if (FLOAT_MODE_P (mode)) 17273 *total = ix86_cost->fdiv; 17274 else 17275 *total = ix86_cost->divide[MODE_INDEX (mode)]; 17276 return false; 17277 17278 case PLUS: 17279 if (FLOAT_MODE_P (mode)) 17280 *total = ix86_cost->fadd; 17281 else if (GET_MODE_CLASS (mode) == MODE_INT 17282 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) 17283 { 17284 if (GET_CODE (XEXP (x, 0)) == PLUS 17285 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 17286 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT 17287 && CONSTANT_P (XEXP (x, 1))) 17288 { 17289 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 17290 if (val == 2 || val == 4 || val == 8) 17291 { 17292 *total = ix86_cost->lea; 17293 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17294 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), 17295 outer_code); 17296 *total += rtx_cost (XEXP (x, 1), outer_code); 17297 return true; 17298 } 17299 } 17300 else if (GET_CODE (XEXP (x, 0)) == MULT 17301 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) 17302 { 17303 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 17304 if (val == 2 || val == 4 || val == 8) 17305 { 17306 *total = ix86_cost->lea; 17307 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17308 *total += rtx_cost (XEXP (x, 1), outer_code); 17309 return true; 17310 } 17311 } 17312 else if (GET_CODE (XEXP (x, 0)) == PLUS) 17313 { 17314 *total = ix86_cost->lea; 17315 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code); 17316 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code); 17317 *total += rtx_cost (XEXP (x, 1), outer_code); 17318 return true; 17319 } 17320 } 17321 /* FALLTHRU */ 17322 17323 case MINUS: 17324 if (FLOAT_MODE_P (mode)) 17325 { 17326 *total = ix86_cost->fadd; 17327 return false; 17328 } 17329 /* FALLTHRU */ 17330 17331 case AND: 17332 case IOR: 17333 case XOR: 17334 if (!TARGET_64BIT && mode == DImode) 17335 { 17336 *total = (ix86_cost->add * 2 17337 + (rtx_cost (XEXP (x, 0), outer_code) 17338 << (GET_MODE (XEXP (x, 0)) != DImode)) 17339 + (rtx_cost (XEXP (x, 1), outer_code) 17340 << (GET_MODE (XEXP (x, 1)) != DImode))); 17341 return true; 17342 } 17343 /* FALLTHRU */ 17344 17345 case NEG: 17346 if (FLOAT_MODE_P (mode)) 17347 { 17348 *total = ix86_cost->fchs; 17349 return false; 17350 } 17351 /* FALLTHRU */ 17352 17353 case NOT: 17354 if (!TARGET_64BIT && mode == DImode) 17355 *total = ix86_cost->add * 2; 17356 else 17357 *total = ix86_cost->add; 17358 return false; 17359 17360 case COMPARE: 17361 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 17362 && XEXP (XEXP (x, 0), 1) == const1_rtx 17363 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT 17364 && XEXP (x, 1) == const0_rtx) 17365 { 17366 /* This kind of construct is implemented using test[bwl]. 17367 Treat it as if we had an AND. */ 17368 *total = (ix86_cost->add 17369 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code) 17370 + rtx_cost (const1_rtx, outer_code)); 17371 return true; 17372 } 17373 return false; 17374 17375 case FLOAT_EXTEND: 17376 if (!TARGET_SSE_MATH 17377 || mode == XFmode 17378 || (mode == DFmode && !TARGET_SSE2)) 17379 /* For standard 80387 constants, raise the cost to prevent 17380 compress_float_constant() to generate load from memory. */ 17381 switch (standard_80387_constant_p (XEXP (x, 0))) 17382 { 17383 case -1: 17384 case 0: 17385 *total = 0; 17386 break; 17387 case 1: /* 0.0 */ 17388 *total = 1; 17389 break; 17390 default: 17391 *total = (x86_ext_80387_constants & TUNEMASK 17392 || optimize_size 17393 ? 1 : 0); 17394 } 17395 return false; 17396 17397 case ABS: 17398 if (FLOAT_MODE_P (mode)) 17399 *total = ix86_cost->fabs; 17400 return false; 17401 17402 case SQRT: 17403 if (FLOAT_MODE_P (mode)) 17404 *total = ix86_cost->fsqrt; 17405 return false; 17406 17407 case UNSPEC: 17408 if (XINT (x, 1) == UNSPEC_TP) 17409 *total = 0; 17410 return false; 17411 17412 default: 17413 return false; 17414 } 17415} 17416 17417#if TARGET_MACHO 17418 17419static int current_machopic_label_num; 17420 17421/* Given a symbol name and its associated stub, write out the 17422 definition of the stub. */ 17423 17424void 17425machopic_output_stub (FILE *file, const char *symb, const char *stub) 17426{ 17427 unsigned int length; 17428 char *binder_name, *symbol_name, lazy_ptr_name[32]; 17429 int label = ++current_machopic_label_num; 17430 17431 /* For 64-bit we shouldn't get here. */ 17432 gcc_assert (!TARGET_64BIT); 17433 17434 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 17435 symb = (*targetm.strip_name_encoding) (symb); 17436 17437 length = strlen (stub); 17438 binder_name = alloca (length + 32); 17439 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 17440 17441 length = strlen (symb); 17442 symbol_name = alloca (length + 32); 17443 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 17444 17445 sprintf (lazy_ptr_name, "L%d$lz", label); 17446 17447 if (MACHOPIC_PURE) 17448 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); 17449 else 17450 switch_to_section (darwin_sections[machopic_symbol_stub_section]); 17451 17452 fprintf (file, "%s:\n", stub); 17453 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 17454 17455 if (MACHOPIC_PURE) 17456 { 17457 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label); 17458 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 17459 fprintf (file, "\tjmp\t*%%edx\n"); 17460 } 17461 else 17462 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); 17463 17464 fprintf (file, "%s:\n", binder_name); 17465 17466 if (MACHOPIC_PURE) 17467 { 17468 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 17469 fprintf (file, "\tpushl\t%%eax\n"); 17470 } 17471 else 17472 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); 17473 17474 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); 17475 17476 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 17477 fprintf (file, "%s:\n", lazy_ptr_name); 17478 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 17479 fprintf (file, "\t.long %s\n", binder_name); 17480} 17481 17482void 17483darwin_x86_file_end (void) 17484{ 17485 darwin_file_end (); 17486 ix86_file_end (); 17487} 17488#endif /* TARGET_MACHO */ 17489 17490/* Order the registers for register allocator. */ 17491 17492void 17493x86_order_regs_for_local_alloc (void) 17494{ 17495 int pos = 0; 17496 int i; 17497 17498 /* First allocate the local general purpose registers. */ 17499 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 17500 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 17501 reg_alloc_order [pos++] = i; 17502 17503 /* Global general purpose registers. */ 17504 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 17505 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 17506 reg_alloc_order [pos++] = i; 17507 17508 /* x87 registers come first in case we are doing FP math 17509 using them. */ 17510 if (!TARGET_SSE_MATH) 17511 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 17512 reg_alloc_order [pos++] = i; 17513 17514 /* SSE registers. */ 17515 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 17516 reg_alloc_order [pos++] = i; 17517 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 17518 reg_alloc_order [pos++] = i; 17519 17520 /* x87 registers. */ 17521 if (TARGET_SSE_MATH) 17522 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 17523 reg_alloc_order [pos++] = i; 17524 17525 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 17526 reg_alloc_order [pos++] = i; 17527 17528 /* Initialize the rest of array as we do not allocate some registers 17529 at all. */ 17530 while (pos < FIRST_PSEUDO_REGISTER) 17531 reg_alloc_order [pos++] = 0; 17532} 17533 17534/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 17535 struct attribute_spec.handler. */ 17536static tree 17537ix86_handle_struct_attribute (tree *node, tree name, 17538 tree args ATTRIBUTE_UNUSED, 17539 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 17540{ 17541 tree *type = NULL; 17542 if (DECL_P (*node)) 17543 { 17544 if (TREE_CODE (*node) == TYPE_DECL) 17545 type = &TREE_TYPE (*node); 17546 } 17547 else 17548 type = node; 17549 17550 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 17551 || TREE_CODE (*type) == UNION_TYPE))) 17552 { 17553 warning (OPT_Wattributes, "%qs attribute ignored", 17554 IDENTIFIER_POINTER (name)); 17555 *no_add_attrs = true; 17556 } 17557 17558 else if ((is_attribute_p ("ms_struct", name) 17559 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 17560 || ((is_attribute_p ("gcc_struct", name) 17561 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 17562 { 17563 warning (OPT_Wattributes, "%qs incompatible attribute ignored", 17564 IDENTIFIER_POINTER (name)); 17565 *no_add_attrs = true; 17566 } 17567 17568 return NULL_TREE; 17569} 17570 17571static bool 17572ix86_ms_bitfield_layout_p (tree record_type) 17573{ 17574 return (TARGET_MS_BITFIELD_LAYOUT && 17575 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 17576 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 17577} 17578 17579/* Returns an expression indicating where the this parameter is 17580 located on entry to the FUNCTION. */ 17581 17582static rtx 17583x86_this_parameter (tree function) 17584{ 17585 tree type = TREE_TYPE (function); 17586 17587 if (TARGET_64BIT) 17588 { 17589 int n = aggregate_value_p (TREE_TYPE (type), type) != 0; 17590 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); 17591 } 17592 17593 if (ix86_function_regparm (type, function) > 0) 17594 { 17595 tree parm; 17596 17597 parm = TYPE_ARG_TYPES (type); 17598 /* Figure out whether or not the function has a variable number of 17599 arguments. */ 17600 for (; parm; parm = TREE_CHAIN (parm)) 17601 if (TREE_VALUE (parm) == void_type_node) 17602 break; 17603 /* If not, the this parameter is in the first argument. */ 17604 if (parm) 17605 { 17606 int regno = 0; 17607 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 17608 regno = 2; 17609 return gen_rtx_REG (SImode, regno); 17610 } 17611 } 17612 17613 if (aggregate_value_p (TREE_TYPE (type), type)) 17614 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); 17615 else 17616 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); 17617} 17618 17619/* Determine whether x86_output_mi_thunk can succeed. */ 17620 17621static bool 17622x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, 17623 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 17624 HOST_WIDE_INT vcall_offset, tree function) 17625{ 17626 /* 64-bit can handle anything. */ 17627 if (TARGET_64BIT) 17628 return true; 17629 17630 /* For 32-bit, everything's fine if we have one free register. */ 17631 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 17632 return true; 17633 17634 /* Need a free register for vcall_offset. */ 17635 if (vcall_offset) 17636 return false; 17637 17638 /* Need a free register for GOT references. */ 17639 if (flag_pic && !(*targetm.binds_local_p) (function)) 17640 return false; 17641 17642 /* Otherwise ok. */ 17643 return true; 17644} 17645 17646/* Output the assembler code for a thunk function. THUNK_DECL is the 17647 declaration for the thunk function itself, FUNCTION is the decl for 17648 the target function. DELTA is an immediate constant offset to be 17649 added to THIS. If VCALL_OFFSET is nonzero, the word at 17650 *(*this + vcall_offset) should be added to THIS. */ 17651 17652static void 17653x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, 17654 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, 17655 HOST_WIDE_INT vcall_offset, tree function) 17656{ 17657 rtx xops[3]; 17658 rtx this = x86_this_parameter (function); 17659 rtx this_reg, tmp; 17660 17661 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 17662 pull it in now and let DELTA benefit. */ 17663 if (REG_P (this)) 17664 this_reg = this; 17665 else if (vcall_offset) 17666 { 17667 /* Put the this parameter into %eax. */ 17668 xops[0] = this; 17669 xops[1] = this_reg = gen_rtx_REG (Pmode, 0); 17670 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 17671 } 17672 else 17673 this_reg = NULL_RTX; 17674 17675 /* Adjust the this parameter by a fixed constant. */ 17676 if (delta) 17677 { 17678 xops[0] = GEN_INT (delta); 17679 xops[1] = this_reg ? this_reg : this; 17680 if (TARGET_64BIT) 17681 { 17682 if (!x86_64_general_operand (xops[0], DImode)) 17683 { 17684 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 17685 xops[1] = tmp; 17686 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 17687 xops[0] = tmp; 17688 xops[1] = this; 17689 } 17690 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 17691 } 17692 else 17693 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 17694 } 17695 17696 /* Adjust the this parameter by a value stored in the vtable. */ 17697 if (vcall_offset) 17698 { 17699 if (TARGET_64BIT) 17700 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); 17701 else 17702 { 17703 int tmp_regno = 2 /* ECX */; 17704 if (lookup_attribute ("fastcall", 17705 TYPE_ATTRIBUTES (TREE_TYPE (function)))) 17706 tmp_regno = 0 /* EAX */; 17707 tmp = gen_rtx_REG (SImode, tmp_regno); 17708 } 17709 17710 xops[0] = gen_rtx_MEM (Pmode, this_reg); 17711 xops[1] = tmp; 17712 if (TARGET_64BIT) 17713 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 17714 else 17715 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 17716 17717 /* Adjust the this parameter. */ 17718 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 17719 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 17720 { 17721 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */); 17722 xops[0] = GEN_INT (vcall_offset); 17723 xops[1] = tmp2; 17724 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 17725 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 17726 } 17727 xops[1] = this_reg; 17728 if (TARGET_64BIT) 17729 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 17730 else 17731 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 17732 } 17733 17734 /* If necessary, drop THIS back to its stack slot. */ 17735 if (this_reg && this_reg != this) 17736 { 17737 xops[0] = this_reg; 17738 xops[1] = this; 17739 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops); 17740 } 17741 17742 xops[0] = XEXP (DECL_RTL (function), 0); 17743 if (TARGET_64BIT) 17744 { 17745 if (!flag_pic || (*targetm.binds_local_p) (function)) 17746 output_asm_insn ("jmp\t%P0", xops); 17747 else 17748 { 17749 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); 17750 tmp = gen_rtx_CONST (Pmode, tmp); 17751 tmp = gen_rtx_MEM (QImode, tmp); 17752 xops[0] = tmp; 17753 output_asm_insn ("jmp\t%A0", xops); 17754 } 17755 } 17756 else 17757 { 17758 if (!flag_pic || (*targetm.binds_local_p) (function)) 17759 output_asm_insn ("jmp\t%P0", xops); 17760 else 17761#if TARGET_MACHO 17762 if (TARGET_MACHO) 17763 { 17764 rtx sym_ref = XEXP (DECL_RTL (function), 0); 17765 tmp = (gen_rtx_SYMBOL_REF 17766 (Pmode, 17767 machopic_indirection_name (sym_ref, /*stub_p=*/true))); 17768 tmp = gen_rtx_MEM (QImode, tmp); 17769 xops[0] = tmp; 17770 output_asm_insn ("jmp\t%0", xops); 17771 } 17772 else 17773#endif /* TARGET_MACHO */ 17774 { 17775 tmp = gen_rtx_REG (SImode, 2 /* ECX */); 17776 output_set_got (tmp, NULL_RTX); 17777 17778 xops[1] = tmp; 17779 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 17780 output_asm_insn ("jmp\t{*}%1", xops); 17781 } 17782 } 17783} 17784 17785static void 17786x86_file_start (void) 17787{ 17788 default_file_start (); 17789#if TARGET_MACHO 17790 darwin_file_start (); 17791#endif 17792 if (X86_FILE_START_VERSION_DIRECTIVE) 17793 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 17794 if (X86_FILE_START_FLTUSED) 17795 fputs ("\t.global\t__fltused\n", asm_out_file); 17796 if (ix86_asm_dialect == ASM_INTEL) 17797 fputs ("\t.intel_syntax\n", asm_out_file); 17798} 17799 17800int 17801x86_field_alignment (tree field, int computed) 17802{ 17803 enum machine_mode mode; 17804 tree type = TREE_TYPE (field); 17805 17806 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 17807 return computed; 17808 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE 17809 ? get_inner_array_type (type) : type); 17810 if (mode == DFmode || mode == DCmode 17811 || GET_MODE_CLASS (mode) == MODE_INT 17812 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 17813 return MIN (32, computed); 17814 return computed; 17815} 17816 17817/* Output assembler code to FILE to increment profiler label # LABELNO 17818 for profiling a function entry. */ 17819void 17820x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 17821{ 17822 if (TARGET_64BIT) 17823 if (flag_pic) 17824 { 17825#ifndef NO_PROFILE_COUNTERS 17826 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); 17827#endif 17828 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 17829 } 17830 else 17831 { 17832#ifndef NO_PROFILE_COUNTERS 17833 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno); 17834#endif 17835 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 17836 } 17837 else if (flag_pic) 17838 { 17839#ifndef NO_PROFILE_COUNTERS 17840 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 17841 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 17842#endif 17843 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 17844 } 17845 else 17846 { 17847#ifndef NO_PROFILE_COUNTERS 17848 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 17849 PROFILE_COUNT_REGISTER); 17850#endif 17851 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 17852 } 17853} 17854 17855/* We don't have exact information about the insn sizes, but we may assume 17856 quite safely that we are informed about all 1 byte insns and memory 17857 address sizes. This is enough to eliminate unnecessary padding in 17858 99% of cases. */ 17859 17860static int 17861min_insn_size (rtx insn) 17862{ 17863 int l = 0; 17864 17865 if (!INSN_P (insn) || !active_insn_p (insn)) 17866 return 0; 17867 17868 /* Discard alignments we've emit and jump instructions. */ 17869 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 17870 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 17871 return 0; 17872 if (GET_CODE (insn) == JUMP_INSN 17873 && (GET_CODE (PATTERN (insn)) == ADDR_VEC 17874 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) 17875 return 0; 17876 17877 /* Important case - calls are always 5 bytes. 17878 It is common to have many calls in the row. */ 17879 if (GET_CODE (insn) == CALL_INSN 17880 && symbolic_reference_mentioned_p (PATTERN (insn)) 17881 && !SIBLING_CALL_P (insn)) 17882 return 5; 17883 if (get_attr_length (insn) <= 1) 17884 return 1; 17885 17886 /* For normal instructions we may rely on the sizes of addresses 17887 and the presence of symbol to require 4 bytes of encoding. 17888 This is not the case for jumps where references are PC relative. */ 17889 if (GET_CODE (insn) != JUMP_INSN) 17890 { 17891 l = get_attr_length_address (insn); 17892 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 17893 l = 4; 17894 } 17895 if (l) 17896 return 1+l; 17897 else 17898 return 2; 17899} 17900 17901/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 17902 window. */ 17903 17904static void 17905ix86_avoid_jump_misspredicts (void) 17906{ 17907 rtx insn, start = get_insns (); 17908 int nbytes = 0, njumps = 0; 17909 int isjump = 0; 17910 17911 /* Look for all minimal intervals of instructions containing 4 jumps. 17912 The intervals are bounded by START and INSN. NBYTES is the total 17913 size of instructions in the interval including INSN and not including 17914 START. When the NBYTES is smaller than 16 bytes, it is possible 17915 that the end of START and INSN ends up in the same 16byte page. 17916 17917 The smallest offset in the page INSN can start is the case where START 17918 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 17919 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). 17920 */ 17921 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 17922 { 17923 17924 nbytes += min_insn_size (insn); 17925 if (dump_file) 17926 fprintf(dump_file, "Insn %i estimated to %i bytes\n", 17927 INSN_UID (insn), min_insn_size (insn)); 17928 if ((GET_CODE (insn) == JUMP_INSN 17929 && GET_CODE (PATTERN (insn)) != ADDR_VEC 17930 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 17931 || GET_CODE (insn) == CALL_INSN) 17932 njumps++; 17933 else 17934 continue; 17935 17936 while (njumps > 3) 17937 { 17938 start = NEXT_INSN (start); 17939 if ((GET_CODE (start) == JUMP_INSN 17940 && GET_CODE (PATTERN (start)) != ADDR_VEC 17941 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) 17942 || GET_CODE (start) == CALL_INSN) 17943 njumps--, isjump = 1; 17944 else 17945 isjump = 0; 17946 nbytes -= min_insn_size (start); 17947 } 17948 gcc_assert (njumps >= 0); 17949 if (dump_file) 17950 fprintf (dump_file, "Interval %i to %i has %i bytes\n", 17951 INSN_UID (start), INSN_UID (insn), nbytes); 17952 17953 if (njumps == 3 && isjump && nbytes < 16) 17954 { 17955 int padsize = 15 - nbytes + min_insn_size (insn); 17956 17957 if (dump_file) 17958 fprintf (dump_file, "Padding insn %i by %i bytes!\n", 17959 INSN_UID (insn), padsize); 17960 emit_insn_before (gen_align (GEN_INT (padsize)), insn); 17961 } 17962 } 17963} 17964 17965/* AMD Athlon works faster 17966 when RET is not destination of conditional jump or directly preceded 17967 by other jump instruction. We avoid the penalty by inserting NOP just 17968 before the RET instructions in such cases. */ 17969static void 17970ix86_pad_returns (void) 17971{ 17972 edge e; 17973 edge_iterator ei; 17974 17975 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 17976 { 17977 basic_block bb = e->src; 17978 rtx ret = BB_END (bb); 17979 rtx prev; 17980 bool replace = false; 17981 17982 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN 17983 || !maybe_hot_bb_p (bb)) 17984 continue; 17985 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 17986 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) 17987 break; 17988 if (prev && GET_CODE (prev) == CODE_LABEL) 17989 { 17990 edge e; 17991 edge_iterator ei; 17992 17993 FOR_EACH_EDGE (e, ei, bb->preds) 17994 if (EDGE_FREQUENCY (e) && e->src->index >= 0 17995 && !(e->flags & EDGE_FALLTHRU)) 17996 replace = true; 17997 } 17998 if (!replace) 17999 { 18000 prev = prev_active_insn (ret); 18001 if (prev 18002 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) 18003 || GET_CODE (prev) == CALL_INSN)) 18004 replace = true; 18005 /* Empty functions get branch mispredict even when the jump destination 18006 is not visible to us. */ 18007 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) 18008 replace = true; 18009 } 18010 if (replace) 18011 { 18012 emit_insn_before (gen_return_internal_long (), ret); 18013 delete_insn (ret); 18014 } 18015 } 18016} 18017 18018/* Implement machine specific optimizations. We implement padding of returns 18019 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ 18020static void 18021ix86_reorg (void) 18022{ 18023 if (TARGET_PAD_RETURNS && optimize && !optimize_size) 18024 ix86_pad_returns (); 18025 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size) 18026 ix86_avoid_jump_misspredicts (); 18027} 18028 18029/* Return nonzero when QImode register that must be represented via REX prefix 18030 is used. */ 18031bool 18032x86_extended_QIreg_mentioned_p (rtx insn) 18033{ 18034 int i; 18035 extract_insn_cached (insn); 18036 for (i = 0; i < recog_data.n_operands; i++) 18037 if (REG_P (recog_data.operand[i]) 18038 && REGNO (recog_data.operand[i]) >= 4) 18039 return true; 18040 return false; 18041} 18042 18043/* Return nonzero when P points to register encoded via REX prefix. 18044 Called via for_each_rtx. */ 18045static int 18046extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) 18047{ 18048 unsigned int regno; 18049 if (!REG_P (*p)) 18050 return 0; 18051 regno = REGNO (*p); 18052 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); 18053} 18054 18055/* Return true when INSN mentions register that must be encoded using REX 18056 prefix. */ 18057bool 18058x86_extended_reg_mentioned_p (rtx insn) 18059{ 18060 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL); 18061} 18062 18063/* Generate an unsigned DImode/SImode to FP conversion. This is the same code 18064 optabs would emit if we didn't have TFmode patterns. */ 18065 18066void 18067x86_emit_floatuns (rtx operands[2]) 18068{ 18069 rtx neglab, donelab, i0, i1, f0, in, out; 18070 enum machine_mode mode, inmode; 18071 18072 inmode = GET_MODE (operands[1]); 18073 gcc_assert (inmode == SImode || inmode == DImode); 18074 18075 out = operands[0]; 18076 in = force_reg (inmode, operands[1]); 18077 mode = GET_MODE (out); 18078 neglab = gen_label_rtx (); 18079 donelab = gen_label_rtx (); 18080 i1 = gen_reg_rtx (Pmode); 18081 f0 = gen_reg_rtx (mode); 18082 18083 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); 18084 18085 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 18086 emit_jump_insn (gen_jump (donelab)); 18087 emit_barrier (); 18088 18089 emit_label (neglab); 18090 18091 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18092 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 18093 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 18094 expand_float (f0, i0, 0); 18095 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 18096 18097 emit_label (donelab); 18098} 18099 18100/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18101 with all elements equal to VAR. Return true if successful. */ 18102 18103static bool 18104ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, 18105 rtx target, rtx val) 18106{ 18107 enum machine_mode smode, wsmode, wvmode; 18108 rtx x; 18109 18110 switch (mode) 18111 { 18112 case V2SImode: 18113 case V2SFmode: 18114 if (!mmx_ok) 18115 return false; 18116 /* FALLTHRU */ 18117 18118 case V2DFmode: 18119 case V2DImode: 18120 case V4SFmode: 18121 case V4SImode: 18122 val = force_reg (GET_MODE_INNER (mode), val); 18123 x = gen_rtx_VEC_DUPLICATE (mode, val); 18124 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18125 return true; 18126 18127 case V4HImode: 18128 if (!mmx_ok) 18129 return false; 18130 if (TARGET_SSE || TARGET_3DNOW_A) 18131 { 18132 val = gen_lowpart (SImode, val); 18133 x = gen_rtx_TRUNCATE (HImode, val); 18134 x = gen_rtx_VEC_DUPLICATE (mode, x); 18135 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18136 return true; 18137 } 18138 else 18139 { 18140 smode = HImode; 18141 wsmode = SImode; 18142 wvmode = V2SImode; 18143 goto widen; 18144 } 18145 18146 case V8QImode: 18147 if (!mmx_ok) 18148 return false; 18149 smode = QImode; 18150 wsmode = HImode; 18151 wvmode = V4HImode; 18152 goto widen; 18153 case V8HImode: 18154 if (TARGET_SSE2) 18155 { 18156 rtx tmp1, tmp2; 18157 /* Extend HImode to SImode using a paradoxical SUBREG. */ 18158 tmp1 = gen_reg_rtx (SImode); 18159 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18160 /* Insert the SImode value as low element of V4SImode vector. */ 18161 tmp2 = gen_reg_rtx (V4SImode); 18162 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18163 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18164 CONST0_RTX (V4SImode), 18165 const1_rtx); 18166 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18167 /* Cast the V4SImode vector back to a V8HImode vector. */ 18168 tmp1 = gen_reg_rtx (V8HImode); 18169 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); 18170 /* Duplicate the low short through the whole low SImode word. */ 18171 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); 18172 /* Cast the V8HImode vector back to a V4SImode vector. */ 18173 tmp2 = gen_reg_rtx (V4SImode); 18174 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18175 /* Replicate the low element of the V4SImode vector. */ 18176 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18177 /* Cast the V2SImode back to V8HImode, and store in target. */ 18178 emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); 18179 return true; 18180 } 18181 smode = HImode; 18182 wsmode = SImode; 18183 wvmode = V4SImode; 18184 goto widen; 18185 case V16QImode: 18186 if (TARGET_SSE2) 18187 { 18188 rtx tmp1, tmp2; 18189 /* Extend QImode to SImode using a paradoxical SUBREG. */ 18190 tmp1 = gen_reg_rtx (SImode); 18191 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 18192 /* Insert the SImode value as low element of V4SImode vector. */ 18193 tmp2 = gen_reg_rtx (V4SImode); 18194 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 18195 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 18196 CONST0_RTX (V4SImode), 18197 const1_rtx); 18198 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 18199 /* Cast the V4SImode vector back to a V16QImode vector. */ 18200 tmp1 = gen_reg_rtx (V16QImode); 18201 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); 18202 /* Duplicate the low byte through the whole low SImode word. */ 18203 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18204 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 18205 /* Cast the V16QImode vector back to a V4SImode vector. */ 18206 tmp2 = gen_reg_rtx (V4SImode); 18207 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 18208 /* Replicate the low element of the V4SImode vector. */ 18209 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 18210 /* Cast the V2SImode back to V16QImode, and store in target. */ 18211 emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); 18212 return true; 18213 } 18214 smode = QImode; 18215 wsmode = HImode; 18216 wvmode = V8HImode; 18217 goto widen; 18218 widen: 18219 /* Replicate the value once into the next wider mode and recurse. */ 18220 val = convert_modes (wsmode, smode, val, true); 18221 x = expand_simple_binop (wsmode, ASHIFT, val, 18222 GEN_INT (GET_MODE_BITSIZE (smode)), 18223 NULL_RTX, 1, OPTAB_LIB_WIDEN); 18224 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); 18225 18226 x = gen_reg_rtx (wvmode); 18227 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) 18228 gcc_unreachable (); 18229 emit_move_insn (target, gen_lowpart (mode, x)); 18230 return true; 18231 18232 default: 18233 return false; 18234 } 18235} 18236 18237/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18238 whose ONE_VAR element is VAR, and other elements are zero. Return true 18239 if successful. */ 18240 18241static bool 18242ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, 18243 rtx target, rtx var, int one_var) 18244{ 18245 enum machine_mode vsimode; 18246 rtx new_target; 18247 rtx x, tmp; 18248 18249 switch (mode) 18250 { 18251 case V2SFmode: 18252 case V2SImode: 18253 if (!mmx_ok) 18254 return false; 18255 /* FALLTHRU */ 18256 18257 case V2DFmode: 18258 case V2DImode: 18259 if (one_var != 0) 18260 return false; 18261 var = force_reg (GET_MODE_INNER (mode), var); 18262 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); 18263 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 18264 return true; 18265 18266 case V4SFmode: 18267 case V4SImode: 18268 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) 18269 new_target = gen_reg_rtx (mode); 18270 else 18271 new_target = target; 18272 var = force_reg (GET_MODE_INNER (mode), var); 18273 x = gen_rtx_VEC_DUPLICATE (mode, var); 18274 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); 18275 emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); 18276 if (one_var != 0) 18277 { 18278 /* We need to shuffle the value to the correct position, so 18279 create a new pseudo to store the intermediate result. */ 18280 18281 /* With SSE2, we can use the integer shuffle insns. */ 18282 if (mode != V4SFmode && TARGET_SSE2) 18283 { 18284 emit_insn (gen_sse2_pshufd_1 (new_target, new_target, 18285 GEN_INT (1), 18286 GEN_INT (one_var == 1 ? 0 : 1), 18287 GEN_INT (one_var == 2 ? 0 : 1), 18288 GEN_INT (one_var == 3 ? 0 : 1))); 18289 if (target != new_target) 18290 emit_move_insn (target, new_target); 18291 return true; 18292 } 18293 18294 /* Otherwise convert the intermediate result to V4SFmode and 18295 use the SSE1 shuffle instructions. */ 18296 if (mode != V4SFmode) 18297 { 18298 tmp = gen_reg_rtx (V4SFmode); 18299 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); 18300 } 18301 else 18302 tmp = new_target; 18303 18304 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp, 18305 GEN_INT (1), 18306 GEN_INT (one_var == 1 ? 0 : 1), 18307 GEN_INT (one_var == 2 ? 0+4 : 1+4), 18308 GEN_INT (one_var == 3 ? 0+4 : 1+4))); 18309 18310 if (mode != V4SFmode) 18311 emit_move_insn (target, gen_lowpart (V4SImode, tmp)); 18312 else if (tmp != target) 18313 emit_move_insn (target, tmp); 18314 } 18315 else if (target != new_target) 18316 emit_move_insn (target, new_target); 18317 return true; 18318 18319 case V8HImode: 18320 case V16QImode: 18321 vsimode = V4SImode; 18322 goto widen; 18323 case V4HImode: 18324 case V8QImode: 18325 if (!mmx_ok) 18326 return false; 18327 vsimode = V2SImode; 18328 goto widen; 18329 widen: 18330 if (one_var != 0) 18331 return false; 18332 18333 /* Zero extend the variable element to SImode and recurse. */ 18334 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); 18335 18336 x = gen_reg_rtx (vsimode); 18337 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, 18338 var, one_var)) 18339 gcc_unreachable (); 18340 18341 emit_move_insn (target, gen_lowpart (mode, x)); 18342 return true; 18343 18344 default: 18345 return false; 18346 } 18347} 18348 18349/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 18350 consisting of the values in VALS. It is known that all elements 18351 except ONE_VAR are constants. Return true if successful. */ 18352 18353static bool 18354ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, 18355 rtx target, rtx vals, int one_var) 18356{ 18357 rtx var = XVECEXP (vals, 0, one_var); 18358 enum machine_mode wmode; 18359 rtx const_vec, x; 18360 18361 const_vec = copy_rtx (vals); 18362 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); 18363 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); 18364 18365 switch (mode) 18366 { 18367 case V2DFmode: 18368 case V2DImode: 18369 case V2SFmode: 18370 case V2SImode: 18371 /* For the two element vectors, it's just as easy to use 18372 the general case. */ 18373 return false; 18374 18375 case V4SFmode: 18376 case V4SImode: 18377 case V8HImode: 18378 case V4HImode: 18379 break; 18380 18381 case V16QImode: 18382 wmode = V8HImode; 18383 goto widen; 18384 case V8QImode: 18385 wmode = V4HImode; 18386 goto widen; 18387 widen: 18388 /* There's no way to set one QImode entry easily. Combine 18389 the variable value with its adjacent constant value, and 18390 promote to an HImode set. */ 18391 x = XVECEXP (vals, 0, one_var ^ 1); 18392 if (one_var & 1) 18393 { 18394 var = convert_modes (HImode, QImode, var, true); 18395 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), 18396 NULL_RTX, 1, OPTAB_LIB_WIDEN); 18397 x = GEN_INT (INTVAL (x) & 0xff); 18398 } 18399 else 18400 { 18401 var = convert_modes (HImode, QImode, var, true); 18402 x = gen_int_mode (INTVAL (x) << 8, HImode); 18403 } 18404 if (x != const0_rtx) 18405 var = expand_simple_binop (HImode, IOR, var, x, var, 18406 1, OPTAB_LIB_WIDEN); 18407 18408 x = gen_reg_rtx (wmode); 18409 emit_move_insn (x, gen_lowpart (wmode, const_vec)); 18410 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); 18411 18412 emit_move_insn (target, gen_lowpart (mode, x)); 18413 return true; 18414 18415 default: 18416 return false; 18417 } 18418 18419 emit_move_insn (target, const_vec); 18420 ix86_expand_vector_set (mmx_ok, target, var, one_var); 18421 return true; 18422} 18423 18424/* A subroutine of ix86_expand_vector_init. Handle the most general case: 18425 all values variable, and none identical. */ 18426 18427static void 18428ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, 18429 rtx target, rtx vals) 18430{ 18431 enum machine_mode half_mode = GET_MODE_INNER (mode); 18432 rtx op0 = NULL, op1 = NULL; 18433 bool use_vec_concat = false; 18434 18435 switch (mode) 18436 { 18437 case V2SFmode: 18438 case V2SImode: 18439 if (!mmx_ok && !TARGET_SSE) 18440 break; 18441 /* FALLTHRU */ 18442 18443 case V2DFmode: 18444 case V2DImode: 18445 /* For the two element vectors, we always implement VEC_CONCAT. */ 18446 op0 = XVECEXP (vals, 0, 0); 18447 op1 = XVECEXP (vals, 0, 1); 18448 use_vec_concat = true; 18449 break; 18450 18451 case V4SFmode: 18452 half_mode = V2SFmode; 18453 goto half; 18454 case V4SImode: 18455 half_mode = V2SImode; 18456 goto half; 18457 half: 18458 { 18459 rtvec v; 18460 18461 /* For V4SF and V4SI, we implement a concat of two V2 vectors. 18462 Recurse to load the two halves. */ 18463 18464 op0 = gen_reg_rtx (half_mode); 18465 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1)); 18466 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v)); 18467 18468 op1 = gen_reg_rtx (half_mode); 18469 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3)); 18470 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v)); 18471 18472 use_vec_concat = true; 18473 } 18474 break; 18475 18476 case V8HImode: 18477 case V16QImode: 18478 case V4HImode: 18479 case V8QImode: 18480 break; 18481 18482 default: 18483 gcc_unreachable (); 18484 } 18485 18486 if (use_vec_concat) 18487 { 18488 if (!register_operand (op0, half_mode)) 18489 op0 = force_reg (half_mode, op0); 18490 if (!register_operand (op1, half_mode)) 18491 op1 = force_reg (half_mode, op1); 18492 18493 emit_insn (gen_rtx_SET (VOIDmode, target, 18494 gen_rtx_VEC_CONCAT (mode, op0, op1))); 18495 } 18496 else 18497 { 18498 int i, j, n_elts, n_words, n_elt_per_word; 18499 enum machine_mode inner_mode; 18500 rtx words[4], shift; 18501 18502 inner_mode = GET_MODE_INNER (mode); 18503 n_elts = GET_MODE_NUNITS (mode); 18504 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 18505 n_elt_per_word = n_elts / n_words; 18506 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); 18507 18508 for (i = 0; i < n_words; ++i) 18509 { 18510 rtx word = NULL_RTX; 18511 18512 for (j = 0; j < n_elt_per_word; ++j) 18513 { 18514 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); 18515 elt = convert_modes (word_mode, inner_mode, elt, true); 18516 18517 if (j == 0) 18518 word = elt; 18519 else 18520 { 18521 word = expand_simple_binop (word_mode, ASHIFT, word, shift, 18522 word, 1, OPTAB_LIB_WIDEN); 18523 word = expand_simple_binop (word_mode, IOR, word, elt, 18524 word, 1, OPTAB_LIB_WIDEN); 18525 } 18526 } 18527 18528 words[i] = word; 18529 } 18530 18531 if (n_words == 1) 18532 emit_move_insn (target, gen_lowpart (mode, words[0])); 18533 else if (n_words == 2) 18534 { 18535 rtx tmp = gen_reg_rtx (mode); 18536 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp)); 18537 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); 18538 emit_move_insn (gen_highpart (word_mode, tmp), words[1]); 18539 emit_move_insn (target, tmp); 18540 } 18541 else if (n_words == 4) 18542 { 18543 rtx tmp = gen_reg_rtx (V4SImode); 18544 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); 18545 ix86_expand_vector_init_general (false, V4SImode, tmp, vals); 18546 emit_move_insn (target, gen_lowpart (mode, tmp)); 18547 } 18548 else 18549 gcc_unreachable (); 18550 } 18551} 18552 18553/* Initialize vector TARGET via VALS. Suppress the use of MMX 18554 instructions unless MMX_OK is true. */ 18555 18556void 18557ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) 18558{ 18559 enum machine_mode mode = GET_MODE (target); 18560 enum machine_mode inner_mode = GET_MODE_INNER (mode); 18561 int n_elts = GET_MODE_NUNITS (mode); 18562 int n_var = 0, one_var = -1; 18563 bool all_same = true, all_const_zero = true; 18564 int i; 18565 rtx x; 18566 18567 for (i = 0; i < n_elts; ++i) 18568 { 18569 x = XVECEXP (vals, 0, i); 18570 if (!CONSTANT_P (x)) 18571 n_var++, one_var = i; 18572 else if (x != CONST0_RTX (inner_mode)) 18573 all_const_zero = false; 18574 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 18575 all_same = false; 18576 } 18577 18578 /* Constants are best loaded from the constant pool. */ 18579 if (n_var == 0) 18580 { 18581 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 18582 return; 18583 } 18584 18585 /* If all values are identical, broadcast the value. */ 18586 if (all_same 18587 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, 18588 XVECEXP (vals, 0, 0))) 18589 return; 18590 18591 /* Values where only one field is non-constant are best loaded from 18592 the pool and overwritten via move later. */ 18593 if (n_var == 1) 18594 { 18595 if (all_const_zero 18596 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, 18597 XVECEXP (vals, 0, one_var), 18598 one_var)) 18599 return; 18600 18601 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) 18602 return; 18603 } 18604 18605 ix86_expand_vector_init_general (mmx_ok, mode, target, vals); 18606} 18607 18608void 18609ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) 18610{ 18611 enum machine_mode mode = GET_MODE (target); 18612 enum machine_mode inner_mode = GET_MODE_INNER (mode); 18613 bool use_vec_merge = false; 18614 rtx tmp; 18615 18616 switch (mode) 18617 { 18618 case V2SFmode: 18619 case V2SImode: 18620 if (mmx_ok) 18621 { 18622 tmp = gen_reg_rtx (GET_MODE_INNER (mode)); 18623 ix86_expand_vector_extract (true, tmp, target, 1 - elt); 18624 if (elt == 0) 18625 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); 18626 else 18627 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); 18628 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18629 return; 18630 } 18631 break; 18632 18633 case V2DFmode: 18634 case V2DImode: 18635 { 18636 rtx op0, op1; 18637 18638 /* For the two element vectors, we implement a VEC_CONCAT with 18639 the extraction of the other element. */ 18640 18641 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); 18642 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); 18643 18644 if (elt == 0) 18645 op0 = val, op1 = tmp; 18646 else 18647 op0 = tmp, op1 = val; 18648 18649 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); 18650 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18651 } 18652 return; 18653 18654 case V4SFmode: 18655 switch (elt) 18656 { 18657 case 0: 18658 use_vec_merge = true; 18659 break; 18660 18661 case 1: 18662 /* tmp = target = A B C D */ 18663 tmp = copy_to_reg (target); 18664 /* target = A A B B */ 18665 emit_insn (gen_sse_unpcklps (target, target, target)); 18666 /* target = X A B B */ 18667 ix86_expand_vector_set (false, target, val, 0); 18668 /* target = A X C D */ 18669 emit_insn (gen_sse_shufps_1 (target, target, tmp, 18670 GEN_INT (1), GEN_INT (0), 18671 GEN_INT (2+4), GEN_INT (3+4))); 18672 return; 18673 18674 case 2: 18675 /* tmp = target = A B C D */ 18676 tmp = copy_to_reg (target); 18677 /* tmp = X B C D */ 18678 ix86_expand_vector_set (false, tmp, val, 0); 18679 /* target = A B X D */ 18680 emit_insn (gen_sse_shufps_1 (target, target, tmp, 18681 GEN_INT (0), GEN_INT (1), 18682 GEN_INT (0+4), GEN_INT (3+4))); 18683 return; 18684 18685 case 3: 18686 /* tmp = target = A B C D */ 18687 tmp = copy_to_reg (target); 18688 /* tmp = X B C D */ 18689 ix86_expand_vector_set (false, tmp, val, 0); 18690 /* target = A B X D */ 18691 emit_insn (gen_sse_shufps_1 (target, target, tmp, 18692 GEN_INT (0), GEN_INT (1), 18693 GEN_INT (2+4), GEN_INT (0+4))); 18694 return; 18695 18696 default: 18697 gcc_unreachable (); 18698 } 18699 break; 18700 18701 case V4SImode: 18702 /* Element 0 handled by vec_merge below. */ 18703 if (elt == 0) 18704 { 18705 use_vec_merge = true; 18706 break; 18707 } 18708 18709 if (TARGET_SSE2) 18710 { 18711 /* With SSE2, use integer shuffles to swap element 0 and ELT, 18712 store into element 0, then shuffle them back. */ 18713 18714 rtx order[4]; 18715 18716 order[0] = GEN_INT (elt); 18717 order[1] = const1_rtx; 18718 order[2] = const2_rtx; 18719 order[3] = GEN_INT (3); 18720 order[elt] = const0_rtx; 18721 18722 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 18723 order[1], order[2], order[3])); 18724 18725 ix86_expand_vector_set (false, target, val, 0); 18726 18727 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 18728 order[1], order[2], order[3])); 18729 } 18730 else 18731 { 18732 /* For SSE1, we have to reuse the V4SF code. */ 18733 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), 18734 gen_lowpart (SFmode, val), elt); 18735 } 18736 return; 18737 18738 case V8HImode: 18739 use_vec_merge = TARGET_SSE2; 18740 break; 18741 case V4HImode: 18742 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 18743 break; 18744 18745 case V16QImode: 18746 case V8QImode: 18747 default: 18748 break; 18749 } 18750 18751 if (use_vec_merge) 18752 { 18753 tmp = gen_rtx_VEC_DUPLICATE (mode, val); 18754 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); 18755 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18756 } 18757 else 18758 { 18759 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 18760 18761 emit_move_insn (mem, target); 18762 18763 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 18764 emit_move_insn (tmp, val); 18765 18766 emit_move_insn (target, mem); 18767 } 18768} 18769 18770void 18771ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) 18772{ 18773 enum machine_mode mode = GET_MODE (vec); 18774 enum machine_mode inner_mode = GET_MODE_INNER (mode); 18775 bool use_vec_extr = false; 18776 rtx tmp; 18777 18778 switch (mode) 18779 { 18780 case V2SImode: 18781 case V2SFmode: 18782 if (!mmx_ok) 18783 break; 18784 /* FALLTHRU */ 18785 18786 case V2DFmode: 18787 case V2DImode: 18788 use_vec_extr = true; 18789 break; 18790 18791 case V4SFmode: 18792 switch (elt) 18793 { 18794 case 0: 18795 tmp = vec; 18796 break; 18797 18798 case 1: 18799 case 3: 18800 tmp = gen_reg_rtx (mode); 18801 emit_insn (gen_sse_shufps_1 (tmp, vec, vec, 18802 GEN_INT (elt), GEN_INT (elt), 18803 GEN_INT (elt+4), GEN_INT (elt+4))); 18804 break; 18805 18806 case 2: 18807 tmp = gen_reg_rtx (mode); 18808 emit_insn (gen_sse_unpckhps (tmp, vec, vec)); 18809 break; 18810 18811 default: 18812 gcc_unreachable (); 18813 } 18814 vec = tmp; 18815 use_vec_extr = true; 18816 elt = 0; 18817 break; 18818 18819 case V4SImode: 18820 if (TARGET_SSE2) 18821 { 18822 switch (elt) 18823 { 18824 case 0: 18825 tmp = vec; 18826 break; 18827 18828 case 1: 18829 case 3: 18830 tmp = gen_reg_rtx (mode); 18831 emit_insn (gen_sse2_pshufd_1 (tmp, vec, 18832 GEN_INT (elt), GEN_INT (elt), 18833 GEN_INT (elt), GEN_INT (elt))); 18834 break; 18835 18836 case 2: 18837 tmp = gen_reg_rtx (mode); 18838 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); 18839 break; 18840 18841 default: 18842 gcc_unreachable (); 18843 } 18844 vec = tmp; 18845 use_vec_extr = true; 18846 elt = 0; 18847 } 18848 else 18849 { 18850 /* For SSE1, we have to reuse the V4SF code. */ 18851 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), 18852 gen_lowpart (V4SFmode, vec), elt); 18853 return; 18854 } 18855 break; 18856 18857 case V8HImode: 18858 use_vec_extr = TARGET_SSE2; 18859 break; 18860 case V4HImode: 18861 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 18862 break; 18863 18864 case V16QImode: 18865 case V8QImode: 18866 /* ??? Could extract the appropriate HImode element and shift. */ 18867 default: 18868 break; 18869 } 18870 18871 if (use_vec_extr) 18872 { 18873 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); 18874 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); 18875 18876 /* Let the rtl optimizers know about the zero extension performed. */ 18877 if (inner_mode == HImode) 18878 { 18879 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); 18880 target = gen_lowpart (SImode, target); 18881 } 18882 18883 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 18884 } 18885 else 18886 { 18887 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 18888 18889 emit_move_insn (mem, vec); 18890 18891 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 18892 emit_move_insn (target, tmp); 18893 } 18894} 18895 18896/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary 18897 pattern to reduce; DEST is the destination; IN is the input vector. */ 18898 18899void 18900ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) 18901{ 18902 rtx tmp1, tmp2, tmp3; 18903 18904 tmp1 = gen_reg_rtx (V4SFmode); 18905 tmp2 = gen_reg_rtx (V4SFmode); 18906 tmp3 = gen_reg_rtx (V4SFmode); 18907 18908 emit_insn (gen_sse_movhlps (tmp1, in, in)); 18909 emit_insn (fn (tmp2, tmp1, in)); 18910 18911 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2, 18912 GEN_INT (1), GEN_INT (1), 18913 GEN_INT (1+4), GEN_INT (1+4))); 18914 emit_insn (fn (dest, tmp2, tmp3)); 18915} 18916 18917/* Target hook for scalar_mode_supported_p. */ 18918static bool 18919ix86_scalar_mode_supported_p (enum machine_mode mode) 18920{ 18921 if (DECIMAL_FLOAT_MODE_P (mode)) 18922 return true; 18923 else 18924 return default_scalar_mode_supported_p (mode); 18925} 18926 18927/* Implements target hook vector_mode_supported_p. */ 18928static bool 18929ix86_vector_mode_supported_p (enum machine_mode mode) 18930{ 18931 if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 18932 return true; 18933 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 18934 return true; 18935 if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) 18936 return true; 18937 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) 18938 return true; 18939 return false; 18940} 18941 18942/* Worker function for TARGET_MD_ASM_CLOBBERS. 18943 18944 We do this in the new i386 backend to maintain source compatibility 18945 with the old cc0-based compiler. */ 18946 18947static tree 18948ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, 18949 tree inputs ATTRIBUTE_UNUSED, 18950 tree clobbers) 18951{ 18952 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), 18953 clobbers); 18954 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), 18955 clobbers); 18956 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"), 18957 clobbers); 18958 return clobbers; 18959} 18960 18961/* Return true if this goes in small data/bss. */ 18962 18963static bool 18964ix86_in_large_data_p (tree exp) 18965{ 18966 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) 18967 return false; 18968 18969 /* Functions are never large data. */ 18970 if (TREE_CODE (exp) == FUNCTION_DECL) 18971 return false; 18972 18973 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 18974 { 18975 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 18976 if (strcmp (section, ".ldata") == 0 18977 || strcmp (section, ".lbss") == 0) 18978 return true; 18979 return false; 18980 } 18981 else 18982 { 18983 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 18984 18985 /* If this is an incomplete type with size 0, then we can't put it 18986 in data because it might be too big when completed. */ 18987 if (!size || size > ix86_section_threshold) 18988 return true; 18989 } 18990 18991 return false; 18992} 18993static void 18994ix86_encode_section_info (tree decl, rtx rtl, int first) 18995{ 18996 default_encode_section_info (decl, rtl, first); 18997 18998 if (TREE_CODE (decl) == VAR_DECL 18999 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) 19000 && ix86_in_large_data_p (decl)) 19001 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; 19002} 19003 19004/* Worker function for REVERSE_CONDITION. */ 19005 19006enum rtx_code 19007ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) 19008{ 19009 return (mode != CCFPmode && mode != CCFPUmode 19010 ? reverse_condition (code) 19011 : reverse_condition_maybe_unordered (code)); 19012} 19013 19014/* Output code to perform an x87 FP register move, from OPERANDS[1] 19015 to OPERANDS[0]. */ 19016 19017const char * 19018output_387_reg_move (rtx insn, rtx *operands) 19019{ 19020 if (REG_P (operands[1]) 19021 && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 19022 { 19023 if (REGNO (operands[0]) == FIRST_STACK_REG) 19024 return output_387_ffreep (operands, 0); 19025 return "fstp\t%y0"; 19026 } 19027 if (STACK_TOP_P (operands[0])) 19028 return "fld%z1\t%y1"; 19029 return "fst\t%y0"; 19030} 19031 19032/* Output code to perform a conditional jump to LABEL, if C2 flag in 19033 FP status register is set. */ 19034 19035void 19036ix86_emit_fp_unordered_jump (rtx label) 19037{ 19038 rtx reg = gen_reg_rtx (HImode); 19039 rtx temp; 19040 19041 emit_insn (gen_x86_fnstsw_1 (reg)); 19042 19043 if (TARGET_USE_SAHF) 19044 { 19045 emit_insn (gen_x86_sahf_1 (reg)); 19046 19047 temp = gen_rtx_REG (CCmode, FLAGS_REG); 19048 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); 19049 } 19050 else 19051 { 19052 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); 19053 19054 temp = gen_rtx_REG (CCNOmode, FLAGS_REG); 19055 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); 19056 } 19057 19058 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 19059 gen_rtx_LABEL_REF (VOIDmode, label), 19060 pc_rtx); 19061 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 19062 emit_jump_insn (temp); 19063} 19064 19065/* Output code to perform a log1p XFmode calculation. */ 19066 19067void ix86_emit_i387_log1p (rtx op0, rtx op1) 19068{ 19069 rtx label1 = gen_label_rtx (); 19070 rtx label2 = gen_label_rtx (); 19071 19072 rtx tmp = gen_reg_rtx (XFmode); 19073 rtx tmp2 = gen_reg_rtx (XFmode); 19074 19075 emit_insn (gen_absxf2 (tmp, op1)); 19076 emit_insn (gen_cmpxf (tmp, 19077 CONST_DOUBLE_FROM_REAL_VALUE ( 19078 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 19079 XFmode))); 19080 emit_jump_insn (gen_bge (label1)); 19081 19082 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19083 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1)); 19084 emit_jump (label2); 19085 19086 emit_label (label1); 19087 emit_move_insn (tmp, CONST1_RTX (XFmode)); 19088 emit_insn (gen_addxf3 (tmp, op1, tmp)); 19089 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 19090 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp)); 19091 19092 emit_label (label2); 19093} 19094 19095/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 19096 19097static void 19098i386_solaris_elf_named_section (const char *name, unsigned int flags, 19099 tree decl) 19100{ 19101 /* With Binutils 2.15, the "@unwind" marker must be specified on 19102 every occurrence of the ".eh_frame" section, not just the first 19103 one. */ 19104 if (TARGET_64BIT 19105 && strcmp (name, ".eh_frame") == 0) 19106 { 19107 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, 19108 flags & SECTION_WRITE ? "aw" : "a"); 19109 return; 19110 } 19111 default_elf_asm_named_section (name, flags, decl); 19112} 19113 19114/* Return the mangling of TYPE if it is an extended fundamental type. */ 19115 19116static const char * 19117ix86_mangle_fundamental_type (tree type) 19118{ 19119 switch (TYPE_MODE (type)) 19120 { 19121 case TFmode: 19122 /* __float128 is "g". */ 19123 return "g"; 19124 case XFmode: 19125 /* "long double" or __float80 is "e". */ 19126 return "e"; 19127 default: 19128 return NULL; 19129 } 19130} 19131 19132/* For 32-bit code we can save PIC register setup by using 19133 __stack_chk_fail_local hidden function instead of calling 19134 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC 19135 register, so it is better to call __stack_chk_fail directly. */ 19136 19137static tree 19138ix86_stack_protect_fail (void) 19139{ 19140 return TARGET_64BIT 19141 ? default_external_stack_protect_fail () 19142 : default_hidden_stack_protect_fail (); 19143} 19144 19145/* Select a format to encode pointers in exception handling data. CODE 19146 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is 19147 true if the symbol may be affected by dynamic relocations. 19148 19149 ??? All x86 object file formats are capable of representing this. 19150 After all, the relocation needed is the same as for the call insn. 19151 Whether or not a particular assembler allows us to enter such, I 19152 guess we'll have to see. */ 19153int 19154asm_preferred_eh_data_format (int code, int global) 19155{ 19156 if (flag_pic) 19157 { 19158 int type = DW_EH_PE_sdata8; 19159 if (!TARGET_64BIT 19160 || ix86_cmodel == CM_SMALL_PIC 19161 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) 19162 type = DW_EH_PE_sdata4; 19163 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 19164 } 19165 if (ix86_cmodel == CM_SMALL 19166 || (ix86_cmodel == CM_MEDIUM && code)) 19167 return DW_EH_PE_udata4; 19168 return DW_EH_PE_absptr; 19169} 19170 19171#include "gt-i386.h" 19172