expmed.c revision 1.11
1/* Medium-level subroutines: convert bit-field store and extract 2 and shifts, multiplies and divides to rtl instructions. 3 Copyright (C) 1987-2017 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "backend.h" 26#include "target.h" 27#include "rtl.h" 28#include "tree.h" 29#include "predict.h" 30#include "memmodel.h" 31#include "tm_p.h" 32#include "expmed.h" 33#include "optabs.h" 34#include "emit-rtl.h" 35#include "diagnostic-core.h" 36#include "fold-const.h" 37#include "stor-layout.h" 38#include "dojump.h" 39#include "explow.h" 40#include "expr.h" 41#include "langhooks.h" 42 43struct target_expmed default_target_expmed; 44#if SWITCHABLE_TARGET 45struct target_expmed *this_target_expmed = &default_target_expmed; 46#endif 47 48static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, 49 unsigned HOST_WIDE_INT, 50 unsigned HOST_WIDE_INT, 51 unsigned HOST_WIDE_INT, 52 rtx, bool); 53static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT, 54 unsigned HOST_WIDE_INT, 55 rtx, bool); 56static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, 57 unsigned HOST_WIDE_INT, 58 unsigned HOST_WIDE_INT, 59 unsigned HOST_WIDE_INT, 60 rtx, bool); 61static rtx extract_fixed_bit_field (machine_mode, rtx, 62 unsigned HOST_WIDE_INT, 63 unsigned HOST_WIDE_INT, rtx, int, bool); 64static rtx extract_fixed_bit_field_1 (machine_mode, rtx, 65 unsigned HOST_WIDE_INT, 66 unsigned HOST_WIDE_INT, rtx, int, bool); 67static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); 68static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, 69 unsigned HOST_WIDE_INT, int, bool); 70static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); 71static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT); 72static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT); 73 74/* Return a constant integer mask value of mode MODE with BITSIZE ones 75 followed by BITPOS zeros, or the complement of that if COMPLEMENT. 76 The mask is truncated if necessary to the width of mode MODE. The 77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ 78 79static inline rtx 80mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement) 81{ 82 return immed_wide_int_const 83 (wi::shifted_mask (bitpos, bitsize, complement, 84 GET_MODE_PRECISION (mode)), mode); 85} 86 87/* Test whether a value is zero of a power of two. */ 88#define EXACT_POWER_OF_2_OR_ZERO_P(x) \ 89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) 90 91struct init_expmed_rtl 92{ 93 rtx reg; 94 rtx plus; 95 rtx neg; 96 rtx mult; 97 rtx sdiv; 98 rtx udiv; 99 rtx sdiv_32; 100 rtx smod_32; 101 rtx wide_mult; 102 rtx wide_lshr; 103 rtx wide_trunc; 104 rtx shift; 105 rtx shift_mult; 106 rtx shift_add; 107 rtx shift_sub0; 108 rtx shift_sub1; 109 rtx zext; 110 rtx trunc; 111 112 rtx pow2[MAX_BITS_PER_WORD]; 113 rtx cint[MAX_BITS_PER_WORD]; 114}; 115 116static void 117init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode, 118 machine_mode from_mode, bool speed) 119{ 120 int to_size, from_size; 121 rtx which; 122 123 to_size = GET_MODE_PRECISION (to_mode); 124 from_size = GET_MODE_PRECISION (from_mode); 125 126 /* Most partial integers have a precision less than the "full" 127 integer it requires for storage. In case one doesn't, for 128 comparison purposes here, reduce the bit size by one in that 129 case. */ 130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT 131 && pow2p_hwi (to_size)) 132 to_size --; 133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT 134 && pow2p_hwi (from_size)) 135 from_size --; 136 137 /* Assume cost of zero-extend and sign-extend is the same. */ 138 which = (to_size < from_size ? all->trunc : all->zext); 139 140 PUT_MODE (all->reg, from_mode); 141 set_convert_cost (to_mode, from_mode, speed, 142 set_src_cost (which, to_mode, speed)); 143} 144 145static void 146init_expmed_one_mode (struct init_expmed_rtl *all, 147 machine_mode mode, int speed) 148{ 149 int m, n, mode_bitsize; 150 machine_mode mode_from; 151 152 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); 153 154 PUT_MODE (all->reg, mode); 155 PUT_MODE (all->plus, mode); 156 PUT_MODE (all->neg, mode); 157 PUT_MODE (all->mult, mode); 158 PUT_MODE (all->sdiv, mode); 159 PUT_MODE (all->udiv, mode); 160 PUT_MODE (all->sdiv_32, mode); 161 PUT_MODE (all->smod_32, mode); 162 PUT_MODE (all->wide_trunc, mode); 163 PUT_MODE (all->shift, mode); 164 PUT_MODE (all->shift_mult, mode); 165 PUT_MODE (all->shift_add, mode); 166 PUT_MODE (all->shift_sub0, mode); 167 PUT_MODE (all->shift_sub1, mode); 168 PUT_MODE (all->zext, mode); 169 PUT_MODE (all->trunc, mode); 170 171 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed)); 172 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed)); 173 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed)); 174 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed)); 175 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed)); 176 177 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed) 178 <= 2 * add_cost (speed, mode))); 179 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed) 180 <= 4 * add_cost (speed, mode))); 181 182 set_shift_cost (speed, mode, 0, 0); 183 { 184 int cost = add_cost (speed, mode); 185 set_shiftadd_cost (speed, mode, 0, cost); 186 set_shiftsub0_cost (speed, mode, 0, cost); 187 set_shiftsub1_cost (speed, mode, 0, cost); 188 } 189 190 n = MIN (MAX_BITS_PER_WORD, mode_bitsize); 191 for (m = 1; m < n; m++) 192 { 193 XEXP (all->shift, 1) = all->cint[m]; 194 XEXP (all->shift_mult, 1) = all->pow2[m]; 195 196 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed)); 197 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode, 198 speed)); 199 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode, 200 speed)); 201 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode, 202 speed)); 203 } 204 205 if (SCALAR_INT_MODE_P (mode)) 206 { 207 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; 208 mode_from = (machine_mode)(mode_from + 1)) 209 init_expmed_one_conv (all, mode, mode_from, speed); 210 } 211 if (GET_MODE_CLASS (mode) == MODE_INT) 212 { 213 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); 214 if (wider_mode != VOIDmode) 215 { 216 PUT_MODE (all->zext, wider_mode); 217 PUT_MODE (all->wide_mult, wider_mode); 218 PUT_MODE (all->wide_lshr, wider_mode); 219 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize); 220 221 set_mul_widen_cost (speed, wider_mode, 222 set_src_cost (all->wide_mult, wider_mode, speed)); 223 set_mul_highpart_cost (speed, mode, 224 set_src_cost (all->wide_trunc, mode, speed)); 225 } 226 } 227} 228 229void 230init_expmed (void) 231{ 232 struct init_expmed_rtl all; 233 machine_mode mode = QImode; 234 int m, speed; 235 236 memset (&all, 0, sizeof all); 237 for (m = 1; m < MAX_BITS_PER_WORD; m++) 238 { 239 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); 240 all.cint[m] = GEN_INT (m); 241 } 242 243 /* Avoid using hard regs in ways which may be unsupported. */ 244 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 245 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); 246 all.neg = gen_rtx_NEG (mode, all.reg); 247 all.mult = gen_rtx_MULT (mode, all.reg, all.reg); 248 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); 249 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); 250 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); 251 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); 252 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); 253 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); 254 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); 255 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); 256 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); 257 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); 258 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); 259 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); 260 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); 261 all.trunc = gen_rtx_TRUNCATE (mode, all.reg); 262 263 for (speed = 0; speed < 2; speed++) 264 { 265 crtl->maybe_hot_insn_p = speed; 266 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed)); 267 268 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; 269 mode = (machine_mode)(mode + 1)) 270 init_expmed_one_mode (&all, mode, speed); 271 272 if (MIN_MODE_PARTIAL_INT != VOIDmode) 273 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; 274 mode = (machine_mode)(mode + 1)) 275 init_expmed_one_mode (&all, mode, speed); 276 277 if (MIN_MODE_VECTOR_INT != VOIDmode) 278 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; 279 mode = (machine_mode)(mode + 1)) 280 init_expmed_one_mode (&all, mode, speed); 281 } 282 283 if (alg_hash_used_p ()) 284 { 285 struct alg_hash_entry *p = alg_hash_entry_ptr (0); 286 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); 287 } 288 else 289 set_alg_hash_used_p (true); 290 default_rtl_profile (); 291 292 ggc_free (all.trunc); 293 ggc_free (all.shift_sub1); 294 ggc_free (all.shift_sub0); 295 ggc_free (all.shift_add); 296 ggc_free (all.shift_mult); 297 ggc_free (all.shift); 298 ggc_free (all.wide_trunc); 299 ggc_free (all.wide_lshr); 300 ggc_free (all.wide_mult); 301 ggc_free (all.zext); 302 ggc_free (all.smod_32); 303 ggc_free (all.sdiv_32); 304 ggc_free (all.udiv); 305 ggc_free (all.sdiv); 306 ggc_free (all.mult); 307 ggc_free (all.neg); 308 ggc_free (all.plus); 309 ggc_free (all.reg); 310} 311 312/* Return an rtx representing minus the value of X. 313 MODE is the intended mode of the result, 314 useful if X is a CONST_INT. */ 315 316rtx 317negate_rtx (machine_mode mode, rtx x) 318{ 319 rtx result = simplify_unary_operation (NEG, mode, x, mode); 320 321 if (result == 0) 322 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); 323 324 return result; 325} 326 327/* Whether reverse storage order is supported on the target. */ 328static int reverse_storage_order_supported = -1; 329 330/* Check whether reverse storage order is supported on the target. */ 331 332static void 333check_reverse_storage_order_support (void) 334{ 335 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) 336 { 337 reverse_storage_order_supported = 0; 338 sorry ("reverse scalar storage order"); 339 } 340 else 341 reverse_storage_order_supported = 1; 342} 343 344/* Whether reverse FP storage order is supported on the target. */ 345static int reverse_float_storage_order_supported = -1; 346 347/* Check whether reverse FP storage order is supported on the target. */ 348 349static void 350check_reverse_float_storage_order_support (void) 351{ 352 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) 353 { 354 reverse_float_storage_order_supported = 0; 355 sorry ("reverse floating-point scalar storage order"); 356 } 357 else 358 reverse_float_storage_order_supported = 1; 359} 360 361/* Return an rtx representing value of X with reverse storage order. 362 MODE is the intended mode of the result, 363 useful if X is a CONST_INT. */ 364 365rtx 366flip_storage_order (enum machine_mode mode, rtx x) 367{ 368 enum machine_mode int_mode; 369 rtx result; 370 371 if (mode == QImode) 372 return x; 373 374 if (COMPLEX_MODE_P (mode)) 375 { 376 rtx real = read_complex_part (x, false); 377 rtx imag = read_complex_part (x, true); 378 379 real = flip_storage_order (GET_MODE_INNER (mode), real); 380 imag = flip_storage_order (GET_MODE_INNER (mode), imag); 381 382 return gen_rtx_CONCAT (mode, real, imag); 383 } 384 385 if (__builtin_expect (reverse_storage_order_supported < 0, 0)) 386 check_reverse_storage_order_support (); 387 388 if (SCALAR_INT_MODE_P (mode)) 389 int_mode = mode; 390 else 391 { 392 if (FLOAT_MODE_P (mode) 393 && __builtin_expect (reverse_float_storage_order_supported < 0, 0)) 394 check_reverse_float_storage_order_support (); 395 396 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0); 397 if (int_mode == BLKmode) 398 { 399 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode)); 400 return x; 401 } 402 x = gen_lowpart (int_mode, x); 403 } 404 405 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode); 406 if (result == 0) 407 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); 408 409 if (int_mode != mode) 410 result = gen_lowpart (mode, result); 411 412 return result; 413} 414 415/* Adjust bitfield memory MEM so that it points to the first unit of mode 416 MODE that contains a bitfield of size BITSIZE at bit position BITNUM. 417 If MODE is BLKmode, return a reference to every byte in the bitfield. 418 Set *NEW_BITNUM to the bit position of the field within the new memory. */ 419 420static rtx 421narrow_bit_field_mem (rtx mem, machine_mode mode, 422 unsigned HOST_WIDE_INT bitsize, 423 unsigned HOST_WIDE_INT bitnum, 424 unsigned HOST_WIDE_INT *new_bitnum) 425{ 426 if (mode == BLKmode) 427 { 428 *new_bitnum = bitnum % BITS_PER_UNIT; 429 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; 430 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) 431 / BITS_PER_UNIT); 432 return adjust_bitfield_address_size (mem, mode, offset, size); 433 } 434 else 435 { 436 unsigned int unit = GET_MODE_BITSIZE (mode); 437 *new_bitnum = bitnum % unit; 438 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; 439 return adjust_bitfield_address (mem, mode, offset); 440 } 441} 442 443/* The caller wants to perform insertion or extraction PATTERN on a 444 bitfield of size BITSIZE at BITNUM bits into memory operand OP0. 445 BITREGION_START and BITREGION_END are as for store_bit_field 446 and FIELDMODE is the natural mode of the field. 447 448 Search for a mode that is compatible with the memory access 449 restrictions and (where applicable) with a register insertion or 450 extraction. Return the new memory on success, storing the adjusted 451 bit position in *NEW_BITNUM. Return null otherwise. */ 452 453static rtx 454adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, 455 rtx op0, HOST_WIDE_INT bitsize, 456 HOST_WIDE_INT bitnum, 457 unsigned HOST_WIDE_INT bitregion_start, 458 unsigned HOST_WIDE_INT bitregion_end, 459 machine_mode fieldmode, 460 unsigned HOST_WIDE_INT *new_bitnum) 461{ 462 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, 463 bitregion_end, MEM_ALIGN (op0), 464 MEM_VOLATILE_P (op0)); 465 machine_mode best_mode; 466 if (iter.next_mode (&best_mode)) 467 { 468 /* We can use a memory in BEST_MODE. See whether this is true for 469 any wider modes. All other things being equal, we prefer to 470 use the widest mode possible because it tends to expose more 471 CSE opportunities. */ 472 if (!iter.prefer_smaller_modes ()) 473 { 474 /* Limit the search to the mode required by the corresponding 475 register insertion or extraction instruction, if any. */ 476 machine_mode limit_mode = word_mode; 477 extraction_insn insn; 478 if (get_best_reg_extraction_insn (&insn, pattern, 479 GET_MODE_BITSIZE (best_mode), 480 fieldmode)) 481 limit_mode = insn.field_mode; 482 483 machine_mode wider_mode; 484 while (iter.next_mode (&wider_mode) 485 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) 486 best_mode = wider_mode; 487 } 488 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, 489 new_bitnum); 490 } 491 return NULL_RTX; 492} 493 494/* Return true if a bitfield of size BITSIZE at bit number BITNUM within 495 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg 496 offset is then BITNUM / BITS_PER_UNIT. */ 497 498static bool 499lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum, 500 unsigned HOST_WIDE_INT bitsize, 501 machine_mode struct_mode) 502{ 503 if (BYTES_BIG_ENDIAN) 504 return (bitnum % BITS_PER_UNIT == 0 505 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode) 506 || (bitnum + bitsize) % BITS_PER_WORD == 0)); 507 else 508 return bitnum % BITS_PER_WORD == 0; 509} 510 511/* Return true if -fstrict-volatile-bitfields applies to an access of OP0 512 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. 513 Return false if the access would touch memory outside the range 514 BITREGION_START to BITREGION_END for conformance to the C++ memory 515 model. */ 516 517static bool 518strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, 519 unsigned HOST_WIDE_INT bitnum, 520 machine_mode fieldmode, 521 unsigned HOST_WIDE_INT bitregion_start, 522 unsigned HOST_WIDE_INT bitregion_end) 523{ 524 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); 525 526 /* -fstrict-volatile-bitfields must be enabled and we must have a 527 volatile MEM. */ 528 if (!MEM_P (op0) 529 || !MEM_VOLATILE_P (op0) 530 || flag_strict_volatile_bitfields <= 0) 531 return false; 532 533 /* Non-integral modes likely only happen with packed structures. 534 Punt. */ 535 if (!SCALAR_INT_MODE_P (fieldmode)) 536 return false; 537 538 /* The bit size must not be larger than the field mode, and 539 the field mode must not be larger than a word. */ 540 if (bitsize > modesize || modesize > BITS_PER_WORD) 541 return false; 542 543 /* Check for cases of unaligned fields that must be split. */ 544 if (bitnum % modesize + bitsize > modesize) 545 return false; 546 547 /* The memory must be sufficiently aligned for a MODESIZE access. 548 This condition guarantees, that the memory access will not 549 touch anything after the end of the structure. */ 550 if (MEM_ALIGN (op0) < modesize) 551 return false; 552 553 /* Check for cases where the C++ memory model applies. */ 554 if (bitregion_end != 0 555 && (bitnum - bitnum % modesize < bitregion_start 556 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end)) 557 return false; 558 559 return true; 560} 561 562/* Return true if OP is a memory and if a bitfield of size BITSIZE at 563 bit number BITNUM can be treated as a simple value of mode MODE. */ 564 565static bool 566simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, 567 unsigned HOST_WIDE_INT bitnum, machine_mode mode) 568{ 569 return (MEM_P (op0) 570 && bitnum % BITS_PER_UNIT == 0 571 && bitsize == GET_MODE_BITSIZE (mode) 572 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) 573 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 574 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); 575} 576 577/* Try to use instruction INSV to store VALUE into a field of OP0. 578 BITSIZE and BITNUM are as for store_bit_field. */ 579 580static bool 581store_bit_field_using_insv (const extraction_insn *insv, rtx op0, 582 unsigned HOST_WIDE_INT bitsize, 583 unsigned HOST_WIDE_INT bitnum, 584 rtx value) 585{ 586 struct expand_operand ops[4]; 587 rtx value1; 588 rtx xop0 = op0; 589 rtx_insn *last = get_last_insn (); 590 bool copy_back = false; 591 592 machine_mode op_mode = insv->field_mode; 593 unsigned int unit = GET_MODE_BITSIZE (op_mode); 594 if (bitsize == 0 || bitsize > unit) 595 return false; 596 597 if (MEM_P (xop0)) 598 /* Get a reference to the first byte of the field. */ 599 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, 600 &bitnum); 601 else 602 { 603 /* Convert from counting within OP0 to counting in OP_MODE. */ 604 if (BYTES_BIG_ENDIAN) 605 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); 606 607 /* If xop0 is a register, we need it in OP_MODE 608 to make it acceptable to the format of insv. */ 609 if (GET_CODE (xop0) == SUBREG) 610 /* We can't just change the mode, because this might clobber op0, 611 and we will need the original value of op0 if insv fails. */ 612 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); 613 if (REG_P (xop0) && GET_MODE (xop0) != op_mode) 614 xop0 = gen_lowpart_SUBREG (op_mode, xop0); 615 } 616 617 /* If the destination is a paradoxical subreg such that we need a 618 truncate to the inner mode, perform the insertion on a temporary and 619 truncate the result to the original destination. Note that we can't 620 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N 621 X) 0)) is (reg:N X). */ 622 if (GET_CODE (xop0) == SUBREG 623 && REG_P (SUBREG_REG (xop0)) 624 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), 625 op_mode)) 626 { 627 rtx tem = gen_reg_rtx (op_mode); 628 emit_move_insn (tem, xop0); 629 xop0 = tem; 630 copy_back = true; 631 } 632 633 /* There are similar overflow check at the start of store_bit_field_1, 634 but that only check the situation where the field lies completely 635 outside the register, while there do have situation where the field 636 lies partialy in the register, we need to adjust bitsize for this 637 partial overflow situation. Without this fix, pr48335-2.c on big-endian 638 will broken on those arch support bit insert instruction, like arm, aarch64 639 etc. */ 640 if (bitsize + bitnum > unit && bitnum < unit) 641 { 642 warning (OPT_Wextra, "write of %wu-bit data outside the bound of " 643 "destination object, data truncated into %wu-bit", 644 bitsize, unit - bitnum); 645 bitsize = unit - bitnum; 646 } 647 648 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 649 "backwards" from the size of the unit we are inserting into. 650 Otherwise, we count bits from the most significant on a 651 BYTES/BITS_BIG_ENDIAN machine. */ 652 653 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 654 bitnum = unit - bitsize - bitnum; 655 656 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ 657 value1 = value; 658 if (GET_MODE (value) != op_mode) 659 { 660 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) 661 { 662 rtx tmp; 663 /* Optimization: Don't bother really extending VALUE 664 if it has all the bits we will actually use. However, 665 if we must narrow it, be sure we do it correctly. */ 666 667 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) 668 { 669 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); 670 if (! tmp) 671 tmp = simplify_gen_subreg (op_mode, 672 force_reg (GET_MODE (value), 673 value1), 674 GET_MODE (value), 0); 675 } 676 else 677 { 678 tmp = gen_lowpart_if_possible (op_mode, value1); 679 if (! tmp) 680 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value), 681 value1)); 682 } 683 value1 = tmp; 684 } 685 else if (CONST_INT_P (value)) 686 value1 = gen_int_mode (INTVAL (value), op_mode); 687 else 688 /* Parse phase is supposed to make VALUE's data type 689 match that of the component reference, which is a type 690 at least as wide as the field; so VALUE should have 691 a mode that corresponds to that type. */ 692 gcc_assert (CONSTANT_P (value)); 693 } 694 695 create_fixed_operand (&ops[0], xop0); 696 create_integer_operand (&ops[1], bitsize); 697 create_integer_operand (&ops[2], bitnum); 698 create_input_operand (&ops[3], value1, op_mode); 699 if (maybe_expand_insn (insv->icode, 4, ops)) 700 { 701 if (copy_back) 702 convert_move (op0, xop0, true); 703 return true; 704 } 705 delete_insns_since (last); 706 return false; 707} 708 709/* A subroutine of store_bit_field, with the same arguments. Return true 710 if the operation could be implemented. 711 712 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have 713 no other way of implementing the operation. If FALLBACK_P is false, 714 return false instead. */ 715 716static bool 717store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 718 unsigned HOST_WIDE_INT bitnum, 719 unsigned HOST_WIDE_INT bitregion_start, 720 unsigned HOST_WIDE_INT bitregion_end, 721 machine_mode fieldmode, 722 rtx value, bool reverse, bool fallback_p) 723{ 724 rtx op0 = str_rtx; 725 rtx orig_value; 726 727 while (GET_CODE (op0) == SUBREG) 728 { 729 /* The following line once was done only if WORDS_BIG_ENDIAN, 730 but I think that is a mistake. WORDS_BIG_ENDIAN is 731 meaningful at a much higher level; when structures are copied 732 between memory and regs, the higher-numbered regs 733 always get higher addresses. */ 734 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); 735 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); 736 int byte_offset = 0; 737 738 /* Paradoxical subregs need special handling on big-endian machines. */ 739 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) 740 { 741 int difference = inner_mode_size - outer_mode_size; 742 743 if (WORDS_BIG_ENDIAN) 744 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; 745 if (BYTES_BIG_ENDIAN) 746 byte_offset += difference % UNITS_PER_WORD; 747 } 748 else 749 byte_offset = SUBREG_BYTE (op0); 750 751 bitnum += byte_offset * BITS_PER_UNIT; 752 op0 = SUBREG_REG (op0); 753 } 754 755 /* No action is needed if the target is a register and if the field 756 lies completely outside that register. This can occur if the source 757 code contains an out-of-bounds access to a small array. */ 758 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) 759 return true; 760 761 /* Use vec_set patterns for inserting parts of vectors whenever 762 available. */ 763 if (VECTOR_MODE_P (GET_MODE (op0)) 764 && !MEM_P (op0) 765 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing 766 && fieldmode == GET_MODE_INNER (GET_MODE (op0)) 767 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0)) 768 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0)))) 769 { 770 struct expand_operand ops[3]; 771 machine_mode outermode = GET_MODE (op0); 772 machine_mode innermode = GET_MODE_INNER (outermode); 773 enum insn_code icode = optab_handler (vec_set_optab, outermode); 774 int pos = bitnum / GET_MODE_BITSIZE (innermode); 775 776 create_fixed_operand (&ops[0], op0); 777 create_input_operand (&ops[1], value, innermode); 778 create_integer_operand (&ops[2], pos); 779 if (maybe_expand_insn (icode, 3, ops)) 780 return true; 781 } 782 783 /* If the target is a register, overwriting the entire object, or storing 784 a full-word or multi-word field can be done with just a SUBREG. */ 785 if (!MEM_P (op0) 786 && bitsize == GET_MODE_BITSIZE (fieldmode) 787 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0) 788 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) 789 { 790 /* Use the subreg machinery either to narrow OP0 to the required 791 words or to cope with mode punning between equal-sized modes. 792 In the latter case, use subreg on the rhs side, not lhs. */ 793 rtx sub; 794 795 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) 796 { 797 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); 798 if (sub) 799 { 800 if (reverse) 801 sub = flip_storage_order (GET_MODE (op0), sub); 802 emit_move_insn (op0, sub); 803 return true; 804 } 805 } 806 else 807 { 808 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), 809 bitnum / BITS_PER_UNIT); 810 if (sub) 811 { 812 if (reverse) 813 value = flip_storage_order (fieldmode, value); 814 emit_move_insn (sub, value); 815 return true; 816 } 817 } 818 } 819 820 /* If the target is memory, storing any naturally aligned field can be 821 done with a simple store. For targets that support fast unaligned 822 memory, any naturally sized, unit aligned field can be done directly. */ 823 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode)) 824 { 825 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT); 826 if (reverse) 827 value = flip_storage_order (fieldmode, value); 828 emit_move_insn (op0, value); 829 return true; 830 } 831 832 /* Make sure we are playing with integral modes. Pun with subregs 833 if we aren't. This must come after the entire register case above, 834 since that case is valid for any mode. The following cases are only 835 valid for integral modes. */ 836 { 837 machine_mode imode = int_mode_for_mode (GET_MODE (op0)); 838 if (imode != GET_MODE (op0)) 839 { 840 if (MEM_P (op0)) 841 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); 842 else 843 { 844 gcc_assert (imode != BLKmode); 845 op0 = gen_lowpart (imode, op0); 846 } 847 } 848 } 849 850 /* Storing an lsb-aligned field in a register 851 can be done with a movstrict instruction. */ 852 853 if (!MEM_P (op0) 854 && !reverse 855 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) 856 && bitsize == GET_MODE_BITSIZE (fieldmode) 857 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) 858 { 859 struct expand_operand ops[2]; 860 enum insn_code icode = optab_handler (movstrict_optab, fieldmode); 861 rtx arg0 = op0; 862 unsigned HOST_WIDE_INT subreg_off; 863 864 if (GET_CODE (arg0) == SUBREG) 865 { 866 /* Else we've got some float mode source being extracted into 867 a different float mode destination -- this combination of 868 subregs results in Severe Tire Damage. */ 869 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode 870 || GET_MODE_CLASS (fieldmode) == MODE_INT 871 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); 872 arg0 = SUBREG_REG (arg0); 873 } 874 875 subreg_off = bitnum / BITS_PER_UNIT; 876 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) 877 { 878 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); 879 880 create_fixed_operand (&ops[0], arg0); 881 /* Shrink the source operand to FIELDMODE. */ 882 create_convert_operand_to (&ops[1], value, fieldmode, false); 883 if (maybe_expand_insn (icode, 2, ops)) 884 return true; 885 } 886 } 887 888 /* Handle fields bigger than a word. */ 889 890 if (bitsize > BITS_PER_WORD) 891 { 892 /* Here we transfer the words of the field 893 in the order least significant first. 894 This is because the most significant word is the one which may 895 be less than full. 896 However, only do that if the value is not BLKmode. */ 897 898 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; 899 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 900 unsigned int i; 901 rtx_insn *last; 902 903 /* This is the mode we must force value to, so that there will be enough 904 subwords to extract. Note that fieldmode will often (always?) be 905 VOIDmode, because that is what store_field uses to indicate that this 906 is a bit field, but passing VOIDmode to operand_subword_force 907 is not allowed. */ 908 fieldmode = GET_MODE (value); 909 if (fieldmode == VOIDmode) 910 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); 911 912 last = get_last_insn (); 913 for (i = 0; i < nwords; i++) 914 { 915 /* If I is 0, use the low-order word in both field and target; 916 if I is 1, use the next to lowest word; and so on. */ 917 unsigned int wordnum = (backwards 918 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD 919 - i - 1 920 : i); 921 unsigned int bit_offset = (backwards ^ reverse 922 ? MAX ((int) bitsize - ((int) i + 1) 923 * BITS_PER_WORD, 924 0) 925 : (int) i * BITS_PER_WORD); 926 rtx value_word = operand_subword_force (value, wordnum, fieldmode); 927 unsigned HOST_WIDE_INT new_bitsize = 928 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); 929 930 /* If the remaining chunk doesn't have full wordsize we have 931 to make sure that for big-endian machines the higher order 932 bits are used. */ 933 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) 934 value_word = simplify_expand_binop (word_mode, lshr_optab, 935 value_word, 936 GEN_INT (BITS_PER_WORD 937 - new_bitsize), 938 NULL_RTX, true, 939 OPTAB_LIB_WIDEN); 940 941 if (!store_bit_field_1 (op0, new_bitsize, 942 bitnum + bit_offset, 943 bitregion_start, bitregion_end, 944 word_mode, 945 value_word, reverse, fallback_p)) 946 { 947 delete_insns_since (last); 948 return false; 949 } 950 } 951 return true; 952 } 953 954 /* If VALUE has a floating-point or complex mode, access it as an 955 integer of the corresponding size. This can occur on a machine 956 with 64 bit registers that uses SFmode for float. It can also 957 occur for unaligned float or complex fields. */ 958 orig_value = value; 959 if (GET_MODE (value) != VOIDmode 960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT 961 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) 962 { 963 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); 964 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); 965 } 966 967 /* If OP0 is a multi-word register, narrow it to the affected word. 968 If the region spans two words, defer to store_split_bit_field. 969 Don't do this if op0 is a single hard register wider than word 970 such as a float or vector register. */ 971 if (!MEM_P (op0) 972 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD 973 && (!REG_P (op0) 974 || !HARD_REGISTER_P (op0) 975 || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1)) 976 { 977 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) 978 { 979 if (!fallback_p) 980 return false; 981 982 store_split_bit_field (op0, bitsize, bitnum, bitregion_start, 983 bitregion_end, value, reverse); 984 return true; 985 } 986 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), 987 bitnum / BITS_PER_WORD * UNITS_PER_WORD); 988 gcc_assert (op0); 989 bitnum %= BITS_PER_WORD; 990 } 991 992 /* From here on we can assume that the field to be stored in fits 993 within a word. If the destination is a register, it too fits 994 in a word. */ 995 996 extraction_insn insv; 997 if (!MEM_P (op0) 998 && !reverse 999 && get_best_reg_extraction_insn (&insv, EP_insv, 1000 GET_MODE_BITSIZE (GET_MODE (op0)), 1001 fieldmode) 1002 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) 1003 return true; 1004 1005 /* If OP0 is a memory, try copying it to a register and seeing if a 1006 cheap register alternative is available. */ 1007 if (MEM_P (op0) && !reverse) 1008 { 1009 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, 1010 fieldmode) 1011 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) 1012 return true; 1013 1014 rtx_insn *last = get_last_insn (); 1015 1016 /* Try loading part of OP0 into a register, inserting the bitfield 1017 into that, and then copying the result back to OP0. */ 1018 unsigned HOST_WIDE_INT bitpos; 1019 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, 1020 bitregion_start, bitregion_end, 1021 fieldmode, &bitpos); 1022 if (xop0) 1023 { 1024 rtx tempreg = copy_to_reg (xop0); 1025 if (store_bit_field_1 (tempreg, bitsize, bitpos, 1026 bitregion_start, bitregion_end, 1027 fieldmode, orig_value, reverse, false)) 1028 { 1029 emit_move_insn (xop0, tempreg); 1030 return true; 1031 } 1032 delete_insns_since (last); 1033 } 1034 } 1035 1036 if (!fallback_p) 1037 return false; 1038 1039 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start, 1040 bitregion_end, value, reverse); 1041 return true; 1042} 1043 1044/* Generate code to store value from rtx VALUE 1045 into a bit-field within structure STR_RTX 1046 containing BITSIZE bits starting at bit BITNUM. 1047 1048 BITREGION_START is bitpos of the first bitfield in this region. 1049 BITREGION_END is the bitpos of the ending bitfield in this region. 1050 These two fields are 0, if the C++ memory model does not apply, 1051 or we are not interested in keeping track of bitfield regions. 1052 1053 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. 1054 1055 If REVERSE is true, the store is to be done in reverse order. */ 1056 1057void 1058store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1059 unsigned HOST_WIDE_INT bitnum, 1060 unsigned HOST_WIDE_INT bitregion_start, 1061 unsigned HOST_WIDE_INT bitregion_end, 1062 machine_mode fieldmode, 1063 rtx value, bool reverse) 1064{ 1065 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ 1066 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode, 1067 bitregion_start, bitregion_end)) 1068 { 1069 /* Storing of a full word can be done with a simple store. 1070 We know here that the field can be accessed with one single 1071 instruction. For targets that support unaligned memory, 1072 an unaligned access may be necessary. */ 1073 if (bitsize == GET_MODE_BITSIZE (fieldmode)) 1074 { 1075 str_rtx = adjust_bitfield_address (str_rtx, fieldmode, 1076 bitnum / BITS_PER_UNIT); 1077 if (reverse) 1078 value = flip_storage_order (fieldmode, value); 1079 gcc_assert (bitnum % BITS_PER_UNIT == 0); 1080 emit_move_insn (str_rtx, value); 1081 } 1082 else 1083 { 1084 rtx temp; 1085 1086 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum, 1087 &bitnum); 1088 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode)); 1089 temp = copy_to_reg (str_rtx); 1090 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0, 1091 fieldmode, value, reverse, true)) 1092 gcc_unreachable (); 1093 1094 emit_move_insn (str_rtx, temp); 1095 } 1096 1097 return; 1098 } 1099 1100 /* Under the C++0x memory model, we must not touch bits outside the 1101 bit region. Adjust the address to start at the beginning of the 1102 bit region. */ 1103 if (MEM_P (str_rtx) && bitregion_start > 0) 1104 { 1105 machine_mode bestmode; 1106 HOST_WIDE_INT offset, size; 1107 1108 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); 1109 1110 offset = bitregion_start / BITS_PER_UNIT; 1111 bitnum -= bitregion_start; 1112 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT; 1113 bitregion_end -= bitregion_start; 1114 bitregion_start = 0; 1115 bestmode = get_best_mode (bitsize, bitnum, 1116 bitregion_start, bitregion_end, 1117 MEM_ALIGN (str_rtx), VOIDmode, 1118 MEM_VOLATILE_P (str_rtx)); 1119 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size); 1120 } 1121 1122 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, 1123 bitregion_start, bitregion_end, 1124 fieldmode, value, reverse, true)) 1125 gcc_unreachable (); 1126} 1127 1128/* Use shifts and boolean operations to store VALUE into a bit field of 1129 width BITSIZE in OP0, starting at bit BITNUM. 1130 1131 If REVERSE is true, the store is to be done in reverse order. */ 1132 1133static void 1134store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1135 unsigned HOST_WIDE_INT bitnum, 1136 unsigned HOST_WIDE_INT bitregion_start, 1137 unsigned HOST_WIDE_INT bitregion_end, 1138 rtx value, bool reverse) 1139{ 1140 /* There is a case not handled here: 1141 a structure with a known alignment of just a halfword 1142 and a field split across two aligned halfwords within the structure. 1143 Or likewise a structure with a known alignment of just a byte 1144 and a field split across two bytes. 1145 Such cases are not supposed to be able to occur. */ 1146 1147 if (MEM_P (op0)) 1148 { 1149 machine_mode mode = GET_MODE (op0); 1150 if (GET_MODE_BITSIZE (mode) == 0 1151 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) 1152 mode = word_mode; 1153 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, 1154 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); 1155 1156 if (mode == VOIDmode) 1157 { 1158 /* The only way this should occur is if the field spans word 1159 boundaries. */ 1160 store_split_bit_field (op0, bitsize, bitnum, bitregion_start, 1161 bitregion_end, value, reverse); 1162 return; 1163 } 1164 1165 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); 1166 } 1167 1168 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse); 1169} 1170 1171/* Helper function for store_fixed_bit_field, stores 1172 the bit field always using the MODE of OP0. */ 1173 1174static void 1175store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize, 1176 unsigned HOST_WIDE_INT bitnum, 1177 rtx value, bool reverse) 1178{ 1179 machine_mode mode; 1180 rtx temp; 1181 int all_zero = 0; 1182 int all_one = 0; 1183 1184 mode = GET_MODE (op0); 1185 gcc_assert (SCALAR_INT_MODE_P (mode)); 1186 1187 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) 1188 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ 1189 1190 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 1191 /* BITNUM is the distance between our msb 1192 and that of the containing datum. 1193 Convert it to the distance from the lsb. */ 1194 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; 1195 1196 /* Now BITNUM is always the distance between our lsb 1197 and that of OP0. */ 1198 1199 /* Shift VALUE left by BITNUM bits. If VALUE is not constant, 1200 we must first convert its mode to MODE. */ 1201 1202 if (CONST_INT_P (value)) 1203 { 1204 unsigned HOST_WIDE_INT v = UINTVAL (value); 1205 1206 if (bitsize < HOST_BITS_PER_WIDE_INT) 1207 v &= (HOST_WIDE_INT_1U << bitsize) - 1; 1208 1209 if (v == 0) 1210 all_zero = 1; 1211 else if ((bitsize < HOST_BITS_PER_WIDE_INT 1212 && v == (HOST_WIDE_INT_1U << bitsize) - 1) 1213 || (bitsize == HOST_BITS_PER_WIDE_INT 1214 && v == HOST_WIDE_INT_M1U)) 1215 all_one = 1; 1216 1217 value = lshift_value (mode, v, bitnum); 1218 } 1219 else 1220 { 1221 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize 1222 && bitnum + bitsize != GET_MODE_BITSIZE (mode)); 1223 1224 if (GET_MODE (value) != mode) 1225 value = convert_to_mode (mode, value, 1); 1226 1227 if (must_and) 1228 value = expand_binop (mode, and_optab, value, 1229 mask_rtx (mode, 0, bitsize, 0), 1230 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1231 if (bitnum > 0) 1232 value = expand_shift (LSHIFT_EXPR, mode, value, 1233 bitnum, NULL_RTX, 1); 1234 } 1235 1236 if (reverse) 1237 value = flip_storage_order (mode, value); 1238 1239 /* Now clear the chosen bits in OP0, 1240 except that if VALUE is -1 we need not bother. */ 1241 /* We keep the intermediates in registers to allow CSE to combine 1242 consecutive bitfield assignments. */ 1243 1244 temp = force_reg (mode, op0); 1245 1246 if (! all_one) 1247 { 1248 rtx mask = mask_rtx (mode, bitnum, bitsize, 1); 1249 if (reverse) 1250 mask = flip_storage_order (mode, mask); 1251 temp = expand_binop (mode, and_optab, temp, mask, 1252 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1253 temp = force_reg (mode, temp); 1254 } 1255 1256 /* Now logical-or VALUE into OP0, unless it is zero. */ 1257 1258 if (! all_zero) 1259 { 1260 temp = expand_binop (mode, ior_optab, temp, value, 1261 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1262 temp = force_reg (mode, temp); 1263 } 1264 1265 if (op0 != temp) 1266 { 1267 op0 = copy_rtx (op0); 1268 emit_move_insn (op0, temp); 1269 } 1270} 1271 1272/* Store a bit field that is split across multiple accessible memory objects. 1273 1274 OP0 is the REG, SUBREG or MEM rtx for the first of the objects. 1275 BITSIZE is the field width; BITPOS the position of its first bit 1276 (within the word). 1277 VALUE is the value to store. 1278 1279 If REVERSE is true, the store is to be done in reverse order. 1280 1281 This does not yet handle fields wider than BITS_PER_WORD. */ 1282 1283static void 1284store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1285 unsigned HOST_WIDE_INT bitpos, 1286 unsigned HOST_WIDE_INT bitregion_start, 1287 unsigned HOST_WIDE_INT bitregion_end, 1288 rtx value, bool reverse) 1289{ 1290 unsigned int unit, total_bits, bitsdone = 0; 1291 1292 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 1293 much at a time. */ 1294 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 1295 unit = BITS_PER_WORD; 1296 else 1297 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 1298 1299 /* If OP0 is a memory with a mode, then UNIT must not be larger than 1300 OP0's mode as well. Otherwise, store_fixed_bit_field will call us 1301 again, and we will mutually recurse forever. */ 1302 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0) 1303 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0))); 1304 1305 /* If VALUE is a constant other than a CONST_INT, get it into a register in 1306 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note 1307 that VALUE might be a floating-point constant. */ 1308 if (CONSTANT_P (value) && !CONST_INT_P (value)) 1309 { 1310 rtx word = gen_lowpart_common (word_mode, value); 1311 1312 if (word && (value != word)) 1313 value = word; 1314 else 1315 value = gen_lowpart_common (word_mode, 1316 force_reg (GET_MODE (value) != VOIDmode 1317 ? GET_MODE (value) 1318 : word_mode, value)); 1319 } 1320 1321 total_bits = GET_MODE_BITSIZE (GET_MODE (value)); 1322 1323 while (bitsdone < bitsize) 1324 { 1325 unsigned HOST_WIDE_INT thissize; 1326 unsigned HOST_WIDE_INT thispos; 1327 unsigned HOST_WIDE_INT offset; 1328 rtx part, word; 1329 1330 offset = (bitpos + bitsdone) / unit; 1331 thispos = (bitpos + bitsdone) % unit; 1332 1333 /* When region of bytes we can touch is restricted, decrease 1334 UNIT close to the end of the region as needed. If op0 is a REG 1335 or SUBREG of REG, don't do this, as there can't be data races 1336 on a register and we can expand shorter code in some cases. */ 1337 if (bitregion_end 1338 && unit > BITS_PER_UNIT 1339 && bitpos + bitsdone - thispos + unit > bitregion_end + 1 1340 && !REG_P (op0) 1341 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) 1342 { 1343 unit = unit / 2; 1344 continue; 1345 } 1346 1347 /* THISSIZE must not overrun a word boundary. Otherwise, 1348 store_fixed_bit_field will call us again, and we will mutually 1349 recurse forever. */ 1350 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 1351 thissize = MIN (thissize, unit - thispos); 1352 1353 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 1354 { 1355 /* Fetch successively less significant portions. */ 1356 if (CONST_INT_P (value)) 1357 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1358 >> (bitsize - bitsdone - thissize)) 1359 & ((HOST_WIDE_INT_1 << thissize) - 1)); 1360 /* Likewise, but the source is little-endian. */ 1361 else if (reverse) 1362 part = extract_fixed_bit_field (word_mode, value, thissize, 1363 bitsize - bitsdone - thissize, 1364 NULL_RTX, 1, false); 1365 else 1366 { 1367 int total_bits = GET_MODE_BITSIZE (GET_MODE (value)); 1368 /* The args are chosen so that the last part includes the 1369 lsb. Give extract_bit_field the value it needs (with 1370 endianness compensation) to fetch the piece we want. */ 1371 part = extract_fixed_bit_field (word_mode, value, thissize, 1372 total_bits - bitsize + bitsdone, 1373 NULL_RTX, 1, false); 1374 } 1375 } 1376 else 1377 { 1378 /* Fetch successively more significant portions. */ 1379 if (CONST_INT_P (value)) 1380 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1381 >> bitsdone) 1382 & ((HOST_WIDE_INT_1 << thissize) - 1)); 1383 /* Likewise, but the source is big-endian. */ 1384 else if (reverse) 1385 part = extract_fixed_bit_field (word_mode, value, thissize, 1386 total_bits - bitsdone - thissize, 1387 NULL_RTX, 1, false); 1388 else 1389 part = extract_fixed_bit_field (word_mode, value, thissize, 1390 bitsdone, NULL_RTX, 1, false); 1391 } 1392 1393 /* If OP0 is a register, then handle OFFSET here. */ 1394 if (SUBREG_P (op0) || REG_P (op0)) 1395 { 1396 machine_mode op0_mode = GET_MODE (op0); 1397 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) 1398 word = offset ? const0_rtx : op0; 1399 else 1400 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD, 1401 GET_MODE (op0)); 1402 offset &= BITS_PER_WORD / unit - 1; 1403 } 1404 else 1405 word = op0; 1406 1407 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, 1408 it is just an out-of-bounds access. Ignore it. */ 1409 if (word != const0_rtx) 1410 store_fixed_bit_field (word, thissize, offset * unit + thispos, 1411 bitregion_start, bitregion_end, part, 1412 reverse); 1413 bitsdone += thissize; 1414 } 1415} 1416 1417/* A subroutine of extract_bit_field_1 that converts return value X 1418 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments 1419 to extract_bit_field. */ 1420 1421static rtx 1422convert_extracted_bit_field (rtx x, machine_mode mode, 1423 machine_mode tmode, bool unsignedp) 1424{ 1425 if (GET_MODE (x) == tmode || GET_MODE (x) == mode) 1426 return x; 1427 1428 /* If the x mode is not a scalar integral, first convert to the 1429 integer mode of that size and then access it as a floating-point 1430 value via a SUBREG. */ 1431 if (!SCALAR_INT_MODE_P (tmode)) 1432 { 1433 machine_mode smode; 1434 1435 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); 1436 x = convert_to_mode (smode, x, unsignedp); 1437 x = force_reg (smode, x); 1438 return gen_lowpart (tmode, x); 1439 } 1440 1441 return convert_to_mode (tmode, x, unsignedp); 1442} 1443 1444/* Try to use an ext(z)v pattern to extract a field from OP0. 1445 Return the extracted value on success, otherwise return null. 1446 EXT_MODE is the mode of the extraction and the other arguments 1447 are as for extract_bit_field. */ 1448 1449static rtx 1450extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, 1451 unsigned HOST_WIDE_INT bitsize, 1452 unsigned HOST_WIDE_INT bitnum, 1453 int unsignedp, rtx target, 1454 machine_mode mode, machine_mode tmode) 1455{ 1456 struct expand_operand ops[4]; 1457 rtx spec_target = target; 1458 rtx spec_target_subreg = 0; 1459 machine_mode ext_mode = extv->field_mode; 1460 unsigned unit = GET_MODE_BITSIZE (ext_mode); 1461 1462 if (bitsize == 0 || unit < bitsize) 1463 return NULL_RTX; 1464 1465 if (MEM_P (op0)) 1466 /* Get a reference to the first byte of the field. */ 1467 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, 1468 &bitnum); 1469 else 1470 { 1471 /* Convert from counting within OP0 to counting in EXT_MODE. */ 1472 if (BYTES_BIG_ENDIAN) 1473 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); 1474 1475 /* If op0 is a register, we need it in EXT_MODE to make it 1476 acceptable to the format of ext(z)v. */ 1477 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) 1478 return NULL_RTX; 1479 if (REG_P (op0) && GET_MODE (op0) != ext_mode) 1480 op0 = gen_lowpart_SUBREG (ext_mode, op0); 1481 } 1482 1483 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 1484 "backwards" from the size of the unit we are extracting from. 1485 Otherwise, we count bits from the most significant on a 1486 BYTES/BITS_BIG_ENDIAN machine. */ 1487 1488 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 1489 bitnum = unit - bitsize - bitnum; 1490 1491 if (target == 0) 1492 target = spec_target = gen_reg_rtx (tmode); 1493 1494 if (GET_MODE (target) != ext_mode) 1495 { 1496 /* Don't use LHS paradoxical subreg if explicit truncation is needed 1497 between the mode of the extraction (word_mode) and the target 1498 mode. Instead, create a temporary and use convert_move to set 1499 the target. */ 1500 if (REG_P (target) 1501 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) 1502 { 1503 target = gen_lowpart (ext_mode, target); 1504 if (GET_MODE_PRECISION (ext_mode) 1505 > GET_MODE_PRECISION (GET_MODE (spec_target))) 1506 spec_target_subreg = target; 1507 } 1508 else 1509 target = gen_reg_rtx (ext_mode); 1510 } 1511 1512 create_output_operand (&ops[0], target, ext_mode); 1513 create_fixed_operand (&ops[1], op0); 1514 create_integer_operand (&ops[2], bitsize); 1515 create_integer_operand (&ops[3], bitnum); 1516 if (maybe_expand_insn (extv->icode, 4, ops)) 1517 { 1518 target = ops[0].value; 1519 if (target == spec_target) 1520 return target; 1521 if (target == spec_target_subreg) 1522 return spec_target; 1523 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1524 } 1525 return NULL_RTX; 1526} 1527 1528/* A subroutine of extract_bit_field, with the same arguments. 1529 If FALLBACK_P is true, fall back to extract_fixed_bit_field 1530 if we can find no other means of implementing the operation. 1531 if FALLBACK_P is false, return NULL instead. */ 1532 1533static rtx 1534extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1535 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, 1536 machine_mode mode, machine_mode tmode, 1537 bool reverse, bool fallback_p) 1538{ 1539 rtx op0 = str_rtx; 1540 machine_mode int_mode; 1541 machine_mode mode1; 1542 1543 if (tmode == VOIDmode) 1544 tmode = mode; 1545 1546 while (GET_CODE (op0) == SUBREG) 1547 { 1548 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; 1549 op0 = SUBREG_REG (op0); 1550 } 1551 1552 /* If we have an out-of-bounds access to a register, just return an 1553 uninitialized register of the required mode. This can occur if the 1554 source code contains an out-of-bounds access to a small array. */ 1555 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) 1556 return gen_reg_rtx (tmode); 1557 1558 if (REG_P (op0) 1559 && mode == GET_MODE (op0) 1560 && bitnum == 0 1561 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) 1562 { 1563 if (reverse) 1564 op0 = flip_storage_order (mode, op0); 1565 /* We're trying to extract a full register from itself. */ 1566 return op0; 1567 } 1568 1569 /* See if we can get a better vector mode before extracting. */ 1570 if (VECTOR_MODE_P (GET_MODE (op0)) 1571 && !MEM_P (op0) 1572 && GET_MODE_INNER (GET_MODE (op0)) != tmode) 1573 { 1574 machine_mode new_mode; 1575 1576 if (GET_MODE_CLASS (tmode) == MODE_FLOAT) 1577 new_mode = MIN_MODE_VECTOR_FLOAT; 1578 else if (GET_MODE_CLASS (tmode) == MODE_FRACT) 1579 new_mode = MIN_MODE_VECTOR_FRACT; 1580 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) 1581 new_mode = MIN_MODE_VECTOR_UFRACT; 1582 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) 1583 new_mode = MIN_MODE_VECTOR_ACCUM; 1584 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) 1585 new_mode = MIN_MODE_VECTOR_UACCUM; 1586 else 1587 new_mode = MIN_MODE_VECTOR_INT; 1588 1589 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) 1590 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) 1591 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode) 1592 && targetm.vector_mode_supported_p (new_mode)) 1593 break; 1594 if (new_mode != VOIDmode) 1595 op0 = gen_lowpart (new_mode, op0); 1596 } 1597 1598 /* Use vec_extract patterns for extracting parts of vectors whenever 1599 available. */ 1600 if (VECTOR_MODE_P (GET_MODE (op0)) 1601 && !MEM_P (op0) 1602 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing 1603 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0)) 1604 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0)))) 1605 { 1606 struct expand_operand ops[3]; 1607 machine_mode outermode = GET_MODE (op0); 1608 machine_mode innermode = GET_MODE_INNER (outermode); 1609 enum insn_code icode = optab_handler (vec_extract_optab, outermode); 1610 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); 1611 1612 create_output_operand (&ops[0], target, innermode); 1613 create_input_operand (&ops[1], op0, outermode); 1614 create_integer_operand (&ops[2], pos); 1615 if (maybe_expand_insn (icode, 3, ops)) 1616 { 1617 target = ops[0].value; 1618 if (GET_MODE (target) != mode) 1619 return gen_lowpart (tmode, target); 1620 return target; 1621 } 1622 } 1623 1624 /* Make sure we are playing with integral modes. Pun with subregs 1625 if we aren't. */ 1626 { 1627 machine_mode imode = int_mode_for_mode (GET_MODE (op0)); 1628 if (imode != GET_MODE (op0)) 1629 { 1630 if (MEM_P (op0)) 1631 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); 1632 else if (imode != BLKmode) 1633 { 1634 op0 = gen_lowpart (imode, op0); 1635 1636 /* If we got a SUBREG, force it into a register since we 1637 aren't going to be able to do another SUBREG on it. */ 1638 if (GET_CODE (op0) == SUBREG) 1639 op0 = force_reg (imode, op0); 1640 } 1641 else 1642 { 1643 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0)); 1644 rtx mem = assign_stack_temp (GET_MODE (op0), size); 1645 emit_move_insn (mem, op0); 1646 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); 1647 } 1648 } 1649 } 1650 1651 /* ??? We currently assume TARGET is at least as big as BITSIZE. 1652 If that's wrong, the solution is to test for it and set TARGET to 0 1653 if needed. */ 1654 1655 /* Get the mode of the field to use for atomic access or subreg 1656 conversion. */ 1657 mode1 = mode; 1658 if (SCALAR_INT_MODE_P (tmode)) 1659 { 1660 machine_mode try_mode = mode_for_size (bitsize, 1661 GET_MODE_CLASS (tmode), 0); 1662 if (try_mode != BLKmode) 1663 mode1 = try_mode; 1664 } 1665 gcc_assert (mode1 != BLKmode); 1666 1667 /* Extraction of a full MODE1 value can be done with a subreg as long 1668 as the least significant bit of the value is the least significant 1669 bit of either OP0 or a word of OP0. */ 1670 if (!MEM_P (op0) 1671 && !reverse 1672 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) 1673 && bitsize == GET_MODE_BITSIZE (mode1) 1674 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))) 1675 { 1676 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 1677 bitnum / BITS_PER_UNIT); 1678 if (sub) 1679 return convert_extracted_bit_field (sub, mode, tmode, unsignedp); 1680 } 1681 1682 /* Extraction of a full MODE1 value can be done with a load as long as 1683 the field is on a byte boundary and is sufficiently aligned. */ 1684 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1)) 1685 { 1686 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT); 1687 if (reverse) 1688 op0 = flip_storage_order (mode1, op0); 1689 return convert_extracted_bit_field (op0, mode, tmode, unsignedp); 1690 } 1691 1692 /* Handle fields bigger than a word. */ 1693 1694 if (bitsize > BITS_PER_WORD) 1695 { 1696 /* Here we transfer the words of the field 1697 in the order least significant first. 1698 This is because the most significant word is the one which may 1699 be less than full. */ 1700 1701 const bool backwards = WORDS_BIG_ENDIAN; 1702 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 1703 unsigned int i; 1704 rtx_insn *last; 1705 1706 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) 1707 target = gen_reg_rtx (mode); 1708 1709 /* In case we're about to clobber a base register or something 1710 (see gcc.c-torture/execute/20040625-1.c). */ 1711 if (reg_mentioned_p (target, str_rtx)) 1712 target = gen_reg_rtx (mode); 1713 1714 /* Indicate for flow that the entire target reg is being set. */ 1715 emit_clobber (target); 1716 1717 last = get_last_insn (); 1718 for (i = 0; i < nwords; i++) 1719 { 1720 /* If I is 0, use the low-order word in both field and target; 1721 if I is 1, use the next to lowest word; and so on. */ 1722 /* Word number in TARGET to use. */ 1723 unsigned int wordnum 1724 = (backwards 1725 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 1726 : i); 1727 /* Offset from start of field in OP0. */ 1728 unsigned int bit_offset = (backwards ^ reverse 1729 ? MAX ((int) bitsize - ((int) i + 1) 1730 * BITS_PER_WORD, 1731 0) 1732 : (int) i * BITS_PER_WORD); 1733 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); 1734 rtx result_part 1735 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, 1736 bitsize - i * BITS_PER_WORD), 1737 bitnum + bit_offset, 1, target_part, 1738 mode, word_mode, reverse, fallback_p); 1739 1740 gcc_assert (target_part); 1741 if (!result_part) 1742 { 1743 delete_insns_since (last); 1744 return NULL; 1745 } 1746 1747 if (result_part != target_part) 1748 emit_move_insn (target_part, result_part); 1749 } 1750 1751 if (unsignedp) 1752 { 1753 /* Unless we've filled TARGET, the upper regs in a multi-reg value 1754 need to be zero'd out. */ 1755 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) 1756 { 1757 unsigned int i, total_words; 1758 1759 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; 1760 for (i = nwords; i < total_words; i++) 1761 emit_move_insn 1762 (operand_subword (target, 1763 backwards ? total_words - i - 1 : i, 1764 1, VOIDmode), 1765 const0_rtx); 1766 } 1767 return target; 1768 } 1769 1770 /* Signed bit field: sign-extend with two arithmetic shifts. */ 1771 target = expand_shift (LSHIFT_EXPR, mode, target, 1772 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1773 return expand_shift (RSHIFT_EXPR, mode, target, 1774 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1775 } 1776 1777 /* If OP0 is a multi-word register, narrow it to the affected word. 1778 If the region spans two words, defer to extract_split_bit_field. */ 1779 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) 1780 { 1781 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) 1782 { 1783 if (!fallback_p) 1784 return NULL_RTX; 1785 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp, 1786 reverse); 1787 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1788 } 1789 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), 1790 bitnum / BITS_PER_WORD * UNITS_PER_WORD); 1791 bitnum %= BITS_PER_WORD; 1792 } 1793 1794 /* From here on we know the desired field is smaller than a word. 1795 If OP0 is a register, it too fits within a word. */ 1796 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; 1797 extraction_insn extv; 1798 if (!MEM_P (op0) 1799 && !reverse 1800 /* ??? We could limit the structure size to the part of OP0 that 1801 contains the field, with appropriate checks for endianness 1802 and TRULY_NOOP_TRUNCATION. */ 1803 && get_best_reg_extraction_insn (&extv, pattern, 1804 GET_MODE_BITSIZE (GET_MODE (op0)), 1805 tmode)) 1806 { 1807 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum, 1808 unsignedp, target, mode, 1809 tmode); 1810 if (result) 1811 return result; 1812 } 1813 1814 /* If OP0 is a memory, try copying it to a register and seeing if a 1815 cheap register alternative is available. */ 1816 if (MEM_P (op0) & !reverse) 1817 { 1818 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, 1819 tmode)) 1820 { 1821 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, 1822 bitnum, unsignedp, 1823 target, mode, 1824 tmode); 1825 if (result) 1826 return result; 1827 } 1828 1829 rtx_insn *last = get_last_insn (); 1830 1831 /* Try loading part of OP0 into a register and extracting the 1832 bitfield from that. */ 1833 unsigned HOST_WIDE_INT bitpos; 1834 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, 1835 0, 0, tmode, &bitpos); 1836 if (xop0) 1837 { 1838 xop0 = copy_to_reg (xop0); 1839 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, 1840 unsignedp, target, 1841 mode, tmode, reverse, false); 1842 if (result) 1843 return result; 1844 delete_insns_since (last); 1845 } 1846 } 1847 1848 if (!fallback_p) 1849 return NULL; 1850 1851 /* Find a correspondingly-sized integer field, so we can apply 1852 shifts and masks to it. */ 1853 int_mode = int_mode_for_mode (tmode); 1854 if (int_mode == BLKmode) 1855 int_mode = int_mode_for_mode (mode); 1856 /* Should probably push op0 out to memory and then do a load. */ 1857 gcc_assert (int_mode != BLKmode); 1858 1859 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target, 1860 unsignedp, reverse); 1861 1862 /* Complex values must be reversed piecewise, so we need to undo the global 1863 reversal, convert to the complex mode and reverse again. */ 1864 if (reverse && COMPLEX_MODE_P (tmode)) 1865 { 1866 target = flip_storage_order (int_mode, target); 1867 target = convert_extracted_bit_field (target, mode, tmode, unsignedp); 1868 target = flip_storage_order (tmode, target); 1869 } 1870 else 1871 target = convert_extracted_bit_field (target, mode, tmode, unsignedp); 1872 1873 return target; 1874} 1875 1876/* Generate code to extract a byte-field from STR_RTX 1877 containing BITSIZE bits, starting at BITNUM, 1878 and put it in TARGET if possible (if TARGET is nonzero). 1879 Regardless of TARGET, we return the rtx for where the value is placed. 1880 1881 STR_RTX is the structure containing the byte (a REG or MEM). 1882 UNSIGNEDP is nonzero if this is an unsigned bit field. 1883 MODE is the natural mode of the field value once extracted. 1884 TMODE is the mode the caller would like the value to have; 1885 but the value may be returned with type MODE instead. 1886 1887 If REVERSE is true, the extraction is to be done in reverse order. 1888 1889 If a TARGET is specified and we can store in it at no extra cost, 1890 we do so, and return TARGET. 1891 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred 1892 if they are equally easy. */ 1893 1894rtx 1895extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1896 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, 1897 machine_mode mode, machine_mode tmode, bool reverse) 1898{ 1899 machine_mode mode1; 1900 1901 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ 1902 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0) 1903 mode1 = GET_MODE (str_rtx); 1904 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) 1905 mode1 = GET_MODE (target); 1906 else 1907 mode1 = tmode; 1908 1909 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0)) 1910 { 1911 /* Extraction of a full MODE1 value can be done with a simple load. 1912 We know here that the field can be accessed with one single 1913 instruction. For targets that support unaligned memory, 1914 an unaligned access may be necessary. */ 1915 if (bitsize == GET_MODE_BITSIZE (mode1)) 1916 { 1917 rtx result = adjust_bitfield_address (str_rtx, mode1, 1918 bitnum / BITS_PER_UNIT); 1919 if (reverse) 1920 result = flip_storage_order (mode1, result); 1921 gcc_assert (bitnum % BITS_PER_UNIT == 0); 1922 return convert_extracted_bit_field (result, mode, tmode, unsignedp); 1923 } 1924 1925 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum, 1926 &bitnum); 1927 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1)); 1928 str_rtx = copy_to_reg (str_rtx); 1929 } 1930 1931 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, 1932 target, mode, tmode, reverse, true); 1933} 1934 1935/* Use shifts and boolean operations to extract a field of BITSIZE bits 1936 from bit BITNUM of OP0. 1937 1938 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). 1939 If REVERSE is true, the extraction is to be done in reverse order. 1940 1941 If TARGET is nonzero, attempts to store the value there 1942 and return TARGET, but this is not guaranteed. 1943 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ 1944 1945static rtx 1946extract_fixed_bit_field (machine_mode tmode, rtx op0, 1947 unsigned HOST_WIDE_INT bitsize, 1948 unsigned HOST_WIDE_INT bitnum, rtx target, 1949 int unsignedp, bool reverse) 1950{ 1951 if (MEM_P (op0)) 1952 { 1953 machine_mode mode 1954 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode, 1955 MEM_VOLATILE_P (op0)); 1956 1957 if (mode == VOIDmode) 1958 /* The only way this should occur is if the field spans word 1959 boundaries. */ 1960 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp, 1961 reverse); 1962 1963 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); 1964 } 1965 1966 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum, 1967 target, unsignedp, reverse); 1968} 1969 1970/* Helper function for extract_fixed_bit_field, extracts 1971 the bit field always using the MODE of OP0. */ 1972 1973static rtx 1974extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, 1975 unsigned HOST_WIDE_INT bitsize, 1976 unsigned HOST_WIDE_INT bitnum, rtx target, 1977 int unsignedp, bool reverse) 1978{ 1979 machine_mode mode = GET_MODE (op0); 1980 gcc_assert (SCALAR_INT_MODE_P (mode)); 1981 1982 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) 1983 for invalid input, such as extract equivalent of f5 from 1984 gcc.dg/pr48335-2.c. */ 1985 1986 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 1987 /* BITNUM is the distance between our msb and that of OP0. 1988 Convert it to the distance from the lsb. */ 1989 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; 1990 1991 /* Now BITNUM is always the distance between the field's lsb and that of OP0. 1992 We have reduced the big-endian case to the little-endian case. */ 1993 if (reverse) 1994 op0 = flip_storage_order (mode, op0); 1995 1996 if (unsignedp) 1997 { 1998 if (bitnum) 1999 { 2000 /* If the field does not already start at the lsb, 2001 shift it so it does. */ 2002 /* Maybe propagate the target for the shift. */ 2003 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 2004 if (tmode != mode) 2005 subtarget = 0; 2006 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); 2007 } 2008 /* Convert the value to the desired mode. */ 2009 if (mode != tmode) 2010 op0 = convert_to_mode (tmode, op0, 1); 2011 2012 /* Unless the msb of the field used to be the msb when we shifted, 2013 mask out the upper bits. */ 2014 2015 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) 2016 return expand_binop (GET_MODE (op0), and_optab, op0, 2017 mask_rtx (GET_MODE (op0), 0, bitsize, 0), 2018 target, 1, OPTAB_LIB_WIDEN); 2019 return op0; 2020 } 2021 2022 /* To extract a signed bit-field, first shift its msb to the msb of the word, 2023 then arithmetic-shift its lsb to the lsb of the word. */ 2024 op0 = force_reg (mode, op0); 2025 2026 /* Find the narrowest integer mode that contains the field. */ 2027 2028 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; 2029 mode = GET_MODE_WIDER_MODE (mode)) 2030 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum) 2031 { 2032 op0 = convert_to_mode (mode, op0, 0); 2033 break; 2034 } 2035 2036 if (mode != tmode) 2037 target = 0; 2038 2039 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) 2040 { 2041 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); 2042 /* Maybe propagate the target for the shift. */ 2043 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 2044 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); 2045 } 2046 2047 return expand_shift (RSHIFT_EXPR, mode, op0, 2048 GET_MODE_BITSIZE (mode) - bitsize, target, 0); 2049} 2050 2051/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value 2052 VALUE << BITPOS. */ 2053 2054static rtx 2055lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, 2056 int bitpos) 2057{ 2058 return immed_wide_int_const (wi::lshift (value, bitpos), mode); 2059} 2060 2061/* Extract a bit field that is split across two words 2062 and return an RTX for the result. 2063 2064 OP0 is the REG, SUBREG or MEM rtx for the first of the two words. 2065 BITSIZE is the field width; BITPOS, position of its first bit, in the word. 2066 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. 2067 2068 If REVERSE is true, the extraction is to be done in reverse order. */ 2069 2070static rtx 2071extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 2072 unsigned HOST_WIDE_INT bitpos, int unsignedp, 2073 bool reverse) 2074{ 2075 unsigned int unit; 2076 unsigned int bitsdone = 0; 2077 rtx result = NULL_RTX; 2078 int first = 1; 2079 2080 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 2081 much at a time. */ 2082 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 2083 unit = BITS_PER_WORD; 2084 else 2085 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 2086 2087 while (bitsdone < bitsize) 2088 { 2089 unsigned HOST_WIDE_INT thissize; 2090 rtx part, word; 2091 unsigned HOST_WIDE_INT thispos; 2092 unsigned HOST_WIDE_INT offset; 2093 2094 offset = (bitpos + bitsdone) / unit; 2095 thispos = (bitpos + bitsdone) % unit; 2096 2097 /* THISSIZE must not overrun a word boundary. Otherwise, 2098 extract_fixed_bit_field will call us again, and we will mutually 2099 recurse forever. */ 2100 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 2101 thissize = MIN (thissize, unit - thispos); 2102 2103 /* If OP0 is a register, then handle OFFSET here. */ 2104 if (SUBREG_P (op0) || REG_P (op0)) 2105 { 2106 word = operand_subword_force (op0, offset, GET_MODE (op0)); 2107 offset = 0; 2108 } 2109 else 2110 word = op0; 2111 2112 /* Extract the parts in bit-counting order, 2113 whose meaning is determined by BYTES_PER_UNIT. 2114 OFFSET is in UNITs, and UNIT is in bits. */ 2115 part = extract_fixed_bit_field (word_mode, word, thissize, 2116 offset * unit + thispos, 0, 1, reverse); 2117 bitsdone += thissize; 2118 2119 /* Shift this part into place for the result. */ 2120 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 2121 { 2122 if (bitsize != bitsdone) 2123 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2124 bitsize - bitsdone, 0, 1); 2125 } 2126 else 2127 { 2128 if (bitsdone != thissize) 2129 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2130 bitsdone - thissize, 0, 1); 2131 } 2132 2133 if (first) 2134 result = part; 2135 else 2136 /* Combine the parts with bitwise or. This works 2137 because we extracted each part as an unsigned bit field. */ 2138 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, 2139 OPTAB_LIB_WIDEN); 2140 2141 first = 0; 2142 } 2143 2144 /* Unsigned bit field: we are done. */ 2145 if (unsignedp) 2146 return result; 2147 /* Signed bit field: sign-extend with two arithmetic shifts. */ 2148 result = expand_shift (LSHIFT_EXPR, word_mode, result, 2149 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2150 return expand_shift (RSHIFT_EXPR, word_mode, result, 2151 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2152} 2153 2154/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving 2155 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than 2156 MODE, fill the upper bits with zeros. Fail if the layout of either 2157 mode is unknown (as for CC modes) or if the extraction would involve 2158 unprofitable mode punning. Return the value on success, otherwise 2159 return null. 2160 2161 This is different from gen_lowpart* in these respects: 2162 2163 - the returned value must always be considered an rvalue 2164 2165 - when MODE is wider than SRC_MODE, the extraction involves 2166 a zero extension 2167 2168 - when MODE is smaller than SRC_MODE, the extraction involves 2169 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). 2170 2171 In other words, this routine performs a computation, whereas the 2172 gen_lowpart* routines are conceptually lvalue or rvalue subreg 2173 operations. */ 2174 2175rtx 2176extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) 2177{ 2178 machine_mode int_mode, src_int_mode; 2179 2180 if (mode == src_mode) 2181 return src; 2182 2183 if (CONSTANT_P (src)) 2184 { 2185 /* simplify_gen_subreg can't be used here, as if simplify_subreg 2186 fails, it will happily create (subreg (symbol_ref)) or similar 2187 invalid SUBREGs. */ 2188 unsigned int byte = subreg_lowpart_offset (mode, src_mode); 2189 rtx ret = simplify_subreg (mode, src, src_mode, byte); 2190 if (ret) 2191 return ret; 2192 2193 if (GET_MODE (src) == VOIDmode 2194 || !validate_subreg (mode, src_mode, src, byte)) 2195 return NULL_RTX; 2196 2197 src = force_reg (GET_MODE (src), src); 2198 return gen_rtx_SUBREG (mode, src, byte); 2199 } 2200 2201 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) 2202 return NULL_RTX; 2203 2204 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) 2205 && MODES_TIEABLE_P (mode, src_mode)) 2206 { 2207 rtx x = gen_lowpart_common (mode, src); 2208 if (x) 2209 return x; 2210 } 2211 2212 src_int_mode = int_mode_for_mode (src_mode); 2213 int_mode = int_mode_for_mode (mode); 2214 if (src_int_mode == BLKmode || int_mode == BLKmode) 2215 return NULL_RTX; 2216 2217 if (!MODES_TIEABLE_P (src_int_mode, src_mode)) 2218 return NULL_RTX; 2219 if (!MODES_TIEABLE_P (int_mode, mode)) 2220 return NULL_RTX; 2221 2222 src = gen_lowpart (src_int_mode, src); 2223 src = convert_modes (int_mode, src_int_mode, src, true); 2224 src = gen_lowpart (mode, src); 2225 return src; 2226} 2227 2228/* Add INC into TARGET. */ 2229 2230void 2231expand_inc (rtx target, rtx inc) 2232{ 2233 rtx value = expand_binop (GET_MODE (target), add_optab, 2234 target, inc, 2235 target, 0, OPTAB_LIB_WIDEN); 2236 if (value != target) 2237 emit_move_insn (target, value); 2238} 2239 2240/* Subtract DEC from TARGET. */ 2241 2242void 2243expand_dec (rtx target, rtx dec) 2244{ 2245 rtx value = expand_binop (GET_MODE (target), sub_optab, 2246 target, dec, 2247 target, 0, OPTAB_LIB_WIDEN); 2248 if (value != target) 2249 emit_move_insn (target, value); 2250} 2251 2252/* Output a shift instruction for expression code CODE, 2253 with SHIFTED being the rtx for the value to shift, 2254 and AMOUNT the rtx for the amount to shift by. 2255 Store the result in the rtx TARGET, if that is convenient. 2256 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2257 Return the rtx for where the value is. 2258 If that cannot be done, abort the compilation unless MAY_FAIL is true, 2259 in which case 0 is returned. */ 2260 2261static rtx 2262expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, 2263 rtx amount, rtx target, int unsignedp, bool may_fail = false) 2264{ 2265 rtx op1, temp = 0; 2266 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); 2267 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); 2268 optab lshift_optab = ashl_optab; 2269 optab rshift_arith_optab = ashr_optab; 2270 optab rshift_uns_optab = lshr_optab; 2271 optab lrotate_optab = rotl_optab; 2272 optab rrotate_optab = rotr_optab; 2273 machine_mode op1_mode; 2274 machine_mode scalar_mode = mode; 2275 int attempt; 2276 bool speed = optimize_insn_for_speed_p (); 2277 2278 if (VECTOR_MODE_P (mode)) 2279 scalar_mode = GET_MODE_INNER (mode); 2280 op1 = amount; 2281 op1_mode = GET_MODE (op1); 2282 2283 /* Determine whether the shift/rotate amount is a vector, or scalar. If the 2284 shift amount is a vector, use the vector/vector shift patterns. */ 2285 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) 2286 { 2287 lshift_optab = vashl_optab; 2288 rshift_arith_optab = vashr_optab; 2289 rshift_uns_optab = vlshr_optab; 2290 lrotate_optab = vrotl_optab; 2291 rrotate_optab = vrotr_optab; 2292 } 2293 2294 /* Previously detected shift-counts computed by NEGATE_EXPR 2295 and shifted in the other direction; but that does not work 2296 on all machines. */ 2297 2298 if (SHIFT_COUNT_TRUNCATED) 2299 { 2300 if (CONST_INT_P (op1) 2301 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= 2302 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) 2303 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) 2304 % GET_MODE_BITSIZE (scalar_mode)); 2305 else if (GET_CODE (op1) == SUBREG 2306 && subreg_lowpart_p (op1) 2307 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) 2308 && SCALAR_INT_MODE_P (GET_MODE (op1))) 2309 op1 = SUBREG_REG (op1); 2310 } 2311 2312 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, 2313 prefer left rotation, if op1 is from bitsize / 2 + 1 to 2314 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 2315 amount instead. */ 2316 if (rotate 2317 && CONST_INT_P (op1) 2318 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, 2319 GET_MODE_BITSIZE (scalar_mode) - 1)) 2320 { 2321 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); 2322 left = !left; 2323 code = left ? LROTATE_EXPR : RROTATE_EXPR; 2324 } 2325 2326 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. 2327 Note that this is not the case for bigger values. For instance a rotation 2328 of 0x01020304 by 16 bits gives 0x03040102 which is different from 2329 0x04030201 (bswapsi). */ 2330 if (rotate 2331 && CONST_INT_P (op1) 2332 && INTVAL (op1) == BITS_PER_UNIT 2333 && GET_MODE_SIZE (scalar_mode) == 2 2334 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing) 2335 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp); 2336 2337 if (op1 == const0_rtx) 2338 return shifted; 2339 2340 /* Check whether its cheaper to implement a left shift by a constant 2341 bit count by a sequence of additions. */ 2342 if (code == LSHIFT_EXPR 2343 && CONST_INT_P (op1) 2344 && INTVAL (op1) > 0 2345 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) 2346 && INTVAL (op1) < MAX_BITS_PER_WORD 2347 && (shift_cost (speed, mode, INTVAL (op1)) 2348 > INTVAL (op1) * add_cost (speed, mode)) 2349 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) 2350 { 2351 int i; 2352 for (i = 0; i < INTVAL (op1); i++) 2353 { 2354 temp = force_reg (mode, shifted); 2355 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, 2356 unsignedp, OPTAB_LIB_WIDEN); 2357 } 2358 return shifted; 2359 } 2360 2361 for (attempt = 0; temp == 0 && attempt < 3; attempt++) 2362 { 2363 enum optab_methods methods; 2364 2365 if (attempt == 0) 2366 methods = OPTAB_DIRECT; 2367 else if (attempt == 1) 2368 methods = OPTAB_WIDEN; 2369 else 2370 methods = OPTAB_LIB_WIDEN; 2371 2372 if (rotate) 2373 { 2374 /* Widening does not work for rotation. */ 2375 if (methods == OPTAB_WIDEN) 2376 continue; 2377 else if (methods == OPTAB_LIB_WIDEN) 2378 { 2379 /* If we have been unable to open-code this by a rotation, 2380 do it as the IOR of two shifts. I.e., to rotate A 2381 by N bits, compute 2382 (A << N) | ((unsigned) A >> ((-N) & (C - 1))) 2383 where C is the bitsize of A. 2384 2385 It is theoretically possible that the target machine might 2386 not be able to perform either shift and hence we would 2387 be making two libcalls rather than just the one for the 2388 shift (similarly if IOR could not be done). We will allow 2389 this extremely unlikely lossage to avoid complicating the 2390 code below. */ 2391 2392 rtx subtarget = target == shifted ? 0 : target; 2393 rtx new_amount, other_amount; 2394 rtx temp1; 2395 2396 new_amount = op1; 2397 if (op1 == const0_rtx) 2398 return shifted; 2399 else if (CONST_INT_P (op1)) 2400 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode) 2401 - INTVAL (op1)); 2402 else 2403 { 2404 other_amount 2405 = simplify_gen_unary (NEG, GET_MODE (op1), 2406 op1, GET_MODE (op1)); 2407 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; 2408 other_amount 2409 = simplify_gen_binary (AND, GET_MODE (op1), other_amount, 2410 gen_int_mode (mask, GET_MODE (op1))); 2411 } 2412 2413 shifted = force_reg (mode, shifted); 2414 2415 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, 2416 mode, shifted, new_amount, 0, 1); 2417 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, 2418 mode, shifted, other_amount, 2419 subtarget, 1); 2420 return expand_binop (mode, ior_optab, temp, temp1, target, 2421 unsignedp, methods); 2422 } 2423 2424 temp = expand_binop (mode, 2425 left ? lrotate_optab : rrotate_optab, 2426 shifted, op1, target, unsignedp, methods); 2427 } 2428 else if (unsignedp) 2429 temp = expand_binop (mode, 2430 left ? lshift_optab : rshift_uns_optab, 2431 shifted, op1, target, unsignedp, methods); 2432 2433 /* Do arithmetic shifts. 2434 Also, if we are going to widen the operand, we can just as well 2435 use an arithmetic right-shift instead of a logical one. */ 2436 if (temp == 0 && ! rotate 2437 && (! unsignedp || (! left && methods == OPTAB_WIDEN))) 2438 { 2439 enum optab_methods methods1 = methods; 2440 2441 /* If trying to widen a log shift to an arithmetic shift, 2442 don't accept an arithmetic shift of the same size. */ 2443 if (unsignedp) 2444 methods1 = OPTAB_MUST_WIDEN; 2445 2446 /* Arithmetic shift */ 2447 2448 temp = expand_binop (mode, 2449 left ? lshift_optab : rshift_arith_optab, 2450 shifted, op1, target, unsignedp, methods1); 2451 } 2452 2453 /* We used to try extzv here for logical right shifts, but that was 2454 only useful for one machine, the VAX, and caused poor code 2455 generation there for lshrdi3, so the code was deleted and a 2456 define_expand for lshrsi3 was added to vax.md. */ 2457 } 2458 2459 gcc_assert (temp != NULL_RTX || may_fail); 2460 return temp; 2461} 2462 2463/* Output a shift instruction for expression code CODE, 2464 with SHIFTED being the rtx for the value to shift, 2465 and AMOUNT the amount to shift by. 2466 Store the result in the rtx TARGET, if that is convenient. 2467 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2468 Return the rtx for where the value is. */ 2469 2470rtx 2471expand_shift (enum tree_code code, machine_mode mode, rtx shifted, 2472 int amount, rtx target, int unsignedp) 2473{ 2474 return expand_shift_1 (code, mode, 2475 shifted, GEN_INT (amount), target, unsignedp); 2476} 2477 2478/* Likewise, but return 0 if that cannot be done. */ 2479 2480static rtx 2481maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, 2482 int amount, rtx target, int unsignedp) 2483{ 2484 return expand_shift_1 (code, mode, 2485 shifted, GEN_INT (amount), target, unsignedp, true); 2486} 2487 2488/* Output a shift instruction for expression code CODE, 2489 with SHIFTED being the rtx for the value to shift, 2490 and AMOUNT the tree for the amount to shift by. 2491 Store the result in the rtx TARGET, if that is convenient. 2492 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2493 Return the rtx for where the value is. */ 2494 2495rtx 2496expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, 2497 tree amount, rtx target, int unsignedp) 2498{ 2499 return expand_shift_1 (code, mode, 2500 shifted, expand_normal (amount), target, unsignedp); 2501} 2502 2503 2504static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, 2505 const struct mult_cost *, machine_mode mode); 2506static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, 2507 const struct algorithm *, enum mult_variant); 2508static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); 2509static rtx extract_high_half (machine_mode, rtx); 2510static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int); 2511static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx, 2512 int, int); 2513/* Compute and return the best algorithm for multiplying by T. 2514 The algorithm must cost less than cost_limit 2515 If retval.cost >= COST_LIMIT, no algorithm was found and all 2516 other field of the returned struct are undefined. 2517 MODE is the machine mode of the multiplication. */ 2518 2519static void 2520synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, 2521 const struct mult_cost *cost_limit, machine_mode mode) 2522{ 2523 int m; 2524 struct algorithm *alg_in, *best_alg; 2525 struct mult_cost best_cost; 2526 struct mult_cost new_limit; 2527 int op_cost, op_latency; 2528 unsigned HOST_WIDE_INT orig_t = t; 2529 unsigned HOST_WIDE_INT q; 2530 int maxm, hash_index; 2531 bool cache_hit = false; 2532 enum alg_code cache_alg = alg_zero; 2533 bool speed = optimize_insn_for_speed_p (); 2534 machine_mode imode; 2535 struct alg_hash_entry *entry_ptr; 2536 2537 /* Indicate that no algorithm is yet found. If no algorithm 2538 is found, this value will be returned and indicate failure. */ 2539 alg_out->cost.cost = cost_limit->cost + 1; 2540 alg_out->cost.latency = cost_limit->latency + 1; 2541 2542 if (cost_limit->cost < 0 2543 || (cost_limit->cost == 0 && cost_limit->latency <= 0)) 2544 return; 2545 2546 /* Be prepared for vector modes. */ 2547 imode = GET_MODE_INNER (mode); 2548 2549 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); 2550 2551 /* Restrict the bits of "t" to the multiplication's mode. */ 2552 t &= GET_MODE_MASK (imode); 2553 2554 /* t == 1 can be done in zero cost. */ 2555 if (t == 1) 2556 { 2557 alg_out->ops = 1; 2558 alg_out->cost.cost = 0; 2559 alg_out->cost.latency = 0; 2560 alg_out->op[0] = alg_m; 2561 return; 2562 } 2563 2564 /* t == 0 sometimes has a cost. If it does and it exceeds our limit, 2565 fail now. */ 2566 if (t == 0) 2567 { 2568 if (MULT_COST_LESS (cost_limit, zero_cost (speed))) 2569 return; 2570 else 2571 { 2572 alg_out->ops = 1; 2573 alg_out->cost.cost = zero_cost (speed); 2574 alg_out->cost.latency = zero_cost (speed); 2575 alg_out->op[0] = alg_zero; 2576 return; 2577 } 2578 } 2579 2580 /* We'll be needing a couple extra algorithm structures now. */ 2581 2582 alg_in = XALLOCA (struct algorithm); 2583 best_alg = XALLOCA (struct algorithm); 2584 best_cost = *cost_limit; 2585 2586 /* Compute the hash index. */ 2587 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; 2588 2589 /* See if we already know what to do for T. */ 2590 entry_ptr = alg_hash_entry_ptr (hash_index); 2591 if (entry_ptr->t == t 2592 && entry_ptr->mode == mode 2593 && entry_ptr->speed == speed 2594 && entry_ptr->alg != alg_unknown) 2595 { 2596 cache_alg = entry_ptr->alg; 2597 2598 if (cache_alg == alg_impossible) 2599 { 2600 /* The cache tells us that it's impossible to synthesize 2601 multiplication by T within entry_ptr->cost. */ 2602 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) 2603 /* COST_LIMIT is at least as restrictive as the one 2604 recorded in the hash table, in which case we have no 2605 hope of synthesizing a multiplication. Just 2606 return. */ 2607 return; 2608 2609 /* If we get here, COST_LIMIT is less restrictive than the 2610 one recorded in the hash table, so we may be able to 2611 synthesize a multiplication. Proceed as if we didn't 2612 have the cache entry. */ 2613 } 2614 else 2615 { 2616 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) 2617 /* The cached algorithm shows that this multiplication 2618 requires more cost than COST_LIMIT. Just return. This 2619 way, we don't clobber this cache entry with 2620 alg_impossible but retain useful information. */ 2621 return; 2622 2623 cache_hit = true; 2624 2625 switch (cache_alg) 2626 { 2627 case alg_shift: 2628 goto do_alg_shift; 2629 2630 case alg_add_t_m2: 2631 case alg_sub_t_m2: 2632 goto do_alg_addsub_t_m2; 2633 2634 case alg_add_factor: 2635 case alg_sub_factor: 2636 goto do_alg_addsub_factor; 2637 2638 case alg_add_t2_m: 2639 goto do_alg_add_t2_m; 2640 2641 case alg_sub_t2_m: 2642 goto do_alg_sub_t2_m; 2643 2644 default: 2645 gcc_unreachable (); 2646 } 2647 } 2648 } 2649 2650 /* If we have a group of zero bits at the low-order part of T, try 2651 multiplying by the remaining bits and then doing a shift. */ 2652 2653 if ((t & 1) == 0) 2654 { 2655 do_alg_shift: 2656 m = ctz_or_zero (t); /* m = number of low zero bits */ 2657 if (m < maxm) 2658 { 2659 q = t >> m; 2660 /* The function expand_shift will choose between a shift and 2661 a sequence of additions, so the observed cost is given as 2662 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ 2663 op_cost = m * add_cost (speed, mode); 2664 if (shift_cost (speed, mode, m) < op_cost) 2665 op_cost = shift_cost (speed, mode, m); 2666 new_limit.cost = best_cost.cost - op_cost; 2667 new_limit.latency = best_cost.latency - op_cost; 2668 synth_mult (alg_in, q, &new_limit, mode); 2669 2670 alg_in->cost.cost += op_cost; 2671 alg_in->cost.latency += op_cost; 2672 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2673 { 2674 best_cost = alg_in->cost; 2675 std::swap (alg_in, best_alg); 2676 best_alg->log[best_alg->ops] = m; 2677 best_alg->op[best_alg->ops] = alg_shift; 2678 } 2679 2680 /* See if treating ORIG_T as a signed number yields a better 2681 sequence. Try this sequence only for a negative ORIG_T 2682 as it would be useless for a non-negative ORIG_T. */ 2683 if ((HOST_WIDE_INT) orig_t < 0) 2684 { 2685 /* Shift ORIG_T as follows because a right shift of a 2686 negative-valued signed type is implementation 2687 defined. */ 2688 q = ~(~orig_t >> m); 2689 /* The function expand_shift will choose between a shift 2690 and a sequence of additions, so the observed cost is 2691 given as MIN (m * add_cost(speed, mode), 2692 shift_cost(speed, mode, m)). */ 2693 op_cost = m * add_cost (speed, mode); 2694 if (shift_cost (speed, mode, m) < op_cost) 2695 op_cost = shift_cost (speed, mode, m); 2696 new_limit.cost = best_cost.cost - op_cost; 2697 new_limit.latency = best_cost.latency - op_cost; 2698 synth_mult (alg_in, q, &new_limit, mode); 2699 2700 alg_in->cost.cost += op_cost; 2701 alg_in->cost.latency += op_cost; 2702 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2703 { 2704 best_cost = alg_in->cost; 2705 std::swap (alg_in, best_alg); 2706 best_alg->log[best_alg->ops] = m; 2707 best_alg->op[best_alg->ops] = alg_shift; 2708 } 2709 } 2710 } 2711 if (cache_hit) 2712 goto done; 2713 } 2714 2715 /* If we have an odd number, add or subtract one. */ 2716 if ((t & 1) != 0) 2717 { 2718 unsigned HOST_WIDE_INT w; 2719 2720 do_alg_addsub_t_m2: 2721 for (w = 1; (w & t) != 0; w <<= 1) 2722 ; 2723 /* If T was -1, then W will be zero after the loop. This is another 2724 case where T ends with ...111. Handling this with (T + 1) and 2725 subtract 1 produces slightly better code and results in algorithm 2726 selection much faster than treating it like the ...0111 case 2727 below. */ 2728 if (w == 0 2729 || (w > 2 2730 /* Reject the case where t is 3. 2731 Thus we prefer addition in that case. */ 2732 && t != 3)) 2733 { 2734 /* T ends with ...111. Multiply by (T + 1) and subtract T. */ 2735 2736 op_cost = add_cost (speed, mode); 2737 new_limit.cost = best_cost.cost - op_cost; 2738 new_limit.latency = best_cost.latency - op_cost; 2739 synth_mult (alg_in, t + 1, &new_limit, mode); 2740 2741 alg_in->cost.cost += op_cost; 2742 alg_in->cost.latency += op_cost; 2743 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2744 { 2745 best_cost = alg_in->cost; 2746 std::swap (alg_in, best_alg); 2747 best_alg->log[best_alg->ops] = 0; 2748 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2749 } 2750 } 2751 else 2752 { 2753 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ 2754 2755 op_cost = add_cost (speed, mode); 2756 new_limit.cost = best_cost.cost - op_cost; 2757 new_limit.latency = best_cost.latency - op_cost; 2758 synth_mult (alg_in, t - 1, &new_limit, mode); 2759 2760 alg_in->cost.cost += op_cost; 2761 alg_in->cost.latency += op_cost; 2762 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2763 { 2764 best_cost = alg_in->cost; 2765 std::swap (alg_in, best_alg); 2766 best_alg->log[best_alg->ops] = 0; 2767 best_alg->op[best_alg->ops] = alg_add_t_m2; 2768 } 2769 } 2770 2771 /* We may be able to calculate a * -7, a * -15, a * -31, etc 2772 quickly with a - a * n for some appropriate constant n. */ 2773 m = exact_log2 (-orig_t + 1); 2774 if (m >= 0 && m < maxm) 2775 { 2776 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2777 /* If the target has a cheap shift-and-subtract insn use 2778 that in preference to a shift insn followed by a sub insn. 2779 Assume that the shift-and-sub is "atomic" with a latency 2780 equal to it's cost, otherwise assume that on superscalar 2781 hardware the shift may be executed concurrently with the 2782 earlier steps in the algorithm. */ 2783 if (shiftsub1_cost (speed, mode, m) <= op_cost) 2784 { 2785 op_cost = shiftsub1_cost (speed, mode, m); 2786 op_latency = op_cost; 2787 } 2788 else 2789 op_latency = add_cost (speed, mode); 2790 2791 new_limit.cost = best_cost.cost - op_cost; 2792 new_limit.latency = best_cost.latency - op_latency; 2793 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, 2794 &new_limit, mode); 2795 2796 alg_in->cost.cost += op_cost; 2797 alg_in->cost.latency += op_latency; 2798 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2799 { 2800 best_cost = alg_in->cost; 2801 std::swap (alg_in, best_alg); 2802 best_alg->log[best_alg->ops] = m; 2803 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2804 } 2805 } 2806 2807 if (cache_hit) 2808 goto done; 2809 } 2810 2811 /* Look for factors of t of the form 2812 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). 2813 If we find such a factor, we can multiply by t using an algorithm that 2814 multiplies by q, shift the result by m and add/subtract it to itself. 2815 2816 We search for large factors first and loop down, even if large factors 2817 are less probable than small; if we find a large factor we will find a 2818 good sequence quickly, and therefore be able to prune (by decreasing 2819 COST_LIMIT) the search. */ 2820 2821 do_alg_addsub_factor: 2822 for (m = floor_log2 (t - 1); m >= 2; m--) 2823 { 2824 unsigned HOST_WIDE_INT d; 2825 2826 d = (HOST_WIDE_INT_1U << m) + 1; 2827 if (t % d == 0 && t > d && m < maxm 2828 && (!cache_hit || cache_alg == alg_add_factor)) 2829 { 2830 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2831 if (shiftadd_cost (speed, mode, m) <= op_cost) 2832 op_cost = shiftadd_cost (speed, mode, m); 2833 2834 op_latency = op_cost; 2835 2836 2837 new_limit.cost = best_cost.cost - op_cost; 2838 new_limit.latency = best_cost.latency - op_latency; 2839 synth_mult (alg_in, t / d, &new_limit, mode); 2840 2841 alg_in->cost.cost += op_cost; 2842 alg_in->cost.latency += op_latency; 2843 if (alg_in->cost.latency < op_cost) 2844 alg_in->cost.latency = op_cost; 2845 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2846 { 2847 best_cost = alg_in->cost; 2848 std::swap (alg_in, best_alg); 2849 best_alg->log[best_alg->ops] = m; 2850 best_alg->op[best_alg->ops] = alg_add_factor; 2851 } 2852 /* Other factors will have been taken care of in the recursion. */ 2853 break; 2854 } 2855 2856 d = (HOST_WIDE_INT_1U << m) - 1; 2857 if (t % d == 0 && t > d && m < maxm 2858 && (!cache_hit || cache_alg == alg_sub_factor)) 2859 { 2860 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2861 if (shiftsub0_cost (speed, mode, m) <= op_cost) 2862 op_cost = shiftsub0_cost (speed, mode, m); 2863 2864 op_latency = op_cost; 2865 2866 new_limit.cost = best_cost.cost - op_cost; 2867 new_limit.latency = best_cost.latency - op_latency; 2868 synth_mult (alg_in, t / d, &new_limit, mode); 2869 2870 alg_in->cost.cost += op_cost; 2871 alg_in->cost.latency += op_latency; 2872 if (alg_in->cost.latency < op_cost) 2873 alg_in->cost.latency = op_cost; 2874 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2875 { 2876 best_cost = alg_in->cost; 2877 std::swap (alg_in, best_alg); 2878 best_alg->log[best_alg->ops] = m; 2879 best_alg->op[best_alg->ops] = alg_sub_factor; 2880 } 2881 break; 2882 } 2883 } 2884 if (cache_hit) 2885 goto done; 2886 2887 /* Try shift-and-add (load effective address) instructions, 2888 i.e. do a*3, a*5, a*9. */ 2889 if ((t & 1) != 0) 2890 { 2891 do_alg_add_t2_m: 2892 q = t - 1; 2893 m = ctz_hwi (q); 2894 if (q && m < maxm) 2895 { 2896 op_cost = shiftadd_cost (speed, mode, m); 2897 new_limit.cost = best_cost.cost - op_cost; 2898 new_limit.latency = best_cost.latency - op_cost; 2899 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); 2900 2901 alg_in->cost.cost += op_cost; 2902 alg_in->cost.latency += op_cost; 2903 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2904 { 2905 best_cost = alg_in->cost; 2906 std::swap (alg_in, best_alg); 2907 best_alg->log[best_alg->ops] = m; 2908 best_alg->op[best_alg->ops] = alg_add_t2_m; 2909 } 2910 } 2911 if (cache_hit) 2912 goto done; 2913 2914 do_alg_sub_t2_m: 2915 q = t + 1; 2916 m = ctz_hwi (q); 2917 if (q && m < maxm) 2918 { 2919 op_cost = shiftsub0_cost (speed, mode, m); 2920 new_limit.cost = best_cost.cost - op_cost; 2921 new_limit.latency = best_cost.latency - op_cost; 2922 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); 2923 2924 alg_in->cost.cost += op_cost; 2925 alg_in->cost.latency += op_cost; 2926 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2927 { 2928 best_cost = alg_in->cost; 2929 std::swap (alg_in, best_alg); 2930 best_alg->log[best_alg->ops] = m; 2931 best_alg->op[best_alg->ops] = alg_sub_t2_m; 2932 } 2933 } 2934 if (cache_hit) 2935 goto done; 2936 } 2937 2938 done: 2939 /* If best_cost has not decreased, we have not found any algorithm. */ 2940 if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) 2941 { 2942 /* We failed to find an algorithm. Record alg_impossible for 2943 this case (that is, <T, MODE, COST_LIMIT>) so that next time 2944 we are asked to find an algorithm for T within the same or 2945 lower COST_LIMIT, we can immediately return to the 2946 caller. */ 2947 entry_ptr->t = t; 2948 entry_ptr->mode = mode; 2949 entry_ptr->speed = speed; 2950 entry_ptr->alg = alg_impossible; 2951 entry_ptr->cost = *cost_limit; 2952 return; 2953 } 2954 2955 /* Cache the result. */ 2956 if (!cache_hit) 2957 { 2958 entry_ptr->t = t; 2959 entry_ptr->mode = mode; 2960 entry_ptr->speed = speed; 2961 entry_ptr->alg = best_alg->op[best_alg->ops]; 2962 entry_ptr->cost.cost = best_cost.cost; 2963 entry_ptr->cost.latency = best_cost.latency; 2964 } 2965 2966 /* If we are getting a too long sequence for `struct algorithm' 2967 to record, make this search fail. */ 2968 if (best_alg->ops == MAX_BITS_PER_WORD) 2969 return; 2970 2971 /* Copy the algorithm from temporary space to the space at alg_out. 2972 We avoid using structure assignment because the majority of 2973 best_alg is normally undefined, and this is a critical function. */ 2974 alg_out->ops = best_alg->ops + 1; 2975 alg_out->cost = best_cost; 2976 memcpy (alg_out->op, best_alg->op, 2977 alg_out->ops * sizeof *alg_out->op); 2978 memcpy (alg_out->log, best_alg->log, 2979 alg_out->ops * sizeof *alg_out->log); 2980} 2981 2982/* Find the cheapest way of multiplying a value of mode MODE by VAL. 2983 Try three variations: 2984 2985 - a shift/add sequence based on VAL itself 2986 - a shift/add sequence based on -VAL, followed by a negation 2987 - a shift/add sequence based on VAL - 1, followed by an addition. 2988 2989 Return true if the cheapest of these cost less than MULT_COST, 2990 describing the algorithm in *ALG and final fixup in *VARIANT. */ 2991 2992bool 2993choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, 2994 struct algorithm *alg, enum mult_variant *variant, 2995 int mult_cost) 2996{ 2997 struct algorithm alg2; 2998 struct mult_cost limit; 2999 int op_cost; 3000 bool speed = optimize_insn_for_speed_p (); 3001 3002 /* Fail quickly for impossible bounds. */ 3003 if (mult_cost < 0) 3004 return false; 3005 3006 /* Ensure that mult_cost provides a reasonable upper bound. 3007 Any constant multiplication can be performed with less 3008 than 2 * bits additions. */ 3009 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); 3010 if (mult_cost > op_cost) 3011 mult_cost = op_cost; 3012 3013 *variant = basic_variant; 3014 limit.cost = mult_cost; 3015 limit.latency = mult_cost; 3016 synth_mult (alg, val, &limit, mode); 3017 3018 /* This works only if the inverted value actually fits in an 3019 `unsigned int' */ 3020 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) 3021 { 3022 op_cost = neg_cost (speed, mode); 3023 if (MULT_COST_LESS (&alg->cost, mult_cost)) 3024 { 3025 limit.cost = alg->cost.cost - op_cost; 3026 limit.latency = alg->cost.latency - op_cost; 3027 } 3028 else 3029 { 3030 limit.cost = mult_cost - op_cost; 3031 limit.latency = mult_cost - op_cost; 3032 } 3033 3034 synth_mult (&alg2, -val, &limit, mode); 3035 alg2.cost.cost += op_cost; 3036 alg2.cost.latency += op_cost; 3037 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 3038 *alg = alg2, *variant = negate_variant; 3039 } 3040 3041 /* This proves very useful for division-by-constant. */ 3042 op_cost = add_cost (speed, mode); 3043 if (MULT_COST_LESS (&alg->cost, mult_cost)) 3044 { 3045 limit.cost = alg->cost.cost - op_cost; 3046 limit.latency = alg->cost.latency - op_cost; 3047 } 3048 else 3049 { 3050 limit.cost = mult_cost - op_cost; 3051 limit.latency = mult_cost - op_cost; 3052 } 3053 3054 synth_mult (&alg2, val - 1, &limit, mode); 3055 alg2.cost.cost += op_cost; 3056 alg2.cost.latency += op_cost; 3057 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 3058 *alg = alg2, *variant = add_variant; 3059 3060 return MULT_COST_LESS (&alg->cost, mult_cost); 3061} 3062 3063/* A subroutine of expand_mult, used for constant multiplications. 3064 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if 3065 convenient. Use the shift/add sequence described by ALG and apply 3066 the final fixup specified by VARIANT. */ 3067 3068static rtx 3069expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, 3070 rtx target, const struct algorithm *alg, 3071 enum mult_variant variant) 3072{ 3073 unsigned HOST_WIDE_INT val_so_far; 3074 rtx_insn *insn; 3075 rtx accum, tem; 3076 int opno; 3077 machine_mode nmode; 3078 3079 /* Avoid referencing memory over and over and invalid sharing 3080 on SUBREGs. */ 3081 op0 = force_reg (mode, op0); 3082 3083 /* ACCUM starts out either as OP0 or as a zero, depending on 3084 the first operation. */ 3085 3086 if (alg->op[0] == alg_zero) 3087 { 3088 accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); 3089 val_so_far = 0; 3090 } 3091 else if (alg->op[0] == alg_m) 3092 { 3093 accum = copy_to_mode_reg (mode, op0); 3094 val_so_far = 1; 3095 } 3096 else 3097 gcc_unreachable (); 3098 3099 for (opno = 1; opno < alg->ops; opno++) 3100 { 3101 int log = alg->log[opno]; 3102 rtx shift_subtarget = optimize ? 0 : accum; 3103 rtx add_target 3104 = (opno == alg->ops - 1 && target != 0 && variant != add_variant 3105 && !optimize) 3106 ? target : 0; 3107 rtx accum_target = optimize ? 0 : accum; 3108 rtx accum_inner; 3109 3110 switch (alg->op[opno]) 3111 { 3112 case alg_shift: 3113 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3114 /* REG_EQUAL note will be attached to the following insn. */ 3115 emit_move_insn (accum, tem); 3116 val_so_far <<= log; 3117 break; 3118 3119 case alg_add_t_m2: 3120 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 3121 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3122 add_target ? add_target : accum_target); 3123 val_so_far += HOST_WIDE_INT_1U << log; 3124 break; 3125 3126 case alg_sub_t_m2: 3127 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 3128 accum = force_operand (gen_rtx_MINUS (mode, accum, tem), 3129 add_target ? add_target : accum_target); 3130 val_so_far -= HOST_WIDE_INT_1U << log; 3131 break; 3132 3133 case alg_add_t2_m: 3134 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3135 log, shift_subtarget, 0); 3136 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), 3137 add_target ? add_target : accum_target); 3138 val_so_far = (val_so_far << log) + 1; 3139 break; 3140 3141 case alg_sub_t2_m: 3142 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3143 log, shift_subtarget, 0); 3144 accum = force_operand (gen_rtx_MINUS (mode, accum, op0), 3145 add_target ? add_target : accum_target); 3146 val_so_far = (val_so_far << log) - 1; 3147 break; 3148 3149 case alg_add_factor: 3150 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3151 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3152 add_target ? add_target : accum_target); 3153 val_so_far += val_so_far << log; 3154 break; 3155 3156 case alg_sub_factor: 3157 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3158 accum = force_operand (gen_rtx_MINUS (mode, tem, accum), 3159 (add_target 3160 ? add_target : (optimize ? 0 : tem))); 3161 val_so_far = (val_so_far << log) - val_so_far; 3162 break; 3163 3164 default: 3165 gcc_unreachable (); 3166 } 3167 3168 if (SCALAR_INT_MODE_P (mode)) 3169 { 3170 /* Write a REG_EQUAL note on the last insn so that we can cse 3171 multiplication sequences. Note that if ACCUM is a SUBREG, 3172 we've set the inner register and must properly indicate that. */ 3173 tem = op0, nmode = mode; 3174 accum_inner = accum; 3175 if (GET_CODE (accum) == SUBREG) 3176 { 3177 accum_inner = SUBREG_REG (accum); 3178 nmode = GET_MODE (accum_inner); 3179 tem = gen_lowpart (nmode, op0); 3180 } 3181 3182 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG. 3183 In that case, only the low bits of accum would be guaranteed to 3184 be equal to the content of the REG_EQUAL note, the upper bits 3185 can be anything. */ 3186 if (!paradoxical_subreg_p (tem)) 3187 { 3188 insn = get_last_insn (); 3189 set_dst_reg_note (insn, REG_EQUAL, 3190 gen_rtx_MULT (nmode, tem, 3191 gen_int_mode (val_so_far, 3192 nmode)), 3193 accum_inner); 3194 } 3195 } 3196 } 3197 3198 if (variant == negate_variant) 3199 { 3200 val_so_far = -val_so_far; 3201 accum = expand_unop (mode, neg_optab, accum, target, 0); 3202 } 3203 else if (variant == add_variant) 3204 { 3205 val_so_far = val_so_far + 1; 3206 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); 3207 } 3208 3209 /* Compare only the bits of val and val_so_far that are significant 3210 in the result mode, to avoid sign-/zero-extension confusion. */ 3211 nmode = GET_MODE_INNER (mode); 3212 val &= GET_MODE_MASK (nmode); 3213 val_so_far &= GET_MODE_MASK (nmode); 3214 gcc_assert (val == (HOST_WIDE_INT) val_so_far); 3215 3216 return accum; 3217} 3218 3219/* Perform a multiplication and return an rtx for the result. 3220 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3221 TARGET is a suggestion for where to store the result (an rtx). 3222 3223 We check specially for a constant integer as OP1. 3224 If you want this check for OP0 as well, then before calling 3225 you should swap the two operands if OP0 would be constant. */ 3226 3227rtx 3228expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, 3229 int unsignedp) 3230{ 3231 enum mult_variant variant; 3232 struct algorithm algorithm; 3233 rtx scalar_op1; 3234 int max_cost; 3235 bool speed = optimize_insn_for_speed_p (); 3236 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; 3237 3238 if (CONSTANT_P (op0)) 3239 std::swap (op0, op1); 3240 3241 /* For vectors, there are several simplifications that can be made if 3242 all elements of the vector constant are identical. */ 3243 scalar_op1 = unwrap_const_vec_duplicate (op1); 3244 3245 if (INTEGRAL_MODE_P (mode)) 3246 { 3247 rtx fake_reg; 3248 HOST_WIDE_INT coeff; 3249 bool is_neg; 3250 int mode_bitsize; 3251 3252 if (op1 == CONST0_RTX (mode)) 3253 return op1; 3254 if (op1 == CONST1_RTX (mode)) 3255 return op0; 3256 if (op1 == CONSTM1_RTX (mode)) 3257 return expand_unop (mode, do_trapv ? negv_optab : neg_optab, 3258 op0, target, 0); 3259 3260 if (do_trapv) 3261 goto skip_synth; 3262 3263 /* If mode is integer vector mode, check if the backend supports 3264 vector lshift (by scalar or vector) at all. If not, we can't use 3265 synthetized multiply. */ 3266 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 3267 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing 3268 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) 3269 goto skip_synth; 3270 3271 /* These are the operations that are potentially turned into 3272 a sequence of shifts and additions. */ 3273 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); 3274 3275 /* synth_mult does an `unsigned int' multiply. As long as the mode is 3276 less than or equal in size to `unsigned int' this doesn't matter. 3277 If the mode is larger than `unsigned int', then synth_mult works 3278 only if the constant value exactly fits in an `unsigned int' without 3279 any truncation. This means that multiplying by negative values does 3280 not work; results are off by 2^32 on a 32 bit machine. */ 3281 if (CONST_INT_P (scalar_op1)) 3282 { 3283 coeff = INTVAL (scalar_op1); 3284 is_neg = coeff < 0; 3285 } 3286#if TARGET_SUPPORTS_WIDE_INT 3287 else if (CONST_WIDE_INT_P (scalar_op1)) 3288#else 3289 else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) 3290#endif 3291 { 3292 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); 3293 /* Perfect power of 2 (other than 1, which is handled above). */ 3294 if (shift > 0) 3295 return expand_shift (LSHIFT_EXPR, mode, op0, 3296 shift, target, unsignedp); 3297 else 3298 goto skip_synth; 3299 } 3300 else 3301 goto skip_synth; 3302 3303 /* We used to test optimize here, on the grounds that it's better to 3304 produce a smaller program when -O is not used. But this causes 3305 such a terrible slowdown sometimes that it seems better to always 3306 use synth_mult. */ 3307 3308 /* Special case powers of two. */ 3309 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) 3310 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) 3311 return expand_shift (LSHIFT_EXPR, mode, op0, 3312 floor_log2 (coeff), target, unsignedp); 3313 3314 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3315 3316 /* Attempt to handle multiplication of DImode values by negative 3317 coefficients, by performing the multiplication by a positive 3318 multiplier and then inverting the result. */ 3319 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) 3320 { 3321 /* Its safe to use -coeff even for INT_MIN, as the 3322 result is interpreted as an unsigned coefficient. 3323 Exclude cost of op0 from max_cost to match the cost 3324 calculation of the synth_mult. */ 3325 coeff = -(unsigned HOST_WIDE_INT) coeff; 3326 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), 3327 mode, speed) 3328 - neg_cost (speed, mode)); 3329 if (max_cost <= 0) 3330 goto skip_synth; 3331 3332 /* Special case powers of two. */ 3333 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3334 { 3335 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, 3336 floor_log2 (coeff), target, unsignedp); 3337 return expand_unop (mode, neg_optab, temp, target, 0); 3338 } 3339 3340 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3341 max_cost)) 3342 { 3343 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, 3344 &algorithm, variant); 3345 return expand_unop (mode, neg_optab, temp, target, 0); 3346 } 3347 goto skip_synth; 3348 } 3349 3350 /* Exclude cost of op0 from max_cost to match the cost 3351 calculation of the synth_mult. */ 3352 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed); 3353 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) 3354 return expand_mult_const (mode, op0, coeff, target, 3355 &algorithm, variant); 3356 } 3357 skip_synth: 3358 3359 /* Expand x*2.0 as x+x. */ 3360 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) 3361 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) 3362 { 3363 op0 = force_reg (GET_MODE (op0), op0); 3364 return expand_binop (mode, add_optab, op0, op0, 3365 target, unsignedp, OPTAB_LIB_WIDEN); 3366 } 3367 3368 /* This used to use umul_optab if unsigned, but for non-widening multiply 3369 there is no difference between signed and unsigned. */ 3370 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, 3371 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); 3372 gcc_assert (op0); 3373 return op0; 3374} 3375 3376/* Return a cost estimate for multiplying a register by the given 3377 COEFFicient in the given MODE and SPEED. */ 3378 3379int 3380mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) 3381{ 3382 int max_cost; 3383 struct algorithm algorithm; 3384 enum mult_variant variant; 3385 3386 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3387 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), 3388 mode, speed); 3389 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) 3390 return algorithm.cost.cost; 3391 else 3392 return max_cost; 3393} 3394 3395/* Perform a widening multiplication and return an rtx for the result. 3396 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3397 TARGET is a suggestion for where to store the result (an rtx). 3398 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab 3399 or smul_widen_optab. 3400 3401 We check specially for a constant integer as OP1, comparing the 3402 cost of a widening multiply against the cost of a sequence of shifts 3403 and adds. */ 3404 3405rtx 3406expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, 3407 int unsignedp, optab this_optab) 3408{ 3409 bool speed = optimize_insn_for_speed_p (); 3410 rtx cop1; 3411 3412 if (CONST_INT_P (op1) 3413 && GET_MODE (op0) != VOIDmode 3414 && (cop1 = convert_modes (mode, GET_MODE (op0), op1, 3415 this_optab == umul_widen_optab)) 3416 && CONST_INT_P (cop1) 3417 && (INTVAL (cop1) >= 0 3418 || HWI_COMPUTABLE_MODE_P (mode))) 3419 { 3420 HOST_WIDE_INT coeff = INTVAL (cop1); 3421 int max_cost; 3422 enum mult_variant variant; 3423 struct algorithm algorithm; 3424 3425 if (coeff == 0) 3426 return CONST0_RTX (mode); 3427 3428 /* Special case powers of two. */ 3429 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3430 { 3431 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3432 return expand_shift (LSHIFT_EXPR, mode, op0, 3433 floor_log2 (coeff), target, unsignedp); 3434 } 3435 3436 /* Exclude cost of op0 from max_cost to match the cost 3437 calculation of the synth_mult. */ 3438 max_cost = mul_widen_cost (speed, mode); 3439 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3440 max_cost)) 3441 { 3442 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3443 return expand_mult_const (mode, op0, coeff, target, 3444 &algorithm, variant); 3445 } 3446 } 3447 return expand_binop (mode, this_optab, op0, op1, target, 3448 unsignedp, OPTAB_LIB_WIDEN); 3449} 3450 3451/* Choose a minimal N + 1 bit approximation to 1/D that can be used to 3452 replace division by D, and put the least significant N bits of the result 3453 in *MULTIPLIER_PTR and return the most significant bit. 3454 3455 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the 3456 needed precision is in PRECISION (should be <= N). 3457 3458 PRECISION should be as small as possible so this function can choose 3459 multiplier more freely. 3460 3461 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that 3462 is to be used for a final right shift is placed in *POST_SHIFT_PTR. 3463 3464 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), 3465 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ 3466 3467unsigned HOST_WIDE_INT 3468choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, 3469 unsigned HOST_WIDE_INT *multiplier_ptr, 3470 int *post_shift_ptr, int *lgup_ptr) 3471{ 3472 int lgup, post_shift; 3473 int pow, pow2; 3474 3475 /* lgup = ceil(log2(divisor)); */ 3476 lgup = ceil_log2 (d); 3477 3478 gcc_assert (lgup <= n); 3479 3480 pow = n + lgup; 3481 pow2 = n + lgup - precision; 3482 3483 /* mlow = 2^(N + lgup)/d */ 3484 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); 3485 wide_int mlow = wi::udiv_trunc (val, d); 3486 3487 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ 3488 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); 3489 wide_int mhigh = wi::udiv_trunc (val, d); 3490 3491 /* If precision == N, then mlow, mhigh exceed 2^N 3492 (but they do not exceed 2^(N+1)). */ 3493 3494 /* Reduce to lowest terms. */ 3495 for (post_shift = lgup; post_shift > 0; post_shift--) 3496 { 3497 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, 3498 HOST_BITS_PER_WIDE_INT); 3499 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, 3500 HOST_BITS_PER_WIDE_INT); 3501 if (ml_lo >= mh_lo) 3502 break; 3503 3504 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); 3505 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); 3506 } 3507 3508 *post_shift_ptr = post_shift; 3509 *lgup_ptr = lgup; 3510 if (n < HOST_BITS_PER_WIDE_INT) 3511 { 3512 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; 3513 *multiplier_ptr = mhigh.to_uhwi () & mask; 3514 return mhigh.to_uhwi () >= mask; 3515 } 3516 else 3517 { 3518 *multiplier_ptr = mhigh.to_uhwi (); 3519 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); 3520 } 3521} 3522 3523/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is 3524 congruent to 1 (mod 2**N). */ 3525 3526static unsigned HOST_WIDE_INT 3527invert_mod2n (unsigned HOST_WIDE_INT x, int n) 3528{ 3529 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ 3530 3531 /* The algorithm notes that the choice y = x satisfies 3532 x*y == 1 mod 2^3, since x is assumed odd. 3533 Each iteration doubles the number of bits of significance in y. */ 3534 3535 unsigned HOST_WIDE_INT mask; 3536 unsigned HOST_WIDE_INT y = x; 3537 int nbit = 3; 3538 3539 mask = (n == HOST_BITS_PER_WIDE_INT 3540 ? HOST_WIDE_INT_M1U 3541 : (HOST_WIDE_INT_1U << n) - 1); 3542 3543 while (nbit < n) 3544 { 3545 y = y * (2 - x*y) & mask; /* Modulo 2^N */ 3546 nbit *= 2; 3547 } 3548 return y; 3549} 3550 3551/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness 3552 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the 3553 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product 3554 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to 3555 become signed. 3556 3557 The result is put in TARGET if that is convenient. 3558 3559 MODE is the mode of operation. */ 3560 3561rtx 3562expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0, 3563 rtx op1, rtx target, int unsignedp) 3564{ 3565 rtx tem; 3566 enum rtx_code adj_code = unsignedp ? PLUS : MINUS; 3567 3568 tem = expand_shift (RSHIFT_EXPR, mode, op0, 3569 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3570 tem = expand_and (mode, tem, op1, NULL_RTX); 3571 adj_operand 3572 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3573 adj_operand); 3574 3575 tem = expand_shift (RSHIFT_EXPR, mode, op1, 3576 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3577 tem = expand_and (mode, tem, op0, NULL_RTX); 3578 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3579 target); 3580 3581 return target; 3582} 3583 3584/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ 3585 3586static rtx 3587extract_high_half (machine_mode mode, rtx op) 3588{ 3589 machine_mode wider_mode; 3590 3591 if (mode == word_mode) 3592 return gen_highpart (mode, op); 3593 3594 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3595 3596 wider_mode = GET_MODE_WIDER_MODE (mode); 3597 op = expand_shift (RSHIFT_EXPR, wider_mode, op, 3598 GET_MODE_BITSIZE (mode), 0, 1); 3599 return convert_modes (mode, wider_mode, op, 0); 3600} 3601 3602/* Like expmed_mult_highpart, but only consider using a multiplication 3603 optab. OP1 is an rtx for the constant operand. */ 3604 3605static rtx 3606expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1, 3607 rtx target, int unsignedp, int max_cost) 3608{ 3609 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); 3610 machine_mode wider_mode; 3611 optab moptab; 3612 rtx tem; 3613 int size; 3614 bool speed = optimize_insn_for_speed_p (); 3615 3616 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3617 3618 wider_mode = GET_MODE_WIDER_MODE (mode); 3619 size = GET_MODE_BITSIZE (mode); 3620 3621 /* Firstly, try using a multiplication insn that only generates the needed 3622 high part of the product, and in the sign flavor of unsignedp. */ 3623 if (mul_highpart_cost (speed, mode) < max_cost) 3624 { 3625 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; 3626 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3627 unsignedp, OPTAB_DIRECT); 3628 if (tem) 3629 return tem; 3630 } 3631 3632 /* Secondly, same as above, but use sign flavor opposite of unsignedp. 3633 Need to adjust the result after the multiplication. */ 3634 if (size - 1 < BITS_PER_WORD 3635 && (mul_highpart_cost (speed, mode) 3636 + 2 * shift_cost (speed, mode, size-1) 3637 + 4 * add_cost (speed, mode) < max_cost)) 3638 { 3639 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; 3640 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3641 unsignedp, OPTAB_DIRECT); 3642 if (tem) 3643 /* We used the wrong signedness. Adjust the result. */ 3644 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3645 tem, unsignedp); 3646 } 3647 3648 /* Try widening multiplication. */ 3649 moptab = unsignedp ? umul_widen_optab : smul_widen_optab; 3650 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3651 && mul_widen_cost (speed, wider_mode) < max_cost) 3652 { 3653 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, 3654 unsignedp, OPTAB_WIDEN); 3655 if (tem) 3656 return extract_high_half (mode, tem); 3657 } 3658 3659 /* Try widening the mode and perform a non-widening multiplication. */ 3660 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing 3661 && size - 1 < BITS_PER_WORD 3662 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) 3663 < max_cost)) 3664 { 3665 rtx_insn *insns; 3666 rtx wop0, wop1; 3667 3668 /* We need to widen the operands, for example to ensure the 3669 constant multiplier is correctly sign or zero extended. 3670 Use a sequence to clean-up any instructions emitted by 3671 the conversions if things don't work out. */ 3672 start_sequence (); 3673 wop0 = convert_modes (wider_mode, mode, op0, unsignedp); 3674 wop1 = convert_modes (wider_mode, mode, op1, unsignedp); 3675 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, 3676 unsignedp, OPTAB_WIDEN); 3677 insns = get_insns (); 3678 end_sequence (); 3679 3680 if (tem) 3681 { 3682 emit_insn (insns); 3683 return extract_high_half (mode, tem); 3684 } 3685 } 3686 3687 /* Try widening multiplication of opposite signedness, and adjust. */ 3688 moptab = unsignedp ? smul_widen_optab : umul_widen_optab; 3689 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3690 && size - 1 < BITS_PER_WORD 3691 && (mul_widen_cost (speed, wider_mode) 3692 + 2 * shift_cost (speed, mode, size-1) 3693 + 4 * add_cost (speed, mode) < max_cost)) 3694 { 3695 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 3696 NULL_RTX, ! unsignedp, OPTAB_WIDEN); 3697 if (tem != 0) 3698 { 3699 tem = extract_high_half (mode, tem); 3700 /* We used the wrong signedness. Adjust the result. */ 3701 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3702 target, unsignedp); 3703 } 3704 } 3705 3706 return 0; 3707} 3708 3709/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), 3710 putting the high half of the result in TARGET if that is convenient, 3711 and return where the result is. If the operation can not be performed, 3712 0 is returned. 3713 3714 MODE is the mode of operation and result. 3715 3716 UNSIGNEDP nonzero means unsigned multiply. 3717 3718 MAX_COST is the total allowed cost for the expanded RTL. */ 3719 3720static rtx 3721expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1, 3722 rtx target, int unsignedp, int max_cost) 3723{ 3724 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); 3725 unsigned HOST_WIDE_INT cnst1; 3726 int extra_cost; 3727 bool sign_adjust = false; 3728 enum mult_variant variant; 3729 struct algorithm alg; 3730 rtx tem; 3731 bool speed = optimize_insn_for_speed_p (); 3732 3733 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3734 /* We can't support modes wider than HOST_BITS_PER_INT. */ 3735 gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); 3736 3737 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); 3738 3739 /* We can't optimize modes wider than BITS_PER_WORD. 3740 ??? We might be able to perform double-word arithmetic if 3741 mode == word_mode, however all the cost calculations in 3742 synth_mult etc. assume single-word operations. */ 3743 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) 3744 return expmed_mult_highpart_optab (mode, op0, op1, target, 3745 unsignedp, max_cost); 3746 3747 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); 3748 3749 /* Check whether we try to multiply by a negative constant. */ 3750 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) 3751 { 3752 sign_adjust = true; 3753 extra_cost += add_cost (speed, mode); 3754 } 3755 3756 /* See whether shift/add multiplication is cheap enough. */ 3757 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, 3758 max_cost - extra_cost)) 3759 { 3760 /* See whether the specialized multiplication optabs are 3761 cheaper than the shift/add version. */ 3762 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, 3763 alg.cost.cost + extra_cost); 3764 if (tem) 3765 return tem; 3766 3767 tem = convert_to_mode (wider_mode, op0, unsignedp); 3768 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); 3769 tem = extract_high_half (mode, tem); 3770 3771 /* Adjust result for signedness. */ 3772 if (sign_adjust) 3773 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); 3774 3775 return tem; 3776 } 3777 return expmed_mult_highpart_optab (mode, op0, op1, target, 3778 unsignedp, max_cost); 3779} 3780 3781 3782/* Expand signed modulus of OP0 by a power of two D in mode MODE. */ 3783 3784static rtx 3785expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) 3786{ 3787 rtx result, temp, shift; 3788 rtx_code_label *label; 3789 int logd; 3790 int prec = GET_MODE_PRECISION (mode); 3791 3792 logd = floor_log2 (d); 3793 result = gen_reg_rtx (mode); 3794 3795 /* Avoid conditional branches when they're expensive. */ 3796 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 3797 && optimize_insn_for_speed_p ()) 3798 { 3799 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, 3800 mode, 0, -1); 3801 if (signmask) 3802 { 3803 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1; 3804 signmask = force_reg (mode, signmask); 3805 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); 3806 3807 /* Use the rtx_cost of a LSHIFTRT instruction to determine 3808 which instruction sequence to use. If logical right shifts 3809 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise 3810 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ 3811 3812 temp = gen_rtx_LSHIFTRT (mode, result, shift); 3813 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing 3814 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ()) 3815 > COSTS_N_INSNS (2))) 3816 { 3817 temp = expand_binop (mode, xor_optab, op0, signmask, 3818 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3819 temp = expand_binop (mode, sub_optab, temp, signmask, 3820 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3821 temp = expand_binop (mode, and_optab, temp, 3822 gen_int_mode (masklow, mode), 3823 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3824 temp = expand_binop (mode, xor_optab, temp, signmask, 3825 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3826 temp = expand_binop (mode, sub_optab, temp, signmask, 3827 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3828 } 3829 else 3830 { 3831 signmask = expand_binop (mode, lshr_optab, signmask, shift, 3832 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3833 signmask = force_reg (mode, signmask); 3834 3835 temp = expand_binop (mode, add_optab, op0, signmask, 3836 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3837 temp = expand_binop (mode, and_optab, temp, 3838 gen_int_mode (masklow, mode), 3839 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3840 temp = expand_binop (mode, sub_optab, temp, signmask, 3841 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3842 } 3843 return temp; 3844 } 3845 } 3846 3847 /* Mask contains the mode's signbit and the significant bits of the 3848 modulus. By including the signbit in the operation, many targets 3849 can avoid an explicit compare operation in the following comparison 3850 against zero. */ 3851 wide_int mask = wi::mask (logd, false, prec); 3852 mask = wi::set_bit (mask, prec - 1); 3853 3854 temp = expand_binop (mode, and_optab, op0, 3855 immed_wide_int_const (mask, mode), 3856 result, 1, OPTAB_LIB_WIDEN); 3857 if (temp != result) 3858 emit_move_insn (result, temp); 3859 3860 label = gen_label_rtx (); 3861 do_cmp_and_jump (result, const0_rtx, GE, mode, label); 3862 3863 temp = expand_binop (mode, sub_optab, result, const1_rtx, result, 3864 0, OPTAB_LIB_WIDEN); 3865 3866 mask = wi::mask (logd, true, prec); 3867 temp = expand_binop (mode, ior_optab, temp, 3868 immed_wide_int_const (mask, mode), 3869 result, 1, OPTAB_LIB_WIDEN); 3870 temp = expand_binop (mode, add_optab, temp, const1_rtx, result, 3871 0, OPTAB_LIB_WIDEN); 3872 if (temp != result) 3873 emit_move_insn (result, temp); 3874 emit_label (label); 3875 return result; 3876} 3877 3878/* Expand signed division of OP0 by a power of two D in mode MODE. 3879 This routine is only called for positive values of D. */ 3880 3881static rtx 3882expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) 3883{ 3884 rtx temp; 3885 rtx_code_label *label; 3886 int logd; 3887 3888 logd = floor_log2 (d); 3889 3890 if (d == 2 3891 && BRANCH_COST (optimize_insn_for_speed_p (), 3892 false) >= 1) 3893 { 3894 temp = gen_reg_rtx (mode); 3895 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); 3896 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 3897 0, OPTAB_LIB_WIDEN); 3898 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3899 } 3900 3901 if (HAVE_conditional_move 3902 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) 3903 { 3904 rtx temp2; 3905 3906 start_sequence (); 3907 temp2 = copy_to_mode_reg (mode, op0); 3908 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), 3909 NULL_RTX, 0, OPTAB_LIB_WIDEN); 3910 temp = force_reg (mode, temp); 3911 3912 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ 3913 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, 3914 mode, temp, temp2, mode, 0); 3915 if (temp2) 3916 { 3917 rtx_insn *seq = get_insns (); 3918 end_sequence (); 3919 emit_insn (seq); 3920 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); 3921 } 3922 end_sequence (); 3923 } 3924 3925 if (BRANCH_COST (optimize_insn_for_speed_p (), 3926 false) >= 2) 3927 { 3928 int ushift = GET_MODE_BITSIZE (mode) - logd; 3929 3930 temp = gen_reg_rtx (mode); 3931 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); 3932 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD 3933 || shift_cost (optimize_insn_for_speed_p (), mode, ushift) 3934 > COSTS_N_INSNS (1)) 3935 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode), 3936 NULL_RTX, 0, OPTAB_LIB_WIDEN); 3937 else 3938 temp = expand_shift (RSHIFT_EXPR, mode, temp, 3939 ushift, NULL_RTX, 1); 3940 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 3941 0, OPTAB_LIB_WIDEN); 3942 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3943 } 3944 3945 label = gen_label_rtx (); 3946 temp = copy_to_mode_reg (mode, op0); 3947 do_cmp_and_jump (temp, const0_rtx, GE, mode, label); 3948 expand_inc (temp, gen_int_mode (d - 1, mode)); 3949 emit_label (label); 3950 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3951} 3952 3953/* Emit the code to divide OP0 by OP1, putting the result in TARGET 3954 if that is convenient, and returning where the result is. 3955 You may request either the quotient or the remainder as the result; 3956 specify REM_FLAG nonzero to get the remainder. 3957 3958 CODE is the expression code for which kind of division this is; 3959 it controls how rounding is done. MODE is the machine mode to use. 3960 UNSIGNEDP nonzero means do unsigned division. */ 3961 3962/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI 3963 and then correct it by or'ing in missing high bits 3964 if result of ANDI is nonzero. 3965 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. 3966 This could optimize to a bfexts instruction. 3967 But C doesn't use these operations, so their optimizations are 3968 left for later. */ 3969/* ??? For modulo, we don't actually need the highpart of the first product, 3970 the low part will do nicely. And for small divisors, the second multiply 3971 can also be a low-part only multiply or even be completely left out. 3972 E.g. to calculate the remainder of a division by 3 with a 32 bit 3973 multiply, multiply with 0x55555556 and extract the upper two bits; 3974 the result is exact for inputs up to 0x1fffffff. 3975 The input range can be reduced by using cross-sum rules. 3976 For odd divisors >= 3, the following table gives right shift counts 3977 so that if a number is shifted by an integer multiple of the given 3978 amount, the remainder stays the same: 3979 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, 3980 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, 3981 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, 3982 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, 3983 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 3984 3985 Cross-sum rules for even numbers can be derived by leaving as many bits 3986 to the right alone as the divisor has zeros to the right. 3987 E.g. if x is an unsigned 32 bit number: 3988 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 3989 */ 3990 3991rtx 3992expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, 3993 rtx op0, rtx op1, rtx target, int unsignedp) 3994{ 3995 machine_mode compute_mode; 3996 rtx tquotient; 3997 rtx quotient = 0, remainder = 0; 3998 rtx_insn *last; 3999 int size; 4000 rtx_insn *insn; 4001 optab optab1, optab2; 4002 int op1_is_constant, op1_is_pow2 = 0; 4003 int max_cost, extra_cost; 4004 static HOST_WIDE_INT last_div_const = 0; 4005 bool speed = optimize_insn_for_speed_p (); 4006 4007 op1_is_constant = CONST_INT_P (op1); 4008 if (op1_is_constant) 4009 { 4010 wide_int ext_op1 = rtx_mode_t (op1, mode); 4011 op1_is_pow2 = (wi::popcount (ext_op1) == 1 4012 || (! unsignedp 4013 && wi::popcount (wi::neg (ext_op1)) == 1)); 4014 } 4015 4016 /* 4017 This is the structure of expand_divmod: 4018 4019 First comes code to fix up the operands so we can perform the operations 4020 correctly and efficiently. 4021 4022 Second comes a switch statement with code specific for each rounding mode. 4023 For some special operands this code emits all RTL for the desired 4024 operation, for other cases, it generates only a quotient and stores it in 4025 QUOTIENT. The case for trunc division/remainder might leave quotient = 0, 4026 to indicate that it has not done anything. 4027 4028 Last comes code that finishes the operation. If QUOTIENT is set and 4029 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If 4030 QUOTIENT is not set, it is computed using trunc rounding. 4031 4032 We try to generate special code for division and remainder when OP1 is a 4033 constant. If |OP1| = 2**n we can use shifts and some other fast 4034 operations. For other values of OP1, we compute a carefully selected 4035 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 4036 by m. 4037 4038 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper 4039 half of the product. Different strategies for generating the product are 4040 implemented in expmed_mult_highpart. 4041 4042 If what we actually want is the remainder, we generate that by another 4043 by-constant multiplication and a subtraction. */ 4044 4045 /* We shouldn't be called with OP1 == const1_rtx, but some of the 4046 code below will malfunction if we are, so check here and handle 4047 the special case if so. */ 4048 if (op1 == const1_rtx) 4049 return rem_flag ? const0_rtx : op0; 4050 4051 /* When dividing by -1, we could get an overflow. 4052 negv_optab can handle overflows. */ 4053 if (! unsignedp && op1 == constm1_rtx) 4054 { 4055 if (rem_flag) 4056 return const0_rtx; 4057 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT 4058 ? negv_optab : neg_optab, op0, target, 0); 4059 } 4060 4061 if (target 4062 /* Don't use the function value register as a target 4063 since we have to read it as well as write it, 4064 and function-inlining gets confused by this. */ 4065 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) 4066 /* Don't clobber an operand while doing a multi-step calculation. */ 4067 || ((rem_flag || op1_is_constant) 4068 && (reg_mentioned_p (target, op0) 4069 || (MEM_P (op0) && MEM_P (target)))) 4070 || reg_mentioned_p (target, op1) 4071 || (MEM_P (op1) && MEM_P (target)))) 4072 target = 0; 4073 4074 /* Get the mode in which to perform this computation. Normally it will 4075 be MODE, but sometimes we can't do the desired operation in MODE. 4076 If so, pick a wider mode in which we can do the operation. Convert 4077 to that mode at the start to avoid repeated conversions. 4078 4079 First see what operations we need. These depend on the expression 4080 we are evaluating. (We assume that divxx3 insns exist under the 4081 same conditions that modxx3 insns and that these insns don't normally 4082 fail. If these assumptions are not correct, we may generate less 4083 efficient code in some cases.) 4084 4085 Then see if we find a mode in which we can open-code that operation 4086 (either a division, modulus, or shift). Finally, check for the smallest 4087 mode for which we can do the operation with a library call. */ 4088 4089 /* We might want to refine this now that we have division-by-constant 4090 optimization. Since expmed_mult_highpart tries so many variants, it is 4091 not straightforward to generalize this. Maybe we should make an array 4092 of possible modes in init_expmed? Save this for GCC 2.7. */ 4093 4094 optab1 = (op1_is_pow2 4095 ? (unsignedp ? lshr_optab : ashr_optab) 4096 : (unsignedp ? udiv_optab : sdiv_optab)); 4097 optab2 = (op1_is_pow2 ? optab1 4098 : (unsignedp ? udivmod_optab : sdivmod_optab)); 4099 4100 for (compute_mode = mode; compute_mode != VOIDmode; 4101 compute_mode = GET_MODE_WIDER_MODE (compute_mode)) 4102 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing 4103 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) 4104 break; 4105 4106 if (compute_mode == VOIDmode) 4107 for (compute_mode = mode; compute_mode != VOIDmode; 4108 compute_mode = GET_MODE_WIDER_MODE (compute_mode)) 4109 if (optab_libfunc (optab1, compute_mode) 4110 || optab_libfunc (optab2, compute_mode)) 4111 break; 4112 4113 /* If we still couldn't find a mode, use MODE, but expand_binop will 4114 probably die. */ 4115 if (compute_mode == VOIDmode) 4116 compute_mode = mode; 4117 4118 if (target && GET_MODE (target) == compute_mode) 4119 tquotient = target; 4120 else 4121 tquotient = gen_reg_rtx (compute_mode); 4122 4123 size = GET_MODE_BITSIZE (compute_mode); 4124#if 0 4125 /* It should be possible to restrict the precision to GET_MODE_BITSIZE 4126 (mode), and thereby get better code when OP1 is a constant. Do that 4127 later. It will require going over all usages of SIZE below. */ 4128 size = GET_MODE_BITSIZE (mode); 4129#endif 4130 4131 /* Only deduct something for a REM if the last divide done was 4132 for a different constant. Then set the constant of the last 4133 divide. */ 4134 max_cost = (unsignedp 4135 ? udiv_cost (speed, compute_mode) 4136 : sdiv_cost (speed, compute_mode)); 4137 if (rem_flag && ! (last_div_const != 0 && op1_is_constant 4138 && INTVAL (op1) == last_div_const)) 4139 max_cost -= (mul_cost (speed, compute_mode) 4140 + add_cost (speed, compute_mode)); 4141 4142 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; 4143 4144 /* Now convert to the best mode to use. */ 4145 if (compute_mode != mode) 4146 { 4147 op0 = convert_modes (compute_mode, mode, op0, unsignedp); 4148 op1 = convert_modes (compute_mode, mode, op1, unsignedp); 4149 4150 /* convert_modes may have placed op1 into a register, so we 4151 must recompute the following. */ 4152 op1_is_constant = CONST_INT_P (op1); 4153 if (op1_is_constant) 4154 { 4155 wide_int ext_op1 = rtx_mode_t (op1, compute_mode); 4156 op1_is_pow2 = (wi::popcount (ext_op1) == 1 4157 || (! unsignedp 4158 && wi::popcount (wi::neg (ext_op1)) == 1)); 4159 } 4160 else 4161 op1_is_pow2 = 0; 4162 } 4163 4164 /* If one of the operands is a volatile MEM, copy it into a register. */ 4165 4166 if (MEM_P (op0) && MEM_VOLATILE_P (op0)) 4167 op0 = force_reg (compute_mode, op0); 4168 if (MEM_P (op1) && MEM_VOLATILE_P (op1)) 4169 op1 = force_reg (compute_mode, op1); 4170 4171 /* If we need the remainder or if OP1 is constant, we need to 4172 put OP0 in a register in case it has any queued subexpressions. */ 4173 if (rem_flag || op1_is_constant) 4174 op0 = force_reg (compute_mode, op0); 4175 4176 last = get_last_insn (); 4177 4178 /* Promote floor rounding to trunc rounding for unsigned operations. */ 4179 if (unsignedp) 4180 { 4181 if (code == FLOOR_DIV_EXPR) 4182 code = TRUNC_DIV_EXPR; 4183 if (code == FLOOR_MOD_EXPR) 4184 code = TRUNC_MOD_EXPR; 4185 if (code == EXACT_DIV_EXPR && op1_is_pow2) 4186 code = TRUNC_DIV_EXPR; 4187 } 4188 4189 if (op1 != const0_rtx) 4190 switch (code) 4191 { 4192 case TRUNC_MOD_EXPR: 4193 case TRUNC_DIV_EXPR: 4194 if (op1_is_constant) 4195 { 4196 if (unsignedp) 4197 { 4198 unsigned HOST_WIDE_INT mh, ml; 4199 int pre_shift, post_shift; 4200 int dummy; 4201 wide_int wd = rtx_mode_t (op1, compute_mode); 4202 unsigned HOST_WIDE_INT d = wd.to_uhwi (); 4203 4204 if (wi::popcount (wd) == 1) 4205 { 4206 pre_shift = floor_log2 (d); 4207 if (rem_flag) 4208 { 4209 unsigned HOST_WIDE_INT mask 4210 = (HOST_WIDE_INT_1U << pre_shift) - 1; 4211 remainder 4212 = expand_binop (compute_mode, and_optab, op0, 4213 gen_int_mode (mask, compute_mode), 4214 remainder, 1, 4215 OPTAB_LIB_WIDEN); 4216 if (remainder) 4217 return gen_lowpart (mode, remainder); 4218 } 4219 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4220 pre_shift, tquotient, 1); 4221 } 4222 else if (size <= HOST_BITS_PER_WIDE_INT) 4223 { 4224 if (d >= (HOST_WIDE_INT_1U << (size - 1))) 4225 { 4226 /* Most significant bit of divisor is set; emit an scc 4227 insn. */ 4228 quotient = emit_store_flag_force (tquotient, GEU, op0, op1, 4229 compute_mode, 1, 1); 4230 } 4231 else 4232 { 4233 /* Find a suitable multiplier and right shift count 4234 instead of multiplying with D. */ 4235 4236 mh = choose_multiplier (d, size, size, 4237 &ml, &post_shift, &dummy); 4238 4239 /* If the suggested multiplier is more than SIZE bits, 4240 we can do better for even divisors, using an 4241 initial right shift. */ 4242 if (mh != 0 && (d & 1) == 0) 4243 { 4244 pre_shift = ctz_or_zero (d); 4245 mh = choose_multiplier (d >> pre_shift, size, 4246 size - pre_shift, 4247 &ml, &post_shift, &dummy); 4248 gcc_assert (!mh); 4249 } 4250 else 4251 pre_shift = 0; 4252 4253 if (mh != 0) 4254 { 4255 rtx t1, t2, t3, t4; 4256 4257 if (post_shift - 1 >= BITS_PER_WORD) 4258 goto fail1; 4259 4260 extra_cost 4261 = (shift_cost (speed, compute_mode, post_shift - 1) 4262 + shift_cost (speed, compute_mode, 1) 4263 + 2 * add_cost (speed, compute_mode)); 4264 t1 = expmed_mult_highpart 4265 (compute_mode, op0, 4266 gen_int_mode (ml, compute_mode), 4267 NULL_RTX, 1, max_cost - extra_cost); 4268 if (t1 == 0) 4269 goto fail1; 4270 t2 = force_operand (gen_rtx_MINUS (compute_mode, 4271 op0, t1), 4272 NULL_RTX); 4273 t3 = expand_shift (RSHIFT_EXPR, compute_mode, 4274 t2, 1, NULL_RTX, 1); 4275 t4 = force_operand (gen_rtx_PLUS (compute_mode, 4276 t1, t3), 4277 NULL_RTX); 4278 quotient = expand_shift 4279 (RSHIFT_EXPR, compute_mode, t4, 4280 post_shift - 1, tquotient, 1); 4281 } 4282 else 4283 { 4284 rtx t1, t2; 4285 4286 if (pre_shift >= BITS_PER_WORD 4287 || post_shift >= BITS_PER_WORD) 4288 goto fail1; 4289 4290 t1 = expand_shift 4291 (RSHIFT_EXPR, compute_mode, op0, 4292 pre_shift, NULL_RTX, 1); 4293 extra_cost 4294 = (shift_cost (speed, compute_mode, pre_shift) 4295 + shift_cost (speed, compute_mode, post_shift)); 4296 t2 = expmed_mult_highpart 4297 (compute_mode, t1, 4298 gen_int_mode (ml, compute_mode), 4299 NULL_RTX, 1, max_cost - extra_cost); 4300 if (t2 == 0) 4301 goto fail1; 4302 quotient = expand_shift 4303 (RSHIFT_EXPR, compute_mode, t2, 4304 post_shift, tquotient, 1); 4305 } 4306 } 4307 } 4308 else /* Too wide mode to use tricky code */ 4309 break; 4310 4311 insn = get_last_insn (); 4312 if (insn != last) 4313 set_dst_reg_note (insn, REG_EQUAL, 4314 gen_rtx_UDIV (compute_mode, op0, op1), 4315 quotient); 4316 } 4317 else /* TRUNC_DIV, signed */ 4318 { 4319 unsigned HOST_WIDE_INT ml; 4320 int lgup, post_shift; 4321 rtx mlr; 4322 HOST_WIDE_INT d = INTVAL (op1); 4323 unsigned HOST_WIDE_INT abs_d; 4324 4325 /* Not prepared to handle division/remainder by 4326 0xffffffffffffffff8000000000000000 etc. */ 4327 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT) 4328 break; 4329 4330 /* Since d might be INT_MIN, we have to cast to 4331 unsigned HOST_WIDE_INT before negating to avoid 4332 undefined signed overflow. */ 4333 abs_d = (d >= 0 4334 ? (unsigned HOST_WIDE_INT) d 4335 : - (unsigned HOST_WIDE_INT) d); 4336 4337 /* n rem d = n rem -d */ 4338 if (rem_flag && d < 0) 4339 { 4340 d = abs_d; 4341 op1 = gen_int_mode (abs_d, compute_mode); 4342 } 4343 4344 if (d == 1) 4345 quotient = op0; 4346 else if (d == -1) 4347 quotient = expand_unop (compute_mode, neg_optab, op0, 4348 tquotient, 0); 4349 else if (size <= HOST_BITS_PER_WIDE_INT 4350 && abs_d == HOST_WIDE_INT_1U << (size - 1)) 4351 { 4352 /* This case is not handled correctly below. */ 4353 quotient = emit_store_flag (tquotient, EQ, op0, op1, 4354 compute_mode, 1, 1); 4355 if (quotient == 0) 4356 goto fail1; 4357 } 4358 else if (EXACT_POWER_OF_2_OR_ZERO_P (d) 4359 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) 4360 && (rem_flag 4361 ? smod_pow2_cheap (speed, compute_mode) 4362 : sdiv_pow2_cheap (speed, compute_mode)) 4363 /* We assume that cheap metric is true if the 4364 optab has an expander for this mode. */ 4365 && ((optab_handler ((rem_flag ? smod_optab 4366 : sdiv_optab), 4367 compute_mode) 4368 != CODE_FOR_nothing) 4369 || (optab_handler (sdivmod_optab, 4370 compute_mode) 4371 != CODE_FOR_nothing))) 4372 ; 4373 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) 4374 { 4375 if (rem_flag) 4376 { 4377 remainder = expand_smod_pow2 (compute_mode, op0, d); 4378 if (remainder) 4379 return gen_lowpart (mode, remainder); 4380 } 4381 4382 if (sdiv_pow2_cheap (speed, compute_mode) 4383 && ((optab_handler (sdiv_optab, compute_mode) 4384 != CODE_FOR_nothing) 4385 || (optab_handler (sdivmod_optab, compute_mode) 4386 != CODE_FOR_nothing))) 4387 quotient = expand_divmod (0, TRUNC_DIV_EXPR, 4388 compute_mode, op0, 4389 gen_int_mode (abs_d, 4390 compute_mode), 4391 NULL_RTX, 0); 4392 else 4393 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); 4394 4395 /* We have computed OP0 / abs(OP1). If OP1 is negative, 4396 negate the quotient. */ 4397 if (d < 0) 4398 { 4399 insn = get_last_insn (); 4400 if (insn != last 4401 && abs_d < (HOST_WIDE_INT_1U 4402 << (HOST_BITS_PER_WIDE_INT - 1))) 4403 set_dst_reg_note (insn, REG_EQUAL, 4404 gen_rtx_DIV (compute_mode, op0, 4405 gen_int_mode 4406 (abs_d, 4407 compute_mode)), 4408 quotient); 4409 4410 quotient = expand_unop (compute_mode, neg_optab, 4411 quotient, quotient, 0); 4412 } 4413 } 4414 else if (size <= HOST_BITS_PER_WIDE_INT) 4415 { 4416 choose_multiplier (abs_d, size, size - 1, 4417 &ml, &post_shift, &lgup); 4418 if (ml < HOST_WIDE_INT_1U << (size - 1)) 4419 { 4420 rtx t1, t2, t3; 4421 4422 if (post_shift >= BITS_PER_WORD 4423 || size - 1 >= BITS_PER_WORD) 4424 goto fail1; 4425 4426 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4427 + shift_cost (speed, compute_mode, size - 1) 4428 + add_cost (speed, compute_mode)); 4429 t1 = expmed_mult_highpart 4430 (compute_mode, op0, gen_int_mode (ml, compute_mode), 4431 NULL_RTX, 0, max_cost - extra_cost); 4432 if (t1 == 0) 4433 goto fail1; 4434 t2 = expand_shift 4435 (RSHIFT_EXPR, compute_mode, t1, 4436 post_shift, NULL_RTX, 0); 4437 t3 = expand_shift 4438 (RSHIFT_EXPR, compute_mode, op0, 4439 size - 1, NULL_RTX, 0); 4440 if (d < 0) 4441 quotient 4442 = force_operand (gen_rtx_MINUS (compute_mode, 4443 t3, t2), 4444 tquotient); 4445 else 4446 quotient 4447 = force_operand (gen_rtx_MINUS (compute_mode, 4448 t2, t3), 4449 tquotient); 4450 } 4451 else 4452 { 4453 rtx t1, t2, t3, t4; 4454 4455 if (post_shift >= BITS_PER_WORD 4456 || size - 1 >= BITS_PER_WORD) 4457 goto fail1; 4458 4459 ml |= HOST_WIDE_INT_M1U << (size - 1); 4460 mlr = gen_int_mode (ml, compute_mode); 4461 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4462 + shift_cost (speed, compute_mode, size - 1) 4463 + 2 * add_cost (speed, compute_mode)); 4464 t1 = expmed_mult_highpart (compute_mode, op0, mlr, 4465 NULL_RTX, 0, 4466 max_cost - extra_cost); 4467 if (t1 == 0) 4468 goto fail1; 4469 t2 = force_operand (gen_rtx_PLUS (compute_mode, 4470 t1, op0), 4471 NULL_RTX); 4472 t3 = expand_shift 4473 (RSHIFT_EXPR, compute_mode, t2, 4474 post_shift, NULL_RTX, 0); 4475 t4 = expand_shift 4476 (RSHIFT_EXPR, compute_mode, op0, 4477 size - 1, NULL_RTX, 0); 4478 if (d < 0) 4479 quotient 4480 = force_operand (gen_rtx_MINUS (compute_mode, 4481 t4, t3), 4482 tquotient); 4483 else 4484 quotient 4485 = force_operand (gen_rtx_MINUS (compute_mode, 4486 t3, t4), 4487 tquotient); 4488 } 4489 } 4490 else /* Too wide mode to use tricky code */ 4491 break; 4492 4493 insn = get_last_insn (); 4494 if (insn != last) 4495 set_dst_reg_note (insn, REG_EQUAL, 4496 gen_rtx_DIV (compute_mode, op0, op1), 4497 quotient); 4498 } 4499 break; 4500 } 4501 fail1: 4502 delete_insns_since (last); 4503 break; 4504 4505 case FLOOR_DIV_EXPR: 4506 case FLOOR_MOD_EXPR: 4507 /* We will come here only for signed operations. */ 4508 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT) 4509 { 4510 unsigned HOST_WIDE_INT mh, ml; 4511 int pre_shift, lgup, post_shift; 4512 HOST_WIDE_INT d = INTVAL (op1); 4513 4514 if (d > 0) 4515 { 4516 /* We could just as easily deal with negative constants here, 4517 but it does not seem worth the trouble for GCC 2.6. */ 4518 if (EXACT_POWER_OF_2_OR_ZERO_P (d)) 4519 { 4520 pre_shift = floor_log2 (d); 4521 if (rem_flag) 4522 { 4523 unsigned HOST_WIDE_INT mask 4524 = (HOST_WIDE_INT_1U << pre_shift) - 1; 4525 remainder = expand_binop 4526 (compute_mode, and_optab, op0, 4527 gen_int_mode (mask, compute_mode), 4528 remainder, 0, OPTAB_LIB_WIDEN); 4529 if (remainder) 4530 return gen_lowpart (mode, remainder); 4531 } 4532 quotient = expand_shift 4533 (RSHIFT_EXPR, compute_mode, op0, 4534 pre_shift, tquotient, 0); 4535 } 4536 else 4537 { 4538 rtx t1, t2, t3, t4; 4539 4540 mh = choose_multiplier (d, size, size - 1, 4541 &ml, &post_shift, &lgup); 4542 gcc_assert (!mh); 4543 4544 if (post_shift < BITS_PER_WORD 4545 && size - 1 < BITS_PER_WORD) 4546 { 4547 t1 = expand_shift 4548 (RSHIFT_EXPR, compute_mode, op0, 4549 size - 1, NULL_RTX, 0); 4550 t2 = expand_binop (compute_mode, xor_optab, op0, t1, 4551 NULL_RTX, 0, OPTAB_WIDEN); 4552 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4553 + shift_cost (speed, compute_mode, size - 1) 4554 + 2 * add_cost (speed, compute_mode)); 4555 t3 = expmed_mult_highpart 4556 (compute_mode, t2, gen_int_mode (ml, compute_mode), 4557 NULL_RTX, 1, max_cost - extra_cost); 4558 if (t3 != 0) 4559 { 4560 t4 = expand_shift 4561 (RSHIFT_EXPR, compute_mode, t3, 4562 post_shift, NULL_RTX, 1); 4563 quotient = expand_binop (compute_mode, xor_optab, 4564 t4, t1, tquotient, 0, 4565 OPTAB_WIDEN); 4566 } 4567 } 4568 } 4569 } 4570 else 4571 { 4572 rtx nsign, t1, t2, t3, t4; 4573 t1 = force_operand (gen_rtx_PLUS (compute_mode, 4574 op0, constm1_rtx), NULL_RTX); 4575 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, 4576 0, OPTAB_WIDEN); 4577 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2, 4578 size - 1, NULL_RTX, 0); 4579 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), 4580 NULL_RTX); 4581 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, 4582 NULL_RTX, 0); 4583 if (t4) 4584 { 4585 rtx t5; 4586 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, 4587 NULL_RTX, 0); 4588 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4589 t4, t5), 4590 tquotient); 4591 } 4592 } 4593 } 4594 4595 if (quotient != 0) 4596 break; 4597 delete_insns_since (last); 4598 4599 /* Try using an instruction that produces both the quotient and 4600 remainder, using truncation. We can easily compensate the quotient 4601 or remainder to get floor rounding, once we have the remainder. 4602 Notice that we compute also the final remainder value here, 4603 and return the result right away. */ 4604 if (target == 0 || GET_MODE (target) != compute_mode) 4605 target = gen_reg_rtx (compute_mode); 4606 4607 if (rem_flag) 4608 { 4609 remainder 4610 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4611 quotient = gen_reg_rtx (compute_mode); 4612 } 4613 else 4614 { 4615 quotient 4616 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4617 remainder = gen_reg_rtx (compute_mode); 4618 } 4619 4620 if (expand_twoval_binop (sdivmod_optab, op0, op1, 4621 quotient, remainder, 0)) 4622 { 4623 /* This could be computed with a branch-less sequence. 4624 Save that for later. */ 4625 rtx tem; 4626 rtx_code_label *label = gen_label_rtx (); 4627 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); 4628 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4629 NULL_RTX, 0, OPTAB_WIDEN); 4630 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); 4631 expand_dec (quotient, const1_rtx); 4632 expand_inc (remainder, op1); 4633 emit_label (label); 4634 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4635 } 4636 4637 /* No luck with division elimination or divmod. Have to do it 4638 by conditionally adjusting op0 *and* the result. */ 4639 { 4640 rtx_code_label *label1, *label2, *label3, *label4, *label5; 4641 rtx adjusted_op0; 4642 rtx tem; 4643 4644 quotient = gen_reg_rtx (compute_mode); 4645 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4646 label1 = gen_label_rtx (); 4647 label2 = gen_label_rtx (); 4648 label3 = gen_label_rtx (); 4649 label4 = gen_label_rtx (); 4650 label5 = gen_label_rtx (); 4651 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4652 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); 4653 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4654 quotient, 0, OPTAB_LIB_WIDEN); 4655 if (tem != quotient) 4656 emit_move_insn (quotient, tem); 4657 emit_jump_insn (targetm.gen_jump (label5)); 4658 emit_barrier (); 4659 emit_label (label1); 4660 expand_inc (adjusted_op0, const1_rtx); 4661 emit_jump_insn (targetm.gen_jump (label4)); 4662 emit_barrier (); 4663 emit_label (label2); 4664 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); 4665 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4666 quotient, 0, OPTAB_LIB_WIDEN); 4667 if (tem != quotient) 4668 emit_move_insn (quotient, tem); 4669 emit_jump_insn (targetm.gen_jump (label5)); 4670 emit_barrier (); 4671 emit_label (label3); 4672 expand_dec (adjusted_op0, const1_rtx); 4673 emit_label (label4); 4674 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4675 quotient, 0, OPTAB_LIB_WIDEN); 4676 if (tem != quotient) 4677 emit_move_insn (quotient, tem); 4678 expand_dec (quotient, const1_rtx); 4679 emit_label (label5); 4680 } 4681 break; 4682 4683 case CEIL_DIV_EXPR: 4684 case CEIL_MOD_EXPR: 4685 if (unsignedp) 4686 { 4687 if (op1_is_constant 4688 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4689 && (size <= HOST_BITS_PER_WIDE_INT 4690 || INTVAL (op1) >= 0)) 4691 { 4692 rtx t1, t2, t3; 4693 unsigned HOST_WIDE_INT d = INTVAL (op1); 4694 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4695 floor_log2 (d), tquotient, 1); 4696 t2 = expand_binop (compute_mode, and_optab, op0, 4697 gen_int_mode (d - 1, compute_mode), 4698 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4699 t3 = gen_reg_rtx (compute_mode); 4700 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4701 compute_mode, 1, 1); 4702 if (t3 == 0) 4703 { 4704 rtx_code_label *lab; 4705 lab = gen_label_rtx (); 4706 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4707 expand_inc (t1, const1_rtx); 4708 emit_label (lab); 4709 quotient = t1; 4710 } 4711 else 4712 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4713 t1, t3), 4714 tquotient); 4715 break; 4716 } 4717 4718 /* Try using an instruction that produces both the quotient and 4719 remainder, using truncation. We can easily compensate the 4720 quotient or remainder to get ceiling rounding, once we have the 4721 remainder. Notice that we compute also the final remainder 4722 value here, and return the result right away. */ 4723 if (target == 0 || GET_MODE (target) != compute_mode) 4724 target = gen_reg_rtx (compute_mode); 4725 4726 if (rem_flag) 4727 { 4728 remainder = (REG_P (target) 4729 ? target : gen_reg_rtx (compute_mode)); 4730 quotient = gen_reg_rtx (compute_mode); 4731 } 4732 else 4733 { 4734 quotient = (REG_P (target) 4735 ? target : gen_reg_rtx (compute_mode)); 4736 remainder = gen_reg_rtx (compute_mode); 4737 } 4738 4739 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, 4740 remainder, 1)) 4741 { 4742 /* This could be computed with a branch-less sequence. 4743 Save that for later. */ 4744 rtx_code_label *label = gen_label_rtx (); 4745 do_cmp_and_jump (remainder, const0_rtx, EQ, 4746 compute_mode, label); 4747 expand_inc (quotient, const1_rtx); 4748 expand_dec (remainder, op1); 4749 emit_label (label); 4750 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4751 } 4752 4753 /* No luck with division elimination or divmod. Have to do it 4754 by conditionally adjusting op0 *and* the result. */ 4755 { 4756 rtx_code_label *label1, *label2; 4757 rtx adjusted_op0, tem; 4758 4759 quotient = gen_reg_rtx (compute_mode); 4760 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4761 label1 = gen_label_rtx (); 4762 label2 = gen_label_rtx (); 4763 do_cmp_and_jump (adjusted_op0, const0_rtx, NE, 4764 compute_mode, label1); 4765 emit_move_insn (quotient, const0_rtx); 4766 emit_jump_insn (targetm.gen_jump (label2)); 4767 emit_barrier (); 4768 emit_label (label1); 4769 expand_dec (adjusted_op0, const1_rtx); 4770 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, 4771 quotient, 1, OPTAB_LIB_WIDEN); 4772 if (tem != quotient) 4773 emit_move_insn (quotient, tem); 4774 expand_inc (quotient, const1_rtx); 4775 emit_label (label2); 4776 } 4777 } 4778 else /* signed */ 4779 { 4780 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4781 && INTVAL (op1) >= 0) 4782 { 4783 /* This is extremely similar to the code for the unsigned case 4784 above. For 2.7 we should merge these variants, but for 4785 2.6.1 I don't want to touch the code for unsigned since that 4786 get used in C. The signed case will only be used by other 4787 languages (Ada). */ 4788 4789 rtx t1, t2, t3; 4790 unsigned HOST_WIDE_INT d = INTVAL (op1); 4791 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4792 floor_log2 (d), tquotient, 0); 4793 t2 = expand_binop (compute_mode, and_optab, op0, 4794 gen_int_mode (d - 1, compute_mode), 4795 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4796 t3 = gen_reg_rtx (compute_mode); 4797 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4798 compute_mode, 1, 1); 4799 if (t3 == 0) 4800 { 4801 rtx_code_label *lab; 4802 lab = gen_label_rtx (); 4803 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4804 expand_inc (t1, const1_rtx); 4805 emit_label (lab); 4806 quotient = t1; 4807 } 4808 else 4809 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4810 t1, t3), 4811 tquotient); 4812 break; 4813 } 4814 4815 /* Try using an instruction that produces both the quotient and 4816 remainder, using truncation. We can easily compensate the 4817 quotient or remainder to get ceiling rounding, once we have the 4818 remainder. Notice that we compute also the final remainder 4819 value here, and return the result right away. */ 4820 if (target == 0 || GET_MODE (target) != compute_mode) 4821 target = gen_reg_rtx (compute_mode); 4822 if (rem_flag) 4823 { 4824 remainder= (REG_P (target) 4825 ? target : gen_reg_rtx (compute_mode)); 4826 quotient = gen_reg_rtx (compute_mode); 4827 } 4828 else 4829 { 4830 quotient = (REG_P (target) 4831 ? target : gen_reg_rtx (compute_mode)); 4832 remainder = gen_reg_rtx (compute_mode); 4833 } 4834 4835 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, 4836 remainder, 0)) 4837 { 4838 /* This could be computed with a branch-less sequence. 4839 Save that for later. */ 4840 rtx tem; 4841 rtx_code_label *label = gen_label_rtx (); 4842 do_cmp_and_jump (remainder, const0_rtx, EQ, 4843 compute_mode, label); 4844 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4845 NULL_RTX, 0, OPTAB_WIDEN); 4846 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); 4847 expand_inc (quotient, const1_rtx); 4848 expand_dec (remainder, op1); 4849 emit_label (label); 4850 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4851 } 4852 4853 /* No luck with division elimination or divmod. Have to do it 4854 by conditionally adjusting op0 *and* the result. */ 4855 { 4856 rtx_code_label *label1, *label2, *label3, *label4, *label5; 4857 rtx adjusted_op0; 4858 rtx tem; 4859 4860 quotient = gen_reg_rtx (compute_mode); 4861 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4862 label1 = gen_label_rtx (); 4863 label2 = gen_label_rtx (); 4864 label3 = gen_label_rtx (); 4865 label4 = gen_label_rtx (); 4866 label5 = gen_label_rtx (); 4867 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4868 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, 4869 compute_mode, label1); 4870 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4871 quotient, 0, OPTAB_LIB_WIDEN); 4872 if (tem != quotient) 4873 emit_move_insn (quotient, tem); 4874 emit_jump_insn (targetm.gen_jump (label5)); 4875 emit_barrier (); 4876 emit_label (label1); 4877 expand_dec (adjusted_op0, const1_rtx); 4878 emit_jump_insn (targetm.gen_jump (label4)); 4879 emit_barrier (); 4880 emit_label (label2); 4881 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, 4882 compute_mode, label3); 4883 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4884 quotient, 0, OPTAB_LIB_WIDEN); 4885 if (tem != quotient) 4886 emit_move_insn (quotient, tem); 4887 emit_jump_insn (targetm.gen_jump (label5)); 4888 emit_barrier (); 4889 emit_label (label3); 4890 expand_inc (adjusted_op0, const1_rtx); 4891 emit_label (label4); 4892 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4893 quotient, 0, OPTAB_LIB_WIDEN); 4894 if (tem != quotient) 4895 emit_move_insn (quotient, tem); 4896 expand_inc (quotient, const1_rtx); 4897 emit_label (label5); 4898 } 4899 } 4900 break; 4901 4902 case EXACT_DIV_EXPR: 4903 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT) 4904 { 4905 HOST_WIDE_INT d = INTVAL (op1); 4906 unsigned HOST_WIDE_INT ml; 4907 int pre_shift; 4908 rtx t1; 4909 4910 pre_shift = ctz_or_zero (d); 4911 ml = invert_mod2n (d >> pre_shift, size); 4912 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4913 pre_shift, NULL_RTX, unsignedp); 4914 quotient = expand_mult (compute_mode, t1, 4915 gen_int_mode (ml, compute_mode), 4916 NULL_RTX, 1); 4917 4918 insn = get_last_insn (); 4919 set_dst_reg_note (insn, REG_EQUAL, 4920 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 4921 compute_mode, op0, op1), 4922 quotient); 4923 } 4924 break; 4925 4926 case ROUND_DIV_EXPR: 4927 case ROUND_MOD_EXPR: 4928 if (unsignedp) 4929 { 4930 rtx tem; 4931 rtx_code_label *label; 4932 label = gen_label_rtx (); 4933 quotient = gen_reg_rtx (compute_mode); 4934 remainder = gen_reg_rtx (compute_mode); 4935 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) 4936 { 4937 rtx tem; 4938 quotient = expand_binop (compute_mode, udiv_optab, op0, op1, 4939 quotient, 1, OPTAB_LIB_WIDEN); 4940 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); 4941 remainder = expand_binop (compute_mode, sub_optab, op0, tem, 4942 remainder, 1, OPTAB_LIB_WIDEN); 4943 } 4944 tem = plus_constant (compute_mode, op1, -1); 4945 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1); 4946 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); 4947 expand_inc (quotient, const1_rtx); 4948 expand_dec (remainder, op1); 4949 emit_label (label); 4950 } 4951 else 4952 { 4953 rtx abs_rem, abs_op1, tem, mask; 4954 rtx_code_label *label; 4955 label = gen_label_rtx (); 4956 quotient = gen_reg_rtx (compute_mode); 4957 remainder = gen_reg_rtx (compute_mode); 4958 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) 4959 { 4960 rtx tem; 4961 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, 4962 quotient, 0, OPTAB_LIB_WIDEN); 4963 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); 4964 remainder = expand_binop (compute_mode, sub_optab, op0, tem, 4965 remainder, 0, OPTAB_LIB_WIDEN); 4966 } 4967 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); 4968 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); 4969 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, 4970 1, NULL_RTX, 1); 4971 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); 4972 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4973 NULL_RTX, 0, OPTAB_WIDEN); 4974 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, 4975 size - 1, NULL_RTX, 0); 4976 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, 4977 NULL_RTX, 0, OPTAB_WIDEN); 4978 tem = expand_binop (compute_mode, sub_optab, tem, mask, 4979 NULL_RTX, 0, OPTAB_WIDEN); 4980 expand_inc (quotient, tem); 4981 tem = expand_binop (compute_mode, xor_optab, mask, op1, 4982 NULL_RTX, 0, OPTAB_WIDEN); 4983 tem = expand_binop (compute_mode, sub_optab, tem, mask, 4984 NULL_RTX, 0, OPTAB_WIDEN); 4985 expand_dec (remainder, tem); 4986 emit_label (label); 4987 } 4988 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4989 4990 default: 4991 gcc_unreachable (); 4992 } 4993 4994 if (quotient == 0) 4995 { 4996 if (target && GET_MODE (target) != compute_mode) 4997 target = 0; 4998 4999 if (rem_flag) 5000 { 5001 /* Try to produce the remainder without producing the quotient. 5002 If we seem to have a divmod pattern that does not require widening, 5003 don't try widening here. We should really have a WIDEN argument 5004 to expand_twoval_binop, since what we'd really like to do here is 5005 1) try a mod insn in compute_mode 5006 2) try a divmod insn in compute_mode 5007 3) try a div insn in compute_mode and multiply-subtract to get 5008 remainder 5009 4) try the same things with widening allowed. */ 5010 remainder 5011 = sign_expand_binop (compute_mode, umod_optab, smod_optab, 5012 op0, op1, target, 5013 unsignedp, 5014 ((optab_handler (optab2, compute_mode) 5015 != CODE_FOR_nothing) 5016 ? OPTAB_DIRECT : OPTAB_WIDEN)); 5017 if (remainder == 0) 5018 { 5019 /* No luck there. Can we do remainder and divide at once 5020 without a library call? */ 5021 remainder = gen_reg_rtx (compute_mode); 5022 if (! expand_twoval_binop ((unsignedp 5023 ? udivmod_optab 5024 : sdivmod_optab), 5025 op0, op1, 5026 NULL_RTX, remainder, unsignedp)) 5027 remainder = 0; 5028 } 5029 5030 if (remainder) 5031 return gen_lowpart (mode, remainder); 5032 } 5033 5034 /* Produce the quotient. Try a quotient insn, but not a library call. 5035 If we have a divmod in this mode, use it in preference to widening 5036 the div (for this test we assume it will not fail). Note that optab2 5037 is set to the one of the two optabs that the call below will use. */ 5038 quotient 5039 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, 5040 op0, op1, rem_flag ? NULL_RTX : target, 5041 unsignedp, 5042 ((optab_handler (optab2, compute_mode) 5043 != CODE_FOR_nothing) 5044 ? OPTAB_DIRECT : OPTAB_WIDEN)); 5045 5046 if (quotient == 0) 5047 { 5048 /* No luck there. Try a quotient-and-remainder insn, 5049 keeping the quotient alone. */ 5050 quotient = gen_reg_rtx (compute_mode); 5051 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, 5052 op0, op1, 5053 quotient, NULL_RTX, unsignedp)) 5054 { 5055 quotient = 0; 5056 if (! rem_flag) 5057 /* Still no luck. If we are not computing the remainder, 5058 use a library call for the quotient. */ 5059 quotient = sign_expand_binop (compute_mode, 5060 udiv_optab, sdiv_optab, 5061 op0, op1, target, 5062 unsignedp, OPTAB_LIB_WIDEN); 5063 } 5064 } 5065 } 5066 5067 if (rem_flag) 5068 { 5069 if (target && GET_MODE (target) != compute_mode) 5070 target = 0; 5071 5072 if (quotient == 0) 5073 { 5074 /* No divide instruction either. Use library for remainder. */ 5075 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, 5076 op0, op1, target, 5077 unsignedp, OPTAB_LIB_WIDEN); 5078 /* No remainder function. Try a quotient-and-remainder 5079 function, keeping the remainder. */ 5080 if (!remainder) 5081 { 5082 remainder = gen_reg_rtx (compute_mode); 5083 if (!expand_twoval_binop_libfunc 5084 (unsignedp ? udivmod_optab : sdivmod_optab, 5085 op0, op1, 5086 NULL_RTX, remainder, 5087 unsignedp ? UMOD : MOD)) 5088 remainder = NULL_RTX; 5089 } 5090 } 5091 else 5092 { 5093 /* We divided. Now finish doing X - Y * (X / Y). */ 5094 remainder = expand_mult (compute_mode, quotient, op1, 5095 NULL_RTX, unsignedp); 5096 remainder = expand_binop (compute_mode, sub_optab, op0, 5097 remainder, target, unsignedp, 5098 OPTAB_LIB_WIDEN); 5099 } 5100 } 5101 5102 return gen_lowpart (mode, rem_flag ? remainder : quotient); 5103} 5104 5105/* Return a tree node with data type TYPE, describing the value of X. 5106 Usually this is an VAR_DECL, if there is no obvious better choice. 5107 X may be an expression, however we only support those expressions 5108 generated by loop.c. */ 5109 5110tree 5111make_tree (tree type, rtx x) 5112{ 5113 tree t; 5114 5115 switch (GET_CODE (x)) 5116 { 5117 case CONST_INT: 5118 case CONST_WIDE_INT: 5119 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type))); 5120 return t; 5121 5122 case CONST_DOUBLE: 5123 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); 5124 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) 5125 t = wide_int_to_tree (type, 5126 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, 5127 HOST_BITS_PER_WIDE_INT * 2)); 5128 else 5129 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x)); 5130 5131 return t; 5132 5133 case CONST_VECTOR: 5134 { 5135 int units = CONST_VECTOR_NUNITS (x); 5136 tree itype = TREE_TYPE (type); 5137 tree *elts; 5138 int i; 5139 5140 /* Build a tree with vector elements. */ 5141 elts = XALLOCAVEC (tree, units); 5142 for (i = units - 1; i >= 0; --i) 5143 { 5144 rtx elt = CONST_VECTOR_ELT (x, i); 5145 elts[i] = make_tree (itype, elt); 5146 } 5147 5148 return build_vector (type, elts); 5149 } 5150 5151 case PLUS: 5152 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5153 make_tree (type, XEXP (x, 1))); 5154 5155 case MINUS: 5156 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5157 make_tree (type, XEXP (x, 1))); 5158 5159 case NEG: 5160 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); 5161 5162 case MULT: 5163 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), 5164 make_tree (type, XEXP (x, 1))); 5165 5166 case ASHIFT: 5167 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), 5168 make_tree (type, XEXP (x, 1))); 5169 5170 case LSHIFTRT: 5171 t = unsigned_type_for (type); 5172 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5173 make_tree (t, XEXP (x, 0)), 5174 make_tree (type, XEXP (x, 1)))); 5175 5176 case ASHIFTRT: 5177 t = signed_type_for (type); 5178 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5179 make_tree (t, XEXP (x, 0)), 5180 make_tree (type, XEXP (x, 1)))); 5181 5182 case DIV: 5183 if (TREE_CODE (type) != REAL_TYPE) 5184 t = signed_type_for (type); 5185 else 5186 t = type; 5187 5188 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5189 make_tree (t, XEXP (x, 0)), 5190 make_tree (t, XEXP (x, 1)))); 5191 case UDIV: 5192 t = unsigned_type_for (type); 5193 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5194 make_tree (t, XEXP (x, 0)), 5195 make_tree (t, XEXP (x, 1)))); 5196 5197 case SIGN_EXTEND: 5198 case ZERO_EXTEND: 5199 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), 5200 GET_CODE (x) == ZERO_EXTEND); 5201 return fold_convert (type, make_tree (t, XEXP (x, 0))); 5202 5203 case CONST: 5204 return make_tree (type, XEXP (x, 0)); 5205 5206 case SYMBOL_REF: 5207 t = SYMBOL_REF_DECL (x); 5208 if (t) 5209 return fold_convert (type, build_fold_addr_expr (t)); 5210 /* fall through. */ 5211 5212 default: 5213 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); 5214 5215 /* If TYPE is a POINTER_TYPE, we might need to convert X from 5216 address mode to pointer mode. */ 5217 if (POINTER_TYPE_P (type)) 5218 x = convert_memory_address_addr_space 5219 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); 5220 5221 /* Note that we do *not* use SET_DECL_RTL here, because we do not 5222 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ 5223 t->decl_with_rtl.rtl = x; 5224 5225 return t; 5226 } 5227} 5228 5229/* Compute the logical-and of OP0 and OP1, storing it in TARGET 5230 and returning TARGET. 5231 5232 If TARGET is 0, a pseudo-register or constant is returned. */ 5233 5234rtx 5235expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) 5236{ 5237 rtx tem = 0; 5238 5239 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) 5240 tem = simplify_binary_operation (AND, mode, op0, op1); 5241 if (tem == 0) 5242 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); 5243 5244 if (target == 0) 5245 target = tem; 5246 else if (tem != target) 5247 emit_move_insn (target, tem); 5248 return target; 5249} 5250 5251/* Helper function for emit_store_flag. */ 5252rtx 5253emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, 5254 machine_mode mode, machine_mode compare_mode, 5255 int unsignedp, rtx x, rtx y, int normalizep, 5256 machine_mode target_mode) 5257{ 5258 struct expand_operand ops[4]; 5259 rtx op0, comparison, subtarget; 5260 rtx_insn *last; 5261 machine_mode result_mode = targetm.cstore_mode (icode); 5262 5263 last = get_last_insn (); 5264 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); 5265 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); 5266 if (!x || !y) 5267 { 5268 delete_insns_since (last); 5269 return NULL_RTX; 5270 } 5271 5272 if (target_mode == VOIDmode) 5273 target_mode = result_mode; 5274 if (!target) 5275 target = gen_reg_rtx (target_mode); 5276 5277 comparison = gen_rtx_fmt_ee (code, result_mode, x, y); 5278 5279 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); 5280 create_fixed_operand (&ops[1], comparison); 5281 create_fixed_operand (&ops[2], x); 5282 create_fixed_operand (&ops[3], y); 5283 if (!maybe_expand_insn (icode, 4, ops)) 5284 { 5285 delete_insns_since (last); 5286 return NULL_RTX; 5287 } 5288 subtarget = ops[0].value; 5289 5290 /* If we are converting to a wider mode, first convert to 5291 TARGET_MODE, then normalize. This produces better combining 5292 opportunities on machines that have a SIGN_EXTRACT when we are 5293 testing a single bit. This mostly benefits the 68k. 5294 5295 If STORE_FLAG_VALUE does not have the sign bit set when 5296 interpreted in MODE, we can do this conversion as unsigned, which 5297 is usually more efficient. */ 5298 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) 5299 { 5300 convert_move (target, subtarget, 5301 val_signbit_known_clear_p (result_mode, 5302 STORE_FLAG_VALUE)); 5303 op0 = target; 5304 result_mode = target_mode; 5305 } 5306 else 5307 op0 = subtarget; 5308 5309 /* If we want to keep subexpressions around, don't reuse our last 5310 target. */ 5311 if (optimize) 5312 subtarget = 0; 5313 5314 /* Now normalize to the proper value in MODE. Sometimes we don't 5315 have to do anything. */ 5316 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) 5317 ; 5318 /* STORE_FLAG_VALUE might be the most negative number, so write 5319 the comparison this way to avoid a compiler-time warning. */ 5320 else if (- normalizep == STORE_FLAG_VALUE) 5321 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); 5322 5323 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes 5324 it hard to use a value of just the sign bit due to ANSI integer 5325 constant typing rules. */ 5326 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) 5327 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, 5328 GET_MODE_BITSIZE (result_mode) - 1, subtarget, 5329 normalizep == 1); 5330 else 5331 { 5332 gcc_assert (STORE_FLAG_VALUE & 1); 5333 5334 op0 = expand_and (result_mode, op0, const1_rtx, subtarget); 5335 if (normalizep == -1) 5336 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); 5337 } 5338 5339 /* If we were converting to a smaller mode, do the conversion now. */ 5340 if (target_mode != result_mode) 5341 { 5342 convert_move (target, op0, 0); 5343 return target; 5344 } 5345 else 5346 return op0; 5347} 5348 5349 5350/* A subroutine of emit_store_flag only including "tricks" that do not 5351 need a recursive call. These are kept separate to avoid infinite 5352 loops. */ 5353 5354static rtx 5355emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, 5356 machine_mode mode, int unsignedp, int normalizep, 5357 machine_mode target_mode) 5358{ 5359 rtx subtarget; 5360 enum insn_code icode; 5361 machine_mode compare_mode; 5362 enum mode_class mclass; 5363 enum rtx_code scode; 5364 5365 if (unsignedp) 5366 code = unsigned_condition (code); 5367 scode = swap_condition (code); 5368 5369 /* If one operand is constant, make it the second one. Only do this 5370 if the other operand is not constant as well. */ 5371 5372 if (swap_commutative_operands_p (op0, op1)) 5373 { 5374 std::swap (op0, op1); 5375 code = swap_condition (code); 5376 } 5377 5378 if (mode == VOIDmode) 5379 mode = GET_MODE (op0); 5380 5381 /* For some comparisons with 1 and -1, we can convert this to 5382 comparisons with zero. This will often produce more opportunities for 5383 store-flag insns. */ 5384 5385 switch (code) 5386 { 5387 case LT: 5388 if (op1 == const1_rtx) 5389 op1 = const0_rtx, code = LE; 5390 break; 5391 case LE: 5392 if (op1 == constm1_rtx) 5393 op1 = const0_rtx, code = LT; 5394 break; 5395 case GE: 5396 if (op1 == const1_rtx) 5397 op1 = const0_rtx, code = GT; 5398 break; 5399 case GT: 5400 if (op1 == constm1_rtx) 5401 op1 = const0_rtx, code = GE; 5402 break; 5403 case GEU: 5404 if (op1 == const1_rtx) 5405 op1 = const0_rtx, code = NE; 5406 break; 5407 case LTU: 5408 if (op1 == const1_rtx) 5409 op1 = const0_rtx, code = EQ; 5410 break; 5411 default: 5412 break; 5413 } 5414 5415 /* If we are comparing a double-word integer with zero or -1, we can 5416 convert the comparison into one involving a single word. */ 5417 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 5418 && GET_MODE_CLASS (mode) == MODE_INT 5419 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) 5420 { 5421 rtx tem; 5422 if ((code == EQ || code == NE) 5423 && (op1 == const0_rtx || op1 == constm1_rtx)) 5424 { 5425 rtx op00, op01; 5426 5427 /* Do a logical OR or AND of the two words and compare the 5428 result. */ 5429 op00 = simplify_gen_subreg (word_mode, op0, mode, 0); 5430 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); 5431 tem = expand_binop (word_mode, 5432 op1 == const0_rtx ? ior_optab : and_optab, 5433 op00, op01, NULL_RTX, unsignedp, 5434 OPTAB_DIRECT); 5435 5436 if (tem != 0) 5437 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, 5438 unsignedp, normalizep); 5439 } 5440 else if ((code == LT || code == GE) && op1 == const0_rtx) 5441 { 5442 rtx op0h; 5443 5444 /* If testing the sign bit, can just test on high word. */ 5445 op0h = simplify_gen_subreg (word_mode, op0, mode, 5446 subreg_highpart_offset (word_mode, 5447 mode)); 5448 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, 5449 unsignedp, normalizep); 5450 } 5451 else 5452 tem = NULL_RTX; 5453 5454 if (tem) 5455 { 5456 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) 5457 return tem; 5458 if (!target) 5459 target = gen_reg_rtx (target_mode); 5460 5461 convert_move (target, tem, 5462 !val_signbit_known_set_p (word_mode, 5463 (normalizep ? normalizep 5464 : STORE_FLAG_VALUE))); 5465 return target; 5466 } 5467 } 5468 5469 /* If this is A < 0 or A >= 0, we can do this by taking the ones 5470 complement of A (for GE) and shifting the sign bit to the low bit. */ 5471 if (op1 == const0_rtx && (code == LT || code == GE) 5472 && GET_MODE_CLASS (mode) == MODE_INT 5473 && (normalizep || STORE_FLAG_VALUE == 1 5474 || val_signbit_p (mode, STORE_FLAG_VALUE))) 5475 { 5476 subtarget = target; 5477 5478 if (!target) 5479 target_mode = mode; 5480 5481 /* If the result is to be wider than OP0, it is best to convert it 5482 first. If it is to be narrower, it is *incorrect* to convert it 5483 first. */ 5484 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) 5485 { 5486 op0 = convert_modes (target_mode, mode, op0, 0); 5487 mode = target_mode; 5488 } 5489 5490 if (target_mode != mode) 5491 subtarget = 0; 5492 5493 if (code == GE) 5494 op0 = expand_unop (mode, one_cmpl_optab, op0, 5495 ((STORE_FLAG_VALUE == 1 || normalizep) 5496 ? 0 : subtarget), 0); 5497 5498 if (STORE_FLAG_VALUE == 1 || normalizep) 5499 /* If we are supposed to produce a 0/1 value, we want to do 5500 a logical shift from the sign bit to the low-order bit; for 5501 a -1/0 value, we do an arithmetic shift. */ 5502 op0 = expand_shift (RSHIFT_EXPR, mode, op0, 5503 GET_MODE_BITSIZE (mode) - 1, 5504 subtarget, normalizep != -1); 5505 5506 if (mode != target_mode) 5507 op0 = convert_modes (target_mode, mode, op0, 0); 5508 5509 return op0; 5510 } 5511 5512 mclass = GET_MODE_CLASS (mode); 5513 for (compare_mode = mode; compare_mode != VOIDmode; 5514 compare_mode = GET_MODE_WIDER_MODE (compare_mode)) 5515 { 5516 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; 5517 icode = optab_handler (cstore_optab, optab_mode); 5518 if (icode != CODE_FOR_nothing) 5519 { 5520 do_pending_stack_adjust (); 5521 rtx tem = emit_cstore (target, icode, code, mode, compare_mode, 5522 unsignedp, op0, op1, normalizep, target_mode); 5523 if (tem) 5524 return tem; 5525 5526 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5527 { 5528 tem = emit_cstore (target, icode, scode, mode, compare_mode, 5529 unsignedp, op1, op0, normalizep, target_mode); 5530 if (tem) 5531 return tem; 5532 } 5533 break; 5534 } 5535 } 5536 5537 return 0; 5538} 5539 5540/* Emit a store-flags instruction for comparison CODE on OP0 and OP1 5541 and storing in TARGET. Normally return TARGET. 5542 Return 0 if that cannot be done. 5543 5544 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If 5545 it is VOIDmode, they cannot both be CONST_INT. 5546 5547 UNSIGNEDP is for the case where we have to widen the operands 5548 to perform the operation. It says to use zero-extension. 5549 5550 NORMALIZEP is 1 if we should convert the result to be either zero 5551 or one. Normalize is -1 if we should convert the result to be 5552 either zero or -1. If NORMALIZEP is zero, the result will be left 5553 "raw" out of the scc insn. */ 5554 5555rtx 5556emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, 5557 machine_mode mode, int unsignedp, int normalizep) 5558{ 5559 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; 5560 enum rtx_code rcode; 5561 rtx subtarget; 5562 rtx tem, trueval; 5563 rtx_insn *last; 5564 5565 /* If we compare constants, we shouldn't use a store-flag operation, 5566 but a constant load. We can get there via the vanilla route that 5567 usually generates a compare-branch sequence, but will in this case 5568 fold the comparison to a constant, and thus elide the branch. */ 5569 if (CONSTANT_P (op0) && CONSTANT_P (op1)) 5570 return NULL_RTX; 5571 5572 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, 5573 target_mode); 5574 if (tem) 5575 return tem; 5576 5577 /* If we reached here, we can't do this with a scc insn, however there 5578 are some comparisons that can be done in other ways. Don't do any 5579 of these cases if branches are very cheap. */ 5580 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) 5581 return 0; 5582 5583 /* See what we need to return. We can only return a 1, -1, or the 5584 sign bit. */ 5585 5586 if (normalizep == 0) 5587 { 5588 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) 5589 normalizep = STORE_FLAG_VALUE; 5590 5591 else if (val_signbit_p (mode, STORE_FLAG_VALUE)) 5592 ; 5593 else 5594 return 0; 5595 } 5596 5597 last = get_last_insn (); 5598 5599 /* If optimizing, use different pseudo registers for each insn, instead 5600 of reusing the same pseudo. This leads to better CSE, but slows 5601 down the compiler, since there are more pseudos */ 5602 subtarget = (!optimize 5603 && (target_mode == mode)) ? target : NULL_RTX; 5604 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); 5605 5606 /* For floating-point comparisons, try the reverse comparison or try 5607 changing the "orderedness" of the comparison. */ 5608 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5609 { 5610 enum rtx_code first_code; 5611 bool and_them; 5612 5613 rcode = reverse_condition_maybe_unordered (code); 5614 if (can_compare_p (rcode, mode, ccp_store_flag) 5615 && (code == ORDERED || code == UNORDERED 5616 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 5617 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 5618 { 5619 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5620 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5621 5622 /* For the reverse comparison, use either an addition or a XOR. */ 5623 if (want_add 5624 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, 5625 optimize_insn_for_speed_p ()) == 0) 5626 { 5627 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5628 STORE_FLAG_VALUE, target_mode); 5629 if (tem) 5630 return expand_binop (target_mode, add_optab, tem, 5631 gen_int_mode (normalizep, target_mode), 5632 target, 0, OPTAB_WIDEN); 5633 } 5634 else if (!want_add 5635 && rtx_cost (trueval, mode, XOR, 1, 5636 optimize_insn_for_speed_p ()) == 0) 5637 { 5638 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5639 normalizep, target_mode); 5640 if (tem) 5641 return expand_binop (target_mode, xor_optab, tem, trueval, 5642 target, INTVAL (trueval) >= 0, OPTAB_WIDEN); 5643 } 5644 } 5645 5646 delete_insns_since (last); 5647 5648 /* Cannot split ORDERED and UNORDERED, only try the above trick. */ 5649 if (code == ORDERED || code == UNORDERED) 5650 return 0; 5651 5652 and_them = split_comparison (code, mode, &first_code, &code); 5653 5654 /* If there are no NaNs, the first comparison should always fall through. 5655 Effectively change the comparison to the other one. */ 5656 if (!HONOR_NANS (mode)) 5657 { 5658 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); 5659 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, 5660 target_mode); 5661 } 5662 5663 if (!HAVE_conditional_move) 5664 return 0; 5665 5666 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a 5667 conditional move. */ 5668 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, 5669 normalizep, target_mode); 5670 if (tem == 0) 5671 return 0; 5672 5673 if (and_them) 5674 tem = emit_conditional_move (target, code, op0, op1, mode, 5675 tem, const0_rtx, GET_MODE (tem), 0); 5676 else 5677 tem = emit_conditional_move (target, code, op0, op1, mode, 5678 trueval, tem, GET_MODE (tem), 0); 5679 5680 if (tem == 0) 5681 delete_insns_since (last); 5682 return tem; 5683 } 5684 5685 /* The remaining tricks only apply to integer comparisons. */ 5686 5687 if (GET_MODE_CLASS (mode) != MODE_INT) 5688 return 0; 5689 5690 /* If this is an equality comparison of integers, we can try to exclusive-or 5691 (or subtract) the two operands and use a recursive call to try the 5692 comparison with zero. Don't do any of these cases if branches are 5693 very cheap. */ 5694 5695 if ((code == EQ || code == NE) && op1 != const0_rtx) 5696 { 5697 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, 5698 OPTAB_WIDEN); 5699 5700 if (tem == 0) 5701 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, 5702 OPTAB_WIDEN); 5703 if (tem != 0) 5704 tem = emit_store_flag (target, code, tem, const0_rtx, 5705 mode, unsignedp, normalizep); 5706 if (tem != 0) 5707 return tem; 5708 5709 delete_insns_since (last); 5710 } 5711 5712 /* For integer comparisons, try the reverse comparison. However, for 5713 small X and if we'd have anyway to extend, implementing "X != 0" 5714 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ 5715 rcode = reverse_condition (code); 5716 if (can_compare_p (rcode, mode, ccp_store_flag) 5717 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing 5718 && code == NE 5719 && GET_MODE_SIZE (mode) < UNITS_PER_WORD 5720 && op1 == const0_rtx)) 5721 { 5722 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5723 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5724 5725 /* Again, for the reverse comparison, use either an addition or a XOR. */ 5726 if (want_add 5727 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, 5728 optimize_insn_for_speed_p ()) == 0) 5729 { 5730 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5731 STORE_FLAG_VALUE, target_mode); 5732 if (tem != 0) 5733 tem = expand_binop (target_mode, add_optab, tem, 5734 gen_int_mode (normalizep, target_mode), 5735 target, 0, OPTAB_WIDEN); 5736 } 5737 else if (!want_add 5738 && rtx_cost (trueval, mode, XOR, 1, 5739 optimize_insn_for_speed_p ()) == 0) 5740 { 5741 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5742 normalizep, target_mode); 5743 if (tem != 0) 5744 tem = expand_binop (target_mode, xor_optab, tem, trueval, target, 5745 INTVAL (trueval) >= 0, OPTAB_WIDEN); 5746 } 5747 5748 if (tem != 0) 5749 return tem; 5750 delete_insns_since (last); 5751 } 5752 5753 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with 5754 the constant zero. Reject all other comparisons at this point. Only 5755 do LE and GT if branches are expensive since they are expensive on 5756 2-operand machines. */ 5757 5758 if (op1 != const0_rtx 5759 || (code != EQ && code != NE 5760 && (BRANCH_COST (optimize_insn_for_speed_p (), 5761 false) <= 1 || (code != LE && code != GT)))) 5762 return 0; 5763 5764 /* Try to put the result of the comparison in the sign bit. Assume we can't 5765 do the necessary operation below. */ 5766 5767 tem = 0; 5768 5769 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has 5770 the sign bit set. */ 5771 5772 if (code == LE) 5773 { 5774 /* This is destructive, so SUBTARGET can't be OP0. */ 5775 if (rtx_equal_p (subtarget, op0)) 5776 subtarget = 0; 5777 5778 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, 5779 OPTAB_WIDEN); 5780 if (tem) 5781 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, 5782 OPTAB_WIDEN); 5783 } 5784 5785 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the 5786 number of bits in the mode of OP0, minus one. */ 5787 5788 if (code == GT) 5789 { 5790 if (rtx_equal_p (subtarget, op0)) 5791 subtarget = 0; 5792 5793 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0, 5794 GET_MODE_BITSIZE (mode) - 1, 5795 subtarget, 0); 5796 if (tem) 5797 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, 5798 OPTAB_WIDEN); 5799 } 5800 5801 if (code == EQ || code == NE) 5802 { 5803 /* For EQ or NE, one way to do the comparison is to apply an operation 5804 that converts the operand into a positive number if it is nonzero 5805 or zero if it was originally zero. Then, for EQ, we subtract 1 and 5806 for NE we negate. This puts the result in the sign bit. Then we 5807 normalize with a shift, if needed. 5808 5809 Two operations that can do the above actions are ABS and FFS, so try 5810 them. If that doesn't work, and MODE is smaller than a full word, 5811 we can use zero-extension to the wider mode (an unsigned conversion) 5812 as the operation. */ 5813 5814 /* Note that ABS doesn't yield a positive number for INT_MIN, but 5815 that is compensated by the subsequent overflow when subtracting 5816 one / negating. */ 5817 5818 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) 5819 tem = expand_unop (mode, abs_optab, op0, subtarget, 1); 5820 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) 5821 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); 5822 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) 5823 { 5824 tem = convert_modes (word_mode, mode, op0, 1); 5825 mode = word_mode; 5826 } 5827 5828 if (tem != 0) 5829 { 5830 if (code == EQ) 5831 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, 5832 0, OPTAB_WIDEN); 5833 else 5834 tem = expand_unop (mode, neg_optab, tem, subtarget, 0); 5835 } 5836 5837 /* If we couldn't do it that way, for NE we can "or" the two's complement 5838 of the value with itself. For EQ, we take the one's complement of 5839 that "or", which is an extra insn, so we only handle EQ if branches 5840 are expensive. */ 5841 5842 if (tem == 0 5843 && (code == NE 5844 || BRANCH_COST (optimize_insn_for_speed_p (), 5845 false) > 1)) 5846 { 5847 if (rtx_equal_p (subtarget, op0)) 5848 subtarget = 0; 5849 5850 tem = expand_unop (mode, neg_optab, op0, subtarget, 0); 5851 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, 5852 OPTAB_WIDEN); 5853 5854 if (tem && code == EQ) 5855 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); 5856 } 5857 } 5858 5859 if (tem && normalizep) 5860 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem, 5861 GET_MODE_BITSIZE (mode) - 1, 5862 subtarget, normalizep == 1); 5863 5864 if (tem) 5865 { 5866 if (!target) 5867 ; 5868 else if (GET_MODE (tem) != target_mode) 5869 { 5870 convert_move (target, tem, 0); 5871 tem = target; 5872 } 5873 else if (!subtarget) 5874 { 5875 emit_move_insn (target, tem); 5876 tem = target; 5877 } 5878 } 5879 else 5880 delete_insns_since (last); 5881 5882 return tem; 5883} 5884 5885/* Like emit_store_flag, but always succeeds. */ 5886 5887rtx 5888emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, 5889 machine_mode mode, int unsignedp, int normalizep) 5890{ 5891 rtx tem; 5892 rtx_code_label *label; 5893 rtx trueval, falseval; 5894 5895 /* First see if emit_store_flag can do the job. */ 5896 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); 5897 if (tem != 0) 5898 return tem; 5899 5900 /* If one operand is constant, make it the second one. Only do this 5901 if the other operand is not constant as well. */ 5902 5903 if (swap_commutative_operands_p (op0, op1)) 5904 { 5905 std::swap (op0, op1); 5906 code = swap_condition (code); 5907 } 5908 5909 if (mode == VOIDmode) 5910 mode = GET_MODE (op0); 5911 5912 if (!target) 5913 target = gen_reg_rtx (word_mode); 5914 5915 /* If this failed, we have to do this with set/compare/jump/set code. 5916 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ 5917 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; 5918 if (code == NE 5919 && GET_MODE_CLASS (mode) == MODE_INT 5920 && REG_P (target) 5921 && op0 == target 5922 && op1 == const0_rtx) 5923 { 5924 label = gen_label_rtx (); 5925 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, 5926 NULL_RTX, NULL, label, -1); 5927 emit_move_insn (target, trueval); 5928 emit_label (label); 5929 return target; 5930 } 5931 5932 if (!REG_P (target) 5933 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) 5934 target = gen_reg_rtx (GET_MODE (target)); 5935 5936 /* Jump in the right direction if the target cannot implement CODE 5937 but can jump on its reverse condition. */ 5938 falseval = const0_rtx; 5939 if (! can_compare_p (code, mode, ccp_jump) 5940 && (! FLOAT_MODE_P (mode) 5941 || code == ORDERED || code == UNORDERED 5942 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 5943 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 5944 { 5945 enum rtx_code rcode; 5946 if (FLOAT_MODE_P (mode)) 5947 rcode = reverse_condition_maybe_unordered (code); 5948 else 5949 rcode = reverse_condition (code); 5950 5951 /* Canonicalize to UNORDERED for the libcall. */ 5952 if (can_compare_p (rcode, mode, ccp_jump) 5953 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) 5954 { 5955 falseval = trueval; 5956 trueval = const0_rtx; 5957 code = rcode; 5958 } 5959 } 5960 5961 emit_move_insn (target, trueval); 5962 label = gen_label_rtx (); 5963 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, 5964 label, -1); 5965 5966 emit_move_insn (target, falseval); 5967 emit_label (label); 5968 5969 return target; 5970} 5971 5972/* Perform possibly multi-word comparison and conditional jump to LABEL 5973 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is 5974 now a thin wrapper around do_compare_rtx_and_jump. */ 5975 5976static void 5977do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, 5978 rtx_code_label *label) 5979{ 5980 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); 5981 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, 5982 NULL, label, -1); 5983} 5984