expmed.c revision 1.12
1/* Medium-level subroutines: convert bit-field store and extract 2 and shifts, multiplies and divides to rtl instructions. 3 Copyright (C) 1987-2018 Free Software Foundation, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "backend.h" 26#include "target.h" 27#include "rtl.h" 28#include "tree.h" 29#include "predict.h" 30#include "memmodel.h" 31#include "tm_p.h" 32#include "expmed.h" 33#include "optabs.h" 34#include "regs.h" 35#include "emit-rtl.h" 36#include "diagnostic-core.h" 37#include "fold-const.h" 38#include "stor-layout.h" 39#include "dojump.h" 40#include "explow.h" 41#include "expr.h" 42#include "langhooks.h" 43#include "tree-vector-builder.h" 44 45struct target_expmed default_target_expmed; 46#if SWITCHABLE_TARGET 47struct target_expmed *this_target_expmed = &default_target_expmed; 48#endif 49 50static bool store_integral_bit_field (rtx, opt_scalar_int_mode, 51 unsigned HOST_WIDE_INT, 52 unsigned HOST_WIDE_INT, 53 poly_uint64, poly_uint64, 54 machine_mode, rtx, bool, bool); 55static void store_fixed_bit_field (rtx, opt_scalar_int_mode, 56 unsigned HOST_WIDE_INT, 57 unsigned HOST_WIDE_INT, 58 poly_uint64, poly_uint64, 59 rtx, scalar_int_mode, bool); 60static void store_fixed_bit_field_1 (rtx, scalar_int_mode, 61 unsigned HOST_WIDE_INT, 62 unsigned HOST_WIDE_INT, 63 rtx, scalar_int_mode, bool); 64static void store_split_bit_field (rtx, opt_scalar_int_mode, 65 unsigned HOST_WIDE_INT, 66 unsigned HOST_WIDE_INT, 67 poly_uint64, poly_uint64, 68 rtx, scalar_int_mode, bool); 69static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode, 70 unsigned HOST_WIDE_INT, 71 unsigned HOST_WIDE_INT, int, rtx, 72 machine_mode, machine_mode, bool, bool); 73static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode, 74 unsigned HOST_WIDE_INT, 75 unsigned HOST_WIDE_INT, rtx, int, bool); 76static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode, 77 unsigned HOST_WIDE_INT, 78 unsigned HOST_WIDE_INT, rtx, int, bool); 79static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); 80static rtx extract_split_bit_field (rtx, opt_scalar_int_mode, 81 unsigned HOST_WIDE_INT, 82 unsigned HOST_WIDE_INT, int, bool); 83static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); 84static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); 85static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); 86 87/* Return a constant integer mask value of mode MODE with BITSIZE ones 88 followed by BITPOS zeros, or the complement of that if COMPLEMENT. 89 The mask is truncated if necessary to the width of mode MODE. The 90 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ 91 92static inline rtx 93mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement) 94{ 95 return immed_wide_int_const 96 (wi::shifted_mask (bitpos, bitsize, complement, 97 GET_MODE_PRECISION (mode)), mode); 98} 99 100/* Test whether a value is zero of a power of two. */ 101#define EXACT_POWER_OF_2_OR_ZERO_P(x) \ 102 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) 103 104struct init_expmed_rtl 105{ 106 rtx reg; 107 rtx plus; 108 rtx neg; 109 rtx mult; 110 rtx sdiv; 111 rtx udiv; 112 rtx sdiv_32; 113 rtx smod_32; 114 rtx wide_mult; 115 rtx wide_lshr; 116 rtx wide_trunc; 117 rtx shift; 118 rtx shift_mult; 119 rtx shift_add; 120 rtx shift_sub0; 121 rtx shift_sub1; 122 rtx zext; 123 rtx trunc; 124 125 rtx pow2[MAX_BITS_PER_WORD]; 126 rtx cint[MAX_BITS_PER_WORD]; 127}; 128 129static void 130init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode, 131 scalar_int_mode from_mode, bool speed) 132{ 133 int to_size, from_size; 134 rtx which; 135 136 to_size = GET_MODE_PRECISION (to_mode); 137 from_size = GET_MODE_PRECISION (from_mode); 138 139 /* Most partial integers have a precision less than the "full" 140 integer it requires for storage. In case one doesn't, for 141 comparison purposes here, reduce the bit size by one in that 142 case. */ 143 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT 144 && pow2p_hwi (to_size)) 145 to_size --; 146 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT 147 && pow2p_hwi (from_size)) 148 from_size --; 149 150 /* Assume cost of zero-extend and sign-extend is the same. */ 151 which = (to_size < from_size ? all->trunc : all->zext); 152 153 PUT_MODE (all->reg, from_mode); 154 set_convert_cost (to_mode, from_mode, speed, 155 set_src_cost (which, to_mode, speed)); 156} 157 158static void 159init_expmed_one_mode (struct init_expmed_rtl *all, 160 machine_mode mode, int speed) 161{ 162 int m, n, mode_bitsize; 163 machine_mode mode_from; 164 165 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); 166 167 PUT_MODE (all->reg, mode); 168 PUT_MODE (all->plus, mode); 169 PUT_MODE (all->neg, mode); 170 PUT_MODE (all->mult, mode); 171 PUT_MODE (all->sdiv, mode); 172 PUT_MODE (all->udiv, mode); 173 PUT_MODE (all->sdiv_32, mode); 174 PUT_MODE (all->smod_32, mode); 175 PUT_MODE (all->wide_trunc, mode); 176 PUT_MODE (all->shift, mode); 177 PUT_MODE (all->shift_mult, mode); 178 PUT_MODE (all->shift_add, mode); 179 PUT_MODE (all->shift_sub0, mode); 180 PUT_MODE (all->shift_sub1, mode); 181 PUT_MODE (all->zext, mode); 182 PUT_MODE (all->trunc, mode); 183 184 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed)); 185 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed)); 186 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed)); 187 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed)); 188 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed)); 189 190 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed) 191 <= 2 * add_cost (speed, mode))); 192 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed) 193 <= 4 * add_cost (speed, mode))); 194 195 set_shift_cost (speed, mode, 0, 0); 196 { 197 int cost = add_cost (speed, mode); 198 set_shiftadd_cost (speed, mode, 0, cost); 199 set_shiftsub0_cost (speed, mode, 0, cost); 200 set_shiftsub1_cost (speed, mode, 0, cost); 201 } 202 203 n = MIN (MAX_BITS_PER_WORD, mode_bitsize); 204 for (m = 1; m < n; m++) 205 { 206 XEXP (all->shift, 1) = all->cint[m]; 207 XEXP (all->shift_mult, 1) = all->pow2[m]; 208 209 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed)); 210 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode, 211 speed)); 212 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode, 213 speed)); 214 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode, 215 speed)); 216 } 217 218 scalar_int_mode int_mode_to; 219 if (is_a <scalar_int_mode> (mode, &int_mode_to)) 220 { 221 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; 222 mode_from = (machine_mode)(mode_from + 1)) 223 init_expmed_one_conv (all, int_mode_to, 224 as_a <scalar_int_mode> (mode_from), speed); 225 226 scalar_int_mode wider_mode; 227 if (GET_MODE_CLASS (int_mode_to) == MODE_INT 228 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode)) 229 { 230 PUT_MODE (all->zext, wider_mode); 231 PUT_MODE (all->wide_mult, wider_mode); 232 PUT_MODE (all->wide_lshr, wider_mode); 233 XEXP (all->wide_lshr, 1) 234 = gen_int_shift_amount (wider_mode, mode_bitsize); 235 236 set_mul_widen_cost (speed, wider_mode, 237 set_src_cost (all->wide_mult, wider_mode, speed)); 238 set_mul_highpart_cost (speed, int_mode_to, 239 set_src_cost (all->wide_trunc, 240 int_mode_to, speed)); 241 } 242 } 243} 244 245void 246init_expmed (void) 247{ 248 struct init_expmed_rtl all; 249 machine_mode mode = QImode; 250 int m, speed; 251 252 memset (&all, 0, sizeof all); 253 for (m = 1; m < MAX_BITS_PER_WORD; m++) 254 { 255 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); 256 all.cint[m] = GEN_INT (m); 257 } 258 259 /* Avoid using hard regs in ways which may be unsupported. */ 260 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 261 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); 262 all.neg = gen_rtx_NEG (mode, all.reg); 263 all.mult = gen_rtx_MULT (mode, all.reg, all.reg); 264 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); 265 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); 266 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); 267 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); 268 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); 269 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); 270 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); 271 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); 272 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); 273 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); 274 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); 275 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); 276 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); 277 all.trunc = gen_rtx_TRUNCATE (mode, all.reg); 278 279 for (speed = 0; speed < 2; speed++) 280 { 281 crtl->maybe_hot_insn_p = speed; 282 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed)); 283 284 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; 285 mode = (machine_mode)(mode + 1)) 286 init_expmed_one_mode (&all, mode, speed); 287 288 if (MIN_MODE_PARTIAL_INT != VOIDmode) 289 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; 290 mode = (machine_mode)(mode + 1)) 291 init_expmed_one_mode (&all, mode, speed); 292 293 if (MIN_MODE_VECTOR_INT != VOIDmode) 294 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; 295 mode = (machine_mode)(mode + 1)) 296 init_expmed_one_mode (&all, mode, speed); 297 } 298 299 if (alg_hash_used_p ()) 300 { 301 struct alg_hash_entry *p = alg_hash_entry_ptr (0); 302 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); 303 } 304 else 305 set_alg_hash_used_p (true); 306 default_rtl_profile (); 307 308 ggc_free (all.trunc); 309 ggc_free (all.shift_sub1); 310 ggc_free (all.shift_sub0); 311 ggc_free (all.shift_add); 312 ggc_free (all.shift_mult); 313 ggc_free (all.shift); 314 ggc_free (all.wide_trunc); 315 ggc_free (all.wide_lshr); 316 ggc_free (all.wide_mult); 317 ggc_free (all.zext); 318 ggc_free (all.smod_32); 319 ggc_free (all.sdiv_32); 320 ggc_free (all.udiv); 321 ggc_free (all.sdiv); 322 ggc_free (all.mult); 323 ggc_free (all.neg); 324 ggc_free (all.plus); 325 ggc_free (all.reg); 326} 327 328/* Return an rtx representing minus the value of X. 329 MODE is the intended mode of the result, 330 useful if X is a CONST_INT. */ 331 332rtx 333negate_rtx (machine_mode mode, rtx x) 334{ 335 rtx result = simplify_unary_operation (NEG, mode, x, mode); 336 337 if (result == 0) 338 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); 339 340 return result; 341} 342 343/* Whether reverse storage order is supported on the target. */ 344static int reverse_storage_order_supported = -1; 345 346/* Check whether reverse storage order is supported on the target. */ 347 348static void 349check_reverse_storage_order_support (void) 350{ 351 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) 352 { 353 reverse_storage_order_supported = 0; 354 sorry ("reverse scalar storage order"); 355 } 356 else 357 reverse_storage_order_supported = 1; 358} 359 360/* Whether reverse FP storage order is supported on the target. */ 361static int reverse_float_storage_order_supported = -1; 362 363/* Check whether reverse FP storage order is supported on the target. */ 364 365static void 366check_reverse_float_storage_order_support (void) 367{ 368 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) 369 { 370 reverse_float_storage_order_supported = 0; 371 sorry ("reverse floating-point scalar storage order"); 372 } 373 else 374 reverse_float_storage_order_supported = 1; 375} 376 377/* Return an rtx representing value of X with reverse storage order. 378 MODE is the intended mode of the result, 379 useful if X is a CONST_INT. */ 380 381rtx 382flip_storage_order (machine_mode mode, rtx x) 383{ 384 scalar_int_mode int_mode; 385 rtx result; 386 387 if (mode == QImode) 388 return x; 389 390 if (COMPLEX_MODE_P (mode)) 391 { 392 rtx real = read_complex_part (x, false); 393 rtx imag = read_complex_part (x, true); 394 395 real = flip_storage_order (GET_MODE_INNER (mode), real); 396 imag = flip_storage_order (GET_MODE_INNER (mode), imag); 397 398 return gen_rtx_CONCAT (mode, real, imag); 399 } 400 401 if (__builtin_expect (reverse_storage_order_supported < 0, 0)) 402 check_reverse_storage_order_support (); 403 404 if (!is_a <scalar_int_mode> (mode, &int_mode)) 405 { 406 if (FLOAT_MODE_P (mode) 407 && __builtin_expect (reverse_float_storage_order_supported < 0, 0)) 408 check_reverse_float_storage_order_support (); 409 410 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)) 411 { 412 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode)); 413 return x; 414 } 415 x = gen_lowpart (int_mode, x); 416 } 417 418 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode); 419 if (result == 0) 420 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); 421 422 if (int_mode != mode) 423 result = gen_lowpart (mode, result); 424 425 return result; 426} 427 428/* If MODE is set, adjust bitfield memory MEM so that it points to the 429 first unit of mode MODE that contains a bitfield of size BITSIZE at 430 bit position BITNUM. If MODE is not set, return a BLKmode reference 431 to every byte in the bitfield. Set *NEW_BITNUM to the bit position 432 of the field within the new memory. */ 433 434static rtx 435narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode, 436 unsigned HOST_WIDE_INT bitsize, 437 unsigned HOST_WIDE_INT bitnum, 438 unsigned HOST_WIDE_INT *new_bitnum) 439{ 440 scalar_int_mode imode; 441 if (mode.exists (&imode)) 442 { 443 unsigned int unit = GET_MODE_BITSIZE (imode); 444 *new_bitnum = bitnum % unit; 445 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; 446 return adjust_bitfield_address (mem, imode, offset); 447 } 448 else 449 { 450 *new_bitnum = bitnum % BITS_PER_UNIT; 451 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; 452 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) 453 / BITS_PER_UNIT); 454 return adjust_bitfield_address_size (mem, BLKmode, offset, size); 455 } 456} 457 458/* The caller wants to perform insertion or extraction PATTERN on a 459 bitfield of size BITSIZE at BITNUM bits into memory operand OP0. 460 BITREGION_START and BITREGION_END are as for store_bit_field 461 and FIELDMODE is the natural mode of the field. 462 463 Search for a mode that is compatible with the memory access 464 restrictions and (where applicable) with a register insertion or 465 extraction. Return the new memory on success, storing the adjusted 466 bit position in *NEW_BITNUM. Return null otherwise. */ 467 468static rtx 469adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, 470 rtx op0, HOST_WIDE_INT bitsize, 471 HOST_WIDE_INT bitnum, 472 poly_uint64 bitregion_start, 473 poly_uint64 bitregion_end, 474 machine_mode fieldmode, 475 unsigned HOST_WIDE_INT *new_bitnum) 476{ 477 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, 478 bitregion_end, MEM_ALIGN (op0), 479 MEM_VOLATILE_P (op0)); 480 scalar_int_mode best_mode; 481 if (iter.next_mode (&best_mode)) 482 { 483 /* We can use a memory in BEST_MODE. See whether this is true for 484 any wider modes. All other things being equal, we prefer to 485 use the widest mode possible because it tends to expose more 486 CSE opportunities. */ 487 if (!iter.prefer_smaller_modes ()) 488 { 489 /* Limit the search to the mode required by the corresponding 490 register insertion or extraction instruction, if any. */ 491 scalar_int_mode limit_mode = word_mode; 492 extraction_insn insn; 493 if (get_best_reg_extraction_insn (&insn, pattern, 494 GET_MODE_BITSIZE (best_mode), 495 fieldmode)) 496 limit_mode = insn.field_mode; 497 498 scalar_int_mode wider_mode; 499 while (iter.next_mode (&wider_mode) 500 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) 501 best_mode = wider_mode; 502 } 503 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, 504 new_bitnum); 505 } 506 return NULL_RTX; 507} 508 509/* Return true if a bitfield of size BITSIZE at bit number BITNUM within 510 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg 511 offset is then BITNUM / BITS_PER_UNIT. */ 512 513static bool 514lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize, 515 machine_mode struct_mode) 516{ 517 poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode); 518 if (BYTES_BIG_ENDIAN) 519 return (multiple_p (bitnum, BITS_PER_UNIT) 520 && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode)) 521 || multiple_p (bitnum + bitsize, 522 regsize * BITS_PER_UNIT))); 523 else 524 return multiple_p (bitnum, regsize * BITS_PER_UNIT); 525} 526 527/* Return true if -fstrict-volatile-bitfields applies to an access of OP0 528 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. 529 Return false if the access would touch memory outside the range 530 BITREGION_START to BITREGION_END for conformance to the C++ memory 531 model. */ 532 533static bool 534strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, 535 unsigned HOST_WIDE_INT bitnum, 536 scalar_int_mode fieldmode, 537 poly_uint64 bitregion_start, 538 poly_uint64 bitregion_end) 539{ 540 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); 541 542 /* -fstrict-volatile-bitfields must be enabled and we must have a 543 volatile MEM. */ 544 if (!MEM_P (op0) 545 || !MEM_VOLATILE_P (op0) 546 || flag_strict_volatile_bitfields <= 0) 547 return false; 548 549 /* The bit size must not be larger than the field mode, and 550 the field mode must not be larger than a word. */ 551 if (bitsize > modesize || modesize > BITS_PER_WORD) 552 return false; 553 554 /* Check for cases of unaligned fields that must be split. */ 555 if (bitnum % modesize + bitsize > modesize) 556 return false; 557 558 /* The memory must be sufficiently aligned for a MODESIZE access. 559 This condition guarantees, that the memory access will not 560 touch anything after the end of the structure. */ 561 if (MEM_ALIGN (op0) < modesize) 562 return false; 563 564 /* Check for cases where the C++ memory model applies. */ 565 if (maybe_ne (bitregion_end, 0U) 566 && (maybe_lt (bitnum - bitnum % modesize, bitregion_start) 567 || maybe_gt (bitnum - bitnum % modesize + modesize - 1, 568 bitregion_end))) 569 return false; 570 571 return true; 572} 573 574/* Return true if OP is a memory and if a bitfield of size BITSIZE at 575 bit number BITNUM can be treated as a simple value of mode MODE. 576 Store the byte offset in *BYTENUM if so. */ 577 578static bool 579simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum, 580 machine_mode mode, poly_uint64 *bytenum) 581{ 582 return (MEM_P (op0) 583 && multiple_p (bitnum, BITS_PER_UNIT, bytenum) 584 && known_eq (bitsize, GET_MODE_BITSIZE (mode)) 585 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0)) 586 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode)) 587 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); 588} 589 590/* Try to use instruction INSV to store VALUE into a field of OP0. 591 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a 592 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM 593 are as for store_bit_field. */ 594 595static bool 596store_bit_field_using_insv (const extraction_insn *insv, rtx op0, 597 opt_scalar_int_mode op0_mode, 598 unsigned HOST_WIDE_INT bitsize, 599 unsigned HOST_WIDE_INT bitnum, 600 rtx value, scalar_int_mode value_mode) 601{ 602 struct expand_operand ops[4]; 603 rtx value1; 604 rtx xop0 = op0; 605 rtx_insn *last = get_last_insn (); 606 bool copy_back = false; 607 608 scalar_int_mode op_mode = insv->field_mode; 609 unsigned int unit = GET_MODE_BITSIZE (op_mode); 610 if (bitsize == 0 || bitsize > unit) 611 return false; 612 613 if (MEM_P (xop0)) 614 /* Get a reference to the first byte of the field. */ 615 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, 616 &bitnum); 617 else 618 { 619 /* Convert from counting within OP0 to counting in OP_MODE. */ 620 if (BYTES_BIG_ENDIAN) 621 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ()); 622 623 /* If xop0 is a register, we need it in OP_MODE 624 to make it acceptable to the format of insv. */ 625 if (GET_CODE (xop0) == SUBREG) 626 /* We can't just change the mode, because this might clobber op0, 627 and we will need the original value of op0 if insv fails. */ 628 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); 629 if (REG_P (xop0) && GET_MODE (xop0) != op_mode) 630 xop0 = gen_lowpart_SUBREG (op_mode, xop0); 631 } 632 633 /* If the destination is a paradoxical subreg such that we need a 634 truncate to the inner mode, perform the insertion on a temporary and 635 truncate the result to the original destination. Note that we can't 636 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N 637 X) 0)) is (reg:N X). */ 638 if (GET_CODE (xop0) == SUBREG 639 && REG_P (SUBREG_REG (xop0)) 640 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), 641 op_mode)) 642 { 643 rtx tem = gen_reg_rtx (op_mode); 644 emit_move_insn (tem, xop0); 645 xop0 = tem; 646 copy_back = true; 647 } 648 649 /* There are similar overflow check at the start of store_bit_field_1, 650 but that only check the situation where the field lies completely 651 outside the register, while there do have situation where the field 652 lies partialy in the register, we need to adjust bitsize for this 653 partial overflow situation. Without this fix, pr48335-2.c on big-endian 654 will broken on those arch support bit insert instruction, like arm, aarch64 655 etc. */ 656 if (bitsize + bitnum > unit && bitnum < unit) 657 { 658 warning (OPT_Wextra, "write of %wu-bit data outside the bound of " 659 "destination object, data truncated into %wu-bit", 660 bitsize, unit - bitnum); 661 bitsize = unit - bitnum; 662 } 663 664 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 665 "backwards" from the size of the unit we are inserting into. 666 Otherwise, we count bits from the most significant on a 667 BYTES/BITS_BIG_ENDIAN machine. */ 668 669 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 670 bitnum = unit - bitsize - bitnum; 671 672 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ 673 value1 = value; 674 if (value_mode != op_mode) 675 { 676 if (GET_MODE_BITSIZE (value_mode) >= bitsize) 677 { 678 rtx tmp; 679 /* Optimization: Don't bother really extending VALUE 680 if it has all the bits we will actually use. However, 681 if we must narrow it, be sure we do it correctly. */ 682 683 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode)) 684 { 685 tmp = simplify_subreg (op_mode, value1, value_mode, 0); 686 if (! tmp) 687 tmp = simplify_gen_subreg (op_mode, 688 force_reg (value_mode, value1), 689 value_mode, 0); 690 } 691 else 692 { 693 tmp = gen_lowpart_if_possible (op_mode, value1); 694 if (! tmp) 695 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1)); 696 } 697 value1 = tmp; 698 } 699 else if (CONST_INT_P (value)) 700 value1 = gen_int_mode (INTVAL (value), op_mode); 701 else 702 /* Parse phase is supposed to make VALUE's data type 703 match that of the component reference, which is a type 704 at least as wide as the field; so VALUE should have 705 a mode that corresponds to that type. */ 706 gcc_assert (CONSTANT_P (value)); 707 } 708 709 create_fixed_operand (&ops[0], xop0); 710 create_integer_operand (&ops[1], bitsize); 711 create_integer_operand (&ops[2], bitnum); 712 create_input_operand (&ops[3], value1, op_mode); 713 if (maybe_expand_insn (insv->icode, 4, ops)) 714 { 715 if (copy_back) 716 convert_move (op0, xop0, true); 717 return true; 718 } 719 delete_insns_since (last); 720 return false; 721} 722 723/* A subroutine of store_bit_field, with the same arguments. Return true 724 if the operation could be implemented. 725 726 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have 727 no other way of implementing the operation. If FALLBACK_P is false, 728 return false instead. */ 729 730static bool 731store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, 732 poly_uint64 bitregion_start, poly_uint64 bitregion_end, 733 machine_mode fieldmode, 734 rtx value, bool reverse, bool fallback_p) 735{ 736 rtx op0 = str_rtx; 737 738 while (GET_CODE (op0) == SUBREG) 739 { 740 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT; 741 op0 = SUBREG_REG (op0); 742 } 743 744 /* No action is needed if the target is a register and if the field 745 lies completely outside that register. This can occur if the source 746 code contains an out-of-bounds access to a small array. */ 747 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0)))) 748 return true; 749 750 /* Use vec_set patterns for inserting parts of vectors whenever 751 available. */ 752 machine_mode outermode = GET_MODE (op0); 753 scalar_mode innermode = GET_MODE_INNER (outermode); 754 poly_uint64 pos; 755 if (VECTOR_MODE_P (outermode) 756 && !MEM_P (op0) 757 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing 758 && fieldmode == innermode 759 && known_eq (bitsize, GET_MODE_BITSIZE (innermode)) 760 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos)) 761 { 762 struct expand_operand ops[3]; 763 enum insn_code icode = optab_handler (vec_set_optab, outermode); 764 765 create_fixed_operand (&ops[0], op0); 766 create_input_operand (&ops[1], value, innermode); 767 create_integer_operand (&ops[2], pos); 768 if (maybe_expand_insn (icode, 3, ops)) 769 return true; 770 } 771 772 /* If the target is a register, overwriting the entire object, or storing 773 a full-word or multi-word field can be done with just a SUBREG. */ 774 if (!MEM_P (op0) 775 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))) 776 { 777 /* Use the subreg machinery either to narrow OP0 to the required 778 words or to cope with mode punning between equal-sized modes. 779 In the latter case, use subreg on the rhs side, not lhs. */ 780 rtx sub; 781 HOST_WIDE_INT regnum; 782 poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0)); 783 if (known_eq (bitnum, 0U) 784 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0)))) 785 { 786 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); 787 if (sub) 788 { 789 if (reverse) 790 sub = flip_storage_order (GET_MODE (op0), sub); 791 emit_move_insn (op0, sub); 792 return true; 793 } 794 } 795 else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, ®num) 796 && multiple_p (bitsize, regsize * BITS_PER_UNIT)) 797 { 798 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), 799 regnum * regsize); 800 if (sub) 801 { 802 if (reverse) 803 value = flip_storage_order (fieldmode, value); 804 emit_move_insn (sub, value); 805 return true; 806 } 807 } 808 } 809 810 /* If the target is memory, storing any naturally aligned field can be 811 done with a simple store. For targets that support fast unaligned 812 memory, any naturally sized, unit aligned field can be done directly. */ 813 poly_uint64 bytenum; 814 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum)) 815 { 816 op0 = adjust_bitfield_address (op0, fieldmode, bytenum); 817 if (reverse) 818 value = flip_storage_order (fieldmode, value); 819 emit_move_insn (op0, value); 820 return true; 821 } 822 823 /* It's possible we'll need to handle other cases here for 824 polynomial bitnum and bitsize. */ 825 826 /* From here on we need to be looking at a fixed-size insertion. */ 827 unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant (); 828 unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant (); 829 830 /* Make sure we are playing with integral modes. Pun with subregs 831 if we aren't. This must come after the entire register case above, 832 since that case is valid for any mode. The following cases are only 833 valid for integral modes. */ 834 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); 835 scalar_int_mode imode; 836 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0)) 837 { 838 if (MEM_P (op0)) 839 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), 840 0, MEM_SIZE (op0)); 841 else if (!op0_mode.exists ()) 842 { 843 if (ibitnum == 0 844 && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0))) 845 && MEM_P (value) 846 && !reverse) 847 { 848 value = adjust_address (value, GET_MODE (op0), 0); 849 emit_move_insn (op0, value); 850 return true; 851 } 852 if (!fallback_p) 853 return false; 854 rtx temp = assign_stack_temp (GET_MODE (op0), 855 GET_MODE_SIZE (GET_MODE (op0))); 856 emit_move_insn (temp, op0); 857 store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value, 858 reverse, fallback_p); 859 emit_move_insn (op0, temp); 860 return true; 861 } 862 else 863 op0 = gen_lowpart (op0_mode.require (), op0); 864 } 865 866 return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum, 867 bitregion_start, bitregion_end, 868 fieldmode, value, reverse, fallback_p); 869} 870 871/* Subroutine of store_bit_field_1, with the same arguments, except 872 that BITSIZE and BITNUM are constant. Handle cases specific to 873 integral modes. If OP0_MODE is defined, it is the mode of OP0, 874 otherwise OP0 is a BLKmode MEM. */ 875 876static bool 877store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode, 878 unsigned HOST_WIDE_INT bitsize, 879 unsigned HOST_WIDE_INT bitnum, 880 poly_uint64 bitregion_start, 881 poly_uint64 bitregion_end, 882 machine_mode fieldmode, 883 rtx value, bool reverse, bool fallback_p) 884{ 885 /* Storing an lsb-aligned field in a register 886 can be done with a movstrict instruction. */ 887 888 if (!MEM_P (op0) 889 && !reverse 890 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ()) 891 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)) 892 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) 893 { 894 struct expand_operand ops[2]; 895 enum insn_code icode = optab_handler (movstrict_optab, fieldmode); 896 rtx arg0 = op0; 897 unsigned HOST_WIDE_INT subreg_off; 898 899 if (GET_CODE (arg0) == SUBREG) 900 { 901 /* Else we've got some float mode source being extracted into 902 a different float mode destination -- this combination of 903 subregs results in Severe Tire Damage. */ 904 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode 905 || GET_MODE_CLASS (fieldmode) == MODE_INT 906 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); 907 arg0 = SUBREG_REG (arg0); 908 } 909 910 subreg_off = bitnum / BITS_PER_UNIT; 911 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) 912 { 913 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); 914 915 create_fixed_operand (&ops[0], arg0); 916 /* Shrink the source operand to FIELDMODE. */ 917 create_convert_operand_to (&ops[1], value, fieldmode, false); 918 if (maybe_expand_insn (icode, 2, ops)) 919 return true; 920 } 921 } 922 923 /* Handle fields bigger than a word. */ 924 925 if (bitsize > BITS_PER_WORD) 926 { 927 /* Here we transfer the words of the field 928 in the order least significant first. 929 This is because the most significant word is the one which may 930 be less than full. 931 However, only do that if the value is not BLKmode. */ 932 933 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; 934 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 935 unsigned int i; 936 rtx_insn *last; 937 938 /* This is the mode we must force value to, so that there will be enough 939 subwords to extract. Note that fieldmode will often (always?) be 940 VOIDmode, because that is what store_field uses to indicate that this 941 is a bit field, but passing VOIDmode to operand_subword_force 942 is not allowed. 943 944 The mode must be fixed-size, since insertions into variable-sized 945 objects are meant to be handled before calling this function. */ 946 fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value)); 947 if (value_mode == VOIDmode) 948 value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD); 949 950 last = get_last_insn (); 951 for (i = 0; i < nwords; i++) 952 { 953 /* If I is 0, use the low-order word in both field and target; 954 if I is 1, use the next to lowest word; and so on. */ 955 unsigned int wordnum = (backwards 956 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD 957 - i - 1 958 : i); 959 unsigned int bit_offset = (backwards ^ reverse 960 ? MAX ((int) bitsize - ((int) i + 1) 961 * BITS_PER_WORD, 962 0) 963 : (int) i * BITS_PER_WORD); 964 rtx value_word = operand_subword_force (value, wordnum, value_mode); 965 unsigned HOST_WIDE_INT new_bitsize = 966 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); 967 968 /* If the remaining chunk doesn't have full wordsize we have 969 to make sure that for big-endian machines the higher order 970 bits are used. */ 971 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) 972 { 973 int shift = BITS_PER_WORD - new_bitsize; 974 rtx shift_rtx = gen_int_shift_amount (word_mode, shift); 975 value_word = simplify_expand_binop (word_mode, lshr_optab, 976 value_word, shift_rtx, 977 NULL_RTX, true, 978 OPTAB_LIB_WIDEN); 979 } 980 981 if (!store_bit_field_1 (op0, new_bitsize, 982 bitnum + bit_offset, 983 bitregion_start, bitregion_end, 984 word_mode, 985 value_word, reverse, fallback_p)) 986 { 987 delete_insns_since (last); 988 return false; 989 } 990 } 991 return true; 992 } 993 994 /* If VALUE has a floating-point or complex mode, access it as an 995 integer of the corresponding size. This can occur on a machine 996 with 64 bit registers that uses SFmode for float. It can also 997 occur for unaligned float or complex fields. */ 998 rtx orig_value = value; 999 scalar_int_mode value_mode; 1000 if (GET_MODE (value) == VOIDmode) 1001 /* By this point we've dealt with values that are bigger than a word, 1002 so word_mode is a conservatively correct choice. */ 1003 value_mode = word_mode; 1004 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode)) 1005 { 1006 value_mode = int_mode_for_mode (GET_MODE (value)).require (); 1007 value = gen_reg_rtx (value_mode); 1008 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); 1009 } 1010 1011 /* If OP0 is a multi-word register, narrow it to the affected word. 1012 If the region spans two words, defer to store_split_bit_field. 1013 Don't do this if op0 is a single hard register wider than word 1014 such as a float or vector register. */ 1015 if (!MEM_P (op0) 1016 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD 1017 && (!REG_P (op0) 1018 || !HARD_REGISTER_P (op0) 1019 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1)) 1020 { 1021 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) 1022 { 1023 if (!fallback_p) 1024 return false; 1025 1026 store_split_bit_field (op0, op0_mode, bitsize, bitnum, 1027 bitregion_start, bitregion_end, 1028 value, value_mode, reverse); 1029 return true; 1030 } 1031 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (), 1032 bitnum / BITS_PER_WORD * UNITS_PER_WORD); 1033 gcc_assert (op0); 1034 op0_mode = word_mode; 1035 bitnum %= BITS_PER_WORD; 1036 } 1037 1038 /* From here on we can assume that the field to be stored in fits 1039 within a word. If the destination is a register, it too fits 1040 in a word. */ 1041 1042 extraction_insn insv; 1043 if (!MEM_P (op0) 1044 && !reverse 1045 && get_best_reg_extraction_insn (&insv, EP_insv, 1046 GET_MODE_BITSIZE (op0_mode.require ()), 1047 fieldmode) 1048 && store_bit_field_using_insv (&insv, op0, op0_mode, 1049 bitsize, bitnum, value, value_mode)) 1050 return true; 1051 1052 /* If OP0 is a memory, try copying it to a register and seeing if a 1053 cheap register alternative is available. */ 1054 if (MEM_P (op0) && !reverse) 1055 { 1056 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, 1057 fieldmode) 1058 && store_bit_field_using_insv (&insv, op0, op0_mode, 1059 bitsize, bitnum, value, value_mode)) 1060 return true; 1061 1062 rtx_insn *last = get_last_insn (); 1063 1064 /* Try loading part of OP0 into a register, inserting the bitfield 1065 into that, and then copying the result back to OP0. */ 1066 unsigned HOST_WIDE_INT bitpos; 1067 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, 1068 bitregion_start, bitregion_end, 1069 fieldmode, &bitpos); 1070 if (xop0) 1071 { 1072 rtx tempreg = copy_to_reg (xop0); 1073 if (store_bit_field_1 (tempreg, bitsize, bitpos, 1074 bitregion_start, bitregion_end, 1075 fieldmode, orig_value, reverse, false)) 1076 { 1077 emit_move_insn (xop0, tempreg); 1078 return true; 1079 } 1080 delete_insns_since (last); 1081 } 1082 } 1083 1084 if (!fallback_p) 1085 return false; 1086 1087 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start, 1088 bitregion_end, value, value_mode, reverse); 1089 return true; 1090} 1091 1092/* Generate code to store value from rtx VALUE 1093 into a bit-field within structure STR_RTX 1094 containing BITSIZE bits starting at bit BITNUM. 1095 1096 BITREGION_START is bitpos of the first bitfield in this region. 1097 BITREGION_END is the bitpos of the ending bitfield in this region. 1098 These two fields are 0, if the C++ memory model does not apply, 1099 or we are not interested in keeping track of bitfield regions. 1100 1101 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. 1102 1103 If REVERSE is true, the store is to be done in reverse order. */ 1104 1105void 1106store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, 1107 poly_uint64 bitregion_start, poly_uint64 bitregion_end, 1108 machine_mode fieldmode, 1109 rtx value, bool reverse) 1110{ 1111 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ 1112 unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0; 1113 scalar_int_mode int_mode; 1114 if (bitsize.is_constant (&ibitsize) 1115 && bitnum.is_constant (&ibitnum) 1116 && is_a <scalar_int_mode> (fieldmode, &int_mode) 1117 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode, 1118 bitregion_start, bitregion_end)) 1119 { 1120 /* Storing of a full word can be done with a simple store. 1121 We know here that the field can be accessed with one single 1122 instruction. For targets that support unaligned memory, 1123 an unaligned access may be necessary. */ 1124 if (ibitsize == GET_MODE_BITSIZE (int_mode)) 1125 { 1126 str_rtx = adjust_bitfield_address (str_rtx, int_mode, 1127 ibitnum / BITS_PER_UNIT); 1128 if (reverse) 1129 value = flip_storage_order (int_mode, value); 1130 gcc_assert (ibitnum % BITS_PER_UNIT == 0); 1131 emit_move_insn (str_rtx, value); 1132 } 1133 else 1134 { 1135 rtx temp; 1136 1137 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, 1138 ibitnum, &ibitnum); 1139 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode)); 1140 temp = copy_to_reg (str_rtx); 1141 if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0, 1142 int_mode, value, reverse, true)) 1143 gcc_unreachable (); 1144 1145 emit_move_insn (str_rtx, temp); 1146 } 1147 1148 return; 1149 } 1150 1151 /* Under the C++0x memory model, we must not touch bits outside the 1152 bit region. Adjust the address to start at the beginning of the 1153 bit region. */ 1154 if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U)) 1155 { 1156 scalar_int_mode best_mode; 1157 machine_mode addr_mode = VOIDmode; 1158 1159 poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT); 1160 bitnum -= bitregion_start; 1161 poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize); 1162 bitregion_end -= bitregion_start; 1163 bitregion_start = 0; 1164 if (bitsize.is_constant (&ibitsize) 1165 && bitnum.is_constant (&ibitnum) 1166 && get_best_mode (ibitsize, ibitnum, 1167 bitregion_start, bitregion_end, 1168 MEM_ALIGN (str_rtx), INT_MAX, 1169 MEM_VOLATILE_P (str_rtx), &best_mode)) 1170 addr_mode = best_mode; 1171 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode, 1172 offset, size); 1173 } 1174 1175 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, 1176 bitregion_start, bitregion_end, 1177 fieldmode, value, reverse, true)) 1178 gcc_unreachable (); 1179} 1180 1181/* Use shifts and boolean operations to store VALUE into a bit field of 1182 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined, 1183 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is 1184 the mode of VALUE. 1185 1186 If REVERSE is true, the store is to be done in reverse order. */ 1187 1188static void 1189store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode, 1190 unsigned HOST_WIDE_INT bitsize, 1191 unsigned HOST_WIDE_INT bitnum, 1192 poly_uint64 bitregion_start, poly_uint64 bitregion_end, 1193 rtx value, scalar_int_mode value_mode, bool reverse) 1194{ 1195 /* There is a case not handled here: 1196 a structure with a known alignment of just a halfword 1197 and a field split across two aligned halfwords within the structure. 1198 Or likewise a structure with a known alignment of just a byte 1199 and a field split across two bytes. 1200 Such cases are not supposed to be able to occur. */ 1201 1202 scalar_int_mode best_mode; 1203 if (MEM_P (op0)) 1204 { 1205 unsigned int max_bitsize = BITS_PER_WORD; 1206 scalar_int_mode imode; 1207 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize) 1208 max_bitsize = GET_MODE_BITSIZE (imode); 1209 1210 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, 1211 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0), 1212 &best_mode)) 1213 { 1214 /* The only way this should occur is if the field spans word 1215 boundaries. */ 1216 store_split_bit_field (op0, op0_mode, bitsize, bitnum, 1217 bitregion_start, bitregion_end, 1218 value, value_mode, reverse); 1219 return; 1220 } 1221 1222 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum); 1223 } 1224 else 1225 best_mode = op0_mode.require (); 1226 1227 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum, 1228 value, value_mode, reverse); 1229} 1230 1231/* Helper function for store_fixed_bit_field, stores 1232 the bit field always using MODE, which is the mode of OP0. The other 1233 arguments are as for store_fixed_bit_field. */ 1234 1235static void 1236store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode, 1237 unsigned HOST_WIDE_INT bitsize, 1238 unsigned HOST_WIDE_INT bitnum, 1239 rtx value, scalar_int_mode value_mode, bool reverse) 1240{ 1241 rtx temp; 1242 int all_zero = 0; 1243 int all_one = 0; 1244 1245 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) 1246 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ 1247 1248 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 1249 /* BITNUM is the distance between our msb 1250 and that of the containing datum. 1251 Convert it to the distance from the lsb. */ 1252 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; 1253 1254 /* Now BITNUM is always the distance between our lsb 1255 and that of OP0. */ 1256 1257 /* Shift VALUE left by BITNUM bits. If VALUE is not constant, 1258 we must first convert its mode to MODE. */ 1259 1260 if (CONST_INT_P (value)) 1261 { 1262 unsigned HOST_WIDE_INT v = UINTVAL (value); 1263 1264 if (bitsize < HOST_BITS_PER_WIDE_INT) 1265 v &= (HOST_WIDE_INT_1U << bitsize) - 1; 1266 1267 if (v == 0) 1268 all_zero = 1; 1269 else if ((bitsize < HOST_BITS_PER_WIDE_INT 1270 && v == (HOST_WIDE_INT_1U << bitsize) - 1) 1271 || (bitsize == HOST_BITS_PER_WIDE_INT 1272 && v == HOST_WIDE_INT_M1U)) 1273 all_one = 1; 1274 1275 value = lshift_value (mode, v, bitnum); 1276 } 1277 else 1278 { 1279 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize 1280 && bitnum + bitsize != GET_MODE_BITSIZE (mode)); 1281 1282 if (value_mode != mode) 1283 value = convert_to_mode (mode, value, 1); 1284 1285 if (must_and) 1286 value = expand_binop (mode, and_optab, value, 1287 mask_rtx (mode, 0, bitsize, 0), 1288 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1289 if (bitnum > 0) 1290 value = expand_shift (LSHIFT_EXPR, mode, value, 1291 bitnum, NULL_RTX, 1); 1292 } 1293 1294 if (reverse) 1295 value = flip_storage_order (mode, value); 1296 1297 /* Now clear the chosen bits in OP0, 1298 except that if VALUE is -1 we need not bother. */ 1299 /* We keep the intermediates in registers to allow CSE to combine 1300 consecutive bitfield assignments. */ 1301 1302 temp = force_reg (mode, op0); 1303 1304 if (! all_one) 1305 { 1306 rtx mask = mask_rtx (mode, bitnum, bitsize, 1); 1307 if (reverse) 1308 mask = flip_storage_order (mode, mask); 1309 temp = expand_binop (mode, and_optab, temp, mask, 1310 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1311 temp = force_reg (mode, temp); 1312 } 1313 1314 /* Now logical-or VALUE into OP0, unless it is zero. */ 1315 1316 if (! all_zero) 1317 { 1318 temp = expand_binop (mode, ior_optab, temp, value, 1319 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1320 temp = force_reg (mode, temp); 1321 } 1322 1323 if (op0 != temp) 1324 { 1325 op0 = copy_rtx (op0); 1326 emit_move_insn (op0, temp); 1327 } 1328} 1329 1330/* Store a bit field that is split across multiple accessible memory objects. 1331 1332 OP0 is the REG, SUBREG or MEM rtx for the first of the objects. 1333 BITSIZE is the field width; BITPOS the position of its first bit 1334 (within the word). 1335 VALUE is the value to store, which has mode VALUE_MODE. 1336 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is 1337 a BLKmode MEM. 1338 1339 If REVERSE is true, the store is to be done in reverse order. 1340 1341 This does not yet handle fields wider than BITS_PER_WORD. */ 1342 1343static void 1344store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, 1345 unsigned HOST_WIDE_INT bitsize, 1346 unsigned HOST_WIDE_INT bitpos, 1347 poly_uint64 bitregion_start, poly_uint64 bitregion_end, 1348 rtx value, scalar_int_mode value_mode, bool reverse) 1349{ 1350 unsigned int unit, total_bits, bitsdone = 0; 1351 1352 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 1353 much at a time. */ 1354 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 1355 unit = BITS_PER_WORD; 1356 else 1357 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 1358 1359 /* If OP0 is a memory with a mode, then UNIT must not be larger than 1360 OP0's mode as well. Otherwise, store_fixed_bit_field will call us 1361 again, and we will mutually recurse forever. */ 1362 if (MEM_P (op0) && op0_mode.exists ()) 1363 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ())); 1364 1365 /* If VALUE is a constant other than a CONST_INT, get it into a register in 1366 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note 1367 that VALUE might be a floating-point constant. */ 1368 if (CONSTANT_P (value) && !CONST_INT_P (value)) 1369 { 1370 rtx word = gen_lowpart_common (word_mode, value); 1371 1372 if (word && (value != word)) 1373 value = word; 1374 else 1375 value = gen_lowpart_common (word_mode, force_reg (value_mode, value)); 1376 value_mode = word_mode; 1377 } 1378 1379 total_bits = GET_MODE_BITSIZE (value_mode); 1380 1381 while (bitsdone < bitsize) 1382 { 1383 unsigned HOST_WIDE_INT thissize; 1384 unsigned HOST_WIDE_INT thispos; 1385 unsigned HOST_WIDE_INT offset; 1386 rtx part; 1387 1388 offset = (bitpos + bitsdone) / unit; 1389 thispos = (bitpos + bitsdone) % unit; 1390 1391 /* When region of bytes we can touch is restricted, decrease 1392 UNIT close to the end of the region as needed. If op0 is a REG 1393 or SUBREG of REG, don't do this, as there can't be data races 1394 on a register and we can expand shorter code in some cases. */ 1395 if (maybe_ne (bitregion_end, 0U) 1396 && unit > BITS_PER_UNIT 1397 && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1) 1398 && !REG_P (op0) 1399 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) 1400 { 1401 unit = unit / 2; 1402 continue; 1403 } 1404 1405 /* THISSIZE must not overrun a word boundary. Otherwise, 1406 store_fixed_bit_field will call us again, and we will mutually 1407 recurse forever. */ 1408 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 1409 thissize = MIN (thissize, unit - thispos); 1410 1411 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 1412 { 1413 /* Fetch successively less significant portions. */ 1414 if (CONST_INT_P (value)) 1415 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1416 >> (bitsize - bitsdone - thissize)) 1417 & ((HOST_WIDE_INT_1 << thissize) - 1)); 1418 /* Likewise, but the source is little-endian. */ 1419 else if (reverse) 1420 part = extract_fixed_bit_field (word_mode, value, value_mode, 1421 thissize, 1422 bitsize - bitsdone - thissize, 1423 NULL_RTX, 1, false); 1424 else 1425 /* The args are chosen so that the last part includes the 1426 lsb. Give extract_bit_field the value it needs (with 1427 endianness compensation) to fetch the piece we want. */ 1428 part = extract_fixed_bit_field (word_mode, value, value_mode, 1429 thissize, 1430 total_bits - bitsize + bitsdone, 1431 NULL_RTX, 1, false); 1432 } 1433 else 1434 { 1435 /* Fetch successively more significant portions. */ 1436 if (CONST_INT_P (value)) 1437 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1438 >> bitsdone) 1439 & ((HOST_WIDE_INT_1 << thissize) - 1)); 1440 /* Likewise, but the source is big-endian. */ 1441 else if (reverse) 1442 part = extract_fixed_bit_field (word_mode, value, value_mode, 1443 thissize, 1444 total_bits - bitsdone - thissize, 1445 NULL_RTX, 1, false); 1446 else 1447 part = extract_fixed_bit_field (word_mode, value, value_mode, 1448 thissize, bitsdone, NULL_RTX, 1449 1, false); 1450 } 1451 1452 /* If OP0 is a register, then handle OFFSET here. */ 1453 rtx op0_piece = op0; 1454 opt_scalar_int_mode op0_piece_mode = op0_mode; 1455 if (SUBREG_P (op0) || REG_P (op0)) 1456 { 1457 scalar_int_mode imode; 1458 if (op0_mode.exists (&imode) 1459 && GET_MODE_SIZE (imode) < UNITS_PER_WORD) 1460 { 1461 if (offset) 1462 op0_piece = const0_rtx; 1463 } 1464 else 1465 { 1466 op0_piece = operand_subword_force (op0, 1467 offset * unit / BITS_PER_WORD, 1468 GET_MODE (op0)); 1469 op0_piece_mode = word_mode; 1470 } 1471 offset &= BITS_PER_WORD / unit - 1; 1472 } 1473 1474 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, 1475 it is just an out-of-bounds access. Ignore it. */ 1476 if (op0_piece != const0_rtx) 1477 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize, 1478 offset * unit + thispos, bitregion_start, 1479 bitregion_end, part, word_mode, reverse); 1480 bitsdone += thissize; 1481 } 1482} 1483 1484/* A subroutine of extract_bit_field_1 that converts return value X 1485 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments 1486 to extract_bit_field. */ 1487 1488static rtx 1489convert_extracted_bit_field (rtx x, machine_mode mode, 1490 machine_mode tmode, bool unsignedp) 1491{ 1492 if (GET_MODE (x) == tmode || GET_MODE (x) == mode) 1493 return x; 1494 1495 /* If the x mode is not a scalar integral, first convert to the 1496 integer mode of that size and then access it as a floating-point 1497 value via a SUBREG. */ 1498 if (!SCALAR_INT_MODE_P (tmode)) 1499 { 1500 scalar_int_mode int_mode = int_mode_for_mode (tmode).require (); 1501 x = convert_to_mode (int_mode, x, unsignedp); 1502 x = force_reg (int_mode, x); 1503 return gen_lowpart (tmode, x); 1504 } 1505 1506 return convert_to_mode (tmode, x, unsignedp); 1507} 1508 1509/* Try to use an ext(z)v pattern to extract a field from OP0. 1510 Return the extracted value on success, otherwise return null. 1511 EXTV describes the extraction instruction to use. If OP0_MODE 1512 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM. 1513 The other arguments are as for extract_bit_field. */ 1514 1515static rtx 1516extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, 1517 opt_scalar_int_mode op0_mode, 1518 unsigned HOST_WIDE_INT bitsize, 1519 unsigned HOST_WIDE_INT bitnum, 1520 int unsignedp, rtx target, 1521 machine_mode mode, machine_mode tmode) 1522{ 1523 struct expand_operand ops[4]; 1524 rtx spec_target = target; 1525 rtx spec_target_subreg = 0; 1526 scalar_int_mode ext_mode = extv->field_mode; 1527 unsigned unit = GET_MODE_BITSIZE (ext_mode); 1528 1529 if (bitsize == 0 || unit < bitsize) 1530 return NULL_RTX; 1531 1532 if (MEM_P (op0)) 1533 /* Get a reference to the first byte of the field. */ 1534 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, 1535 &bitnum); 1536 else 1537 { 1538 /* Convert from counting within OP0 to counting in EXT_MODE. */ 1539 if (BYTES_BIG_ENDIAN) 1540 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ()); 1541 1542 /* If op0 is a register, we need it in EXT_MODE to make it 1543 acceptable to the format of ext(z)v. */ 1544 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode) 1545 return NULL_RTX; 1546 if (REG_P (op0) && op0_mode.require () != ext_mode) 1547 op0 = gen_lowpart_SUBREG (ext_mode, op0); 1548 } 1549 1550 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 1551 "backwards" from the size of the unit we are extracting from. 1552 Otherwise, we count bits from the most significant on a 1553 BYTES/BITS_BIG_ENDIAN machine. */ 1554 1555 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 1556 bitnum = unit - bitsize - bitnum; 1557 1558 if (target == 0) 1559 target = spec_target = gen_reg_rtx (tmode); 1560 1561 if (GET_MODE (target) != ext_mode) 1562 { 1563 /* Don't use LHS paradoxical subreg if explicit truncation is needed 1564 between the mode of the extraction (word_mode) and the target 1565 mode. Instead, create a temporary and use convert_move to set 1566 the target. */ 1567 if (REG_P (target) 1568 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) 1569 { 1570 target = gen_lowpart (ext_mode, target); 1571 if (partial_subreg_p (GET_MODE (spec_target), ext_mode)) 1572 spec_target_subreg = target; 1573 } 1574 else 1575 target = gen_reg_rtx (ext_mode); 1576 } 1577 1578 create_output_operand (&ops[0], target, ext_mode); 1579 create_fixed_operand (&ops[1], op0); 1580 create_integer_operand (&ops[2], bitsize); 1581 create_integer_operand (&ops[3], bitnum); 1582 if (maybe_expand_insn (extv->icode, 4, ops)) 1583 { 1584 target = ops[0].value; 1585 if (target == spec_target) 1586 return target; 1587 if (target == spec_target_subreg) 1588 return spec_target; 1589 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1590 } 1591 return NULL_RTX; 1592} 1593 1594/* See whether it would be valid to extract the part of OP0 described 1595 by BITNUM and BITSIZE into a value of mode MODE using a subreg 1596 operation. Return the subreg if so, otherwise return null. */ 1597 1598static rtx 1599extract_bit_field_as_subreg (machine_mode mode, rtx op0, 1600 poly_uint64 bitsize, poly_uint64 bitnum) 1601{ 1602 poly_uint64 bytenum; 1603 if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum) 1604 && known_eq (bitsize, GET_MODE_BITSIZE (mode)) 1605 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) 1606 && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0))) 1607 return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum); 1608 return NULL_RTX; 1609} 1610 1611/* A subroutine of extract_bit_field, with the same arguments. 1612 If FALLBACK_P is true, fall back to extract_fixed_bit_field 1613 if we can find no other means of implementing the operation. 1614 if FALLBACK_P is false, return NULL instead. */ 1615 1616static rtx 1617extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, 1618 int unsignedp, rtx target, machine_mode mode, 1619 machine_mode tmode, bool reverse, bool fallback_p, 1620 rtx *alt_rtl) 1621{ 1622 rtx op0 = str_rtx; 1623 machine_mode mode1; 1624 1625 if (tmode == VOIDmode) 1626 tmode = mode; 1627 1628 while (GET_CODE (op0) == SUBREG) 1629 { 1630 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; 1631 op0 = SUBREG_REG (op0); 1632 } 1633 1634 /* If we have an out-of-bounds access to a register, just return an 1635 uninitialized register of the required mode. This can occur if the 1636 source code contains an out-of-bounds access to a small array. */ 1637 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0)))) 1638 return gen_reg_rtx (tmode); 1639 1640 if (REG_P (op0) 1641 && mode == GET_MODE (op0) 1642 && known_eq (bitnum, 0U) 1643 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0)))) 1644 { 1645 if (reverse) 1646 op0 = flip_storage_order (mode, op0); 1647 /* We're trying to extract a full register from itself. */ 1648 return op0; 1649 } 1650 1651 /* First try to check for vector from vector extractions. */ 1652 if (VECTOR_MODE_P (GET_MODE (op0)) 1653 && !MEM_P (op0) 1654 && VECTOR_MODE_P (tmode) 1655 && known_eq (bitsize, GET_MODE_BITSIZE (tmode)) 1656 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode))) 1657 { 1658 machine_mode new_mode = GET_MODE (op0); 1659 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)) 1660 { 1661 scalar_mode inner_mode = GET_MODE_INNER (tmode); 1662 poly_uint64 nunits; 1663 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)), 1664 GET_MODE_UNIT_BITSIZE (tmode), &nunits) 1665 || !mode_for_vector (inner_mode, nunits).exists (&new_mode) 1666 || !VECTOR_MODE_P (new_mode) 1667 || maybe_ne (GET_MODE_SIZE (new_mode), 1668 GET_MODE_SIZE (GET_MODE (op0))) 1669 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode) 1670 || !targetm.vector_mode_supported_p (new_mode)) 1671 new_mode = VOIDmode; 1672 } 1673 poly_uint64 pos; 1674 if (new_mode != VOIDmode 1675 && (convert_optab_handler (vec_extract_optab, new_mode, tmode) 1676 != CODE_FOR_nothing) 1677 && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos)) 1678 { 1679 struct expand_operand ops[3]; 1680 machine_mode outermode = new_mode; 1681 machine_mode innermode = tmode; 1682 enum insn_code icode 1683 = convert_optab_handler (vec_extract_optab, outermode, innermode); 1684 1685 if (new_mode != GET_MODE (op0)) 1686 op0 = gen_lowpart (new_mode, op0); 1687 create_output_operand (&ops[0], target, innermode); 1688 ops[0].target = 1; 1689 create_input_operand (&ops[1], op0, outermode); 1690 create_integer_operand (&ops[2], pos); 1691 if (maybe_expand_insn (icode, 3, ops)) 1692 { 1693 if (alt_rtl && ops[0].target) 1694 *alt_rtl = target; 1695 target = ops[0].value; 1696 if (GET_MODE (target) != mode) 1697 return gen_lowpart (tmode, target); 1698 return target; 1699 } 1700 } 1701 } 1702 1703 /* See if we can get a better vector mode before extracting. */ 1704 if (VECTOR_MODE_P (GET_MODE (op0)) 1705 && !MEM_P (op0) 1706 && GET_MODE_INNER (GET_MODE (op0)) != tmode) 1707 { 1708 machine_mode new_mode; 1709 1710 if (GET_MODE_CLASS (tmode) == MODE_FLOAT) 1711 new_mode = MIN_MODE_VECTOR_FLOAT; 1712 else if (GET_MODE_CLASS (tmode) == MODE_FRACT) 1713 new_mode = MIN_MODE_VECTOR_FRACT; 1714 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) 1715 new_mode = MIN_MODE_VECTOR_UFRACT; 1716 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) 1717 new_mode = MIN_MODE_VECTOR_ACCUM; 1718 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) 1719 new_mode = MIN_MODE_VECTOR_UACCUM; 1720 else 1721 new_mode = MIN_MODE_VECTOR_INT; 1722 1723 FOR_EACH_MODE_FROM (new_mode, new_mode) 1724 if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0))) 1725 && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode)) 1726 && targetm.vector_mode_supported_p (new_mode)) 1727 break; 1728 if (new_mode != VOIDmode) 1729 op0 = gen_lowpart (new_mode, op0); 1730 } 1731 1732 /* Use vec_extract patterns for extracting parts of vectors whenever 1733 available. If that fails, see whether the current modes and bitregion 1734 give a natural subreg. */ 1735 machine_mode outermode = GET_MODE (op0); 1736 if (VECTOR_MODE_P (outermode) && !MEM_P (op0)) 1737 { 1738 scalar_mode innermode = GET_MODE_INNER (outermode); 1739 enum insn_code icode 1740 = convert_optab_handler (vec_extract_optab, outermode, innermode); 1741 poly_uint64 pos; 1742 if (icode != CODE_FOR_nothing 1743 && known_eq (bitsize, GET_MODE_BITSIZE (innermode)) 1744 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos)) 1745 { 1746 struct expand_operand ops[3]; 1747 1748 create_output_operand (&ops[0], target, innermode); 1749 ops[0].target = 1; 1750 create_input_operand (&ops[1], op0, outermode); 1751 create_integer_operand (&ops[2], pos); 1752 if (maybe_expand_insn (icode, 3, ops)) 1753 { 1754 if (alt_rtl && ops[0].target) 1755 *alt_rtl = target; 1756 target = ops[0].value; 1757 if (GET_MODE (target) != mode) 1758 return gen_lowpart (tmode, target); 1759 return target; 1760 } 1761 } 1762 /* Using subregs is useful if we're extracting one register vector 1763 from a multi-register vector. extract_bit_field_as_subreg checks 1764 for valid bitsize and bitnum, so we don't need to do that here. */ 1765 if (VECTOR_MODE_P (mode)) 1766 { 1767 rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum); 1768 if (sub) 1769 return sub; 1770 } 1771 } 1772 1773 /* Make sure we are playing with integral modes. Pun with subregs 1774 if we aren't. */ 1775 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); 1776 scalar_int_mode imode; 1777 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0)) 1778 { 1779 if (MEM_P (op0)) 1780 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), 1781 0, MEM_SIZE (op0)); 1782 else if (op0_mode.exists (&imode)) 1783 { 1784 op0 = gen_lowpart (imode, op0); 1785 1786 /* If we got a SUBREG, force it into a register since we 1787 aren't going to be able to do another SUBREG on it. */ 1788 if (GET_CODE (op0) == SUBREG) 1789 op0 = force_reg (imode, op0); 1790 } 1791 else 1792 { 1793 poly_int64 size = GET_MODE_SIZE (GET_MODE (op0)); 1794 rtx mem = assign_stack_temp (GET_MODE (op0), size); 1795 emit_move_insn (mem, op0); 1796 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); 1797 } 1798 } 1799 1800 /* ??? We currently assume TARGET is at least as big as BITSIZE. 1801 If that's wrong, the solution is to test for it and set TARGET to 0 1802 if needed. */ 1803 1804 /* Get the mode of the field to use for atomic access or subreg 1805 conversion. */ 1806 if (!SCALAR_INT_MODE_P (tmode) 1807 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1)) 1808 mode1 = mode; 1809 gcc_assert (mode1 != BLKmode); 1810 1811 /* Extraction of a full MODE1 value can be done with a subreg as long 1812 as the least significant bit of the value is the least significant 1813 bit of either OP0 or a word of OP0. */ 1814 if (!MEM_P (op0) && !reverse) 1815 { 1816 rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum); 1817 if (sub) 1818 return convert_extracted_bit_field (sub, mode, tmode, unsignedp); 1819 } 1820 1821 /* Extraction of a full MODE1 value can be done with a load as long as 1822 the field is on a byte boundary and is sufficiently aligned. */ 1823 poly_uint64 bytenum; 1824 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum)) 1825 { 1826 op0 = adjust_bitfield_address (op0, mode1, bytenum); 1827 if (reverse) 1828 op0 = flip_storage_order (mode1, op0); 1829 return convert_extracted_bit_field (op0, mode, tmode, unsignedp); 1830 } 1831 1832 /* If we have a memory source and a non-constant bit offset, restrict 1833 the memory to the referenced bytes. This is a worst-case fallback 1834 but is useful for things like vector booleans. */ 1835 if (MEM_P (op0) && !bitnum.is_constant ()) 1836 { 1837 bytenum = bits_to_bytes_round_down (bitnum); 1838 bitnum = num_trailing_bits (bitnum); 1839 poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize); 1840 op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize); 1841 op0_mode = opt_scalar_int_mode (); 1842 } 1843 1844 /* It's possible we'll need to handle other cases here for 1845 polynomial bitnum and bitsize. */ 1846 1847 /* From here on we need to be looking at a fixed-size insertion. */ 1848 return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (), 1849 bitnum.to_constant (), unsignedp, 1850 target, mode, tmode, reverse, fallback_p); 1851} 1852 1853/* Subroutine of extract_bit_field_1, with the same arguments, except 1854 that BITSIZE and BITNUM are constant. Handle cases specific to 1855 integral modes. If OP0_MODE is defined, it is the mode of OP0, 1856 otherwise OP0 is a BLKmode MEM. */ 1857 1858static rtx 1859extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode, 1860 unsigned HOST_WIDE_INT bitsize, 1861 unsigned HOST_WIDE_INT bitnum, int unsignedp, 1862 rtx target, machine_mode mode, machine_mode tmode, 1863 bool reverse, bool fallback_p) 1864{ 1865 /* Handle fields bigger than a word. */ 1866 1867 if (bitsize > BITS_PER_WORD) 1868 { 1869 /* Here we transfer the words of the field 1870 in the order least significant first. 1871 This is because the most significant word is the one which may 1872 be less than full. */ 1873 1874 const bool backwards = WORDS_BIG_ENDIAN; 1875 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 1876 unsigned int i; 1877 rtx_insn *last; 1878 1879 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) 1880 target = gen_reg_rtx (mode); 1881 1882 /* In case we're about to clobber a base register or something 1883 (see gcc.c-torture/execute/20040625-1.c). */ 1884 if (reg_mentioned_p (target, op0)) 1885 target = gen_reg_rtx (mode); 1886 1887 /* Indicate for flow that the entire target reg is being set. */ 1888 emit_clobber (target); 1889 1890 /* The mode must be fixed-size, since extract_bit_field_1 handles 1891 extractions from variable-sized objects before calling this 1892 function. */ 1893 unsigned int target_size 1894 = GET_MODE_SIZE (GET_MODE (target)).to_constant (); 1895 last = get_last_insn (); 1896 for (i = 0; i < nwords; i++) 1897 { 1898 /* If I is 0, use the low-order word in both field and target; 1899 if I is 1, use the next to lowest word; and so on. */ 1900 /* Word number in TARGET to use. */ 1901 unsigned int wordnum 1902 = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i); 1903 /* Offset from start of field in OP0. */ 1904 unsigned int bit_offset = (backwards ^ reverse 1905 ? MAX ((int) bitsize - ((int) i + 1) 1906 * BITS_PER_WORD, 1907 0) 1908 : (int) i * BITS_PER_WORD); 1909 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); 1910 rtx result_part 1911 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, 1912 bitsize - i * BITS_PER_WORD), 1913 bitnum + bit_offset, 1, target_part, 1914 mode, word_mode, reverse, fallback_p, NULL); 1915 1916 gcc_assert (target_part); 1917 if (!result_part) 1918 { 1919 delete_insns_since (last); 1920 return NULL; 1921 } 1922 1923 if (result_part != target_part) 1924 emit_move_insn (target_part, result_part); 1925 } 1926 1927 if (unsignedp) 1928 { 1929 /* Unless we've filled TARGET, the upper regs in a multi-reg value 1930 need to be zero'd out. */ 1931 if (target_size > nwords * UNITS_PER_WORD) 1932 { 1933 unsigned int i, total_words; 1934 1935 total_words = target_size / UNITS_PER_WORD; 1936 for (i = nwords; i < total_words; i++) 1937 emit_move_insn 1938 (operand_subword (target, 1939 backwards ? total_words - i - 1 : i, 1940 1, VOIDmode), 1941 const0_rtx); 1942 } 1943 return target; 1944 } 1945 1946 /* Signed bit field: sign-extend with two arithmetic shifts. */ 1947 target = expand_shift (LSHIFT_EXPR, mode, target, 1948 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1949 return expand_shift (RSHIFT_EXPR, mode, target, 1950 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1951 } 1952 1953 /* If OP0 is a multi-word register, narrow it to the affected word. 1954 If the region spans two words, defer to extract_split_bit_field. */ 1955 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD) 1956 { 1957 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) 1958 { 1959 if (!fallback_p) 1960 return NULL_RTX; 1961 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum, 1962 unsignedp, reverse); 1963 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1964 } 1965 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (), 1966 bitnum / BITS_PER_WORD * UNITS_PER_WORD); 1967 op0_mode = word_mode; 1968 bitnum %= BITS_PER_WORD; 1969 } 1970 1971 /* From here on we know the desired field is smaller than a word. 1972 If OP0 is a register, it too fits within a word. */ 1973 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; 1974 extraction_insn extv; 1975 if (!MEM_P (op0) 1976 && !reverse 1977 /* ??? We could limit the structure size to the part of OP0 that 1978 contains the field, with appropriate checks for endianness 1979 and TARGET_TRULY_NOOP_TRUNCATION. */ 1980 && get_best_reg_extraction_insn (&extv, pattern, 1981 GET_MODE_BITSIZE (op0_mode.require ()), 1982 tmode)) 1983 { 1984 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode, 1985 bitsize, bitnum, 1986 unsignedp, target, mode, 1987 tmode); 1988 if (result) 1989 return result; 1990 } 1991 1992 /* If OP0 is a memory, try copying it to a register and seeing if a 1993 cheap register alternative is available. */ 1994 if (MEM_P (op0) & !reverse) 1995 { 1996 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, 1997 tmode)) 1998 { 1999 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode, 2000 bitsize, bitnum, 2001 unsignedp, target, mode, 2002 tmode); 2003 if (result) 2004 return result; 2005 } 2006 2007 rtx_insn *last = get_last_insn (); 2008 2009 /* Try loading part of OP0 into a register and extracting the 2010 bitfield from that. */ 2011 unsigned HOST_WIDE_INT bitpos; 2012 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, 2013 0, 0, tmode, &bitpos); 2014 if (xop0) 2015 { 2016 xop0 = copy_to_reg (xop0); 2017 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, 2018 unsignedp, target, 2019 mode, tmode, reverse, false, NULL); 2020 if (result) 2021 return result; 2022 delete_insns_since (last); 2023 } 2024 } 2025 2026 if (!fallback_p) 2027 return NULL; 2028 2029 /* Find a correspondingly-sized integer field, so we can apply 2030 shifts and masks to it. */ 2031 scalar_int_mode int_mode; 2032 if (!int_mode_for_mode (tmode).exists (&int_mode)) 2033 /* If this fails, we should probably push op0 out to memory and then 2034 do a load. */ 2035 int_mode = int_mode_for_mode (mode).require (); 2036 2037 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize, 2038 bitnum, target, unsignedp, reverse); 2039 2040 /* Complex values must be reversed piecewise, so we need to undo the global 2041 reversal, convert to the complex mode and reverse again. */ 2042 if (reverse && COMPLEX_MODE_P (tmode)) 2043 { 2044 target = flip_storage_order (int_mode, target); 2045 target = convert_extracted_bit_field (target, mode, tmode, unsignedp); 2046 target = flip_storage_order (tmode, target); 2047 } 2048 else 2049 target = convert_extracted_bit_field (target, mode, tmode, unsignedp); 2050 2051 return target; 2052} 2053 2054/* Generate code to extract a byte-field from STR_RTX 2055 containing BITSIZE bits, starting at BITNUM, 2056 and put it in TARGET if possible (if TARGET is nonzero). 2057 Regardless of TARGET, we return the rtx for where the value is placed. 2058 2059 STR_RTX is the structure containing the byte (a REG or MEM). 2060 UNSIGNEDP is nonzero if this is an unsigned bit field. 2061 MODE is the natural mode of the field value once extracted. 2062 TMODE is the mode the caller would like the value to have; 2063 but the value may be returned with type MODE instead. 2064 2065 If REVERSE is true, the extraction is to be done in reverse order. 2066 2067 If a TARGET is specified and we can store in it at no extra cost, 2068 we do so, and return TARGET. 2069 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred 2070 if they are equally easy. */ 2071 2072rtx 2073extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, 2074 int unsignedp, rtx target, machine_mode mode, 2075 machine_mode tmode, bool reverse, rtx *alt_rtl) 2076{ 2077 machine_mode mode1; 2078 2079 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ 2080 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0)) 2081 mode1 = GET_MODE (str_rtx); 2082 else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0)) 2083 mode1 = GET_MODE (target); 2084 else 2085 mode1 = tmode; 2086 2087 unsigned HOST_WIDE_INT ibitsize, ibitnum; 2088 scalar_int_mode int_mode; 2089 if (bitsize.is_constant (&ibitsize) 2090 && bitnum.is_constant (&ibitnum) 2091 && is_a <scalar_int_mode> (mode1, &int_mode) 2092 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, 2093 int_mode, 0, 0)) 2094 { 2095 /* Extraction of a full INT_MODE value can be done with a simple load. 2096 We know here that the field can be accessed with one single 2097 instruction. For targets that support unaligned memory, 2098 an unaligned access may be necessary. */ 2099 if (ibitsize == GET_MODE_BITSIZE (int_mode)) 2100 { 2101 rtx result = adjust_bitfield_address (str_rtx, int_mode, 2102 ibitnum / BITS_PER_UNIT); 2103 if (reverse) 2104 result = flip_storage_order (int_mode, result); 2105 gcc_assert (ibitnum % BITS_PER_UNIT == 0); 2106 return convert_extracted_bit_field (result, mode, tmode, unsignedp); 2107 } 2108 2109 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum, 2110 &ibitnum); 2111 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode)); 2112 str_rtx = copy_to_reg (str_rtx); 2113 return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp, 2114 target, mode, tmode, reverse, true, alt_rtl); 2115 } 2116 2117 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, 2118 target, mode, tmode, reverse, true, alt_rtl); 2119} 2120 2121/* Use shifts and boolean operations to extract a field of BITSIZE bits 2122 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0, 2123 otherwise OP0 is a BLKmode MEM. 2124 2125 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). 2126 If REVERSE is true, the extraction is to be done in reverse order. 2127 2128 If TARGET is nonzero, attempts to store the value there 2129 and return TARGET, but this is not guaranteed. 2130 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ 2131 2132static rtx 2133extract_fixed_bit_field (machine_mode tmode, rtx op0, 2134 opt_scalar_int_mode op0_mode, 2135 unsigned HOST_WIDE_INT bitsize, 2136 unsigned HOST_WIDE_INT bitnum, rtx target, 2137 int unsignedp, bool reverse) 2138{ 2139 scalar_int_mode mode; 2140 if (MEM_P (op0)) 2141 { 2142 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), 2143 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode)) 2144 /* The only way this should occur is if the field spans word 2145 boundaries. */ 2146 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum, 2147 unsignedp, reverse); 2148 2149 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); 2150 } 2151 else 2152 mode = op0_mode.require (); 2153 2154 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum, 2155 target, unsignedp, reverse); 2156} 2157 2158/* Helper function for extract_fixed_bit_field, extracts 2159 the bit field always using MODE, which is the mode of OP0. 2160 The other arguments are as for extract_fixed_bit_field. */ 2161 2162static rtx 2163extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode, 2164 unsigned HOST_WIDE_INT bitsize, 2165 unsigned HOST_WIDE_INT bitnum, rtx target, 2166 int unsignedp, bool reverse) 2167{ 2168 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) 2169 for invalid input, such as extract equivalent of f5 from 2170 gcc.dg/pr48335-2.c. */ 2171 2172 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 2173 /* BITNUM is the distance between our msb and that of OP0. 2174 Convert it to the distance from the lsb. */ 2175 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; 2176 2177 /* Now BITNUM is always the distance between the field's lsb and that of OP0. 2178 We have reduced the big-endian case to the little-endian case. */ 2179 if (reverse) 2180 op0 = flip_storage_order (mode, op0); 2181 2182 if (unsignedp) 2183 { 2184 if (bitnum) 2185 { 2186 /* If the field does not already start at the lsb, 2187 shift it so it does. */ 2188 /* Maybe propagate the target for the shift. */ 2189 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 2190 if (tmode != mode) 2191 subtarget = 0; 2192 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); 2193 } 2194 /* Convert the value to the desired mode. TMODE must also be a 2195 scalar integer for this conversion to make sense, since we 2196 shouldn't reinterpret the bits. */ 2197 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode); 2198 if (mode != new_mode) 2199 op0 = convert_to_mode (new_mode, op0, 1); 2200 2201 /* Unless the msb of the field used to be the msb when we shifted, 2202 mask out the upper bits. */ 2203 2204 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) 2205 return expand_binop (new_mode, and_optab, op0, 2206 mask_rtx (new_mode, 0, bitsize, 0), 2207 target, 1, OPTAB_LIB_WIDEN); 2208 return op0; 2209 } 2210 2211 /* To extract a signed bit-field, first shift its msb to the msb of the word, 2212 then arithmetic-shift its lsb to the lsb of the word. */ 2213 op0 = force_reg (mode, op0); 2214 2215 /* Find the narrowest integer mode that contains the field. */ 2216 2217 opt_scalar_int_mode mode_iter; 2218 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) 2219 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum) 2220 break; 2221 2222 mode = mode_iter.require (); 2223 op0 = convert_to_mode (mode, op0, 0); 2224 2225 if (mode != tmode) 2226 target = 0; 2227 2228 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) 2229 { 2230 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); 2231 /* Maybe propagate the target for the shift. */ 2232 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 2233 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); 2234 } 2235 2236 return expand_shift (RSHIFT_EXPR, mode, op0, 2237 GET_MODE_BITSIZE (mode) - bitsize, target, 0); 2238} 2239 2240/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value 2241 VALUE << BITPOS. */ 2242 2243static rtx 2244lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, 2245 int bitpos) 2246{ 2247 return immed_wide_int_const (wi::lshift (value, bitpos), mode); 2248} 2249 2250/* Extract a bit field that is split across two words 2251 and return an RTX for the result. 2252 2253 OP0 is the REG, SUBREG or MEM rtx for the first of the two words. 2254 BITSIZE is the field width; BITPOS, position of its first bit, in the word. 2255 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. 2256 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is 2257 a BLKmode MEM. 2258 2259 If REVERSE is true, the extraction is to be done in reverse order. */ 2260 2261static rtx 2262extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, 2263 unsigned HOST_WIDE_INT bitsize, 2264 unsigned HOST_WIDE_INT bitpos, int unsignedp, 2265 bool reverse) 2266{ 2267 unsigned int unit; 2268 unsigned int bitsdone = 0; 2269 rtx result = NULL_RTX; 2270 int first = 1; 2271 2272 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 2273 much at a time. */ 2274 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 2275 unit = BITS_PER_WORD; 2276 else 2277 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 2278 2279 while (bitsdone < bitsize) 2280 { 2281 unsigned HOST_WIDE_INT thissize; 2282 rtx part; 2283 unsigned HOST_WIDE_INT thispos; 2284 unsigned HOST_WIDE_INT offset; 2285 2286 offset = (bitpos + bitsdone) / unit; 2287 thispos = (bitpos + bitsdone) % unit; 2288 2289 /* THISSIZE must not overrun a word boundary. Otherwise, 2290 extract_fixed_bit_field will call us again, and we will mutually 2291 recurse forever. */ 2292 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 2293 thissize = MIN (thissize, unit - thispos); 2294 2295 /* If OP0 is a register, then handle OFFSET here. */ 2296 rtx op0_piece = op0; 2297 opt_scalar_int_mode op0_piece_mode = op0_mode; 2298 if (SUBREG_P (op0) || REG_P (op0)) 2299 { 2300 op0_piece = operand_subword_force (op0, offset, op0_mode.require ()); 2301 op0_piece_mode = word_mode; 2302 offset = 0; 2303 } 2304 2305 /* Extract the parts in bit-counting order, 2306 whose meaning is determined by BYTES_PER_UNIT. 2307 OFFSET is in UNITs, and UNIT is in bits. */ 2308 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode, 2309 thissize, offset * unit + thispos, 2310 0, 1, reverse); 2311 bitsdone += thissize; 2312 2313 /* Shift this part into place for the result. */ 2314 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 2315 { 2316 if (bitsize != bitsdone) 2317 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2318 bitsize - bitsdone, 0, 1); 2319 } 2320 else 2321 { 2322 if (bitsdone != thissize) 2323 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2324 bitsdone - thissize, 0, 1); 2325 } 2326 2327 if (first) 2328 result = part; 2329 else 2330 /* Combine the parts with bitwise or. This works 2331 because we extracted each part as an unsigned bit field. */ 2332 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, 2333 OPTAB_LIB_WIDEN); 2334 2335 first = 0; 2336 } 2337 2338 /* Unsigned bit field: we are done. */ 2339 if (unsignedp) 2340 return result; 2341 /* Signed bit field: sign-extend with two arithmetic shifts. */ 2342 result = expand_shift (LSHIFT_EXPR, word_mode, result, 2343 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2344 return expand_shift (RSHIFT_EXPR, word_mode, result, 2345 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2346} 2347 2348/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving 2349 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than 2350 MODE, fill the upper bits with zeros. Fail if the layout of either 2351 mode is unknown (as for CC modes) or if the extraction would involve 2352 unprofitable mode punning. Return the value on success, otherwise 2353 return null. 2354 2355 This is different from gen_lowpart* in these respects: 2356 2357 - the returned value must always be considered an rvalue 2358 2359 - when MODE is wider than SRC_MODE, the extraction involves 2360 a zero extension 2361 2362 - when MODE is smaller than SRC_MODE, the extraction involves 2363 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION). 2364 2365 In other words, this routine performs a computation, whereas the 2366 gen_lowpart* routines are conceptually lvalue or rvalue subreg 2367 operations. */ 2368 2369rtx 2370extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) 2371{ 2372 scalar_int_mode int_mode, src_int_mode; 2373 2374 if (mode == src_mode) 2375 return src; 2376 2377 if (CONSTANT_P (src)) 2378 { 2379 /* simplify_gen_subreg can't be used here, as if simplify_subreg 2380 fails, it will happily create (subreg (symbol_ref)) or similar 2381 invalid SUBREGs. */ 2382 poly_uint64 byte = subreg_lowpart_offset (mode, src_mode); 2383 rtx ret = simplify_subreg (mode, src, src_mode, byte); 2384 if (ret) 2385 return ret; 2386 2387 if (GET_MODE (src) == VOIDmode 2388 || !validate_subreg (mode, src_mode, src, byte)) 2389 return NULL_RTX; 2390 2391 src = force_reg (GET_MODE (src), src); 2392 return gen_rtx_SUBREG (mode, src, byte); 2393 } 2394 2395 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) 2396 return NULL_RTX; 2397 2398 if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode)) 2399 && targetm.modes_tieable_p (mode, src_mode)) 2400 { 2401 rtx x = gen_lowpart_common (mode, src); 2402 if (x) 2403 return x; 2404 } 2405 2406 if (!int_mode_for_mode (src_mode).exists (&src_int_mode) 2407 || !int_mode_for_mode (mode).exists (&int_mode)) 2408 return NULL_RTX; 2409 2410 if (!targetm.modes_tieable_p (src_int_mode, src_mode)) 2411 return NULL_RTX; 2412 if (!targetm.modes_tieable_p (int_mode, mode)) 2413 return NULL_RTX; 2414 2415 src = gen_lowpart (src_int_mode, src); 2416 src = convert_modes (int_mode, src_int_mode, src, true); 2417 src = gen_lowpart (mode, src); 2418 return src; 2419} 2420 2421/* Add INC into TARGET. */ 2422 2423void 2424expand_inc (rtx target, rtx inc) 2425{ 2426 rtx value = expand_binop (GET_MODE (target), add_optab, 2427 target, inc, 2428 target, 0, OPTAB_LIB_WIDEN); 2429 if (value != target) 2430 emit_move_insn (target, value); 2431} 2432 2433/* Subtract DEC from TARGET. */ 2434 2435void 2436expand_dec (rtx target, rtx dec) 2437{ 2438 rtx value = expand_binop (GET_MODE (target), sub_optab, 2439 target, dec, 2440 target, 0, OPTAB_LIB_WIDEN); 2441 if (value != target) 2442 emit_move_insn (target, value); 2443} 2444 2445/* Output a shift instruction for expression code CODE, 2446 with SHIFTED being the rtx for the value to shift, 2447 and AMOUNT the rtx for the amount to shift by. 2448 Store the result in the rtx TARGET, if that is convenient. 2449 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2450 Return the rtx for where the value is. 2451 If that cannot be done, abort the compilation unless MAY_FAIL is true, 2452 in which case 0 is returned. */ 2453 2454static rtx 2455expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, 2456 rtx amount, rtx target, int unsignedp, bool may_fail = false) 2457{ 2458 rtx op1, temp = 0; 2459 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); 2460 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); 2461 optab lshift_optab = ashl_optab; 2462 optab rshift_arith_optab = ashr_optab; 2463 optab rshift_uns_optab = lshr_optab; 2464 optab lrotate_optab = rotl_optab; 2465 optab rrotate_optab = rotr_optab; 2466 machine_mode op1_mode; 2467 scalar_mode scalar_mode = GET_MODE_INNER (mode); 2468 int attempt; 2469 bool speed = optimize_insn_for_speed_p (); 2470 2471 op1 = amount; 2472 op1_mode = GET_MODE (op1); 2473 2474 /* Determine whether the shift/rotate amount is a vector, or scalar. If the 2475 shift amount is a vector, use the vector/vector shift patterns. */ 2476 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) 2477 { 2478 lshift_optab = vashl_optab; 2479 rshift_arith_optab = vashr_optab; 2480 rshift_uns_optab = vlshr_optab; 2481 lrotate_optab = vrotl_optab; 2482 rrotate_optab = vrotr_optab; 2483 } 2484 2485 /* Previously detected shift-counts computed by NEGATE_EXPR 2486 and shifted in the other direction; but that does not work 2487 on all machines. */ 2488 2489 if (SHIFT_COUNT_TRUNCATED) 2490 { 2491 if (CONST_INT_P (op1) 2492 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= 2493 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) 2494 op1 = gen_int_shift_amount (mode, 2495 (unsigned HOST_WIDE_INT) INTVAL (op1) 2496 % GET_MODE_BITSIZE (scalar_mode)); 2497 else if (GET_CODE (op1) == SUBREG 2498 && subreg_lowpart_p (op1) 2499 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) 2500 && SCALAR_INT_MODE_P (GET_MODE (op1))) 2501 op1 = SUBREG_REG (op1); 2502 } 2503 2504 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, 2505 prefer left rotation, if op1 is from bitsize / 2 + 1 to 2506 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 2507 amount instead. */ 2508 if (rotate 2509 && CONST_INT_P (op1) 2510 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, 2511 GET_MODE_BITSIZE (scalar_mode) - 1)) 2512 { 2513 op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode) 2514 - INTVAL (op1))); 2515 left = !left; 2516 code = left ? LROTATE_EXPR : RROTATE_EXPR; 2517 } 2518 2519 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. 2520 Note that this is not the case for bigger values. For instance a rotation 2521 of 0x01020304 by 16 bits gives 0x03040102 which is different from 2522 0x04030201 (bswapsi). */ 2523 if (rotate 2524 && CONST_INT_P (op1) 2525 && INTVAL (op1) == BITS_PER_UNIT 2526 && GET_MODE_SIZE (scalar_mode) == 2 2527 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing) 2528 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp); 2529 2530 if (op1 == const0_rtx) 2531 return shifted; 2532 2533 /* Check whether its cheaper to implement a left shift by a constant 2534 bit count by a sequence of additions. */ 2535 if (code == LSHIFT_EXPR 2536 && CONST_INT_P (op1) 2537 && INTVAL (op1) > 0 2538 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) 2539 && INTVAL (op1) < MAX_BITS_PER_WORD 2540 && (shift_cost (speed, mode, INTVAL (op1)) 2541 > INTVAL (op1) * add_cost (speed, mode)) 2542 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) 2543 { 2544 int i; 2545 for (i = 0; i < INTVAL (op1); i++) 2546 { 2547 temp = force_reg (mode, shifted); 2548 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, 2549 unsignedp, OPTAB_LIB_WIDEN); 2550 } 2551 return shifted; 2552 } 2553 2554 for (attempt = 0; temp == 0 && attempt < 3; attempt++) 2555 { 2556 enum optab_methods methods; 2557 2558 if (attempt == 0) 2559 methods = OPTAB_DIRECT; 2560 else if (attempt == 1) 2561 methods = OPTAB_WIDEN; 2562 else 2563 methods = OPTAB_LIB_WIDEN; 2564 2565 if (rotate) 2566 { 2567 /* Widening does not work for rotation. */ 2568 if (methods == OPTAB_WIDEN) 2569 continue; 2570 else if (methods == OPTAB_LIB_WIDEN) 2571 { 2572 /* If we have been unable to open-code this by a rotation, 2573 do it as the IOR of two shifts. I.e., to rotate A 2574 by N bits, compute 2575 (A << N) | ((unsigned) A >> ((-N) & (C - 1))) 2576 where C is the bitsize of A. 2577 2578 It is theoretically possible that the target machine might 2579 not be able to perform either shift and hence we would 2580 be making two libcalls rather than just the one for the 2581 shift (similarly if IOR could not be done). We will allow 2582 this extremely unlikely lossage to avoid complicating the 2583 code below. */ 2584 2585 rtx subtarget = target == shifted ? 0 : target; 2586 rtx new_amount, other_amount; 2587 rtx temp1; 2588 2589 new_amount = op1; 2590 if (op1 == const0_rtx) 2591 return shifted; 2592 else if (CONST_INT_P (op1)) 2593 other_amount = gen_int_shift_amount 2594 (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); 2595 else 2596 { 2597 other_amount 2598 = simplify_gen_unary (NEG, GET_MODE (op1), 2599 op1, GET_MODE (op1)); 2600 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; 2601 other_amount 2602 = simplify_gen_binary (AND, GET_MODE (op1), other_amount, 2603 gen_int_mode (mask, GET_MODE (op1))); 2604 } 2605 2606 shifted = force_reg (mode, shifted); 2607 2608 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, 2609 mode, shifted, new_amount, 0, 1); 2610 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, 2611 mode, shifted, other_amount, 2612 subtarget, 1); 2613 return expand_binop (mode, ior_optab, temp, temp1, target, 2614 unsignedp, methods); 2615 } 2616 2617 temp = expand_binop (mode, 2618 left ? lrotate_optab : rrotate_optab, 2619 shifted, op1, target, unsignedp, methods); 2620 } 2621 else if (unsignedp) 2622 temp = expand_binop (mode, 2623 left ? lshift_optab : rshift_uns_optab, 2624 shifted, op1, target, unsignedp, methods); 2625 2626 /* Do arithmetic shifts. 2627 Also, if we are going to widen the operand, we can just as well 2628 use an arithmetic right-shift instead of a logical one. */ 2629 if (temp == 0 && ! rotate 2630 && (! unsignedp || (! left && methods == OPTAB_WIDEN))) 2631 { 2632 enum optab_methods methods1 = methods; 2633 2634 /* If trying to widen a log shift to an arithmetic shift, 2635 don't accept an arithmetic shift of the same size. */ 2636 if (unsignedp) 2637 methods1 = OPTAB_MUST_WIDEN; 2638 2639 /* Arithmetic shift */ 2640 2641 temp = expand_binop (mode, 2642 left ? lshift_optab : rshift_arith_optab, 2643 shifted, op1, target, unsignedp, methods1); 2644 } 2645 2646 /* We used to try extzv here for logical right shifts, but that was 2647 only useful for one machine, the VAX, and caused poor code 2648 generation there for lshrdi3, so the code was deleted and a 2649 define_expand for lshrsi3 was added to vax.md. */ 2650 } 2651 2652 gcc_assert (temp != NULL_RTX || may_fail); 2653 return temp; 2654} 2655 2656/* Output a shift instruction for expression code CODE, 2657 with SHIFTED being the rtx for the value to shift, 2658 and AMOUNT the amount to shift by. 2659 Store the result in the rtx TARGET, if that is convenient. 2660 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2661 Return the rtx for where the value is. */ 2662 2663rtx 2664expand_shift (enum tree_code code, machine_mode mode, rtx shifted, 2665 poly_int64 amount, rtx target, int unsignedp) 2666{ 2667 return expand_shift_1 (code, mode, shifted, 2668 gen_int_shift_amount (mode, amount), 2669 target, unsignedp); 2670} 2671 2672/* Likewise, but return 0 if that cannot be done. */ 2673 2674static rtx 2675maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, 2676 int amount, rtx target, int unsignedp) 2677{ 2678 return expand_shift_1 (code, mode, 2679 shifted, GEN_INT (amount), target, unsignedp, true); 2680} 2681 2682/* Output a shift instruction for expression code CODE, 2683 with SHIFTED being the rtx for the value to shift, 2684 and AMOUNT the tree for the amount to shift by. 2685 Store the result in the rtx TARGET, if that is convenient. 2686 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2687 Return the rtx for where the value is. */ 2688 2689rtx 2690expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, 2691 tree amount, rtx target, int unsignedp) 2692{ 2693 return expand_shift_1 (code, mode, 2694 shifted, expand_normal (amount), target, unsignedp); 2695} 2696 2697 2698static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, 2699 const struct mult_cost *, machine_mode mode); 2700static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, 2701 const struct algorithm *, enum mult_variant); 2702static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); 2703static rtx extract_high_half (scalar_int_mode, rtx); 2704static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int); 2705static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx, 2706 int, int); 2707/* Compute and return the best algorithm for multiplying by T. 2708 The algorithm must cost less than cost_limit 2709 If retval.cost >= COST_LIMIT, no algorithm was found and all 2710 other field of the returned struct are undefined. 2711 MODE is the machine mode of the multiplication. */ 2712 2713static void 2714synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, 2715 const struct mult_cost *cost_limit, machine_mode mode) 2716{ 2717 int m; 2718 struct algorithm *alg_in, *best_alg; 2719 struct mult_cost best_cost; 2720 struct mult_cost new_limit; 2721 int op_cost, op_latency; 2722 unsigned HOST_WIDE_INT orig_t = t; 2723 unsigned HOST_WIDE_INT q; 2724 int maxm, hash_index; 2725 bool cache_hit = false; 2726 enum alg_code cache_alg = alg_zero; 2727 bool speed = optimize_insn_for_speed_p (); 2728 scalar_int_mode imode; 2729 struct alg_hash_entry *entry_ptr; 2730 2731 /* Indicate that no algorithm is yet found. If no algorithm 2732 is found, this value will be returned and indicate failure. */ 2733 alg_out->cost.cost = cost_limit->cost + 1; 2734 alg_out->cost.latency = cost_limit->latency + 1; 2735 2736 if (cost_limit->cost < 0 2737 || (cost_limit->cost == 0 && cost_limit->latency <= 0)) 2738 return; 2739 2740 /* Be prepared for vector modes. */ 2741 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode)); 2742 2743 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); 2744 2745 /* Restrict the bits of "t" to the multiplication's mode. */ 2746 t &= GET_MODE_MASK (imode); 2747 2748 /* t == 1 can be done in zero cost. */ 2749 if (t == 1) 2750 { 2751 alg_out->ops = 1; 2752 alg_out->cost.cost = 0; 2753 alg_out->cost.latency = 0; 2754 alg_out->op[0] = alg_m; 2755 return; 2756 } 2757 2758 /* t == 0 sometimes has a cost. If it does and it exceeds our limit, 2759 fail now. */ 2760 if (t == 0) 2761 { 2762 if (MULT_COST_LESS (cost_limit, zero_cost (speed))) 2763 return; 2764 else 2765 { 2766 alg_out->ops = 1; 2767 alg_out->cost.cost = zero_cost (speed); 2768 alg_out->cost.latency = zero_cost (speed); 2769 alg_out->op[0] = alg_zero; 2770 return; 2771 } 2772 } 2773 2774 /* We'll be needing a couple extra algorithm structures now. */ 2775 2776 alg_in = XALLOCA (struct algorithm); 2777 best_alg = XALLOCA (struct algorithm); 2778 best_cost = *cost_limit; 2779 2780 /* Compute the hash index. */ 2781 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; 2782 2783 /* See if we already know what to do for T. */ 2784 entry_ptr = alg_hash_entry_ptr (hash_index); 2785 if (entry_ptr->t == t 2786 && entry_ptr->mode == mode 2787 && entry_ptr->speed == speed 2788 && entry_ptr->alg != alg_unknown) 2789 { 2790 cache_alg = entry_ptr->alg; 2791 2792 if (cache_alg == alg_impossible) 2793 { 2794 /* The cache tells us that it's impossible to synthesize 2795 multiplication by T within entry_ptr->cost. */ 2796 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) 2797 /* COST_LIMIT is at least as restrictive as the one 2798 recorded in the hash table, in which case we have no 2799 hope of synthesizing a multiplication. Just 2800 return. */ 2801 return; 2802 2803 /* If we get here, COST_LIMIT is less restrictive than the 2804 one recorded in the hash table, so we may be able to 2805 synthesize a multiplication. Proceed as if we didn't 2806 have the cache entry. */ 2807 } 2808 else 2809 { 2810 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) 2811 /* The cached algorithm shows that this multiplication 2812 requires more cost than COST_LIMIT. Just return. This 2813 way, we don't clobber this cache entry with 2814 alg_impossible but retain useful information. */ 2815 return; 2816 2817 cache_hit = true; 2818 2819 switch (cache_alg) 2820 { 2821 case alg_shift: 2822 goto do_alg_shift; 2823 2824 case alg_add_t_m2: 2825 case alg_sub_t_m2: 2826 goto do_alg_addsub_t_m2; 2827 2828 case alg_add_factor: 2829 case alg_sub_factor: 2830 goto do_alg_addsub_factor; 2831 2832 case alg_add_t2_m: 2833 goto do_alg_add_t2_m; 2834 2835 case alg_sub_t2_m: 2836 goto do_alg_sub_t2_m; 2837 2838 default: 2839 gcc_unreachable (); 2840 } 2841 } 2842 } 2843 2844 /* If we have a group of zero bits at the low-order part of T, try 2845 multiplying by the remaining bits and then doing a shift. */ 2846 2847 if ((t & 1) == 0) 2848 { 2849 do_alg_shift: 2850 m = ctz_or_zero (t); /* m = number of low zero bits */ 2851 if (m < maxm) 2852 { 2853 q = t >> m; 2854 /* The function expand_shift will choose between a shift and 2855 a sequence of additions, so the observed cost is given as 2856 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ 2857 op_cost = m * add_cost (speed, mode); 2858 if (shift_cost (speed, mode, m) < op_cost) 2859 op_cost = shift_cost (speed, mode, m); 2860 new_limit.cost = best_cost.cost - op_cost; 2861 new_limit.latency = best_cost.latency - op_cost; 2862 synth_mult (alg_in, q, &new_limit, mode); 2863 2864 alg_in->cost.cost += op_cost; 2865 alg_in->cost.latency += op_cost; 2866 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2867 { 2868 best_cost = alg_in->cost; 2869 std::swap (alg_in, best_alg); 2870 best_alg->log[best_alg->ops] = m; 2871 best_alg->op[best_alg->ops] = alg_shift; 2872 } 2873 2874 /* See if treating ORIG_T as a signed number yields a better 2875 sequence. Try this sequence only for a negative ORIG_T 2876 as it would be useless for a non-negative ORIG_T. */ 2877 if ((HOST_WIDE_INT) orig_t < 0) 2878 { 2879 /* Shift ORIG_T as follows because a right shift of a 2880 negative-valued signed type is implementation 2881 defined. */ 2882 q = ~(~orig_t >> m); 2883 /* The function expand_shift will choose between a shift 2884 and a sequence of additions, so the observed cost is 2885 given as MIN (m * add_cost(speed, mode), 2886 shift_cost(speed, mode, m)). */ 2887 op_cost = m * add_cost (speed, mode); 2888 if (shift_cost (speed, mode, m) < op_cost) 2889 op_cost = shift_cost (speed, mode, m); 2890 new_limit.cost = best_cost.cost - op_cost; 2891 new_limit.latency = best_cost.latency - op_cost; 2892 synth_mult (alg_in, q, &new_limit, mode); 2893 2894 alg_in->cost.cost += op_cost; 2895 alg_in->cost.latency += op_cost; 2896 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2897 { 2898 best_cost = alg_in->cost; 2899 std::swap (alg_in, best_alg); 2900 best_alg->log[best_alg->ops] = m; 2901 best_alg->op[best_alg->ops] = alg_shift; 2902 } 2903 } 2904 } 2905 if (cache_hit) 2906 goto done; 2907 } 2908 2909 /* If we have an odd number, add or subtract one. */ 2910 if ((t & 1) != 0) 2911 { 2912 unsigned HOST_WIDE_INT w; 2913 2914 do_alg_addsub_t_m2: 2915 for (w = 1; (w & t) != 0; w <<= 1) 2916 ; 2917 /* If T was -1, then W will be zero after the loop. This is another 2918 case where T ends with ...111. Handling this with (T + 1) and 2919 subtract 1 produces slightly better code and results in algorithm 2920 selection much faster than treating it like the ...0111 case 2921 below. */ 2922 if (w == 0 2923 || (w > 2 2924 /* Reject the case where t is 3. 2925 Thus we prefer addition in that case. */ 2926 && t != 3)) 2927 { 2928 /* T ends with ...111. Multiply by (T + 1) and subtract T. */ 2929 2930 op_cost = add_cost (speed, mode); 2931 new_limit.cost = best_cost.cost - op_cost; 2932 new_limit.latency = best_cost.latency - op_cost; 2933 synth_mult (alg_in, t + 1, &new_limit, mode); 2934 2935 alg_in->cost.cost += op_cost; 2936 alg_in->cost.latency += op_cost; 2937 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2938 { 2939 best_cost = alg_in->cost; 2940 std::swap (alg_in, best_alg); 2941 best_alg->log[best_alg->ops] = 0; 2942 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2943 } 2944 } 2945 else 2946 { 2947 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ 2948 2949 op_cost = add_cost (speed, mode); 2950 new_limit.cost = best_cost.cost - op_cost; 2951 new_limit.latency = best_cost.latency - op_cost; 2952 synth_mult (alg_in, t - 1, &new_limit, mode); 2953 2954 alg_in->cost.cost += op_cost; 2955 alg_in->cost.latency += op_cost; 2956 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2957 { 2958 best_cost = alg_in->cost; 2959 std::swap (alg_in, best_alg); 2960 best_alg->log[best_alg->ops] = 0; 2961 best_alg->op[best_alg->ops] = alg_add_t_m2; 2962 } 2963 } 2964 2965 /* We may be able to calculate a * -7, a * -15, a * -31, etc 2966 quickly with a - a * n for some appropriate constant n. */ 2967 m = exact_log2 (-orig_t + 1); 2968 if (m >= 0 && m < maxm) 2969 { 2970 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2971 /* If the target has a cheap shift-and-subtract insn use 2972 that in preference to a shift insn followed by a sub insn. 2973 Assume that the shift-and-sub is "atomic" with a latency 2974 equal to it's cost, otherwise assume that on superscalar 2975 hardware the shift may be executed concurrently with the 2976 earlier steps in the algorithm. */ 2977 if (shiftsub1_cost (speed, mode, m) <= op_cost) 2978 { 2979 op_cost = shiftsub1_cost (speed, mode, m); 2980 op_latency = op_cost; 2981 } 2982 else 2983 op_latency = add_cost (speed, mode); 2984 2985 new_limit.cost = best_cost.cost - op_cost; 2986 new_limit.latency = best_cost.latency - op_latency; 2987 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, 2988 &new_limit, mode); 2989 2990 alg_in->cost.cost += op_cost; 2991 alg_in->cost.latency += op_latency; 2992 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2993 { 2994 best_cost = alg_in->cost; 2995 std::swap (alg_in, best_alg); 2996 best_alg->log[best_alg->ops] = m; 2997 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2998 } 2999 } 3000 3001 if (cache_hit) 3002 goto done; 3003 } 3004 3005 /* Look for factors of t of the form 3006 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). 3007 If we find such a factor, we can multiply by t using an algorithm that 3008 multiplies by q, shift the result by m and add/subtract it to itself. 3009 3010 We search for large factors first and loop down, even if large factors 3011 are less probable than small; if we find a large factor we will find a 3012 good sequence quickly, and therefore be able to prune (by decreasing 3013 COST_LIMIT) the search. */ 3014 3015 do_alg_addsub_factor: 3016 for (m = floor_log2 (t - 1); m >= 2; m--) 3017 { 3018 unsigned HOST_WIDE_INT d; 3019 3020 d = (HOST_WIDE_INT_1U << m) + 1; 3021 if (t % d == 0 && t > d && m < maxm 3022 && (!cache_hit || cache_alg == alg_add_factor)) 3023 { 3024 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 3025 if (shiftadd_cost (speed, mode, m) <= op_cost) 3026 op_cost = shiftadd_cost (speed, mode, m); 3027 3028 op_latency = op_cost; 3029 3030 3031 new_limit.cost = best_cost.cost - op_cost; 3032 new_limit.latency = best_cost.latency - op_latency; 3033 synth_mult (alg_in, t / d, &new_limit, mode); 3034 3035 alg_in->cost.cost += op_cost; 3036 alg_in->cost.latency += op_latency; 3037 if (alg_in->cost.latency < op_cost) 3038 alg_in->cost.latency = op_cost; 3039 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 3040 { 3041 best_cost = alg_in->cost; 3042 std::swap (alg_in, best_alg); 3043 best_alg->log[best_alg->ops] = m; 3044 best_alg->op[best_alg->ops] = alg_add_factor; 3045 } 3046 /* Other factors will have been taken care of in the recursion. */ 3047 break; 3048 } 3049 3050 d = (HOST_WIDE_INT_1U << m) - 1; 3051 if (t % d == 0 && t > d && m < maxm 3052 && (!cache_hit || cache_alg == alg_sub_factor)) 3053 { 3054 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 3055 if (shiftsub0_cost (speed, mode, m) <= op_cost) 3056 op_cost = shiftsub0_cost (speed, mode, m); 3057 3058 op_latency = op_cost; 3059 3060 new_limit.cost = best_cost.cost - op_cost; 3061 new_limit.latency = best_cost.latency - op_latency; 3062 synth_mult (alg_in, t / d, &new_limit, mode); 3063 3064 alg_in->cost.cost += op_cost; 3065 alg_in->cost.latency += op_latency; 3066 if (alg_in->cost.latency < op_cost) 3067 alg_in->cost.latency = op_cost; 3068 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 3069 { 3070 best_cost = alg_in->cost; 3071 std::swap (alg_in, best_alg); 3072 best_alg->log[best_alg->ops] = m; 3073 best_alg->op[best_alg->ops] = alg_sub_factor; 3074 } 3075 break; 3076 } 3077 } 3078 if (cache_hit) 3079 goto done; 3080 3081 /* Try shift-and-add (load effective address) instructions, 3082 i.e. do a*3, a*5, a*9. */ 3083 if ((t & 1) != 0) 3084 { 3085 do_alg_add_t2_m: 3086 q = t - 1; 3087 m = ctz_hwi (q); 3088 if (q && m < maxm) 3089 { 3090 op_cost = shiftadd_cost (speed, mode, m); 3091 new_limit.cost = best_cost.cost - op_cost; 3092 new_limit.latency = best_cost.latency - op_cost; 3093 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); 3094 3095 alg_in->cost.cost += op_cost; 3096 alg_in->cost.latency += op_cost; 3097 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 3098 { 3099 best_cost = alg_in->cost; 3100 std::swap (alg_in, best_alg); 3101 best_alg->log[best_alg->ops] = m; 3102 best_alg->op[best_alg->ops] = alg_add_t2_m; 3103 } 3104 } 3105 if (cache_hit) 3106 goto done; 3107 3108 do_alg_sub_t2_m: 3109 q = t + 1; 3110 m = ctz_hwi (q); 3111 if (q && m < maxm) 3112 { 3113 op_cost = shiftsub0_cost (speed, mode, m); 3114 new_limit.cost = best_cost.cost - op_cost; 3115 new_limit.latency = best_cost.latency - op_cost; 3116 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); 3117 3118 alg_in->cost.cost += op_cost; 3119 alg_in->cost.latency += op_cost; 3120 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 3121 { 3122 best_cost = alg_in->cost; 3123 std::swap (alg_in, best_alg); 3124 best_alg->log[best_alg->ops] = m; 3125 best_alg->op[best_alg->ops] = alg_sub_t2_m; 3126 } 3127 } 3128 if (cache_hit) 3129 goto done; 3130 } 3131 3132 done: 3133 /* If best_cost has not decreased, we have not found any algorithm. */ 3134 if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) 3135 { 3136 /* We failed to find an algorithm. Record alg_impossible for 3137 this case (that is, <T, MODE, COST_LIMIT>) so that next time 3138 we are asked to find an algorithm for T within the same or 3139 lower COST_LIMIT, we can immediately return to the 3140 caller. */ 3141 entry_ptr->t = t; 3142 entry_ptr->mode = mode; 3143 entry_ptr->speed = speed; 3144 entry_ptr->alg = alg_impossible; 3145 entry_ptr->cost = *cost_limit; 3146 return; 3147 } 3148 3149 /* Cache the result. */ 3150 if (!cache_hit) 3151 { 3152 entry_ptr->t = t; 3153 entry_ptr->mode = mode; 3154 entry_ptr->speed = speed; 3155 entry_ptr->alg = best_alg->op[best_alg->ops]; 3156 entry_ptr->cost.cost = best_cost.cost; 3157 entry_ptr->cost.latency = best_cost.latency; 3158 } 3159 3160 /* If we are getting a too long sequence for `struct algorithm' 3161 to record, make this search fail. */ 3162 if (best_alg->ops == MAX_BITS_PER_WORD) 3163 return; 3164 3165 /* Copy the algorithm from temporary space to the space at alg_out. 3166 We avoid using structure assignment because the majority of 3167 best_alg is normally undefined, and this is a critical function. */ 3168 alg_out->ops = best_alg->ops + 1; 3169 alg_out->cost = best_cost; 3170 memcpy (alg_out->op, best_alg->op, 3171 alg_out->ops * sizeof *alg_out->op); 3172 memcpy (alg_out->log, best_alg->log, 3173 alg_out->ops * sizeof *alg_out->log); 3174} 3175 3176/* Find the cheapest way of multiplying a value of mode MODE by VAL. 3177 Try three variations: 3178 3179 - a shift/add sequence based on VAL itself 3180 - a shift/add sequence based on -VAL, followed by a negation 3181 - a shift/add sequence based on VAL - 1, followed by an addition. 3182 3183 Return true if the cheapest of these cost less than MULT_COST, 3184 describing the algorithm in *ALG and final fixup in *VARIANT. */ 3185 3186bool 3187choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, 3188 struct algorithm *alg, enum mult_variant *variant, 3189 int mult_cost) 3190{ 3191 struct algorithm alg2; 3192 struct mult_cost limit; 3193 int op_cost; 3194 bool speed = optimize_insn_for_speed_p (); 3195 3196 /* Fail quickly for impossible bounds. */ 3197 if (mult_cost < 0) 3198 return false; 3199 3200 /* Ensure that mult_cost provides a reasonable upper bound. 3201 Any constant multiplication can be performed with less 3202 than 2 * bits additions. */ 3203 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); 3204 if (mult_cost > op_cost) 3205 mult_cost = op_cost; 3206 3207 *variant = basic_variant; 3208 limit.cost = mult_cost; 3209 limit.latency = mult_cost; 3210 synth_mult (alg, val, &limit, mode); 3211 3212 /* This works only if the inverted value actually fits in an 3213 `unsigned int' */ 3214 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) 3215 { 3216 op_cost = neg_cost (speed, mode); 3217 if (MULT_COST_LESS (&alg->cost, mult_cost)) 3218 { 3219 limit.cost = alg->cost.cost - op_cost; 3220 limit.latency = alg->cost.latency - op_cost; 3221 } 3222 else 3223 { 3224 limit.cost = mult_cost - op_cost; 3225 limit.latency = mult_cost - op_cost; 3226 } 3227 3228 synth_mult (&alg2, -val, &limit, mode); 3229 alg2.cost.cost += op_cost; 3230 alg2.cost.latency += op_cost; 3231 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 3232 *alg = alg2, *variant = negate_variant; 3233 } 3234 3235 /* This proves very useful for division-by-constant. */ 3236 op_cost = add_cost (speed, mode); 3237 if (MULT_COST_LESS (&alg->cost, mult_cost)) 3238 { 3239 limit.cost = alg->cost.cost - op_cost; 3240 limit.latency = alg->cost.latency - op_cost; 3241 } 3242 else 3243 { 3244 limit.cost = mult_cost - op_cost; 3245 limit.latency = mult_cost - op_cost; 3246 } 3247 3248 synth_mult (&alg2, val - 1, &limit, mode); 3249 alg2.cost.cost += op_cost; 3250 alg2.cost.latency += op_cost; 3251 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 3252 *alg = alg2, *variant = add_variant; 3253 3254 return MULT_COST_LESS (&alg->cost, mult_cost); 3255} 3256 3257/* A subroutine of expand_mult, used for constant multiplications. 3258 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if 3259 convenient. Use the shift/add sequence described by ALG and apply 3260 the final fixup specified by VARIANT. */ 3261 3262static rtx 3263expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, 3264 rtx target, const struct algorithm *alg, 3265 enum mult_variant variant) 3266{ 3267 unsigned HOST_WIDE_INT val_so_far; 3268 rtx_insn *insn; 3269 rtx accum, tem; 3270 int opno; 3271 machine_mode nmode; 3272 3273 /* Avoid referencing memory over and over and invalid sharing 3274 on SUBREGs. */ 3275 op0 = force_reg (mode, op0); 3276 3277 /* ACCUM starts out either as OP0 or as a zero, depending on 3278 the first operation. */ 3279 3280 if (alg->op[0] == alg_zero) 3281 { 3282 accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); 3283 val_so_far = 0; 3284 } 3285 else if (alg->op[0] == alg_m) 3286 { 3287 accum = copy_to_mode_reg (mode, op0); 3288 val_so_far = 1; 3289 } 3290 else 3291 gcc_unreachable (); 3292 3293 for (opno = 1; opno < alg->ops; opno++) 3294 { 3295 int log = alg->log[opno]; 3296 rtx shift_subtarget = optimize ? 0 : accum; 3297 rtx add_target 3298 = (opno == alg->ops - 1 && target != 0 && variant != add_variant 3299 && !optimize) 3300 ? target : 0; 3301 rtx accum_target = optimize ? 0 : accum; 3302 rtx accum_inner; 3303 3304 switch (alg->op[opno]) 3305 { 3306 case alg_shift: 3307 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3308 /* REG_EQUAL note will be attached to the following insn. */ 3309 emit_move_insn (accum, tem); 3310 val_so_far <<= log; 3311 break; 3312 3313 case alg_add_t_m2: 3314 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 3315 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3316 add_target ? add_target : accum_target); 3317 val_so_far += HOST_WIDE_INT_1U << log; 3318 break; 3319 3320 case alg_sub_t_m2: 3321 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 3322 accum = force_operand (gen_rtx_MINUS (mode, accum, tem), 3323 add_target ? add_target : accum_target); 3324 val_so_far -= HOST_WIDE_INT_1U << log; 3325 break; 3326 3327 case alg_add_t2_m: 3328 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3329 log, shift_subtarget, 0); 3330 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), 3331 add_target ? add_target : accum_target); 3332 val_so_far = (val_so_far << log) + 1; 3333 break; 3334 3335 case alg_sub_t2_m: 3336 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3337 log, shift_subtarget, 0); 3338 accum = force_operand (gen_rtx_MINUS (mode, accum, op0), 3339 add_target ? add_target : accum_target); 3340 val_so_far = (val_so_far << log) - 1; 3341 break; 3342 3343 case alg_add_factor: 3344 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3345 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3346 add_target ? add_target : accum_target); 3347 val_so_far += val_so_far << log; 3348 break; 3349 3350 case alg_sub_factor: 3351 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3352 accum = force_operand (gen_rtx_MINUS (mode, tem, accum), 3353 (add_target 3354 ? add_target : (optimize ? 0 : tem))); 3355 val_so_far = (val_so_far << log) - val_so_far; 3356 break; 3357 3358 default: 3359 gcc_unreachable (); 3360 } 3361 3362 if (SCALAR_INT_MODE_P (mode)) 3363 { 3364 /* Write a REG_EQUAL note on the last insn so that we can cse 3365 multiplication sequences. Note that if ACCUM is a SUBREG, 3366 we've set the inner register and must properly indicate that. */ 3367 tem = op0, nmode = mode; 3368 accum_inner = accum; 3369 if (GET_CODE (accum) == SUBREG) 3370 { 3371 accum_inner = SUBREG_REG (accum); 3372 nmode = GET_MODE (accum_inner); 3373 tem = gen_lowpart (nmode, op0); 3374 } 3375 3376 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG. 3377 In that case, only the low bits of accum would be guaranteed to 3378 be equal to the content of the REG_EQUAL note, the upper bits 3379 can be anything. */ 3380 if (!paradoxical_subreg_p (tem)) 3381 { 3382 insn = get_last_insn (); 3383 wide_int wval_so_far 3384 = wi::uhwi (val_so_far, 3385 GET_MODE_PRECISION (as_a <scalar_mode> (nmode))); 3386 rtx c = immed_wide_int_const (wval_so_far, nmode); 3387 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c), 3388 accum_inner); 3389 } 3390 } 3391 } 3392 3393 if (variant == negate_variant) 3394 { 3395 val_so_far = -val_so_far; 3396 accum = expand_unop (mode, neg_optab, accum, target, 0); 3397 } 3398 else if (variant == add_variant) 3399 { 3400 val_so_far = val_so_far + 1; 3401 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); 3402 } 3403 3404 /* Compare only the bits of val and val_so_far that are significant 3405 in the result mode, to avoid sign-/zero-extension confusion. */ 3406 nmode = GET_MODE_INNER (mode); 3407 val &= GET_MODE_MASK (nmode); 3408 val_so_far &= GET_MODE_MASK (nmode); 3409 gcc_assert (val == (HOST_WIDE_INT) val_so_far); 3410 3411 return accum; 3412} 3413 3414/* Perform a multiplication and return an rtx for the result. 3415 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3416 TARGET is a suggestion for where to store the result (an rtx). 3417 3418 We check specially for a constant integer as OP1. 3419 If you want this check for OP0 as well, then before calling 3420 you should swap the two operands if OP0 would be constant. */ 3421 3422rtx 3423expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, 3424 int unsignedp, bool no_libcall) 3425{ 3426 enum mult_variant variant; 3427 struct algorithm algorithm; 3428 rtx scalar_op1; 3429 int max_cost; 3430 bool speed = optimize_insn_for_speed_p (); 3431 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; 3432 3433 if (CONSTANT_P (op0)) 3434 std::swap (op0, op1); 3435 3436 /* For vectors, there are several simplifications that can be made if 3437 all elements of the vector constant are identical. */ 3438 scalar_op1 = unwrap_const_vec_duplicate (op1); 3439 3440 if (INTEGRAL_MODE_P (mode)) 3441 { 3442 rtx fake_reg; 3443 HOST_WIDE_INT coeff; 3444 bool is_neg; 3445 int mode_bitsize; 3446 3447 if (op1 == CONST0_RTX (mode)) 3448 return op1; 3449 if (op1 == CONST1_RTX (mode)) 3450 return op0; 3451 if (op1 == CONSTM1_RTX (mode)) 3452 return expand_unop (mode, do_trapv ? negv_optab : neg_optab, 3453 op0, target, 0); 3454 3455 if (do_trapv) 3456 goto skip_synth; 3457 3458 /* If mode is integer vector mode, check if the backend supports 3459 vector lshift (by scalar or vector) at all. If not, we can't use 3460 synthetized multiply. */ 3461 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 3462 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing 3463 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) 3464 goto skip_synth; 3465 3466 /* These are the operations that are potentially turned into 3467 a sequence of shifts and additions. */ 3468 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); 3469 3470 /* synth_mult does an `unsigned int' multiply. As long as the mode is 3471 less than or equal in size to `unsigned int' this doesn't matter. 3472 If the mode is larger than `unsigned int', then synth_mult works 3473 only if the constant value exactly fits in an `unsigned int' without 3474 any truncation. This means that multiplying by negative values does 3475 not work; results are off by 2^32 on a 32 bit machine. */ 3476 if (CONST_INT_P (scalar_op1)) 3477 { 3478 coeff = INTVAL (scalar_op1); 3479 is_neg = coeff < 0; 3480 } 3481#if TARGET_SUPPORTS_WIDE_INT 3482 else if (CONST_WIDE_INT_P (scalar_op1)) 3483#else 3484 else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) 3485#endif 3486 { 3487 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); 3488 /* Perfect power of 2 (other than 1, which is handled above). */ 3489 if (shift > 0) 3490 return expand_shift (LSHIFT_EXPR, mode, op0, 3491 shift, target, unsignedp); 3492 else 3493 goto skip_synth; 3494 } 3495 else 3496 goto skip_synth; 3497 3498 /* We used to test optimize here, on the grounds that it's better to 3499 produce a smaller program when -O is not used. But this causes 3500 such a terrible slowdown sometimes that it seems better to always 3501 use synth_mult. */ 3502 3503 /* Special case powers of two. */ 3504 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) 3505 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) 3506 return expand_shift (LSHIFT_EXPR, mode, op0, 3507 floor_log2 (coeff), target, unsignedp); 3508 3509 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3510 3511 /* Attempt to handle multiplication of DImode values by negative 3512 coefficients, by performing the multiplication by a positive 3513 multiplier and then inverting the result. */ 3514 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) 3515 { 3516 /* Its safe to use -coeff even for INT_MIN, as the 3517 result is interpreted as an unsigned coefficient. 3518 Exclude cost of op0 from max_cost to match the cost 3519 calculation of the synth_mult. */ 3520 coeff = -(unsigned HOST_WIDE_INT) coeff; 3521 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), 3522 mode, speed) 3523 - neg_cost (speed, mode)); 3524 if (max_cost <= 0) 3525 goto skip_synth; 3526 3527 /* Special case powers of two. */ 3528 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3529 { 3530 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, 3531 floor_log2 (coeff), target, unsignedp); 3532 return expand_unop (mode, neg_optab, temp, target, 0); 3533 } 3534 3535 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3536 max_cost)) 3537 { 3538 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, 3539 &algorithm, variant); 3540 return expand_unop (mode, neg_optab, temp, target, 0); 3541 } 3542 goto skip_synth; 3543 } 3544 3545 /* Exclude cost of op0 from max_cost to match the cost 3546 calculation of the synth_mult. */ 3547 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed); 3548 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) 3549 return expand_mult_const (mode, op0, coeff, target, 3550 &algorithm, variant); 3551 } 3552 skip_synth: 3553 3554 /* Expand x*2.0 as x+x. */ 3555 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) 3556 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) 3557 { 3558 op0 = force_reg (GET_MODE (op0), op0); 3559 return expand_binop (mode, add_optab, op0, op0, 3560 target, unsignedp, 3561 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); 3562 } 3563 3564 /* This used to use umul_optab if unsigned, but for non-widening multiply 3565 there is no difference between signed and unsigned. */ 3566 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, 3567 op0, op1, target, unsignedp, 3568 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); 3569 gcc_assert (op0 || no_libcall); 3570 return op0; 3571} 3572 3573/* Return a cost estimate for multiplying a register by the given 3574 COEFFicient in the given MODE and SPEED. */ 3575 3576int 3577mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) 3578{ 3579 int max_cost; 3580 struct algorithm algorithm; 3581 enum mult_variant variant; 3582 3583 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3584 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), 3585 mode, speed); 3586 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) 3587 return algorithm.cost.cost; 3588 else 3589 return max_cost; 3590} 3591 3592/* Perform a widening multiplication and return an rtx for the result. 3593 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3594 TARGET is a suggestion for where to store the result (an rtx). 3595 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab 3596 or smul_widen_optab. 3597 3598 We check specially for a constant integer as OP1, comparing the 3599 cost of a widening multiply against the cost of a sequence of shifts 3600 and adds. */ 3601 3602rtx 3603expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, 3604 int unsignedp, optab this_optab) 3605{ 3606 bool speed = optimize_insn_for_speed_p (); 3607 rtx cop1; 3608 3609 if (CONST_INT_P (op1) 3610 && GET_MODE (op0) != VOIDmode 3611 && (cop1 = convert_modes (mode, GET_MODE (op0), op1, 3612 this_optab == umul_widen_optab)) 3613 && CONST_INT_P (cop1) 3614 && (INTVAL (cop1) >= 0 3615 || HWI_COMPUTABLE_MODE_P (mode))) 3616 { 3617 HOST_WIDE_INT coeff = INTVAL (cop1); 3618 int max_cost; 3619 enum mult_variant variant; 3620 struct algorithm algorithm; 3621 3622 if (coeff == 0) 3623 return CONST0_RTX (mode); 3624 3625 /* Special case powers of two. */ 3626 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3627 { 3628 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3629 return expand_shift (LSHIFT_EXPR, mode, op0, 3630 floor_log2 (coeff), target, unsignedp); 3631 } 3632 3633 /* Exclude cost of op0 from max_cost to match the cost 3634 calculation of the synth_mult. */ 3635 max_cost = mul_widen_cost (speed, mode); 3636 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3637 max_cost)) 3638 { 3639 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3640 return expand_mult_const (mode, op0, coeff, target, 3641 &algorithm, variant); 3642 } 3643 } 3644 return expand_binop (mode, this_optab, op0, op1, target, 3645 unsignedp, OPTAB_LIB_WIDEN); 3646} 3647 3648/* Choose a minimal N + 1 bit approximation to 1/D that can be used to 3649 replace division by D, and put the least significant N bits of the result 3650 in *MULTIPLIER_PTR and return the most significant bit. 3651 3652 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the 3653 needed precision is in PRECISION (should be <= N). 3654 3655 PRECISION should be as small as possible so this function can choose 3656 multiplier more freely. 3657 3658 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that 3659 is to be used for a final right shift is placed in *POST_SHIFT_PTR. 3660 3661 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), 3662 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ 3663 3664unsigned HOST_WIDE_INT 3665choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, 3666 unsigned HOST_WIDE_INT *multiplier_ptr, 3667 int *post_shift_ptr, int *lgup_ptr) 3668{ 3669 int lgup, post_shift; 3670 int pow, pow2; 3671 3672 /* lgup = ceil(log2(divisor)); */ 3673 lgup = ceil_log2 (d); 3674 3675 gcc_assert (lgup <= n); 3676 3677 pow = n + lgup; 3678 pow2 = n + lgup - precision; 3679 3680 /* mlow = 2^(N + lgup)/d */ 3681 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); 3682 wide_int mlow = wi::udiv_trunc (val, d); 3683 3684 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ 3685 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); 3686 wide_int mhigh = wi::udiv_trunc (val, d); 3687 3688 /* If precision == N, then mlow, mhigh exceed 2^N 3689 (but they do not exceed 2^(N+1)). */ 3690 3691 /* Reduce to lowest terms. */ 3692 for (post_shift = lgup; post_shift > 0; post_shift--) 3693 { 3694 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, 3695 HOST_BITS_PER_WIDE_INT); 3696 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, 3697 HOST_BITS_PER_WIDE_INT); 3698 if (ml_lo >= mh_lo) 3699 break; 3700 3701 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); 3702 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); 3703 } 3704 3705 *post_shift_ptr = post_shift; 3706 *lgup_ptr = lgup; 3707 if (n < HOST_BITS_PER_WIDE_INT) 3708 { 3709 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; 3710 *multiplier_ptr = mhigh.to_uhwi () & mask; 3711 return mhigh.to_uhwi () >= mask; 3712 } 3713 else 3714 { 3715 *multiplier_ptr = mhigh.to_uhwi (); 3716 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); 3717 } 3718} 3719 3720/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is 3721 congruent to 1 (mod 2**N). */ 3722 3723static unsigned HOST_WIDE_INT 3724invert_mod2n (unsigned HOST_WIDE_INT x, int n) 3725{ 3726 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ 3727 3728 /* The algorithm notes that the choice y = x satisfies 3729 x*y == 1 mod 2^3, since x is assumed odd. 3730 Each iteration doubles the number of bits of significance in y. */ 3731 3732 unsigned HOST_WIDE_INT mask; 3733 unsigned HOST_WIDE_INT y = x; 3734 int nbit = 3; 3735 3736 mask = (n == HOST_BITS_PER_WIDE_INT 3737 ? HOST_WIDE_INT_M1U 3738 : (HOST_WIDE_INT_1U << n) - 1); 3739 3740 while (nbit < n) 3741 { 3742 y = y * (2 - x*y) & mask; /* Modulo 2^N */ 3743 nbit *= 2; 3744 } 3745 return y; 3746} 3747 3748/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness 3749 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the 3750 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product 3751 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to 3752 become signed. 3753 3754 The result is put in TARGET if that is convenient. 3755 3756 MODE is the mode of operation. */ 3757 3758rtx 3759expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0, 3760 rtx op1, rtx target, int unsignedp) 3761{ 3762 rtx tem; 3763 enum rtx_code adj_code = unsignedp ? PLUS : MINUS; 3764 3765 tem = expand_shift (RSHIFT_EXPR, mode, op0, 3766 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3767 tem = expand_and (mode, tem, op1, NULL_RTX); 3768 adj_operand 3769 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3770 adj_operand); 3771 3772 tem = expand_shift (RSHIFT_EXPR, mode, op1, 3773 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3774 tem = expand_and (mode, tem, op0, NULL_RTX); 3775 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3776 target); 3777 3778 return target; 3779} 3780 3781/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ 3782 3783static rtx 3784extract_high_half (scalar_int_mode mode, rtx op) 3785{ 3786 if (mode == word_mode) 3787 return gen_highpart (mode, op); 3788 3789 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); 3790 3791 op = expand_shift (RSHIFT_EXPR, wider_mode, op, 3792 GET_MODE_BITSIZE (mode), 0, 1); 3793 return convert_modes (mode, wider_mode, op, 0); 3794} 3795 3796/* Like expmed_mult_highpart, but only consider using a multiplication 3797 optab. OP1 is an rtx for the constant operand. */ 3798 3799static rtx 3800expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, 3801 rtx target, int unsignedp, int max_cost) 3802{ 3803 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); 3804 optab moptab; 3805 rtx tem; 3806 int size; 3807 bool speed = optimize_insn_for_speed_p (); 3808 3809 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); 3810 3811 size = GET_MODE_BITSIZE (mode); 3812 3813 /* Firstly, try using a multiplication insn that only generates the needed 3814 high part of the product, and in the sign flavor of unsignedp. */ 3815 if (mul_highpart_cost (speed, mode) < max_cost) 3816 { 3817 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; 3818 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3819 unsignedp, OPTAB_DIRECT); 3820 if (tem) 3821 return tem; 3822 } 3823 3824 /* Secondly, same as above, but use sign flavor opposite of unsignedp. 3825 Need to adjust the result after the multiplication. */ 3826 if (size - 1 < BITS_PER_WORD 3827 && (mul_highpart_cost (speed, mode) 3828 + 2 * shift_cost (speed, mode, size-1) 3829 + 4 * add_cost (speed, mode) < max_cost)) 3830 { 3831 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; 3832 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3833 unsignedp, OPTAB_DIRECT); 3834 if (tem) 3835 /* We used the wrong signedness. Adjust the result. */ 3836 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3837 tem, unsignedp); 3838 } 3839 3840 /* Try widening multiplication. */ 3841 moptab = unsignedp ? umul_widen_optab : smul_widen_optab; 3842 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3843 && mul_widen_cost (speed, wider_mode) < max_cost) 3844 { 3845 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, 3846 unsignedp, OPTAB_WIDEN); 3847 if (tem) 3848 return extract_high_half (mode, tem); 3849 } 3850 3851 /* Try widening the mode and perform a non-widening multiplication. */ 3852 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing 3853 && size - 1 < BITS_PER_WORD 3854 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) 3855 < max_cost)) 3856 { 3857 rtx_insn *insns; 3858 rtx wop0, wop1; 3859 3860 /* We need to widen the operands, for example to ensure the 3861 constant multiplier is correctly sign or zero extended. 3862 Use a sequence to clean-up any instructions emitted by 3863 the conversions if things don't work out. */ 3864 start_sequence (); 3865 wop0 = convert_modes (wider_mode, mode, op0, unsignedp); 3866 wop1 = convert_modes (wider_mode, mode, op1, unsignedp); 3867 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, 3868 unsignedp, OPTAB_WIDEN); 3869 insns = get_insns (); 3870 end_sequence (); 3871 3872 if (tem) 3873 { 3874 emit_insn (insns); 3875 return extract_high_half (mode, tem); 3876 } 3877 } 3878 3879 /* Try widening multiplication of opposite signedness, and adjust. */ 3880 moptab = unsignedp ? smul_widen_optab : umul_widen_optab; 3881 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3882 && size - 1 < BITS_PER_WORD 3883 && (mul_widen_cost (speed, wider_mode) 3884 + 2 * shift_cost (speed, mode, size-1) 3885 + 4 * add_cost (speed, mode) < max_cost)) 3886 { 3887 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 3888 NULL_RTX, ! unsignedp, OPTAB_WIDEN); 3889 if (tem != 0) 3890 { 3891 tem = extract_high_half (mode, tem); 3892 /* We used the wrong signedness. Adjust the result. */ 3893 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3894 target, unsignedp); 3895 } 3896 } 3897 3898 return 0; 3899} 3900 3901/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), 3902 putting the high half of the result in TARGET if that is convenient, 3903 and return where the result is. If the operation can not be performed, 3904 0 is returned. 3905 3906 MODE is the mode of operation and result. 3907 3908 UNSIGNEDP nonzero means unsigned multiply. 3909 3910 MAX_COST is the total allowed cost for the expanded RTL. */ 3911 3912static rtx 3913expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, 3914 rtx target, int unsignedp, int max_cost) 3915{ 3916 unsigned HOST_WIDE_INT cnst1; 3917 int extra_cost; 3918 bool sign_adjust = false; 3919 enum mult_variant variant; 3920 struct algorithm alg; 3921 rtx tem; 3922 bool speed = optimize_insn_for_speed_p (); 3923 3924 /* We can't support modes wider than HOST_BITS_PER_INT. */ 3925 gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); 3926 3927 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); 3928 3929 /* We can't optimize modes wider than BITS_PER_WORD. 3930 ??? We might be able to perform double-word arithmetic if 3931 mode == word_mode, however all the cost calculations in 3932 synth_mult etc. assume single-word operations. */ 3933 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); 3934 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) 3935 return expmed_mult_highpart_optab (mode, op0, op1, target, 3936 unsignedp, max_cost); 3937 3938 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); 3939 3940 /* Check whether we try to multiply by a negative constant. */ 3941 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) 3942 { 3943 sign_adjust = true; 3944 extra_cost += add_cost (speed, mode); 3945 } 3946 3947 /* See whether shift/add multiplication is cheap enough. */ 3948 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, 3949 max_cost - extra_cost)) 3950 { 3951 /* See whether the specialized multiplication optabs are 3952 cheaper than the shift/add version. */ 3953 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, 3954 alg.cost.cost + extra_cost); 3955 if (tem) 3956 return tem; 3957 3958 tem = convert_to_mode (wider_mode, op0, unsignedp); 3959 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); 3960 tem = extract_high_half (mode, tem); 3961 3962 /* Adjust result for signedness. */ 3963 if (sign_adjust) 3964 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); 3965 3966 return tem; 3967 } 3968 return expmed_mult_highpart_optab (mode, op0, op1, target, 3969 unsignedp, max_cost); 3970} 3971 3972 3973/* Expand signed modulus of OP0 by a power of two D in mode MODE. */ 3974 3975static rtx 3976expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) 3977{ 3978 rtx result, temp, shift; 3979 rtx_code_label *label; 3980 int logd; 3981 int prec = GET_MODE_PRECISION (mode); 3982 3983 logd = floor_log2 (d); 3984 result = gen_reg_rtx (mode); 3985 3986 /* Avoid conditional branches when they're expensive. */ 3987 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 3988 && optimize_insn_for_speed_p ()) 3989 { 3990 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, 3991 mode, 0, -1); 3992 if (signmask) 3993 { 3994 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1; 3995 signmask = force_reg (mode, signmask); 3996 shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd); 3997 3998 /* Use the rtx_cost of a LSHIFTRT instruction to determine 3999 which instruction sequence to use. If logical right shifts 4000 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise 4001 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ 4002 4003 temp = gen_rtx_LSHIFTRT (mode, result, shift); 4004 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing 4005 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ()) 4006 > COSTS_N_INSNS (2))) 4007 { 4008 temp = expand_binop (mode, xor_optab, op0, signmask, 4009 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4010 temp = expand_binop (mode, sub_optab, temp, signmask, 4011 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4012 temp = expand_binop (mode, and_optab, temp, 4013 gen_int_mode (masklow, mode), 4014 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4015 temp = expand_binop (mode, xor_optab, temp, signmask, 4016 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4017 temp = expand_binop (mode, sub_optab, temp, signmask, 4018 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4019 } 4020 else 4021 { 4022 signmask = expand_binop (mode, lshr_optab, signmask, shift, 4023 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4024 signmask = force_reg (mode, signmask); 4025 4026 temp = expand_binop (mode, add_optab, op0, signmask, 4027 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4028 temp = expand_binop (mode, and_optab, temp, 4029 gen_int_mode (masklow, mode), 4030 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4031 temp = expand_binop (mode, sub_optab, temp, signmask, 4032 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4033 } 4034 return temp; 4035 } 4036 } 4037 4038 /* Mask contains the mode's signbit and the significant bits of the 4039 modulus. By including the signbit in the operation, many targets 4040 can avoid an explicit compare operation in the following comparison 4041 against zero. */ 4042 wide_int mask = wi::mask (logd, false, prec); 4043 mask = wi::set_bit (mask, prec - 1); 4044 4045 temp = expand_binop (mode, and_optab, op0, 4046 immed_wide_int_const (mask, mode), 4047 result, 1, OPTAB_LIB_WIDEN); 4048 if (temp != result) 4049 emit_move_insn (result, temp); 4050 4051 label = gen_label_rtx (); 4052 do_cmp_and_jump (result, const0_rtx, GE, mode, label); 4053 4054 temp = expand_binop (mode, sub_optab, result, const1_rtx, result, 4055 0, OPTAB_LIB_WIDEN); 4056 4057 mask = wi::mask (logd, true, prec); 4058 temp = expand_binop (mode, ior_optab, temp, 4059 immed_wide_int_const (mask, mode), 4060 result, 1, OPTAB_LIB_WIDEN); 4061 temp = expand_binop (mode, add_optab, temp, const1_rtx, result, 4062 0, OPTAB_LIB_WIDEN); 4063 if (temp != result) 4064 emit_move_insn (result, temp); 4065 emit_label (label); 4066 return result; 4067} 4068 4069/* Expand signed division of OP0 by a power of two D in mode MODE. 4070 This routine is only called for positive values of D. */ 4071 4072static rtx 4073expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) 4074{ 4075 rtx temp; 4076 rtx_code_label *label; 4077 int logd; 4078 4079 logd = floor_log2 (d); 4080 4081 if (d == 2 4082 && BRANCH_COST (optimize_insn_for_speed_p (), 4083 false) >= 1) 4084 { 4085 temp = gen_reg_rtx (mode); 4086 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); 4087 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 4088 0, OPTAB_LIB_WIDEN); 4089 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 4090 } 4091 4092 if (HAVE_conditional_move 4093 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) 4094 { 4095 rtx temp2; 4096 4097 start_sequence (); 4098 temp2 = copy_to_mode_reg (mode, op0); 4099 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), 4100 NULL_RTX, 0, OPTAB_LIB_WIDEN); 4101 temp = force_reg (mode, temp); 4102 4103 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ 4104 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, 4105 mode, temp, temp2, mode, 0); 4106 if (temp2) 4107 { 4108 rtx_insn *seq = get_insns (); 4109 end_sequence (); 4110 emit_insn (seq); 4111 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); 4112 } 4113 end_sequence (); 4114 } 4115 4116 if (BRANCH_COST (optimize_insn_for_speed_p (), 4117 false) >= 2) 4118 { 4119 int ushift = GET_MODE_BITSIZE (mode) - logd; 4120 4121 temp = gen_reg_rtx (mode); 4122 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); 4123 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD 4124 || shift_cost (optimize_insn_for_speed_p (), mode, ushift) 4125 > COSTS_N_INSNS (1)) 4126 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode), 4127 NULL_RTX, 0, OPTAB_LIB_WIDEN); 4128 else 4129 temp = expand_shift (RSHIFT_EXPR, mode, temp, 4130 ushift, NULL_RTX, 1); 4131 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 4132 0, OPTAB_LIB_WIDEN); 4133 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 4134 } 4135 4136 label = gen_label_rtx (); 4137 temp = copy_to_mode_reg (mode, op0); 4138 do_cmp_and_jump (temp, const0_rtx, GE, mode, label); 4139 expand_inc (temp, gen_int_mode (d - 1, mode)); 4140 emit_label (label); 4141 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 4142} 4143 4144/* Emit the code to divide OP0 by OP1, putting the result in TARGET 4145 if that is convenient, and returning where the result is. 4146 You may request either the quotient or the remainder as the result; 4147 specify REM_FLAG nonzero to get the remainder. 4148 4149 CODE is the expression code for which kind of division this is; 4150 it controls how rounding is done. MODE is the machine mode to use. 4151 UNSIGNEDP nonzero means do unsigned division. */ 4152 4153/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI 4154 and then correct it by or'ing in missing high bits 4155 if result of ANDI is nonzero. 4156 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. 4157 This could optimize to a bfexts instruction. 4158 But C doesn't use these operations, so their optimizations are 4159 left for later. */ 4160/* ??? For modulo, we don't actually need the highpart of the first product, 4161 the low part will do nicely. And for small divisors, the second multiply 4162 can also be a low-part only multiply or even be completely left out. 4163 E.g. to calculate the remainder of a division by 3 with a 32 bit 4164 multiply, multiply with 0x55555556 and extract the upper two bits; 4165 the result is exact for inputs up to 0x1fffffff. 4166 The input range can be reduced by using cross-sum rules. 4167 For odd divisors >= 3, the following table gives right shift counts 4168 so that if a number is shifted by an integer multiple of the given 4169 amount, the remainder stays the same: 4170 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, 4171 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, 4172 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, 4173 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, 4174 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 4175 4176 Cross-sum rules for even numbers can be derived by leaving as many bits 4177 to the right alone as the divisor has zeros to the right. 4178 E.g. if x is an unsigned 32 bit number: 4179 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 4180 */ 4181 4182rtx 4183expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, 4184 rtx op0, rtx op1, rtx target, int unsignedp) 4185{ 4186 machine_mode compute_mode; 4187 rtx tquotient; 4188 rtx quotient = 0, remainder = 0; 4189 rtx_insn *last; 4190 rtx_insn *insn; 4191 optab optab1, optab2; 4192 int op1_is_constant, op1_is_pow2 = 0; 4193 int max_cost, extra_cost; 4194 static HOST_WIDE_INT last_div_const = 0; 4195 bool speed = optimize_insn_for_speed_p (); 4196 4197 op1_is_constant = CONST_INT_P (op1); 4198 if (op1_is_constant) 4199 { 4200 wide_int ext_op1 = rtx_mode_t (op1, mode); 4201 op1_is_pow2 = (wi::popcount (ext_op1) == 1 4202 || (! unsignedp 4203 && wi::popcount (wi::neg (ext_op1)) == 1)); 4204 } 4205 4206 /* 4207 This is the structure of expand_divmod: 4208 4209 First comes code to fix up the operands so we can perform the operations 4210 correctly and efficiently. 4211 4212 Second comes a switch statement with code specific for each rounding mode. 4213 For some special operands this code emits all RTL for the desired 4214 operation, for other cases, it generates only a quotient and stores it in 4215 QUOTIENT. The case for trunc division/remainder might leave quotient = 0, 4216 to indicate that it has not done anything. 4217 4218 Last comes code that finishes the operation. If QUOTIENT is set and 4219 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If 4220 QUOTIENT is not set, it is computed using trunc rounding. 4221 4222 We try to generate special code for division and remainder when OP1 is a 4223 constant. If |OP1| = 2**n we can use shifts and some other fast 4224 operations. For other values of OP1, we compute a carefully selected 4225 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 4226 by m. 4227 4228 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper 4229 half of the product. Different strategies for generating the product are 4230 implemented in expmed_mult_highpart. 4231 4232 If what we actually want is the remainder, we generate that by another 4233 by-constant multiplication and a subtraction. */ 4234 4235 /* We shouldn't be called with OP1 == const1_rtx, but some of the 4236 code below will malfunction if we are, so check here and handle 4237 the special case if so. */ 4238 if (op1 == const1_rtx) 4239 return rem_flag ? const0_rtx : op0; 4240 4241 /* When dividing by -1, we could get an overflow. 4242 negv_optab can handle overflows. */ 4243 if (! unsignedp && op1 == constm1_rtx) 4244 { 4245 if (rem_flag) 4246 return const0_rtx; 4247 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT 4248 ? negv_optab : neg_optab, op0, target, 0); 4249 } 4250 4251 if (target 4252 /* Don't use the function value register as a target 4253 since we have to read it as well as write it, 4254 and function-inlining gets confused by this. */ 4255 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) 4256 /* Don't clobber an operand while doing a multi-step calculation. */ 4257 || ((rem_flag || op1_is_constant) 4258 && (reg_mentioned_p (target, op0) 4259 || (MEM_P (op0) && MEM_P (target)))) 4260 || reg_mentioned_p (target, op1) 4261 || (MEM_P (op1) && MEM_P (target)))) 4262 target = 0; 4263 4264 /* Get the mode in which to perform this computation. Normally it will 4265 be MODE, but sometimes we can't do the desired operation in MODE. 4266 If so, pick a wider mode in which we can do the operation. Convert 4267 to that mode at the start to avoid repeated conversions. 4268 4269 First see what operations we need. These depend on the expression 4270 we are evaluating. (We assume that divxx3 insns exist under the 4271 same conditions that modxx3 insns and that these insns don't normally 4272 fail. If these assumptions are not correct, we may generate less 4273 efficient code in some cases.) 4274 4275 Then see if we find a mode in which we can open-code that operation 4276 (either a division, modulus, or shift). Finally, check for the smallest 4277 mode for which we can do the operation with a library call. */ 4278 4279 /* We might want to refine this now that we have division-by-constant 4280 optimization. Since expmed_mult_highpart tries so many variants, it is 4281 not straightforward to generalize this. Maybe we should make an array 4282 of possible modes in init_expmed? Save this for GCC 2.7. */ 4283 4284 optab1 = (op1_is_pow2 4285 ? (unsignedp ? lshr_optab : ashr_optab) 4286 : (unsignedp ? udiv_optab : sdiv_optab)); 4287 optab2 = (op1_is_pow2 ? optab1 4288 : (unsignedp ? udivmod_optab : sdivmod_optab)); 4289 4290 FOR_EACH_MODE_FROM (compute_mode, mode) 4291 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing 4292 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) 4293 break; 4294 4295 if (compute_mode == VOIDmode) 4296 FOR_EACH_MODE_FROM (compute_mode, mode) 4297 if (optab_libfunc (optab1, compute_mode) 4298 || optab_libfunc (optab2, compute_mode)) 4299 break; 4300 4301 /* If we still couldn't find a mode, use MODE, but expand_binop will 4302 probably die. */ 4303 if (compute_mode == VOIDmode) 4304 compute_mode = mode; 4305 4306 if (target && GET_MODE (target) == compute_mode) 4307 tquotient = target; 4308 else 4309 tquotient = gen_reg_rtx (compute_mode); 4310 4311#if 0 4312 /* It should be possible to restrict the precision to GET_MODE_BITSIZE 4313 (mode), and thereby get better code when OP1 is a constant. Do that 4314 later. It will require going over all usages of SIZE below. */ 4315 size = GET_MODE_BITSIZE (mode); 4316#endif 4317 4318 /* Only deduct something for a REM if the last divide done was 4319 for a different constant. Then set the constant of the last 4320 divide. */ 4321 max_cost = (unsignedp 4322 ? udiv_cost (speed, compute_mode) 4323 : sdiv_cost (speed, compute_mode)); 4324 if (rem_flag && ! (last_div_const != 0 && op1_is_constant 4325 && INTVAL (op1) == last_div_const)) 4326 max_cost -= (mul_cost (speed, compute_mode) 4327 + add_cost (speed, compute_mode)); 4328 4329 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; 4330 4331 /* Now convert to the best mode to use. */ 4332 if (compute_mode != mode) 4333 { 4334 op0 = convert_modes (compute_mode, mode, op0, unsignedp); 4335 op1 = convert_modes (compute_mode, mode, op1, unsignedp); 4336 4337 /* convert_modes may have placed op1 into a register, so we 4338 must recompute the following. */ 4339 op1_is_constant = CONST_INT_P (op1); 4340 if (op1_is_constant) 4341 { 4342 wide_int ext_op1 = rtx_mode_t (op1, compute_mode); 4343 op1_is_pow2 = (wi::popcount (ext_op1) == 1 4344 || (! unsignedp 4345 && wi::popcount (wi::neg (ext_op1)) == 1)); 4346 } 4347 else 4348 op1_is_pow2 = 0; 4349 } 4350 4351 /* If one of the operands is a volatile MEM, copy it into a register. */ 4352 4353 if (MEM_P (op0) && MEM_VOLATILE_P (op0)) 4354 op0 = force_reg (compute_mode, op0); 4355 if (MEM_P (op1) && MEM_VOLATILE_P (op1)) 4356 op1 = force_reg (compute_mode, op1); 4357 4358 /* If we need the remainder or if OP1 is constant, we need to 4359 put OP0 in a register in case it has any queued subexpressions. */ 4360 if (rem_flag || op1_is_constant) 4361 op0 = force_reg (compute_mode, op0); 4362 4363 last = get_last_insn (); 4364 4365 /* Promote floor rounding to trunc rounding for unsigned operations. */ 4366 if (unsignedp) 4367 { 4368 if (code == FLOOR_DIV_EXPR) 4369 code = TRUNC_DIV_EXPR; 4370 if (code == FLOOR_MOD_EXPR) 4371 code = TRUNC_MOD_EXPR; 4372 if (code == EXACT_DIV_EXPR && op1_is_pow2) 4373 code = TRUNC_DIV_EXPR; 4374 } 4375 4376 if (op1 != const0_rtx) 4377 switch (code) 4378 { 4379 case TRUNC_MOD_EXPR: 4380 case TRUNC_DIV_EXPR: 4381 if (op1_is_constant) 4382 { 4383 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); 4384 int size = GET_MODE_BITSIZE (int_mode); 4385 if (unsignedp) 4386 { 4387 unsigned HOST_WIDE_INT mh, ml; 4388 int pre_shift, post_shift; 4389 int dummy; 4390 wide_int wd = rtx_mode_t (op1, int_mode); 4391 unsigned HOST_WIDE_INT d = wd.to_uhwi (); 4392 4393 if (wi::popcount (wd) == 1) 4394 { 4395 pre_shift = floor_log2 (d); 4396 if (rem_flag) 4397 { 4398 unsigned HOST_WIDE_INT mask 4399 = (HOST_WIDE_INT_1U << pre_shift) - 1; 4400 remainder 4401 = expand_binop (int_mode, and_optab, op0, 4402 gen_int_mode (mask, int_mode), 4403 remainder, 1, 4404 OPTAB_LIB_WIDEN); 4405 if (remainder) 4406 return gen_lowpart (mode, remainder); 4407 } 4408 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0, 4409 pre_shift, tquotient, 1); 4410 } 4411 else if (size <= HOST_BITS_PER_WIDE_INT) 4412 { 4413 if (d >= (HOST_WIDE_INT_1U << (size - 1))) 4414 { 4415 /* Most significant bit of divisor is set; emit an scc 4416 insn. */ 4417 quotient = emit_store_flag_force (tquotient, GEU, op0, op1, 4418 int_mode, 1, 1); 4419 } 4420 else 4421 { 4422 /* Find a suitable multiplier and right shift count 4423 instead of multiplying with D. */ 4424 4425 mh = choose_multiplier (d, size, size, 4426 &ml, &post_shift, &dummy); 4427 4428 /* If the suggested multiplier is more than SIZE bits, 4429 we can do better for even divisors, using an 4430 initial right shift. */ 4431 if (mh != 0 && (d & 1) == 0) 4432 { 4433 pre_shift = ctz_or_zero (d); 4434 mh = choose_multiplier (d >> pre_shift, size, 4435 size - pre_shift, 4436 &ml, &post_shift, &dummy); 4437 gcc_assert (!mh); 4438 } 4439 else 4440 pre_shift = 0; 4441 4442 if (mh != 0) 4443 { 4444 rtx t1, t2, t3, t4; 4445 4446 if (post_shift - 1 >= BITS_PER_WORD) 4447 goto fail1; 4448 4449 extra_cost 4450 = (shift_cost (speed, int_mode, post_shift - 1) 4451 + shift_cost (speed, int_mode, 1) 4452 + 2 * add_cost (speed, int_mode)); 4453 t1 = expmed_mult_highpart 4454 (int_mode, op0, gen_int_mode (ml, int_mode), 4455 NULL_RTX, 1, max_cost - extra_cost); 4456 if (t1 == 0) 4457 goto fail1; 4458 t2 = force_operand (gen_rtx_MINUS (int_mode, 4459 op0, t1), 4460 NULL_RTX); 4461 t3 = expand_shift (RSHIFT_EXPR, int_mode, 4462 t2, 1, NULL_RTX, 1); 4463 t4 = force_operand (gen_rtx_PLUS (int_mode, 4464 t1, t3), 4465 NULL_RTX); 4466 quotient = expand_shift 4467 (RSHIFT_EXPR, int_mode, t4, 4468 post_shift - 1, tquotient, 1); 4469 } 4470 else 4471 { 4472 rtx t1, t2; 4473 4474 if (pre_shift >= BITS_PER_WORD 4475 || post_shift >= BITS_PER_WORD) 4476 goto fail1; 4477 4478 t1 = expand_shift 4479 (RSHIFT_EXPR, int_mode, op0, 4480 pre_shift, NULL_RTX, 1); 4481 extra_cost 4482 = (shift_cost (speed, int_mode, pre_shift) 4483 + shift_cost (speed, int_mode, post_shift)); 4484 t2 = expmed_mult_highpart 4485 (int_mode, t1, 4486 gen_int_mode (ml, int_mode), 4487 NULL_RTX, 1, max_cost - extra_cost); 4488 if (t2 == 0) 4489 goto fail1; 4490 quotient = expand_shift 4491 (RSHIFT_EXPR, int_mode, t2, 4492 post_shift, tquotient, 1); 4493 } 4494 } 4495 } 4496 else /* Too wide mode to use tricky code */ 4497 break; 4498 4499 insn = get_last_insn (); 4500 if (insn != last) 4501 set_dst_reg_note (insn, REG_EQUAL, 4502 gen_rtx_UDIV (int_mode, op0, op1), 4503 quotient); 4504 } 4505 else /* TRUNC_DIV, signed */ 4506 { 4507 unsigned HOST_WIDE_INT ml; 4508 int lgup, post_shift; 4509 rtx mlr; 4510 HOST_WIDE_INT d = INTVAL (op1); 4511 unsigned HOST_WIDE_INT abs_d; 4512 4513 /* Not prepared to handle division/remainder by 4514 0xffffffffffffffff8000000000000000 etc. */ 4515 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT) 4516 break; 4517 4518 /* Since d might be INT_MIN, we have to cast to 4519 unsigned HOST_WIDE_INT before negating to avoid 4520 undefined signed overflow. */ 4521 abs_d = (d >= 0 4522 ? (unsigned HOST_WIDE_INT) d 4523 : - (unsigned HOST_WIDE_INT) d); 4524 4525 /* n rem d = n rem -d */ 4526 if (rem_flag && d < 0) 4527 { 4528 d = abs_d; 4529 op1 = gen_int_mode (abs_d, int_mode); 4530 } 4531 4532 if (d == 1) 4533 quotient = op0; 4534 else if (d == -1) 4535 quotient = expand_unop (int_mode, neg_optab, op0, 4536 tquotient, 0); 4537 else if (size <= HOST_BITS_PER_WIDE_INT 4538 && abs_d == HOST_WIDE_INT_1U << (size - 1)) 4539 { 4540 /* This case is not handled correctly below. */ 4541 quotient = emit_store_flag (tquotient, EQ, op0, op1, 4542 int_mode, 1, 1); 4543 if (quotient == 0) 4544 goto fail1; 4545 } 4546 else if (EXACT_POWER_OF_2_OR_ZERO_P (d) 4547 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) 4548 && (rem_flag 4549 ? smod_pow2_cheap (speed, int_mode) 4550 : sdiv_pow2_cheap (speed, int_mode)) 4551 /* We assume that cheap metric is true if the 4552 optab has an expander for this mode. */ 4553 && ((optab_handler ((rem_flag ? smod_optab 4554 : sdiv_optab), 4555 int_mode) 4556 != CODE_FOR_nothing) 4557 || (optab_handler (sdivmod_optab, int_mode) 4558 != CODE_FOR_nothing))) 4559 ; 4560 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) 4561 { 4562 if (rem_flag) 4563 { 4564 remainder = expand_smod_pow2 (int_mode, op0, d); 4565 if (remainder) 4566 return gen_lowpart (mode, remainder); 4567 } 4568 4569 if (sdiv_pow2_cheap (speed, int_mode) 4570 && ((optab_handler (sdiv_optab, int_mode) 4571 != CODE_FOR_nothing) 4572 || (optab_handler (sdivmod_optab, int_mode) 4573 != CODE_FOR_nothing))) 4574 quotient = expand_divmod (0, TRUNC_DIV_EXPR, 4575 int_mode, op0, 4576 gen_int_mode (abs_d, 4577 int_mode), 4578 NULL_RTX, 0); 4579 else 4580 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d); 4581 4582 /* We have computed OP0 / abs(OP1). If OP1 is negative, 4583 negate the quotient. */ 4584 if (d < 0) 4585 { 4586 insn = get_last_insn (); 4587 if (insn != last 4588 && abs_d < (HOST_WIDE_INT_1U 4589 << (HOST_BITS_PER_WIDE_INT - 1))) 4590 set_dst_reg_note (insn, REG_EQUAL, 4591 gen_rtx_DIV (int_mode, op0, 4592 gen_int_mode 4593 (abs_d, 4594 int_mode)), 4595 quotient); 4596 4597 quotient = expand_unop (int_mode, neg_optab, 4598 quotient, quotient, 0); 4599 } 4600 } 4601 else if (size <= HOST_BITS_PER_WIDE_INT) 4602 { 4603 choose_multiplier (abs_d, size, size - 1, 4604 &ml, &post_shift, &lgup); 4605 if (ml < HOST_WIDE_INT_1U << (size - 1)) 4606 { 4607 rtx t1, t2, t3; 4608 4609 if (post_shift >= BITS_PER_WORD 4610 || size - 1 >= BITS_PER_WORD) 4611 goto fail1; 4612 4613 extra_cost = (shift_cost (speed, int_mode, post_shift) 4614 + shift_cost (speed, int_mode, size - 1) 4615 + add_cost (speed, int_mode)); 4616 t1 = expmed_mult_highpart 4617 (int_mode, op0, gen_int_mode (ml, int_mode), 4618 NULL_RTX, 0, max_cost - extra_cost); 4619 if (t1 == 0) 4620 goto fail1; 4621 t2 = expand_shift 4622 (RSHIFT_EXPR, int_mode, t1, 4623 post_shift, NULL_RTX, 0); 4624 t3 = expand_shift 4625 (RSHIFT_EXPR, int_mode, op0, 4626 size - 1, NULL_RTX, 0); 4627 if (d < 0) 4628 quotient 4629 = force_operand (gen_rtx_MINUS (int_mode, t3, t2), 4630 tquotient); 4631 else 4632 quotient 4633 = force_operand (gen_rtx_MINUS (int_mode, t2, t3), 4634 tquotient); 4635 } 4636 else 4637 { 4638 rtx t1, t2, t3, t4; 4639 4640 if (post_shift >= BITS_PER_WORD 4641 || size - 1 >= BITS_PER_WORD) 4642 goto fail1; 4643 4644 ml |= HOST_WIDE_INT_M1U << (size - 1); 4645 mlr = gen_int_mode (ml, int_mode); 4646 extra_cost = (shift_cost (speed, int_mode, post_shift) 4647 + shift_cost (speed, int_mode, size - 1) 4648 + 2 * add_cost (speed, int_mode)); 4649 t1 = expmed_mult_highpart (int_mode, op0, mlr, 4650 NULL_RTX, 0, 4651 max_cost - extra_cost); 4652 if (t1 == 0) 4653 goto fail1; 4654 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0), 4655 NULL_RTX); 4656 t3 = expand_shift 4657 (RSHIFT_EXPR, int_mode, t2, 4658 post_shift, NULL_RTX, 0); 4659 t4 = expand_shift 4660 (RSHIFT_EXPR, int_mode, op0, 4661 size - 1, NULL_RTX, 0); 4662 if (d < 0) 4663 quotient 4664 = force_operand (gen_rtx_MINUS (int_mode, t4, t3), 4665 tquotient); 4666 else 4667 quotient 4668 = force_operand (gen_rtx_MINUS (int_mode, t3, t4), 4669 tquotient); 4670 } 4671 } 4672 else /* Too wide mode to use tricky code */ 4673 break; 4674 4675 insn = get_last_insn (); 4676 if (insn != last) 4677 set_dst_reg_note (insn, REG_EQUAL, 4678 gen_rtx_DIV (int_mode, op0, op1), 4679 quotient); 4680 } 4681 break; 4682 } 4683 fail1: 4684 delete_insns_since (last); 4685 break; 4686 4687 case FLOOR_DIV_EXPR: 4688 case FLOOR_MOD_EXPR: 4689 /* We will come here only for signed operations. */ 4690 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode)) 4691 { 4692 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); 4693 int size = GET_MODE_BITSIZE (int_mode); 4694 unsigned HOST_WIDE_INT mh, ml; 4695 int pre_shift, lgup, post_shift; 4696 HOST_WIDE_INT d = INTVAL (op1); 4697 4698 if (d > 0) 4699 { 4700 /* We could just as easily deal with negative constants here, 4701 but it does not seem worth the trouble for GCC 2.6. */ 4702 if (EXACT_POWER_OF_2_OR_ZERO_P (d)) 4703 { 4704 pre_shift = floor_log2 (d); 4705 if (rem_flag) 4706 { 4707 unsigned HOST_WIDE_INT mask 4708 = (HOST_WIDE_INT_1U << pre_shift) - 1; 4709 remainder = expand_binop 4710 (int_mode, and_optab, op0, 4711 gen_int_mode (mask, int_mode), 4712 remainder, 0, OPTAB_LIB_WIDEN); 4713 if (remainder) 4714 return gen_lowpart (mode, remainder); 4715 } 4716 quotient = expand_shift 4717 (RSHIFT_EXPR, int_mode, op0, 4718 pre_shift, tquotient, 0); 4719 } 4720 else 4721 { 4722 rtx t1, t2, t3, t4; 4723 4724 mh = choose_multiplier (d, size, size - 1, 4725 &ml, &post_shift, &lgup); 4726 gcc_assert (!mh); 4727 4728 if (post_shift < BITS_PER_WORD 4729 && size - 1 < BITS_PER_WORD) 4730 { 4731 t1 = expand_shift 4732 (RSHIFT_EXPR, int_mode, op0, 4733 size - 1, NULL_RTX, 0); 4734 t2 = expand_binop (int_mode, xor_optab, op0, t1, 4735 NULL_RTX, 0, OPTAB_WIDEN); 4736 extra_cost = (shift_cost (speed, int_mode, post_shift) 4737 + shift_cost (speed, int_mode, size - 1) 4738 + 2 * add_cost (speed, int_mode)); 4739 t3 = expmed_mult_highpart 4740 (int_mode, t2, gen_int_mode (ml, int_mode), 4741 NULL_RTX, 1, max_cost - extra_cost); 4742 if (t3 != 0) 4743 { 4744 t4 = expand_shift 4745 (RSHIFT_EXPR, int_mode, t3, 4746 post_shift, NULL_RTX, 1); 4747 quotient = expand_binop (int_mode, xor_optab, 4748 t4, t1, tquotient, 0, 4749 OPTAB_WIDEN); 4750 } 4751 } 4752 } 4753 } 4754 else 4755 { 4756 rtx nsign, t1, t2, t3, t4; 4757 t1 = force_operand (gen_rtx_PLUS (int_mode, 4758 op0, constm1_rtx), NULL_RTX); 4759 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX, 4760 0, OPTAB_WIDEN); 4761 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2, 4762 size - 1, NULL_RTX, 0); 4763 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign), 4764 NULL_RTX); 4765 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1, 4766 NULL_RTX, 0); 4767 if (t4) 4768 { 4769 rtx t5; 4770 t5 = expand_unop (int_mode, one_cmpl_optab, nsign, 4771 NULL_RTX, 0); 4772 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5), 4773 tquotient); 4774 } 4775 } 4776 } 4777 4778 if (quotient != 0) 4779 break; 4780 delete_insns_since (last); 4781 4782 /* Try using an instruction that produces both the quotient and 4783 remainder, using truncation. We can easily compensate the quotient 4784 or remainder to get floor rounding, once we have the remainder. 4785 Notice that we compute also the final remainder value here, 4786 and return the result right away. */ 4787 if (target == 0 || GET_MODE (target) != compute_mode) 4788 target = gen_reg_rtx (compute_mode); 4789 4790 if (rem_flag) 4791 { 4792 remainder 4793 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4794 quotient = gen_reg_rtx (compute_mode); 4795 } 4796 else 4797 { 4798 quotient 4799 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4800 remainder = gen_reg_rtx (compute_mode); 4801 } 4802 4803 if (expand_twoval_binop (sdivmod_optab, op0, op1, 4804 quotient, remainder, 0)) 4805 { 4806 /* This could be computed with a branch-less sequence. 4807 Save that for later. */ 4808 rtx tem; 4809 rtx_code_label *label = gen_label_rtx (); 4810 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); 4811 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4812 NULL_RTX, 0, OPTAB_WIDEN); 4813 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); 4814 expand_dec (quotient, const1_rtx); 4815 expand_inc (remainder, op1); 4816 emit_label (label); 4817 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4818 } 4819 4820 /* No luck with division elimination or divmod. Have to do it 4821 by conditionally adjusting op0 *and* the result. */ 4822 { 4823 rtx_code_label *label1, *label2, *label3, *label4, *label5; 4824 rtx adjusted_op0; 4825 rtx tem; 4826 4827 quotient = gen_reg_rtx (compute_mode); 4828 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4829 label1 = gen_label_rtx (); 4830 label2 = gen_label_rtx (); 4831 label3 = gen_label_rtx (); 4832 label4 = gen_label_rtx (); 4833 label5 = gen_label_rtx (); 4834 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4835 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); 4836 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4837 quotient, 0, OPTAB_LIB_WIDEN); 4838 if (tem != quotient) 4839 emit_move_insn (quotient, tem); 4840 emit_jump_insn (targetm.gen_jump (label5)); 4841 emit_barrier (); 4842 emit_label (label1); 4843 expand_inc (adjusted_op0, const1_rtx); 4844 emit_jump_insn (targetm.gen_jump (label4)); 4845 emit_barrier (); 4846 emit_label (label2); 4847 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); 4848 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4849 quotient, 0, OPTAB_LIB_WIDEN); 4850 if (tem != quotient) 4851 emit_move_insn (quotient, tem); 4852 emit_jump_insn (targetm.gen_jump (label5)); 4853 emit_barrier (); 4854 emit_label (label3); 4855 expand_dec (adjusted_op0, const1_rtx); 4856 emit_label (label4); 4857 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4858 quotient, 0, OPTAB_LIB_WIDEN); 4859 if (tem != quotient) 4860 emit_move_insn (quotient, tem); 4861 expand_dec (quotient, const1_rtx); 4862 emit_label (label5); 4863 } 4864 break; 4865 4866 case CEIL_DIV_EXPR: 4867 case CEIL_MOD_EXPR: 4868 if (unsignedp) 4869 { 4870 if (op1_is_constant 4871 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4872 && (HWI_COMPUTABLE_MODE_P (compute_mode) 4873 || INTVAL (op1) >= 0)) 4874 { 4875 scalar_int_mode int_mode 4876 = as_a <scalar_int_mode> (compute_mode); 4877 rtx t1, t2, t3; 4878 unsigned HOST_WIDE_INT d = INTVAL (op1); 4879 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0, 4880 floor_log2 (d), tquotient, 1); 4881 t2 = expand_binop (int_mode, and_optab, op0, 4882 gen_int_mode (d - 1, int_mode), 4883 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4884 t3 = gen_reg_rtx (int_mode); 4885 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1); 4886 if (t3 == 0) 4887 { 4888 rtx_code_label *lab; 4889 lab = gen_label_rtx (); 4890 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab); 4891 expand_inc (t1, const1_rtx); 4892 emit_label (lab); 4893 quotient = t1; 4894 } 4895 else 4896 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3), 4897 tquotient); 4898 break; 4899 } 4900 4901 /* Try using an instruction that produces both the quotient and 4902 remainder, using truncation. We can easily compensate the 4903 quotient or remainder to get ceiling rounding, once we have the 4904 remainder. Notice that we compute also the final remainder 4905 value here, and return the result right away. */ 4906 if (target == 0 || GET_MODE (target) != compute_mode) 4907 target = gen_reg_rtx (compute_mode); 4908 4909 if (rem_flag) 4910 { 4911 remainder = (REG_P (target) 4912 ? target : gen_reg_rtx (compute_mode)); 4913 quotient = gen_reg_rtx (compute_mode); 4914 } 4915 else 4916 { 4917 quotient = (REG_P (target) 4918 ? target : gen_reg_rtx (compute_mode)); 4919 remainder = gen_reg_rtx (compute_mode); 4920 } 4921 4922 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, 4923 remainder, 1)) 4924 { 4925 /* This could be computed with a branch-less sequence. 4926 Save that for later. */ 4927 rtx_code_label *label = gen_label_rtx (); 4928 do_cmp_and_jump (remainder, const0_rtx, EQ, 4929 compute_mode, label); 4930 expand_inc (quotient, const1_rtx); 4931 expand_dec (remainder, op1); 4932 emit_label (label); 4933 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4934 } 4935 4936 /* No luck with division elimination or divmod. Have to do it 4937 by conditionally adjusting op0 *and* the result. */ 4938 { 4939 rtx_code_label *label1, *label2; 4940 rtx adjusted_op0, tem; 4941 4942 quotient = gen_reg_rtx (compute_mode); 4943 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4944 label1 = gen_label_rtx (); 4945 label2 = gen_label_rtx (); 4946 do_cmp_and_jump (adjusted_op0, const0_rtx, NE, 4947 compute_mode, label1); 4948 emit_move_insn (quotient, const0_rtx); 4949 emit_jump_insn (targetm.gen_jump (label2)); 4950 emit_barrier (); 4951 emit_label (label1); 4952 expand_dec (adjusted_op0, const1_rtx); 4953 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, 4954 quotient, 1, OPTAB_LIB_WIDEN); 4955 if (tem != quotient) 4956 emit_move_insn (quotient, tem); 4957 expand_inc (quotient, const1_rtx); 4958 emit_label (label2); 4959 } 4960 } 4961 else /* signed */ 4962 { 4963 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4964 && INTVAL (op1) >= 0) 4965 { 4966 /* This is extremely similar to the code for the unsigned case 4967 above. For 2.7 we should merge these variants, but for 4968 2.6.1 I don't want to touch the code for unsigned since that 4969 get used in C. The signed case will only be used by other 4970 languages (Ada). */ 4971 4972 rtx t1, t2, t3; 4973 unsigned HOST_WIDE_INT d = INTVAL (op1); 4974 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4975 floor_log2 (d), tquotient, 0); 4976 t2 = expand_binop (compute_mode, and_optab, op0, 4977 gen_int_mode (d - 1, compute_mode), 4978 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4979 t3 = gen_reg_rtx (compute_mode); 4980 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4981 compute_mode, 1, 1); 4982 if (t3 == 0) 4983 { 4984 rtx_code_label *lab; 4985 lab = gen_label_rtx (); 4986 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4987 expand_inc (t1, const1_rtx); 4988 emit_label (lab); 4989 quotient = t1; 4990 } 4991 else 4992 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4993 t1, t3), 4994 tquotient); 4995 break; 4996 } 4997 4998 /* Try using an instruction that produces both the quotient and 4999 remainder, using truncation. We can easily compensate the 5000 quotient or remainder to get ceiling rounding, once we have the 5001 remainder. Notice that we compute also the final remainder 5002 value here, and return the result right away. */ 5003 if (target == 0 || GET_MODE (target) != compute_mode) 5004 target = gen_reg_rtx (compute_mode); 5005 if (rem_flag) 5006 { 5007 remainder= (REG_P (target) 5008 ? target : gen_reg_rtx (compute_mode)); 5009 quotient = gen_reg_rtx (compute_mode); 5010 } 5011 else 5012 { 5013 quotient = (REG_P (target) 5014 ? target : gen_reg_rtx (compute_mode)); 5015 remainder = gen_reg_rtx (compute_mode); 5016 } 5017 5018 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, 5019 remainder, 0)) 5020 { 5021 /* This could be computed with a branch-less sequence. 5022 Save that for later. */ 5023 rtx tem; 5024 rtx_code_label *label = gen_label_rtx (); 5025 do_cmp_and_jump (remainder, const0_rtx, EQ, 5026 compute_mode, label); 5027 tem = expand_binop (compute_mode, xor_optab, op0, op1, 5028 NULL_RTX, 0, OPTAB_WIDEN); 5029 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); 5030 expand_inc (quotient, const1_rtx); 5031 expand_dec (remainder, op1); 5032 emit_label (label); 5033 return gen_lowpart (mode, rem_flag ? remainder : quotient); 5034 } 5035 5036 /* No luck with division elimination or divmod. Have to do it 5037 by conditionally adjusting op0 *and* the result. */ 5038 { 5039 rtx_code_label *label1, *label2, *label3, *label4, *label5; 5040 rtx adjusted_op0; 5041 rtx tem; 5042 5043 quotient = gen_reg_rtx (compute_mode); 5044 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 5045 label1 = gen_label_rtx (); 5046 label2 = gen_label_rtx (); 5047 label3 = gen_label_rtx (); 5048 label4 = gen_label_rtx (); 5049 label5 = gen_label_rtx (); 5050 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 5051 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, 5052 compute_mode, label1); 5053 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 5054 quotient, 0, OPTAB_LIB_WIDEN); 5055 if (tem != quotient) 5056 emit_move_insn (quotient, tem); 5057 emit_jump_insn (targetm.gen_jump (label5)); 5058 emit_barrier (); 5059 emit_label (label1); 5060 expand_dec (adjusted_op0, const1_rtx); 5061 emit_jump_insn (targetm.gen_jump (label4)); 5062 emit_barrier (); 5063 emit_label (label2); 5064 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, 5065 compute_mode, label3); 5066 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 5067 quotient, 0, OPTAB_LIB_WIDEN); 5068 if (tem != quotient) 5069 emit_move_insn (quotient, tem); 5070 emit_jump_insn (targetm.gen_jump (label5)); 5071 emit_barrier (); 5072 emit_label (label3); 5073 expand_inc (adjusted_op0, const1_rtx); 5074 emit_label (label4); 5075 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 5076 quotient, 0, OPTAB_LIB_WIDEN); 5077 if (tem != quotient) 5078 emit_move_insn (quotient, tem); 5079 expand_inc (quotient, const1_rtx); 5080 emit_label (label5); 5081 } 5082 } 5083 break; 5084 5085 case EXACT_DIV_EXPR: 5086 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode)) 5087 { 5088 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); 5089 int size = GET_MODE_BITSIZE (int_mode); 5090 HOST_WIDE_INT d = INTVAL (op1); 5091 unsigned HOST_WIDE_INT ml; 5092 int pre_shift; 5093 rtx t1; 5094 5095 pre_shift = ctz_or_zero (d); 5096 ml = invert_mod2n (d >> pre_shift, size); 5097 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0, 5098 pre_shift, NULL_RTX, unsignedp); 5099 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode), 5100 NULL_RTX, 1); 5101 5102 insn = get_last_insn (); 5103 set_dst_reg_note (insn, REG_EQUAL, 5104 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 5105 int_mode, op0, op1), 5106 quotient); 5107 } 5108 break; 5109 5110 case ROUND_DIV_EXPR: 5111 case ROUND_MOD_EXPR: 5112 if (unsignedp) 5113 { 5114 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); 5115 rtx tem; 5116 rtx_code_label *label; 5117 label = gen_label_rtx (); 5118 quotient = gen_reg_rtx (int_mode); 5119 remainder = gen_reg_rtx (int_mode); 5120 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) 5121 { 5122 rtx tem; 5123 quotient = expand_binop (int_mode, udiv_optab, op0, op1, 5124 quotient, 1, OPTAB_LIB_WIDEN); 5125 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1); 5126 remainder = expand_binop (int_mode, sub_optab, op0, tem, 5127 remainder, 1, OPTAB_LIB_WIDEN); 5128 } 5129 tem = plus_constant (int_mode, op1, -1); 5130 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1); 5131 do_cmp_and_jump (remainder, tem, LEU, int_mode, label); 5132 expand_inc (quotient, const1_rtx); 5133 expand_dec (remainder, op1); 5134 emit_label (label); 5135 } 5136 else 5137 { 5138 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); 5139 int size = GET_MODE_BITSIZE (int_mode); 5140 rtx abs_rem, abs_op1, tem, mask; 5141 rtx_code_label *label; 5142 label = gen_label_rtx (); 5143 quotient = gen_reg_rtx (int_mode); 5144 remainder = gen_reg_rtx (int_mode); 5145 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) 5146 { 5147 rtx tem; 5148 quotient = expand_binop (int_mode, sdiv_optab, op0, op1, 5149 quotient, 0, OPTAB_LIB_WIDEN); 5150 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0); 5151 remainder = expand_binop (int_mode, sub_optab, op0, tem, 5152 remainder, 0, OPTAB_LIB_WIDEN); 5153 } 5154 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0); 5155 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0); 5156 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem, 5157 1, NULL_RTX, 1); 5158 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label); 5159 tem = expand_binop (int_mode, xor_optab, op0, op1, 5160 NULL_RTX, 0, OPTAB_WIDEN); 5161 mask = expand_shift (RSHIFT_EXPR, int_mode, tem, 5162 size - 1, NULL_RTX, 0); 5163 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx, 5164 NULL_RTX, 0, OPTAB_WIDEN); 5165 tem = expand_binop (int_mode, sub_optab, tem, mask, 5166 NULL_RTX, 0, OPTAB_WIDEN); 5167 expand_inc (quotient, tem); 5168 tem = expand_binop (int_mode, xor_optab, mask, op1, 5169 NULL_RTX, 0, OPTAB_WIDEN); 5170 tem = expand_binop (int_mode, sub_optab, tem, mask, 5171 NULL_RTX, 0, OPTAB_WIDEN); 5172 expand_dec (remainder, tem); 5173 emit_label (label); 5174 } 5175 return gen_lowpart (mode, rem_flag ? remainder : quotient); 5176 5177 default: 5178 gcc_unreachable (); 5179 } 5180 5181 if (quotient == 0) 5182 { 5183 if (target && GET_MODE (target) != compute_mode) 5184 target = 0; 5185 5186 if (rem_flag) 5187 { 5188 /* Try to produce the remainder without producing the quotient. 5189 If we seem to have a divmod pattern that does not require widening, 5190 don't try widening here. We should really have a WIDEN argument 5191 to expand_twoval_binop, since what we'd really like to do here is 5192 1) try a mod insn in compute_mode 5193 2) try a divmod insn in compute_mode 5194 3) try a div insn in compute_mode and multiply-subtract to get 5195 remainder 5196 4) try the same things with widening allowed. */ 5197 remainder 5198 = sign_expand_binop (compute_mode, umod_optab, smod_optab, 5199 op0, op1, target, 5200 unsignedp, 5201 ((optab_handler (optab2, compute_mode) 5202 != CODE_FOR_nothing) 5203 ? OPTAB_DIRECT : OPTAB_WIDEN)); 5204 if (remainder == 0) 5205 { 5206 /* No luck there. Can we do remainder and divide at once 5207 without a library call? */ 5208 remainder = gen_reg_rtx (compute_mode); 5209 if (! expand_twoval_binop ((unsignedp 5210 ? udivmod_optab 5211 : sdivmod_optab), 5212 op0, op1, 5213 NULL_RTX, remainder, unsignedp)) 5214 remainder = 0; 5215 } 5216 5217 if (remainder) 5218 return gen_lowpart (mode, remainder); 5219 } 5220 5221 /* Produce the quotient. Try a quotient insn, but not a library call. 5222 If we have a divmod in this mode, use it in preference to widening 5223 the div (for this test we assume it will not fail). Note that optab2 5224 is set to the one of the two optabs that the call below will use. */ 5225 quotient 5226 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, 5227 op0, op1, rem_flag ? NULL_RTX : target, 5228 unsignedp, 5229 ((optab_handler (optab2, compute_mode) 5230 != CODE_FOR_nothing) 5231 ? OPTAB_DIRECT : OPTAB_WIDEN)); 5232 5233 if (quotient == 0) 5234 { 5235 /* No luck there. Try a quotient-and-remainder insn, 5236 keeping the quotient alone. */ 5237 quotient = gen_reg_rtx (compute_mode); 5238 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, 5239 op0, op1, 5240 quotient, NULL_RTX, unsignedp)) 5241 { 5242 quotient = 0; 5243 if (! rem_flag) 5244 /* Still no luck. If we are not computing the remainder, 5245 use a library call for the quotient. */ 5246 quotient = sign_expand_binop (compute_mode, 5247 udiv_optab, sdiv_optab, 5248 op0, op1, target, 5249 unsignedp, OPTAB_LIB_WIDEN); 5250 } 5251 } 5252 } 5253 5254 if (rem_flag) 5255 { 5256 if (target && GET_MODE (target) != compute_mode) 5257 target = 0; 5258 5259 if (quotient == 0) 5260 { 5261 /* No divide instruction either. Use library for remainder. */ 5262 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, 5263 op0, op1, target, 5264 unsignedp, OPTAB_LIB_WIDEN); 5265 /* No remainder function. Try a quotient-and-remainder 5266 function, keeping the remainder. */ 5267 if (!remainder) 5268 { 5269 remainder = gen_reg_rtx (compute_mode); 5270 if (!expand_twoval_binop_libfunc 5271 (unsignedp ? udivmod_optab : sdivmod_optab, 5272 op0, op1, 5273 NULL_RTX, remainder, 5274 unsignedp ? UMOD : MOD)) 5275 remainder = NULL_RTX; 5276 } 5277 } 5278 else 5279 { 5280 /* We divided. Now finish doing X - Y * (X / Y). */ 5281 remainder = expand_mult (compute_mode, quotient, op1, 5282 NULL_RTX, unsignedp); 5283 remainder = expand_binop (compute_mode, sub_optab, op0, 5284 remainder, target, unsignedp, 5285 OPTAB_LIB_WIDEN); 5286 } 5287 } 5288 5289 return gen_lowpart (mode, rem_flag ? remainder : quotient); 5290} 5291 5292/* Return a tree node with data type TYPE, describing the value of X. 5293 Usually this is an VAR_DECL, if there is no obvious better choice. 5294 X may be an expression, however we only support those expressions 5295 generated by loop.c. */ 5296 5297tree 5298make_tree (tree type, rtx x) 5299{ 5300 tree t; 5301 5302 switch (GET_CODE (x)) 5303 { 5304 case CONST_INT: 5305 case CONST_WIDE_INT: 5306 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type))); 5307 return t; 5308 5309 case CONST_DOUBLE: 5310 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); 5311 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) 5312 t = wide_int_to_tree (type, 5313 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, 5314 HOST_BITS_PER_WIDE_INT * 2)); 5315 else 5316 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x)); 5317 5318 return t; 5319 5320 case CONST_VECTOR: 5321 { 5322 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x); 5323 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); 5324 tree itype = TREE_TYPE (type); 5325 5326 /* Build a tree with vector elements. */ 5327 tree_vector_builder elts (type, npatterns, nelts_per_pattern); 5328 unsigned int count = elts.encoded_nelts (); 5329 for (unsigned int i = 0; i < count; ++i) 5330 { 5331 rtx elt = CONST_VECTOR_ELT (x, i); 5332 elts.quick_push (make_tree (itype, elt)); 5333 } 5334 5335 return elts.build (); 5336 } 5337 5338 case PLUS: 5339 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5340 make_tree (type, XEXP (x, 1))); 5341 5342 case MINUS: 5343 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5344 make_tree (type, XEXP (x, 1))); 5345 5346 case NEG: 5347 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); 5348 5349 case MULT: 5350 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), 5351 make_tree (type, XEXP (x, 1))); 5352 5353 case ASHIFT: 5354 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), 5355 make_tree (type, XEXP (x, 1))); 5356 5357 case LSHIFTRT: 5358 t = unsigned_type_for (type); 5359 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5360 make_tree (t, XEXP (x, 0)), 5361 make_tree (type, XEXP (x, 1)))); 5362 5363 case ASHIFTRT: 5364 t = signed_type_for (type); 5365 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5366 make_tree (t, XEXP (x, 0)), 5367 make_tree (type, XEXP (x, 1)))); 5368 5369 case DIV: 5370 if (TREE_CODE (type) != REAL_TYPE) 5371 t = signed_type_for (type); 5372 else 5373 t = type; 5374 5375 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5376 make_tree (t, XEXP (x, 0)), 5377 make_tree (t, XEXP (x, 1)))); 5378 case UDIV: 5379 t = unsigned_type_for (type); 5380 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5381 make_tree (t, XEXP (x, 0)), 5382 make_tree (t, XEXP (x, 1)))); 5383 5384 case SIGN_EXTEND: 5385 case ZERO_EXTEND: 5386 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), 5387 GET_CODE (x) == ZERO_EXTEND); 5388 return fold_convert (type, make_tree (t, XEXP (x, 0))); 5389 5390 case CONST: 5391 return make_tree (type, XEXP (x, 0)); 5392 5393 case SYMBOL_REF: 5394 t = SYMBOL_REF_DECL (x); 5395 if (t) 5396 return fold_convert (type, build_fold_addr_expr (t)); 5397 /* fall through. */ 5398 5399 default: 5400 if (CONST_POLY_INT_P (x)) 5401 return wide_int_to_tree (t, const_poly_int_value (x)); 5402 5403 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); 5404 5405 /* If TYPE is a POINTER_TYPE, we might need to convert X from 5406 address mode to pointer mode. */ 5407 if (POINTER_TYPE_P (type)) 5408 x = convert_memory_address_addr_space 5409 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); 5410 5411 /* Note that we do *not* use SET_DECL_RTL here, because we do not 5412 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ 5413 t->decl_with_rtl.rtl = x; 5414 5415 return t; 5416 } 5417} 5418 5419/* Compute the logical-and of OP0 and OP1, storing it in TARGET 5420 and returning TARGET. 5421 5422 If TARGET is 0, a pseudo-register or constant is returned. */ 5423 5424rtx 5425expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) 5426{ 5427 rtx tem = 0; 5428 5429 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) 5430 tem = simplify_binary_operation (AND, mode, op0, op1); 5431 if (tem == 0) 5432 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); 5433 5434 if (target == 0) 5435 target = tem; 5436 else if (tem != target) 5437 emit_move_insn (target, tem); 5438 return target; 5439} 5440 5441/* Helper function for emit_store_flag. */ 5442rtx 5443emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, 5444 machine_mode mode, machine_mode compare_mode, 5445 int unsignedp, rtx x, rtx y, int normalizep, 5446 machine_mode target_mode) 5447{ 5448 struct expand_operand ops[4]; 5449 rtx op0, comparison, subtarget; 5450 rtx_insn *last; 5451 scalar_int_mode result_mode = targetm.cstore_mode (icode); 5452 scalar_int_mode int_target_mode; 5453 5454 last = get_last_insn (); 5455 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); 5456 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); 5457 if (!x || !y) 5458 { 5459 delete_insns_since (last); 5460 return NULL_RTX; 5461 } 5462 5463 if (target_mode == VOIDmode) 5464 int_target_mode = result_mode; 5465 else 5466 int_target_mode = as_a <scalar_int_mode> (target_mode); 5467 if (!target) 5468 target = gen_reg_rtx (int_target_mode); 5469 5470 comparison = gen_rtx_fmt_ee (code, result_mode, x, y); 5471 5472 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); 5473 create_fixed_operand (&ops[1], comparison); 5474 create_fixed_operand (&ops[2], x); 5475 create_fixed_operand (&ops[3], y); 5476 if (!maybe_expand_insn (icode, 4, ops)) 5477 { 5478 delete_insns_since (last); 5479 return NULL_RTX; 5480 } 5481 subtarget = ops[0].value; 5482 5483 /* If we are converting to a wider mode, first convert to 5484 INT_TARGET_MODE, then normalize. This produces better combining 5485 opportunities on machines that have a SIGN_EXTRACT when we are 5486 testing a single bit. This mostly benefits the 68k. 5487 5488 If STORE_FLAG_VALUE does not have the sign bit set when 5489 interpreted in MODE, we can do this conversion as unsigned, which 5490 is usually more efficient. */ 5491 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode)) 5492 { 5493 convert_move (target, subtarget, 5494 val_signbit_known_clear_p (result_mode, 5495 STORE_FLAG_VALUE)); 5496 op0 = target; 5497 result_mode = int_target_mode; 5498 } 5499 else 5500 op0 = subtarget; 5501 5502 /* If we want to keep subexpressions around, don't reuse our last 5503 target. */ 5504 if (optimize) 5505 subtarget = 0; 5506 5507 /* Now normalize to the proper value in MODE. Sometimes we don't 5508 have to do anything. */ 5509 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) 5510 ; 5511 /* STORE_FLAG_VALUE might be the most negative number, so write 5512 the comparison this way to avoid a compiler-time warning. */ 5513 else if (- normalizep == STORE_FLAG_VALUE) 5514 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); 5515 5516 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes 5517 it hard to use a value of just the sign bit due to ANSI integer 5518 constant typing rules. */ 5519 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) 5520 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, 5521 GET_MODE_BITSIZE (result_mode) - 1, subtarget, 5522 normalizep == 1); 5523 else 5524 { 5525 gcc_assert (STORE_FLAG_VALUE & 1); 5526 5527 op0 = expand_and (result_mode, op0, const1_rtx, subtarget); 5528 if (normalizep == -1) 5529 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); 5530 } 5531 5532 /* If we were converting to a smaller mode, do the conversion now. */ 5533 if (int_target_mode != result_mode) 5534 { 5535 convert_move (target, op0, 0); 5536 return target; 5537 } 5538 else 5539 return op0; 5540} 5541 5542 5543/* A subroutine of emit_store_flag only including "tricks" that do not 5544 need a recursive call. These are kept separate to avoid infinite 5545 loops. */ 5546 5547static rtx 5548emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, 5549 machine_mode mode, int unsignedp, int normalizep, 5550 machine_mode target_mode) 5551{ 5552 rtx subtarget; 5553 enum insn_code icode; 5554 machine_mode compare_mode; 5555 enum mode_class mclass; 5556 enum rtx_code scode; 5557 5558 if (unsignedp) 5559 code = unsigned_condition (code); 5560 scode = swap_condition (code); 5561 5562 /* If one operand is constant, make it the second one. Only do this 5563 if the other operand is not constant as well. */ 5564 5565 if (swap_commutative_operands_p (op0, op1)) 5566 { 5567 std::swap (op0, op1); 5568 code = swap_condition (code); 5569 } 5570 5571 if (mode == VOIDmode) 5572 mode = GET_MODE (op0); 5573 5574 /* For some comparisons with 1 and -1, we can convert this to 5575 comparisons with zero. This will often produce more opportunities for 5576 store-flag insns. */ 5577 5578 switch (code) 5579 { 5580 case LT: 5581 if (op1 == const1_rtx) 5582 op1 = const0_rtx, code = LE; 5583 break; 5584 case LE: 5585 if (op1 == constm1_rtx) 5586 op1 = const0_rtx, code = LT; 5587 break; 5588 case GE: 5589 if (op1 == const1_rtx) 5590 op1 = const0_rtx, code = GT; 5591 break; 5592 case GT: 5593 if (op1 == constm1_rtx) 5594 op1 = const0_rtx, code = GE; 5595 break; 5596 case GEU: 5597 if (op1 == const1_rtx) 5598 op1 = const0_rtx, code = NE; 5599 break; 5600 case LTU: 5601 if (op1 == const1_rtx) 5602 op1 = const0_rtx, code = EQ; 5603 break; 5604 default: 5605 break; 5606 } 5607 5608 /* If we are comparing a double-word integer with zero or -1, we can 5609 convert the comparison into one involving a single word. */ 5610 scalar_int_mode int_mode; 5611 if (is_int_mode (mode, &int_mode) 5612 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2 5613 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) 5614 { 5615 rtx tem; 5616 if ((code == EQ || code == NE) 5617 && (op1 == const0_rtx || op1 == constm1_rtx)) 5618 { 5619 rtx op00, op01; 5620 5621 /* Do a logical OR or AND of the two words and compare the 5622 result. */ 5623 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0); 5624 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD); 5625 tem = expand_binop (word_mode, 5626 op1 == const0_rtx ? ior_optab : and_optab, 5627 op00, op01, NULL_RTX, unsignedp, 5628 OPTAB_DIRECT); 5629 5630 if (tem != 0) 5631 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, 5632 unsignedp, normalizep); 5633 } 5634 else if ((code == LT || code == GE) && op1 == const0_rtx) 5635 { 5636 rtx op0h; 5637 5638 /* If testing the sign bit, can just test on high word. */ 5639 op0h = simplify_gen_subreg (word_mode, op0, int_mode, 5640 subreg_highpart_offset (word_mode, 5641 int_mode)); 5642 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, 5643 unsignedp, normalizep); 5644 } 5645 else 5646 tem = NULL_RTX; 5647 5648 if (tem) 5649 { 5650 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) 5651 return tem; 5652 if (!target) 5653 target = gen_reg_rtx (target_mode); 5654 5655 convert_move (target, tem, 5656 !val_signbit_known_set_p (word_mode, 5657 (normalizep ? normalizep 5658 : STORE_FLAG_VALUE))); 5659 return target; 5660 } 5661 } 5662 5663 /* If this is A < 0 or A >= 0, we can do this by taking the ones 5664 complement of A (for GE) and shifting the sign bit to the low bit. */ 5665 if (op1 == const0_rtx && (code == LT || code == GE) 5666 && is_int_mode (mode, &int_mode) 5667 && (normalizep || STORE_FLAG_VALUE == 1 5668 || val_signbit_p (int_mode, STORE_FLAG_VALUE))) 5669 { 5670 scalar_int_mode int_target_mode; 5671 subtarget = target; 5672 5673 if (!target) 5674 int_target_mode = int_mode; 5675 else 5676 { 5677 /* If the result is to be wider than OP0, it is best to convert it 5678 first. If it is to be narrower, it is *incorrect* to convert it 5679 first. */ 5680 int_target_mode = as_a <scalar_int_mode> (target_mode); 5681 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode)) 5682 { 5683 op0 = convert_modes (int_target_mode, int_mode, op0, 0); 5684 int_mode = int_target_mode; 5685 } 5686 } 5687 5688 if (int_target_mode != int_mode) 5689 subtarget = 0; 5690 5691 if (code == GE) 5692 op0 = expand_unop (int_mode, one_cmpl_optab, op0, 5693 ((STORE_FLAG_VALUE == 1 || normalizep) 5694 ? 0 : subtarget), 0); 5695 5696 if (STORE_FLAG_VALUE == 1 || normalizep) 5697 /* If we are supposed to produce a 0/1 value, we want to do 5698 a logical shift from the sign bit to the low-order bit; for 5699 a -1/0 value, we do an arithmetic shift. */ 5700 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0, 5701 GET_MODE_BITSIZE (int_mode) - 1, 5702 subtarget, normalizep != -1); 5703 5704 if (int_mode != int_target_mode) 5705 op0 = convert_modes (int_target_mode, int_mode, op0, 0); 5706 5707 return op0; 5708 } 5709 5710 mclass = GET_MODE_CLASS (mode); 5711 FOR_EACH_MODE_FROM (compare_mode, mode) 5712 { 5713 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; 5714 icode = optab_handler (cstore_optab, optab_mode); 5715 if (icode != CODE_FOR_nothing) 5716 { 5717 do_pending_stack_adjust (); 5718 rtx tem = emit_cstore (target, icode, code, mode, compare_mode, 5719 unsignedp, op0, op1, normalizep, target_mode); 5720 if (tem) 5721 return tem; 5722 5723 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5724 { 5725 tem = emit_cstore (target, icode, scode, mode, compare_mode, 5726 unsignedp, op1, op0, normalizep, target_mode); 5727 if (tem) 5728 return tem; 5729 } 5730 break; 5731 } 5732 } 5733 5734 return 0; 5735} 5736 5737/* Subroutine of emit_store_flag that handles cases in which the operands 5738 are scalar integers. SUBTARGET is the target to use for temporary 5739 operations and TRUEVAL is the value to store when the condition is 5740 true. All other arguments are as for emit_store_flag. */ 5741 5742rtx 5743emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0, 5744 rtx op1, scalar_int_mode mode, int unsignedp, 5745 int normalizep, rtx trueval) 5746{ 5747 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; 5748 rtx_insn *last = get_last_insn (); 5749 5750 /* If this is an equality comparison of integers, we can try to exclusive-or 5751 (or subtract) the two operands and use a recursive call to try the 5752 comparison with zero. Don't do any of these cases if branches are 5753 very cheap. */ 5754 5755 if ((code == EQ || code == NE) && op1 != const0_rtx) 5756 { 5757 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, 5758 OPTAB_WIDEN); 5759 5760 if (tem == 0) 5761 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, 5762 OPTAB_WIDEN); 5763 if (tem != 0) 5764 tem = emit_store_flag (target, code, tem, const0_rtx, 5765 mode, unsignedp, normalizep); 5766 if (tem != 0) 5767 return tem; 5768 5769 delete_insns_since (last); 5770 } 5771 5772 /* For integer comparisons, try the reverse comparison. However, for 5773 small X and if we'd have anyway to extend, implementing "X != 0" 5774 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ 5775 rtx_code rcode = reverse_condition (code); 5776 if (can_compare_p (rcode, mode, ccp_store_flag) 5777 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing 5778 && code == NE 5779 && GET_MODE_SIZE (mode) < UNITS_PER_WORD 5780 && op1 == const0_rtx)) 5781 { 5782 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5783 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5784 5785 /* Again, for the reverse comparison, use either an addition or a XOR. */ 5786 if (want_add 5787 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, 5788 optimize_insn_for_speed_p ()) == 0) 5789 { 5790 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5791 STORE_FLAG_VALUE, target_mode); 5792 if (tem != 0) 5793 tem = expand_binop (target_mode, add_optab, tem, 5794 gen_int_mode (normalizep, target_mode), 5795 target, 0, OPTAB_WIDEN); 5796 if (tem != 0) 5797 return tem; 5798 } 5799 else if (!want_add 5800 && rtx_cost (trueval, mode, XOR, 1, 5801 optimize_insn_for_speed_p ()) == 0) 5802 { 5803 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5804 normalizep, target_mode); 5805 if (tem != 0) 5806 tem = expand_binop (target_mode, xor_optab, tem, trueval, target, 5807 INTVAL (trueval) >= 0, OPTAB_WIDEN); 5808 if (tem != 0) 5809 return tem; 5810 } 5811 5812 delete_insns_since (last); 5813 } 5814 5815 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with 5816 the constant zero. Reject all other comparisons at this point. Only 5817 do LE and GT if branches are expensive since they are expensive on 5818 2-operand machines. */ 5819 5820 if (op1 != const0_rtx 5821 || (code != EQ && code != NE 5822 && (BRANCH_COST (optimize_insn_for_speed_p (), 5823 false) <= 1 || (code != LE && code != GT)))) 5824 return 0; 5825 5826 /* Try to put the result of the comparison in the sign bit. Assume we can't 5827 do the necessary operation below. */ 5828 5829 rtx tem = 0; 5830 5831 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has 5832 the sign bit set. */ 5833 5834 if (code == LE) 5835 { 5836 /* This is destructive, so SUBTARGET can't be OP0. */ 5837 if (rtx_equal_p (subtarget, op0)) 5838 subtarget = 0; 5839 5840 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, 5841 OPTAB_WIDEN); 5842 if (tem) 5843 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, 5844 OPTAB_WIDEN); 5845 } 5846 5847 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the 5848 number of bits in the mode of OP0, minus one. */ 5849 5850 if (code == GT) 5851 { 5852 if (rtx_equal_p (subtarget, op0)) 5853 subtarget = 0; 5854 5855 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0, 5856 GET_MODE_BITSIZE (mode) - 1, 5857 subtarget, 0); 5858 if (tem) 5859 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, 5860 OPTAB_WIDEN); 5861 } 5862 5863 if (code == EQ || code == NE) 5864 { 5865 /* For EQ or NE, one way to do the comparison is to apply an operation 5866 that converts the operand into a positive number if it is nonzero 5867 or zero if it was originally zero. Then, for EQ, we subtract 1 and 5868 for NE we negate. This puts the result in the sign bit. Then we 5869 normalize with a shift, if needed. 5870 5871 Two operations that can do the above actions are ABS and FFS, so try 5872 them. If that doesn't work, and MODE is smaller than a full word, 5873 we can use zero-extension to the wider mode (an unsigned conversion) 5874 as the operation. */ 5875 5876 /* Note that ABS doesn't yield a positive number for INT_MIN, but 5877 that is compensated by the subsequent overflow when subtracting 5878 one / negating. */ 5879 5880 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) 5881 tem = expand_unop (mode, abs_optab, op0, subtarget, 1); 5882 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) 5883 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); 5884 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) 5885 { 5886 tem = convert_modes (word_mode, mode, op0, 1); 5887 mode = word_mode; 5888 } 5889 5890 if (tem != 0) 5891 { 5892 if (code == EQ) 5893 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, 5894 0, OPTAB_WIDEN); 5895 else 5896 tem = expand_unop (mode, neg_optab, tem, subtarget, 0); 5897 } 5898 5899 /* If we couldn't do it that way, for NE we can "or" the two's complement 5900 of the value with itself. For EQ, we take the one's complement of 5901 that "or", which is an extra insn, so we only handle EQ if branches 5902 are expensive. */ 5903 5904 if (tem == 0 5905 && (code == NE 5906 || BRANCH_COST (optimize_insn_for_speed_p (), 5907 false) > 1)) 5908 { 5909 if (rtx_equal_p (subtarget, op0)) 5910 subtarget = 0; 5911 5912 tem = expand_unop (mode, neg_optab, op0, subtarget, 0); 5913 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, 5914 OPTAB_WIDEN); 5915 5916 if (tem && code == EQ) 5917 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); 5918 } 5919 } 5920 5921 if (tem && normalizep) 5922 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem, 5923 GET_MODE_BITSIZE (mode) - 1, 5924 subtarget, normalizep == 1); 5925 5926 if (tem) 5927 { 5928 if (!target) 5929 ; 5930 else if (GET_MODE (tem) != target_mode) 5931 { 5932 convert_move (target, tem, 0); 5933 tem = target; 5934 } 5935 else if (!subtarget) 5936 { 5937 emit_move_insn (target, tem); 5938 tem = target; 5939 } 5940 } 5941 else 5942 delete_insns_since (last); 5943 5944 return tem; 5945} 5946 5947/* Emit a store-flags instruction for comparison CODE on OP0 and OP1 5948 and storing in TARGET. Normally return TARGET. 5949 Return 0 if that cannot be done. 5950 5951 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If 5952 it is VOIDmode, they cannot both be CONST_INT. 5953 5954 UNSIGNEDP is for the case where we have to widen the operands 5955 to perform the operation. It says to use zero-extension. 5956 5957 NORMALIZEP is 1 if we should convert the result to be either zero 5958 or one. Normalize is -1 if we should convert the result to be 5959 either zero or -1. If NORMALIZEP is zero, the result will be left 5960 "raw" out of the scc insn. */ 5961 5962rtx 5963emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, 5964 machine_mode mode, int unsignedp, int normalizep) 5965{ 5966 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; 5967 enum rtx_code rcode; 5968 rtx subtarget; 5969 rtx tem, trueval; 5970 rtx_insn *last; 5971 5972 /* If we compare constants, we shouldn't use a store-flag operation, 5973 but a constant load. We can get there via the vanilla route that 5974 usually generates a compare-branch sequence, but will in this case 5975 fold the comparison to a constant, and thus elide the branch. */ 5976 if (CONSTANT_P (op0) && CONSTANT_P (op1)) 5977 return NULL_RTX; 5978 5979 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, 5980 target_mode); 5981 if (tem) 5982 return tem; 5983 5984 /* If we reached here, we can't do this with a scc insn, however there 5985 are some comparisons that can be done in other ways. Don't do any 5986 of these cases if branches are very cheap. */ 5987 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) 5988 return 0; 5989 5990 /* See what we need to return. We can only return a 1, -1, or the 5991 sign bit. */ 5992 5993 if (normalizep == 0) 5994 { 5995 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) 5996 normalizep = STORE_FLAG_VALUE; 5997 5998 else if (val_signbit_p (mode, STORE_FLAG_VALUE)) 5999 ; 6000 else 6001 return 0; 6002 } 6003 6004 last = get_last_insn (); 6005 6006 /* If optimizing, use different pseudo registers for each insn, instead 6007 of reusing the same pseudo. This leads to better CSE, but slows 6008 down the compiler, since there are more pseudos. */ 6009 subtarget = (!optimize 6010 && (target_mode == mode)) ? target : NULL_RTX; 6011 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); 6012 6013 /* For floating-point comparisons, try the reverse comparison or try 6014 changing the "orderedness" of the comparison. */ 6015 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 6016 { 6017 enum rtx_code first_code; 6018 bool and_them; 6019 6020 rcode = reverse_condition_maybe_unordered (code); 6021 if (can_compare_p (rcode, mode, ccp_store_flag) 6022 && (code == ORDERED || code == UNORDERED 6023 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 6024 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 6025 { 6026 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 6027 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 6028 6029 /* For the reverse comparison, use either an addition or a XOR. */ 6030 if (want_add 6031 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, 6032 optimize_insn_for_speed_p ()) == 0) 6033 { 6034 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 6035 STORE_FLAG_VALUE, target_mode); 6036 if (tem) 6037 return expand_binop (target_mode, add_optab, tem, 6038 gen_int_mode (normalizep, target_mode), 6039 target, 0, OPTAB_WIDEN); 6040 } 6041 else if (!want_add 6042 && rtx_cost (trueval, mode, XOR, 1, 6043 optimize_insn_for_speed_p ()) == 0) 6044 { 6045 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 6046 normalizep, target_mode); 6047 if (tem) 6048 return expand_binop (target_mode, xor_optab, tem, trueval, 6049 target, INTVAL (trueval) >= 0, 6050 OPTAB_WIDEN); 6051 } 6052 } 6053 6054 delete_insns_since (last); 6055 6056 /* Cannot split ORDERED and UNORDERED, only try the above trick. */ 6057 if (code == ORDERED || code == UNORDERED) 6058 return 0; 6059 6060 and_them = split_comparison (code, mode, &first_code, &code); 6061 6062 /* If there are no NaNs, the first comparison should always fall through. 6063 Effectively change the comparison to the other one. */ 6064 if (!HONOR_NANS (mode)) 6065 { 6066 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); 6067 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, 6068 target_mode); 6069 } 6070 6071 if (!HAVE_conditional_move) 6072 return 0; 6073 6074 /* Do not turn a trapping comparison into a non-trapping one. */ 6075 if ((code != EQ && code != NE && code != UNEQ && code != LTGT) 6076 && flag_trapping_math) 6077 return 0; 6078 6079 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a 6080 conditional move. */ 6081 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, 6082 normalizep, target_mode); 6083 if (tem == 0) 6084 return 0; 6085 6086 if (and_them) 6087 tem = emit_conditional_move (target, code, op0, op1, mode, 6088 tem, const0_rtx, GET_MODE (tem), 0); 6089 else 6090 tem = emit_conditional_move (target, code, op0, op1, mode, 6091 trueval, tem, GET_MODE (tem), 0); 6092 6093 if (tem == 0) 6094 delete_insns_since (last); 6095 return tem; 6096 } 6097 6098 /* The remaining tricks only apply to integer comparisons. */ 6099 6100 scalar_int_mode int_mode; 6101 if (is_int_mode (mode, &int_mode)) 6102 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode, 6103 unsignedp, normalizep, trueval); 6104 6105 return 0; 6106} 6107 6108/* Like emit_store_flag, but always succeeds. */ 6109 6110rtx 6111emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, 6112 machine_mode mode, int unsignedp, int normalizep) 6113{ 6114 rtx tem; 6115 rtx_code_label *label; 6116 rtx trueval, falseval; 6117 6118 /* First see if emit_store_flag can do the job. */ 6119 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); 6120 if (tem != 0) 6121 return tem; 6122 6123 /* If one operand is constant, make it the second one. Only do this 6124 if the other operand is not constant as well. */ 6125 if (swap_commutative_operands_p (op0, op1)) 6126 { 6127 std::swap (op0, op1); 6128 code = swap_condition (code); 6129 } 6130 6131 if (mode == VOIDmode) 6132 mode = GET_MODE (op0); 6133 6134 if (!target) 6135 target = gen_reg_rtx (word_mode); 6136 6137 /* If this failed, we have to do this with set/compare/jump/set code. 6138 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ 6139 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; 6140 if (code == NE 6141 && GET_MODE_CLASS (mode) == MODE_INT 6142 && REG_P (target) 6143 && op0 == target 6144 && op1 == const0_rtx) 6145 { 6146 label = gen_label_rtx (); 6147 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, 6148 NULL_RTX, NULL, label, 6149 profile_probability::uninitialized ()); 6150 emit_move_insn (target, trueval); 6151 emit_label (label); 6152 return target; 6153 } 6154 6155 if (!REG_P (target) 6156 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) 6157 target = gen_reg_rtx (GET_MODE (target)); 6158 6159 /* Jump in the right direction if the target cannot implement CODE 6160 but can jump on its reverse condition. */ 6161 falseval = const0_rtx; 6162 if (! can_compare_p (code, mode, ccp_jump) 6163 && (! FLOAT_MODE_P (mode) 6164 || code == ORDERED || code == UNORDERED 6165 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 6166 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 6167 { 6168 enum rtx_code rcode; 6169 if (FLOAT_MODE_P (mode)) 6170 rcode = reverse_condition_maybe_unordered (code); 6171 else 6172 rcode = reverse_condition (code); 6173 6174 /* Canonicalize to UNORDERED for the libcall. */ 6175 if (can_compare_p (rcode, mode, ccp_jump) 6176 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) 6177 { 6178 falseval = trueval; 6179 trueval = const0_rtx; 6180 code = rcode; 6181 } 6182 } 6183 6184 emit_move_insn (target, trueval); 6185 label = gen_label_rtx (); 6186 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, 6187 label, profile_probability::uninitialized ()); 6188 6189 emit_move_insn (target, falseval); 6190 emit_label (label); 6191 6192 return target; 6193} 6194 6195/* Perform possibly multi-word comparison and conditional jump to LABEL 6196 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is 6197 now a thin wrapper around do_compare_rtx_and_jump. */ 6198 6199static void 6200do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, 6201 rtx_code_label *label) 6202{ 6203 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); 6204 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, 6205 NULL, label, profile_probability::uninitialized ()); 6206} 6207