expmed.c revision 1.12
1/* Medium-level subroutines: convert bit-field store and extract
2   and shifts, multiplies and divides to rtl instructions.
3   Copyright (C) 1987-2018 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "predict.h"
30#include "memmodel.h"
31#include "tm_p.h"
32#include "expmed.h"
33#include "optabs.h"
34#include "regs.h"
35#include "emit-rtl.h"
36#include "diagnostic-core.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "dojump.h"
40#include "explow.h"
41#include "expr.h"
42#include "langhooks.h"
43#include "tree-vector-builder.h"
44
45struct target_expmed default_target_expmed;
46#if SWITCHABLE_TARGET
47struct target_expmed *this_target_expmed = &default_target_expmed;
48#endif
49
50static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
51				      unsigned HOST_WIDE_INT,
52				      unsigned HOST_WIDE_INT,
53				      poly_uint64, poly_uint64,
54				      machine_mode, rtx, bool, bool);
55static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
56				   unsigned HOST_WIDE_INT,
57				   unsigned HOST_WIDE_INT,
58				   poly_uint64, poly_uint64,
59				   rtx, scalar_int_mode, bool);
60static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
61				     unsigned HOST_WIDE_INT,
62				     unsigned HOST_WIDE_INT,
63				     rtx, scalar_int_mode, bool);
64static void store_split_bit_field (rtx, opt_scalar_int_mode,
65				   unsigned HOST_WIDE_INT,
66				   unsigned HOST_WIDE_INT,
67				   poly_uint64, poly_uint64,
68				   rtx, scalar_int_mode, bool);
69static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
70				       unsigned HOST_WIDE_INT,
71				       unsigned HOST_WIDE_INT, int, rtx,
72				       machine_mode, machine_mode, bool, bool);
73static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
74				    unsigned HOST_WIDE_INT,
75				    unsigned HOST_WIDE_INT, rtx, int, bool);
76static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
77				      unsigned HOST_WIDE_INT,
78				      unsigned HOST_WIDE_INT, rtx, int, bool);
79static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
80static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
81				    unsigned HOST_WIDE_INT,
82				    unsigned HOST_WIDE_INT, int, bool);
83static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
84static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
85static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
86
87/* Return a constant integer mask value of mode MODE with BITSIZE ones
88   followed by BITPOS zeros, or the complement of that if COMPLEMENT.
89   The mask is truncated if necessary to the width of mode MODE.  The
90   mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
91
92static inline rtx
93mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
94{
95  return immed_wide_int_const
96    (wi::shifted_mask (bitpos, bitsize, complement,
97		       GET_MODE_PRECISION (mode)), mode);
98}
99
100/* Test whether a value is zero of a power of two.  */
101#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
102  (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
103
104struct init_expmed_rtl
105{
106  rtx reg;
107  rtx plus;
108  rtx neg;
109  rtx mult;
110  rtx sdiv;
111  rtx udiv;
112  rtx sdiv_32;
113  rtx smod_32;
114  rtx wide_mult;
115  rtx wide_lshr;
116  rtx wide_trunc;
117  rtx shift;
118  rtx shift_mult;
119  rtx shift_add;
120  rtx shift_sub0;
121  rtx shift_sub1;
122  rtx zext;
123  rtx trunc;
124
125  rtx pow2[MAX_BITS_PER_WORD];
126  rtx cint[MAX_BITS_PER_WORD];
127};
128
129static void
130init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
131		      scalar_int_mode from_mode, bool speed)
132{
133  int to_size, from_size;
134  rtx which;
135
136  to_size = GET_MODE_PRECISION (to_mode);
137  from_size = GET_MODE_PRECISION (from_mode);
138
139  /* Most partial integers have a precision less than the "full"
140     integer it requires for storage.  In case one doesn't, for
141     comparison purposes here, reduce the bit size by one in that
142     case.  */
143  if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
144      && pow2p_hwi (to_size))
145    to_size --;
146  if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
147      && pow2p_hwi (from_size))
148    from_size --;
149
150  /* Assume cost of zero-extend and sign-extend is the same.  */
151  which = (to_size < from_size ? all->trunc : all->zext);
152
153  PUT_MODE (all->reg, from_mode);
154  set_convert_cost (to_mode, from_mode, speed,
155		    set_src_cost (which, to_mode, speed));
156}
157
158static void
159init_expmed_one_mode (struct init_expmed_rtl *all,
160		      machine_mode mode, int speed)
161{
162  int m, n, mode_bitsize;
163  machine_mode mode_from;
164
165  mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
166
167  PUT_MODE (all->reg, mode);
168  PUT_MODE (all->plus, mode);
169  PUT_MODE (all->neg, mode);
170  PUT_MODE (all->mult, mode);
171  PUT_MODE (all->sdiv, mode);
172  PUT_MODE (all->udiv, mode);
173  PUT_MODE (all->sdiv_32, mode);
174  PUT_MODE (all->smod_32, mode);
175  PUT_MODE (all->wide_trunc, mode);
176  PUT_MODE (all->shift, mode);
177  PUT_MODE (all->shift_mult, mode);
178  PUT_MODE (all->shift_add, mode);
179  PUT_MODE (all->shift_sub0, mode);
180  PUT_MODE (all->shift_sub1, mode);
181  PUT_MODE (all->zext, mode);
182  PUT_MODE (all->trunc, mode);
183
184  set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
185  set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
186  set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
187  set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
188  set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
189
190  set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
191				     <= 2 * add_cost (speed, mode)));
192  set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
193				     <= 4 * add_cost (speed, mode)));
194
195  set_shift_cost (speed, mode, 0, 0);
196  {
197    int cost = add_cost (speed, mode);
198    set_shiftadd_cost (speed, mode, 0, cost);
199    set_shiftsub0_cost (speed, mode, 0, cost);
200    set_shiftsub1_cost (speed, mode, 0, cost);
201  }
202
203  n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
204  for (m = 1; m < n; m++)
205    {
206      XEXP (all->shift, 1) = all->cint[m];
207      XEXP (all->shift_mult, 1) = all->pow2[m];
208
209      set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
210      set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
211						       speed));
212      set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
213							speed));
214      set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
215							speed));
216    }
217
218  scalar_int_mode int_mode_to;
219  if (is_a <scalar_int_mode> (mode, &int_mode_to))
220    {
221      for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
222	   mode_from = (machine_mode)(mode_from + 1))
223	init_expmed_one_conv (all, int_mode_to,
224			      as_a <scalar_int_mode> (mode_from), speed);
225
226      scalar_int_mode wider_mode;
227      if (GET_MODE_CLASS (int_mode_to) == MODE_INT
228	  && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
229	{
230	  PUT_MODE (all->zext, wider_mode);
231	  PUT_MODE (all->wide_mult, wider_mode);
232	  PUT_MODE (all->wide_lshr, wider_mode);
233	  XEXP (all->wide_lshr, 1)
234	    = gen_int_shift_amount (wider_mode, mode_bitsize);
235
236	  set_mul_widen_cost (speed, wider_mode,
237			      set_src_cost (all->wide_mult, wider_mode, speed));
238	  set_mul_highpart_cost (speed, int_mode_to,
239				 set_src_cost (all->wide_trunc,
240					       int_mode_to, speed));
241	}
242    }
243}
244
245void
246init_expmed (void)
247{
248  struct init_expmed_rtl all;
249  machine_mode mode = QImode;
250  int m, speed;
251
252  memset (&all, 0, sizeof all);
253  for (m = 1; m < MAX_BITS_PER_WORD; m++)
254    {
255      all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
256      all.cint[m] = GEN_INT (m);
257    }
258
259  /* Avoid using hard regs in ways which may be unsupported.  */
260  all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
261  all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
262  all.neg = gen_rtx_NEG (mode, all.reg);
263  all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
264  all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
265  all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
266  all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
267  all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
268  all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
269  all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
270  all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
271  all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
272  all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
273  all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
274  all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
275  all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
276  all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
277  all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
278
279  for (speed = 0; speed < 2; speed++)
280    {
281      crtl->maybe_hot_insn_p = speed;
282      set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
283
284      for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
285	   mode = (machine_mode)(mode + 1))
286	init_expmed_one_mode (&all, mode, speed);
287
288      if (MIN_MODE_PARTIAL_INT != VOIDmode)
289	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
290	     mode = (machine_mode)(mode + 1))
291	  init_expmed_one_mode (&all, mode, speed);
292
293      if (MIN_MODE_VECTOR_INT != VOIDmode)
294	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
295	     mode = (machine_mode)(mode + 1))
296	  init_expmed_one_mode (&all, mode, speed);
297    }
298
299  if (alg_hash_used_p ())
300    {
301      struct alg_hash_entry *p = alg_hash_entry_ptr (0);
302      memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
303    }
304  else
305    set_alg_hash_used_p (true);
306  default_rtl_profile ();
307
308  ggc_free (all.trunc);
309  ggc_free (all.shift_sub1);
310  ggc_free (all.shift_sub0);
311  ggc_free (all.shift_add);
312  ggc_free (all.shift_mult);
313  ggc_free (all.shift);
314  ggc_free (all.wide_trunc);
315  ggc_free (all.wide_lshr);
316  ggc_free (all.wide_mult);
317  ggc_free (all.zext);
318  ggc_free (all.smod_32);
319  ggc_free (all.sdiv_32);
320  ggc_free (all.udiv);
321  ggc_free (all.sdiv);
322  ggc_free (all.mult);
323  ggc_free (all.neg);
324  ggc_free (all.plus);
325  ggc_free (all.reg);
326}
327
328/* Return an rtx representing minus the value of X.
329   MODE is the intended mode of the result,
330   useful if X is a CONST_INT.  */
331
332rtx
333negate_rtx (machine_mode mode, rtx x)
334{
335  rtx result = simplify_unary_operation (NEG, mode, x, mode);
336
337  if (result == 0)
338    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
339
340  return result;
341}
342
343/* Whether reverse storage order is supported on the target.  */
344static int reverse_storage_order_supported = -1;
345
346/* Check whether reverse storage order is supported on the target.  */
347
348static void
349check_reverse_storage_order_support (void)
350{
351  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
352    {
353      reverse_storage_order_supported = 0;
354      sorry ("reverse scalar storage order");
355    }
356  else
357    reverse_storage_order_supported = 1;
358}
359
360/* Whether reverse FP storage order is supported on the target.  */
361static int reverse_float_storage_order_supported = -1;
362
363/* Check whether reverse FP storage order is supported on the target.  */
364
365static void
366check_reverse_float_storage_order_support (void)
367{
368  if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
369    {
370      reverse_float_storage_order_supported = 0;
371      sorry ("reverse floating-point scalar storage order");
372    }
373  else
374    reverse_float_storage_order_supported = 1;
375}
376
377/* Return an rtx representing value of X with reverse storage order.
378   MODE is the intended mode of the result,
379   useful if X is a CONST_INT.  */
380
381rtx
382flip_storage_order (machine_mode mode, rtx x)
383{
384  scalar_int_mode int_mode;
385  rtx result;
386
387  if (mode == QImode)
388    return x;
389
390  if (COMPLEX_MODE_P (mode))
391    {
392      rtx real = read_complex_part (x, false);
393      rtx imag = read_complex_part (x, true);
394
395      real = flip_storage_order (GET_MODE_INNER (mode), real);
396      imag = flip_storage_order (GET_MODE_INNER (mode), imag);
397
398      return gen_rtx_CONCAT (mode, real, imag);
399    }
400
401  if (__builtin_expect (reverse_storage_order_supported < 0, 0))
402    check_reverse_storage_order_support ();
403
404  if (!is_a <scalar_int_mode> (mode, &int_mode))
405    {
406      if (FLOAT_MODE_P (mode)
407	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
408	check_reverse_float_storage_order_support ();
409
410      if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
411	{
412	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
413	  return x;
414	}
415      x = gen_lowpart (int_mode, x);
416    }
417
418  result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
419  if (result == 0)
420    result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
421
422  if (int_mode != mode)
423    result = gen_lowpart (mode, result);
424
425  return result;
426}
427
428/* If MODE is set, adjust bitfield memory MEM so that it points to the
429   first unit of mode MODE that contains a bitfield of size BITSIZE at
430   bit position BITNUM.  If MODE is not set, return a BLKmode reference
431   to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
432   of the field within the new memory.  */
433
434static rtx
435narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
436		      unsigned HOST_WIDE_INT bitsize,
437		      unsigned HOST_WIDE_INT bitnum,
438		      unsigned HOST_WIDE_INT *new_bitnum)
439{
440  scalar_int_mode imode;
441  if (mode.exists (&imode))
442    {
443      unsigned int unit = GET_MODE_BITSIZE (imode);
444      *new_bitnum = bitnum % unit;
445      HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
446      return adjust_bitfield_address (mem, imode, offset);
447    }
448  else
449    {
450      *new_bitnum = bitnum % BITS_PER_UNIT;
451      HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
452      HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
453			    / BITS_PER_UNIT);
454      return adjust_bitfield_address_size (mem, BLKmode, offset, size);
455    }
456}
457
458/* The caller wants to perform insertion or extraction PATTERN on a
459   bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
460   BITREGION_START and BITREGION_END are as for store_bit_field
461   and FIELDMODE is the natural mode of the field.
462
463   Search for a mode that is compatible with the memory access
464   restrictions and (where applicable) with a register insertion or
465   extraction.  Return the new memory on success, storing the adjusted
466   bit position in *NEW_BITNUM.  Return null otherwise.  */
467
468static rtx
469adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
470			      rtx op0, HOST_WIDE_INT bitsize,
471			      HOST_WIDE_INT bitnum,
472			      poly_uint64 bitregion_start,
473			      poly_uint64 bitregion_end,
474			      machine_mode fieldmode,
475			      unsigned HOST_WIDE_INT *new_bitnum)
476{
477  bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
478				bitregion_end, MEM_ALIGN (op0),
479				MEM_VOLATILE_P (op0));
480  scalar_int_mode best_mode;
481  if (iter.next_mode (&best_mode))
482    {
483      /* We can use a memory in BEST_MODE.  See whether this is true for
484	 any wider modes.  All other things being equal, we prefer to
485	 use the widest mode possible because it tends to expose more
486	 CSE opportunities.  */
487      if (!iter.prefer_smaller_modes ())
488	{
489	  /* Limit the search to the mode required by the corresponding
490	     register insertion or extraction instruction, if any.  */
491	  scalar_int_mode limit_mode = word_mode;
492	  extraction_insn insn;
493	  if (get_best_reg_extraction_insn (&insn, pattern,
494					    GET_MODE_BITSIZE (best_mode),
495					    fieldmode))
496	    limit_mode = insn.field_mode;
497
498	  scalar_int_mode wider_mode;
499	  while (iter.next_mode (&wider_mode)
500		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
501	    best_mode = wider_mode;
502	}
503      return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
504				   new_bitnum);
505    }
506  return NULL_RTX;
507}
508
509/* Return true if a bitfield of size BITSIZE at bit number BITNUM within
510   a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
511   offset is then BITNUM / BITS_PER_UNIT.  */
512
513static bool
514lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
515		     machine_mode struct_mode)
516{
517  poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
518  if (BYTES_BIG_ENDIAN)
519    return (multiple_p (bitnum, BITS_PER_UNIT)
520	    && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
521		|| multiple_p (bitnum + bitsize,
522			       regsize * BITS_PER_UNIT)));
523  else
524    return multiple_p (bitnum, regsize * BITS_PER_UNIT);
525}
526
527/* Return true if -fstrict-volatile-bitfields applies to an access of OP0
528   containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
529   Return false if the access would touch memory outside the range
530   BITREGION_START to BITREGION_END for conformance to the C++ memory
531   model.  */
532
533static bool
534strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
535			    unsigned HOST_WIDE_INT bitnum,
536			    scalar_int_mode fieldmode,
537			    poly_uint64 bitregion_start,
538			    poly_uint64 bitregion_end)
539{
540  unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
541
542  /* -fstrict-volatile-bitfields must be enabled and we must have a
543     volatile MEM.  */
544  if (!MEM_P (op0)
545      || !MEM_VOLATILE_P (op0)
546      || flag_strict_volatile_bitfields <= 0)
547    return false;
548
549  /* The bit size must not be larger than the field mode, and
550     the field mode must not be larger than a word.  */
551  if (bitsize > modesize || modesize > BITS_PER_WORD)
552    return false;
553
554  /* Check for cases of unaligned fields that must be split.  */
555  if (bitnum % modesize + bitsize > modesize)
556    return false;
557
558  /* The memory must be sufficiently aligned for a MODESIZE access.
559     This condition guarantees, that the memory access will not
560     touch anything after the end of the structure.  */
561  if (MEM_ALIGN (op0) < modesize)
562    return false;
563
564  /* Check for cases where the C++ memory model applies.  */
565  if (maybe_ne (bitregion_end, 0U)
566      && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
567	  || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
568		       bitregion_end)))
569    return false;
570
571  return true;
572}
573
574/* Return true if OP is a memory and if a bitfield of size BITSIZE at
575   bit number BITNUM can be treated as a simple value of mode MODE.
576   Store the byte offset in *BYTENUM if so.  */
577
578static bool
579simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
580		       machine_mode mode, poly_uint64 *bytenum)
581{
582  return (MEM_P (op0)
583	  && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
584	  && known_eq (bitsize, GET_MODE_BITSIZE (mode))
585	  && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
586	      || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
587		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
588}
589
590/* Try to use instruction INSV to store VALUE into a field of OP0.
591   If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
592   BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
593   are as for store_bit_field.  */
594
595static bool
596store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
597			    opt_scalar_int_mode op0_mode,
598			    unsigned HOST_WIDE_INT bitsize,
599			    unsigned HOST_WIDE_INT bitnum,
600			    rtx value, scalar_int_mode value_mode)
601{
602  struct expand_operand ops[4];
603  rtx value1;
604  rtx xop0 = op0;
605  rtx_insn *last = get_last_insn ();
606  bool copy_back = false;
607
608  scalar_int_mode op_mode = insv->field_mode;
609  unsigned int unit = GET_MODE_BITSIZE (op_mode);
610  if (bitsize == 0 || bitsize > unit)
611    return false;
612
613  if (MEM_P (xop0))
614    /* Get a reference to the first byte of the field.  */
615    xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
616				 &bitnum);
617  else
618    {
619      /* Convert from counting within OP0 to counting in OP_MODE.  */
620      if (BYTES_BIG_ENDIAN)
621	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
622
623      /* If xop0 is a register, we need it in OP_MODE
624	 to make it acceptable to the format of insv.  */
625      if (GET_CODE (xop0) == SUBREG)
626	/* We can't just change the mode, because this might clobber op0,
627	   and we will need the original value of op0 if insv fails.  */
628	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
629      if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
630	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
631    }
632
633  /* If the destination is a paradoxical subreg such that we need a
634     truncate to the inner mode, perform the insertion on a temporary and
635     truncate the result to the original destination.  Note that we can't
636     just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
637     X) 0)) is (reg:N X).  */
638  if (GET_CODE (xop0) == SUBREG
639      && REG_P (SUBREG_REG (xop0))
640      && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
641					 op_mode))
642    {
643      rtx tem = gen_reg_rtx (op_mode);
644      emit_move_insn (tem, xop0);
645      xop0 = tem;
646      copy_back = true;
647    }
648
649  /* There are similar overflow check at the start of store_bit_field_1,
650     but that only check the situation where the field lies completely
651     outside the register, while there do have situation where the field
652     lies partialy in the register, we need to adjust bitsize for this
653     partial overflow situation.  Without this fix, pr48335-2.c on big-endian
654     will broken on those arch support bit insert instruction, like arm, aarch64
655     etc.  */
656  if (bitsize + bitnum > unit && bitnum < unit)
657    {
658      warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
659	       "destination object, data truncated into %wu-bit",
660	       bitsize, unit - bitnum);
661      bitsize = unit - bitnum;
662    }
663
664  /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
665     "backwards" from the size of the unit we are inserting into.
666     Otherwise, we count bits from the most significant on a
667     BYTES/BITS_BIG_ENDIAN machine.  */
668
669  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
670    bitnum = unit - bitsize - bitnum;
671
672  /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
673  value1 = value;
674  if (value_mode != op_mode)
675    {
676      if (GET_MODE_BITSIZE (value_mode) >= bitsize)
677	{
678	  rtx tmp;
679	  /* Optimization: Don't bother really extending VALUE
680	     if it has all the bits we will actually use.  However,
681	     if we must narrow it, be sure we do it correctly.  */
682
683	  if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
684	    {
685	      tmp = simplify_subreg (op_mode, value1, value_mode, 0);
686	      if (! tmp)
687		tmp = simplify_gen_subreg (op_mode,
688					   force_reg (value_mode, value1),
689					   value_mode, 0);
690	    }
691	  else
692	    {
693	      tmp = gen_lowpart_if_possible (op_mode, value1);
694	      if (! tmp)
695		tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
696	    }
697	  value1 = tmp;
698	}
699      else if (CONST_INT_P (value))
700	value1 = gen_int_mode (INTVAL (value), op_mode);
701      else
702	/* Parse phase is supposed to make VALUE's data type
703	   match that of the component reference, which is a type
704	   at least as wide as the field; so VALUE should have
705	   a mode that corresponds to that type.  */
706	gcc_assert (CONSTANT_P (value));
707    }
708
709  create_fixed_operand (&ops[0], xop0);
710  create_integer_operand (&ops[1], bitsize);
711  create_integer_operand (&ops[2], bitnum);
712  create_input_operand (&ops[3], value1, op_mode);
713  if (maybe_expand_insn (insv->icode, 4, ops))
714    {
715      if (copy_back)
716	convert_move (op0, xop0, true);
717      return true;
718    }
719  delete_insns_since (last);
720  return false;
721}
722
723/* A subroutine of store_bit_field, with the same arguments.  Return true
724   if the operation could be implemented.
725
726   If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
727   no other way of implementing the operation.  If FALLBACK_P is false,
728   return false instead.  */
729
730static bool
731store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
732		   poly_uint64 bitregion_start, poly_uint64 bitregion_end,
733		   machine_mode fieldmode,
734		   rtx value, bool reverse, bool fallback_p)
735{
736  rtx op0 = str_rtx;
737
738  while (GET_CODE (op0) == SUBREG)
739    {
740      bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
741      op0 = SUBREG_REG (op0);
742    }
743
744  /* No action is needed if the target is a register and if the field
745     lies completely outside that register.  This can occur if the source
746     code contains an out-of-bounds access to a small array.  */
747  if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
748    return true;
749
750  /* Use vec_set patterns for inserting parts of vectors whenever
751     available.  */
752  machine_mode outermode = GET_MODE (op0);
753  scalar_mode innermode = GET_MODE_INNER (outermode);
754  poly_uint64 pos;
755  if (VECTOR_MODE_P (outermode)
756      && !MEM_P (op0)
757      && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
758      && fieldmode == innermode
759      && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
760      && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
761    {
762      struct expand_operand ops[3];
763      enum insn_code icode = optab_handler (vec_set_optab, outermode);
764
765      create_fixed_operand (&ops[0], op0);
766      create_input_operand (&ops[1], value, innermode);
767      create_integer_operand (&ops[2], pos);
768      if (maybe_expand_insn (icode, 3, ops))
769	return true;
770    }
771
772  /* If the target is a register, overwriting the entire object, or storing
773     a full-word or multi-word field can be done with just a SUBREG.  */
774  if (!MEM_P (op0)
775      && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
776    {
777      /* Use the subreg machinery either to narrow OP0 to the required
778	 words or to cope with mode punning between equal-sized modes.
779	 In the latter case, use subreg on the rhs side, not lhs.  */
780      rtx sub;
781      HOST_WIDE_INT regnum;
782      poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
783      if (known_eq (bitnum, 0U)
784	  && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
785	{
786	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
787	  if (sub)
788	    {
789	      if (reverse)
790		sub = flip_storage_order (GET_MODE (op0), sub);
791	      emit_move_insn (op0, sub);
792	      return true;
793	    }
794	}
795      else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
796	       && multiple_p (bitsize, regsize * BITS_PER_UNIT))
797	{
798	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
799				     regnum * regsize);
800	  if (sub)
801	    {
802	      if (reverse)
803		value = flip_storage_order (fieldmode, value);
804	      emit_move_insn (sub, value);
805	      return true;
806	    }
807	}
808    }
809
810  /* If the target is memory, storing any naturally aligned field can be
811     done with a simple store.  For targets that support fast unaligned
812     memory, any naturally sized, unit aligned field can be done directly.  */
813  poly_uint64 bytenum;
814  if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
815    {
816      op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
817      if (reverse)
818	value = flip_storage_order (fieldmode, value);
819      emit_move_insn (op0, value);
820      return true;
821    }
822
823  /* It's possible we'll need to handle other cases here for
824     polynomial bitnum and bitsize.  */
825
826  /* From here on we need to be looking at a fixed-size insertion.  */
827  unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
828  unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
829
830  /* Make sure we are playing with integral modes.  Pun with subregs
831     if we aren't.  This must come after the entire register case above,
832     since that case is valid for any mode.  The following cases are only
833     valid for integral modes.  */
834  opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
835  scalar_int_mode imode;
836  if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
837    {
838      if (MEM_P (op0))
839	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
840					    0, MEM_SIZE (op0));
841      else if (!op0_mode.exists ())
842	{
843	  if (ibitnum == 0
844	      && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
845	      && MEM_P (value)
846	      && !reverse)
847	    {
848	      value = adjust_address (value, GET_MODE (op0), 0);
849	      emit_move_insn (op0, value);
850	      return true;
851	    }
852	  if (!fallback_p)
853	    return false;
854	  rtx temp = assign_stack_temp (GET_MODE (op0),
855					GET_MODE_SIZE (GET_MODE (op0)));
856	  emit_move_insn (temp, op0);
857	  store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
858			     reverse, fallback_p);
859	  emit_move_insn (op0, temp);
860	  return true;
861	}
862      else
863	op0 = gen_lowpart (op0_mode.require (), op0);
864    }
865
866  return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
867				   bitregion_start, bitregion_end,
868				   fieldmode, value, reverse, fallback_p);
869}
870
871/* Subroutine of store_bit_field_1, with the same arguments, except
872   that BITSIZE and BITNUM are constant.  Handle cases specific to
873   integral modes.  If OP0_MODE is defined, it is the mode of OP0,
874   otherwise OP0 is a BLKmode MEM.  */
875
876static bool
877store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
878			  unsigned HOST_WIDE_INT bitsize,
879			  unsigned HOST_WIDE_INT bitnum,
880			  poly_uint64 bitregion_start,
881			  poly_uint64 bitregion_end,
882			  machine_mode fieldmode,
883			  rtx value, bool reverse, bool fallback_p)
884{
885  /* Storing an lsb-aligned field in a register
886     can be done with a movstrict instruction.  */
887
888  if (!MEM_P (op0)
889      && !reverse
890      && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
891      && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
892      && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
893    {
894      struct expand_operand ops[2];
895      enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
896      rtx arg0 = op0;
897      unsigned HOST_WIDE_INT subreg_off;
898
899      if (GET_CODE (arg0) == SUBREG)
900	{
901	  /* Else we've got some float mode source being extracted into
902	     a different float mode destination -- this combination of
903	     subregs results in Severe Tire Damage.  */
904	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
905		      || GET_MODE_CLASS (fieldmode) == MODE_INT
906		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
907	  arg0 = SUBREG_REG (arg0);
908	}
909
910      subreg_off = bitnum / BITS_PER_UNIT;
911      if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
912	{
913	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
914
915	  create_fixed_operand (&ops[0], arg0);
916	  /* Shrink the source operand to FIELDMODE.  */
917	  create_convert_operand_to (&ops[1], value, fieldmode, false);
918	  if (maybe_expand_insn (icode, 2, ops))
919	    return true;
920	}
921    }
922
923  /* Handle fields bigger than a word.  */
924
925  if (bitsize > BITS_PER_WORD)
926    {
927      /* Here we transfer the words of the field
928	 in the order least significant first.
929	 This is because the most significant word is the one which may
930	 be less than full.
931	 However, only do that if the value is not BLKmode.  */
932
933      const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
934      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
935      unsigned int i;
936      rtx_insn *last;
937
938      /* This is the mode we must force value to, so that there will be enough
939	 subwords to extract.  Note that fieldmode will often (always?) be
940	 VOIDmode, because that is what store_field uses to indicate that this
941	 is a bit field, but passing VOIDmode to operand_subword_force
942	 is not allowed.
943
944	 The mode must be fixed-size, since insertions into variable-sized
945	 objects are meant to be handled before calling this function.  */
946      fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
947      if (value_mode == VOIDmode)
948	value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
949
950      last = get_last_insn ();
951      for (i = 0; i < nwords; i++)
952	{
953	  /* If I is 0, use the low-order word in both field and target;
954	     if I is 1, use the next to lowest word; and so on.  */
955	  unsigned int wordnum = (backwards
956				  ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
957				  - i - 1
958				  : i);
959	  unsigned int bit_offset = (backwards ^ reverse
960				     ? MAX ((int) bitsize - ((int) i + 1)
961					    * BITS_PER_WORD,
962					    0)
963				     : (int) i * BITS_PER_WORD);
964	  rtx value_word = operand_subword_force (value, wordnum, value_mode);
965	  unsigned HOST_WIDE_INT new_bitsize =
966	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
967
968	  /* If the remaining chunk doesn't have full wordsize we have
969	     to make sure that for big-endian machines the higher order
970	     bits are used.  */
971	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
972	    {
973	      int shift = BITS_PER_WORD - new_bitsize;
974	      rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
975	      value_word = simplify_expand_binop (word_mode, lshr_optab,
976						  value_word, shift_rtx,
977						  NULL_RTX, true,
978						  OPTAB_LIB_WIDEN);
979	    }
980
981	  if (!store_bit_field_1 (op0, new_bitsize,
982				  bitnum + bit_offset,
983				  bitregion_start, bitregion_end,
984				  word_mode,
985				  value_word, reverse, fallback_p))
986	    {
987	      delete_insns_since (last);
988	      return false;
989	    }
990	}
991      return true;
992    }
993
994  /* If VALUE has a floating-point or complex mode, access it as an
995     integer of the corresponding size.  This can occur on a machine
996     with 64 bit registers that uses SFmode for float.  It can also
997     occur for unaligned float or complex fields.  */
998  rtx orig_value = value;
999  scalar_int_mode value_mode;
1000  if (GET_MODE (value) == VOIDmode)
1001    /* By this point we've dealt with values that are bigger than a word,
1002       so word_mode is a conservatively correct choice.  */
1003    value_mode = word_mode;
1004  else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1005    {
1006      value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1007      value = gen_reg_rtx (value_mode);
1008      emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1009    }
1010
1011  /* If OP0 is a multi-word register, narrow it to the affected word.
1012     If the region spans two words, defer to store_split_bit_field.
1013     Don't do this if op0 is a single hard register wider than word
1014     such as a float or vector register.  */
1015  if (!MEM_P (op0)
1016      && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1017      && (!REG_P (op0)
1018	  || !HARD_REGISTER_P (op0)
1019	  || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1020    {
1021      if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1022	{
1023	  if (!fallback_p)
1024	    return false;
1025
1026	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1027				 bitregion_start, bitregion_end,
1028				 value, value_mode, reverse);
1029	  return true;
1030	}
1031      op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1032				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1033      gcc_assert (op0);
1034      op0_mode = word_mode;
1035      bitnum %= BITS_PER_WORD;
1036    }
1037
1038  /* From here on we can assume that the field to be stored in fits
1039     within a word.  If the destination is a register, it too fits
1040     in a word.  */
1041
1042  extraction_insn insv;
1043  if (!MEM_P (op0)
1044      && !reverse
1045      && get_best_reg_extraction_insn (&insv, EP_insv,
1046				       GET_MODE_BITSIZE (op0_mode.require ()),
1047				       fieldmode)
1048      && store_bit_field_using_insv (&insv, op0, op0_mode,
1049				     bitsize, bitnum, value, value_mode))
1050    return true;
1051
1052  /* If OP0 is a memory, try copying it to a register and seeing if a
1053     cheap register alternative is available.  */
1054  if (MEM_P (op0) && !reverse)
1055    {
1056      if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1057					fieldmode)
1058	  && store_bit_field_using_insv (&insv, op0, op0_mode,
1059					 bitsize, bitnum, value, value_mode))
1060	return true;
1061
1062      rtx_insn *last = get_last_insn ();
1063
1064      /* Try loading part of OP0 into a register, inserting the bitfield
1065	 into that, and then copying the result back to OP0.  */
1066      unsigned HOST_WIDE_INT bitpos;
1067      rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1068					       bitregion_start, bitregion_end,
1069					       fieldmode, &bitpos);
1070      if (xop0)
1071	{
1072	  rtx tempreg = copy_to_reg (xop0);
1073	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1074				 bitregion_start, bitregion_end,
1075				 fieldmode, orig_value, reverse, false))
1076	    {
1077	      emit_move_insn (xop0, tempreg);
1078	      return true;
1079	    }
1080	  delete_insns_since (last);
1081	}
1082    }
1083
1084  if (!fallback_p)
1085    return false;
1086
1087  store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1088			 bitregion_end, value, value_mode, reverse);
1089  return true;
1090}
1091
1092/* Generate code to store value from rtx VALUE
1093   into a bit-field within structure STR_RTX
1094   containing BITSIZE bits starting at bit BITNUM.
1095
1096   BITREGION_START is bitpos of the first bitfield in this region.
1097   BITREGION_END is the bitpos of the ending bitfield in this region.
1098   These two fields are 0, if the C++ memory model does not apply,
1099   or we are not interested in keeping track of bitfield regions.
1100
1101   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1102
1103   If REVERSE is true, the store is to be done in reverse order.  */
1104
1105void
1106store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1107		 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1108		 machine_mode fieldmode,
1109		 rtx value, bool reverse)
1110{
1111  /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1112  unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1113  scalar_int_mode int_mode;
1114  if (bitsize.is_constant (&ibitsize)
1115      && bitnum.is_constant (&ibitnum)
1116      && is_a <scalar_int_mode> (fieldmode, &int_mode)
1117      && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1118				     bitregion_start, bitregion_end))
1119    {
1120      /* Storing of a full word can be done with a simple store.
1121	 We know here that the field can be accessed with one single
1122	 instruction.  For targets that support unaligned memory,
1123	 an unaligned access may be necessary.  */
1124      if (ibitsize == GET_MODE_BITSIZE (int_mode))
1125	{
1126	  str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1127					     ibitnum / BITS_PER_UNIT);
1128	  if (reverse)
1129	    value = flip_storage_order (int_mode, value);
1130	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1131	  emit_move_insn (str_rtx, value);
1132	}
1133      else
1134	{
1135	  rtx temp;
1136
1137	  str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1138					  ibitnum, &ibitnum);
1139	  gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1140	  temp = copy_to_reg (str_rtx);
1141	  if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1142				  int_mode, value, reverse, true))
1143	    gcc_unreachable ();
1144
1145	  emit_move_insn (str_rtx, temp);
1146	}
1147
1148      return;
1149    }
1150
1151  /* Under the C++0x memory model, we must not touch bits outside the
1152     bit region.  Adjust the address to start at the beginning of the
1153     bit region.  */
1154  if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1155    {
1156      scalar_int_mode best_mode;
1157      machine_mode addr_mode = VOIDmode;
1158
1159      poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1160      bitnum -= bitregion_start;
1161      poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1162      bitregion_end -= bitregion_start;
1163      bitregion_start = 0;
1164      if (bitsize.is_constant (&ibitsize)
1165	  && bitnum.is_constant (&ibitnum)
1166	  && get_best_mode (ibitsize, ibitnum,
1167			    bitregion_start, bitregion_end,
1168			    MEM_ALIGN (str_rtx), INT_MAX,
1169			    MEM_VOLATILE_P (str_rtx), &best_mode))
1170	addr_mode = best_mode;
1171      str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1172					      offset, size);
1173    }
1174
1175  if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1176			  bitregion_start, bitregion_end,
1177			  fieldmode, value, reverse, true))
1178    gcc_unreachable ();
1179}
1180
1181/* Use shifts and boolean operations to store VALUE into a bit field of
1182   width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1183   it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1184   the mode of VALUE.
1185
1186   If REVERSE is true, the store is to be done in reverse order.  */
1187
1188static void
1189store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1190		       unsigned HOST_WIDE_INT bitsize,
1191		       unsigned HOST_WIDE_INT bitnum,
1192		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1193		       rtx value, scalar_int_mode value_mode, bool reverse)
1194{
1195  /* There is a case not handled here:
1196     a structure with a known alignment of just a halfword
1197     and a field split across two aligned halfwords within the structure.
1198     Or likewise a structure with a known alignment of just a byte
1199     and a field split across two bytes.
1200     Such cases are not supposed to be able to occur.  */
1201
1202  scalar_int_mode best_mode;
1203  if (MEM_P (op0))
1204    {
1205      unsigned int max_bitsize = BITS_PER_WORD;
1206      scalar_int_mode imode;
1207      if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1208	max_bitsize = GET_MODE_BITSIZE (imode);
1209
1210      if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1211			  MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1212			  &best_mode))
1213	{
1214	  /* The only way this should occur is if the field spans word
1215	     boundaries.  */
1216	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1217				 bitregion_start, bitregion_end,
1218				 value, value_mode, reverse);
1219	  return;
1220	}
1221
1222      op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1223    }
1224  else
1225    best_mode = op0_mode.require ();
1226
1227  store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1228			   value, value_mode, reverse);
1229}
1230
1231/* Helper function for store_fixed_bit_field, stores
1232   the bit field always using MODE, which is the mode of OP0.  The other
1233   arguments are as for store_fixed_bit_field.  */
1234
1235static void
1236store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1237			 unsigned HOST_WIDE_INT bitsize,
1238			 unsigned HOST_WIDE_INT bitnum,
1239			 rtx value, scalar_int_mode value_mode, bool reverse)
1240{
1241  rtx temp;
1242  int all_zero = 0;
1243  int all_one = 0;
1244
1245  /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1246     for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1247
1248  if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1249    /* BITNUM is the distance between our msb
1250       and that of the containing datum.
1251       Convert it to the distance from the lsb.  */
1252    bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1253
1254  /* Now BITNUM is always the distance between our lsb
1255     and that of OP0.  */
1256
1257  /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1258     we must first convert its mode to MODE.  */
1259
1260  if (CONST_INT_P (value))
1261    {
1262      unsigned HOST_WIDE_INT v = UINTVAL (value);
1263
1264      if (bitsize < HOST_BITS_PER_WIDE_INT)
1265	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1266
1267      if (v == 0)
1268	all_zero = 1;
1269      else if ((bitsize < HOST_BITS_PER_WIDE_INT
1270		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1271	       || (bitsize == HOST_BITS_PER_WIDE_INT
1272		   && v == HOST_WIDE_INT_M1U))
1273	all_one = 1;
1274
1275      value = lshift_value (mode, v, bitnum);
1276    }
1277  else
1278    {
1279      int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1280		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1281
1282      if (value_mode != mode)
1283	value = convert_to_mode (mode, value, 1);
1284
1285      if (must_and)
1286	value = expand_binop (mode, and_optab, value,
1287			      mask_rtx (mode, 0, bitsize, 0),
1288			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1289      if (bitnum > 0)
1290	value = expand_shift (LSHIFT_EXPR, mode, value,
1291			      bitnum, NULL_RTX, 1);
1292    }
1293
1294  if (reverse)
1295    value = flip_storage_order (mode, value);
1296
1297  /* Now clear the chosen bits in OP0,
1298     except that if VALUE is -1 we need not bother.  */
1299  /* We keep the intermediates in registers to allow CSE to combine
1300     consecutive bitfield assignments.  */
1301
1302  temp = force_reg (mode, op0);
1303
1304  if (! all_one)
1305    {
1306      rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1307      if (reverse)
1308	mask = flip_storage_order (mode, mask);
1309      temp = expand_binop (mode, and_optab, temp, mask,
1310			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1311      temp = force_reg (mode, temp);
1312    }
1313
1314  /* Now logical-or VALUE into OP0, unless it is zero.  */
1315
1316  if (! all_zero)
1317    {
1318      temp = expand_binop (mode, ior_optab, temp, value,
1319			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1320      temp = force_reg (mode, temp);
1321    }
1322
1323  if (op0 != temp)
1324    {
1325      op0 = copy_rtx (op0);
1326      emit_move_insn (op0, temp);
1327    }
1328}
1329
1330/* Store a bit field that is split across multiple accessible memory objects.
1331
1332   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1333   BITSIZE is the field width; BITPOS the position of its first bit
1334   (within the word).
1335   VALUE is the value to store, which has mode VALUE_MODE.
1336   If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1337   a BLKmode MEM.
1338
1339   If REVERSE is true, the store is to be done in reverse order.
1340
1341   This does not yet handle fields wider than BITS_PER_WORD.  */
1342
1343static void
1344store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1345		       unsigned HOST_WIDE_INT bitsize,
1346		       unsigned HOST_WIDE_INT bitpos,
1347		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1348		       rtx value, scalar_int_mode value_mode, bool reverse)
1349{
1350  unsigned int unit, total_bits, bitsdone = 0;
1351
1352  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1353     much at a time.  */
1354  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1355    unit = BITS_PER_WORD;
1356  else
1357    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1358
1359  /* If OP0 is a memory with a mode, then UNIT must not be larger than
1360     OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1361     again, and we will mutually recurse forever.  */
1362  if (MEM_P (op0) && op0_mode.exists ())
1363    unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1364
1365  /* If VALUE is a constant other than a CONST_INT, get it into a register in
1366     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1367     that VALUE might be a floating-point constant.  */
1368  if (CONSTANT_P (value) && !CONST_INT_P (value))
1369    {
1370      rtx word = gen_lowpart_common (word_mode, value);
1371
1372      if (word && (value != word))
1373	value = word;
1374      else
1375	value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1376      value_mode = word_mode;
1377    }
1378
1379  total_bits = GET_MODE_BITSIZE (value_mode);
1380
1381  while (bitsdone < bitsize)
1382    {
1383      unsigned HOST_WIDE_INT thissize;
1384      unsigned HOST_WIDE_INT thispos;
1385      unsigned HOST_WIDE_INT offset;
1386      rtx part;
1387
1388      offset = (bitpos + bitsdone) / unit;
1389      thispos = (bitpos + bitsdone) % unit;
1390
1391      /* When region of bytes we can touch is restricted, decrease
1392	 UNIT close to the end of the region as needed.  If op0 is a REG
1393	 or SUBREG of REG, don't do this, as there can't be data races
1394	 on a register and we can expand shorter code in some cases.  */
1395      if (maybe_ne (bitregion_end, 0U)
1396	  && unit > BITS_PER_UNIT
1397	  && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1398	  && !REG_P (op0)
1399	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1400	{
1401	  unit = unit / 2;
1402	  continue;
1403	}
1404
1405      /* THISSIZE must not overrun a word boundary.  Otherwise,
1406	 store_fixed_bit_field will call us again, and we will mutually
1407	 recurse forever.  */
1408      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1409      thissize = MIN (thissize, unit - thispos);
1410
1411      if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1412	{
1413	  /* Fetch successively less significant portions.  */
1414	  if (CONST_INT_P (value))
1415	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1416			     >> (bitsize - bitsdone - thissize))
1417			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1418          /* Likewise, but the source is little-endian.  */
1419          else if (reverse)
1420	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1421					    thissize,
1422					    bitsize - bitsdone - thissize,
1423					    NULL_RTX, 1, false);
1424	  else
1425	    /* The args are chosen so that the last part includes the
1426	       lsb.  Give extract_bit_field the value it needs (with
1427	       endianness compensation) to fetch the piece we want.  */
1428	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1429					    thissize,
1430					    total_bits - bitsize + bitsdone,
1431					    NULL_RTX, 1, false);
1432	}
1433      else
1434	{
1435	  /* Fetch successively more significant portions.  */
1436	  if (CONST_INT_P (value))
1437	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1438			     >> bitsdone)
1439			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1440	  /* Likewise, but the source is big-endian.  */
1441          else if (reverse)
1442	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1443					    thissize,
1444					    total_bits - bitsdone - thissize,
1445					    NULL_RTX, 1, false);
1446	  else
1447	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1448					    thissize, bitsdone, NULL_RTX,
1449					    1, false);
1450	}
1451
1452      /* If OP0 is a register, then handle OFFSET here.  */
1453      rtx op0_piece = op0;
1454      opt_scalar_int_mode op0_piece_mode = op0_mode;
1455      if (SUBREG_P (op0) || REG_P (op0))
1456	{
1457	  scalar_int_mode imode;
1458	  if (op0_mode.exists (&imode)
1459	      && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1460	    {
1461	      if (offset)
1462		op0_piece = const0_rtx;
1463	    }
1464	  else
1465	    {
1466	      op0_piece = operand_subword_force (op0,
1467						 offset * unit / BITS_PER_WORD,
1468						 GET_MODE (op0));
1469	      op0_piece_mode = word_mode;
1470	    }
1471	  offset &= BITS_PER_WORD / unit - 1;
1472	}
1473
1474      /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1475	 it is just an out-of-bounds access.  Ignore it.  */
1476      if (op0_piece != const0_rtx)
1477	store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1478			       offset * unit + thispos, bitregion_start,
1479			       bitregion_end, part, word_mode, reverse);
1480      bitsdone += thissize;
1481    }
1482}
1483
1484/* A subroutine of extract_bit_field_1 that converts return value X
1485   to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1486   to extract_bit_field.  */
1487
1488static rtx
1489convert_extracted_bit_field (rtx x, machine_mode mode,
1490			     machine_mode tmode, bool unsignedp)
1491{
1492  if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1493    return x;
1494
1495  /* If the x mode is not a scalar integral, first convert to the
1496     integer mode of that size and then access it as a floating-point
1497     value via a SUBREG.  */
1498  if (!SCALAR_INT_MODE_P (tmode))
1499    {
1500      scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1501      x = convert_to_mode (int_mode, x, unsignedp);
1502      x = force_reg (int_mode, x);
1503      return gen_lowpart (tmode, x);
1504    }
1505
1506  return convert_to_mode (tmode, x, unsignedp);
1507}
1508
1509/* Try to use an ext(z)v pattern to extract a field from OP0.
1510   Return the extracted value on success, otherwise return null.
1511   EXTV describes the extraction instruction to use.  If OP0_MODE
1512   is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1513   The other arguments are as for extract_bit_field.  */
1514
1515static rtx
1516extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1517			      opt_scalar_int_mode op0_mode,
1518			      unsigned HOST_WIDE_INT bitsize,
1519			      unsigned HOST_WIDE_INT bitnum,
1520			      int unsignedp, rtx target,
1521			      machine_mode mode, machine_mode tmode)
1522{
1523  struct expand_operand ops[4];
1524  rtx spec_target = target;
1525  rtx spec_target_subreg = 0;
1526  scalar_int_mode ext_mode = extv->field_mode;
1527  unsigned unit = GET_MODE_BITSIZE (ext_mode);
1528
1529  if (bitsize == 0 || unit < bitsize)
1530    return NULL_RTX;
1531
1532  if (MEM_P (op0))
1533    /* Get a reference to the first byte of the field.  */
1534    op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1535				&bitnum);
1536  else
1537    {
1538      /* Convert from counting within OP0 to counting in EXT_MODE.  */
1539      if (BYTES_BIG_ENDIAN)
1540	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1541
1542      /* If op0 is a register, we need it in EXT_MODE to make it
1543	 acceptable to the format of ext(z)v.  */
1544      if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1545	return NULL_RTX;
1546      if (REG_P (op0) && op0_mode.require () != ext_mode)
1547	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1548    }
1549
1550  /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1551     "backwards" from the size of the unit we are extracting from.
1552     Otherwise, we count bits from the most significant on a
1553     BYTES/BITS_BIG_ENDIAN machine.  */
1554
1555  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1556    bitnum = unit - bitsize - bitnum;
1557
1558  if (target == 0)
1559    target = spec_target = gen_reg_rtx (tmode);
1560
1561  if (GET_MODE (target) != ext_mode)
1562    {
1563      /* Don't use LHS paradoxical subreg if explicit truncation is needed
1564	 between the mode of the extraction (word_mode) and the target
1565	 mode.  Instead, create a temporary and use convert_move to set
1566	 the target.  */
1567      if (REG_P (target)
1568	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1569	{
1570	  target = gen_lowpart (ext_mode, target);
1571	  if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1572	    spec_target_subreg = target;
1573	}
1574      else
1575	target = gen_reg_rtx (ext_mode);
1576    }
1577
1578  create_output_operand (&ops[0], target, ext_mode);
1579  create_fixed_operand (&ops[1], op0);
1580  create_integer_operand (&ops[2], bitsize);
1581  create_integer_operand (&ops[3], bitnum);
1582  if (maybe_expand_insn (extv->icode, 4, ops))
1583    {
1584      target = ops[0].value;
1585      if (target == spec_target)
1586	return target;
1587      if (target == spec_target_subreg)
1588	return spec_target;
1589      return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1590    }
1591  return NULL_RTX;
1592}
1593
1594/* See whether it would be valid to extract the part of OP0 described
1595   by BITNUM and BITSIZE into a value of mode MODE using a subreg
1596   operation.  Return the subreg if so, otherwise return null.  */
1597
1598static rtx
1599extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1600			     poly_uint64 bitsize, poly_uint64 bitnum)
1601{
1602  poly_uint64 bytenum;
1603  if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1604      && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1605      && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1606      && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1607    return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1608  return NULL_RTX;
1609}
1610
1611/* A subroutine of extract_bit_field, with the same arguments.
1612   If FALLBACK_P is true, fall back to extract_fixed_bit_field
1613   if we can find no other means of implementing the operation.
1614   if FALLBACK_P is false, return NULL instead.  */
1615
1616static rtx
1617extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1618		     int unsignedp, rtx target, machine_mode mode,
1619		     machine_mode tmode, bool reverse, bool fallback_p,
1620		     rtx *alt_rtl)
1621{
1622  rtx op0 = str_rtx;
1623  machine_mode mode1;
1624
1625  if (tmode == VOIDmode)
1626    tmode = mode;
1627
1628  while (GET_CODE (op0) == SUBREG)
1629    {
1630      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1631      op0 = SUBREG_REG (op0);
1632    }
1633
1634  /* If we have an out-of-bounds access to a register, just return an
1635     uninitialized register of the required mode.  This can occur if the
1636     source code contains an out-of-bounds access to a small array.  */
1637  if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1638    return gen_reg_rtx (tmode);
1639
1640  if (REG_P (op0)
1641      && mode == GET_MODE (op0)
1642      && known_eq (bitnum, 0U)
1643      && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1644    {
1645      if (reverse)
1646	op0 = flip_storage_order (mode, op0);
1647      /* We're trying to extract a full register from itself.  */
1648      return op0;
1649    }
1650
1651  /* First try to check for vector from vector extractions.  */
1652  if (VECTOR_MODE_P (GET_MODE (op0))
1653      && !MEM_P (op0)
1654      && VECTOR_MODE_P (tmode)
1655      && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1656      && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1657    {
1658      machine_mode new_mode = GET_MODE (op0);
1659      if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1660	{
1661	  scalar_mode inner_mode = GET_MODE_INNER (tmode);
1662	  poly_uint64 nunits;
1663	  if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1664			   GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1665	      || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
1666	      || !VECTOR_MODE_P (new_mode)
1667	      || maybe_ne (GET_MODE_SIZE (new_mode),
1668			   GET_MODE_SIZE (GET_MODE (op0)))
1669	      || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1670	      || !targetm.vector_mode_supported_p (new_mode))
1671	    new_mode = VOIDmode;
1672	}
1673      poly_uint64 pos;
1674      if (new_mode != VOIDmode
1675	  && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1676	      != CODE_FOR_nothing)
1677	  && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1678	{
1679	  struct expand_operand ops[3];
1680	  machine_mode outermode = new_mode;
1681	  machine_mode innermode = tmode;
1682	  enum insn_code icode
1683	    = convert_optab_handler (vec_extract_optab, outermode, innermode);
1684
1685	  if (new_mode != GET_MODE (op0))
1686	    op0 = gen_lowpart (new_mode, op0);
1687	  create_output_operand (&ops[0], target, innermode);
1688	  ops[0].target = 1;
1689	  create_input_operand (&ops[1], op0, outermode);
1690	  create_integer_operand (&ops[2], pos);
1691	  if (maybe_expand_insn (icode, 3, ops))
1692	    {
1693	      if (alt_rtl && ops[0].target)
1694		*alt_rtl = target;
1695	      target = ops[0].value;
1696	      if (GET_MODE (target) != mode)
1697		return gen_lowpart (tmode, target);
1698	      return target;
1699	    }
1700	}
1701    }
1702
1703  /* See if we can get a better vector mode before extracting.  */
1704  if (VECTOR_MODE_P (GET_MODE (op0))
1705      && !MEM_P (op0)
1706      && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1707    {
1708      machine_mode new_mode;
1709
1710      if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1711	new_mode = MIN_MODE_VECTOR_FLOAT;
1712      else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1713	new_mode = MIN_MODE_VECTOR_FRACT;
1714      else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1715	new_mode = MIN_MODE_VECTOR_UFRACT;
1716      else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1717	new_mode = MIN_MODE_VECTOR_ACCUM;
1718      else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1719	new_mode = MIN_MODE_VECTOR_UACCUM;
1720      else
1721	new_mode = MIN_MODE_VECTOR_INT;
1722
1723      FOR_EACH_MODE_FROM (new_mode, new_mode)
1724	if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1725	    && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1726	    && targetm.vector_mode_supported_p (new_mode))
1727	  break;
1728      if (new_mode != VOIDmode)
1729	op0 = gen_lowpart (new_mode, op0);
1730    }
1731
1732  /* Use vec_extract patterns for extracting parts of vectors whenever
1733     available.  If that fails, see whether the current modes and bitregion
1734     give a natural subreg.  */
1735  machine_mode outermode = GET_MODE (op0);
1736  if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1737    {
1738      scalar_mode innermode = GET_MODE_INNER (outermode);
1739      enum insn_code icode
1740	= convert_optab_handler (vec_extract_optab, outermode, innermode);
1741      poly_uint64 pos;
1742      if (icode != CODE_FOR_nothing
1743	  && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1744	  && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1745	{
1746	  struct expand_operand ops[3];
1747
1748	  create_output_operand (&ops[0], target, innermode);
1749	  ops[0].target = 1;
1750	  create_input_operand (&ops[1], op0, outermode);
1751	  create_integer_operand (&ops[2], pos);
1752	  if (maybe_expand_insn (icode, 3, ops))
1753	    {
1754	      if (alt_rtl && ops[0].target)
1755		*alt_rtl = target;
1756	      target = ops[0].value;
1757	      if (GET_MODE (target) != mode)
1758		return gen_lowpart (tmode, target);
1759	      return target;
1760	    }
1761	}
1762      /* Using subregs is useful if we're extracting one register vector
1763	 from a multi-register vector.  extract_bit_field_as_subreg checks
1764	 for valid bitsize and bitnum, so we don't need to do that here.  */
1765      if (VECTOR_MODE_P (mode))
1766	{
1767	  rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1768	  if (sub)
1769	    return sub;
1770	}
1771    }
1772
1773  /* Make sure we are playing with integral modes.  Pun with subregs
1774     if we aren't.  */
1775  opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1776  scalar_int_mode imode;
1777  if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1778    {
1779      if (MEM_P (op0))
1780	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1781					    0, MEM_SIZE (op0));
1782      else if (op0_mode.exists (&imode))
1783	{
1784	  op0 = gen_lowpart (imode, op0);
1785
1786	  /* If we got a SUBREG, force it into a register since we
1787	     aren't going to be able to do another SUBREG on it.  */
1788	  if (GET_CODE (op0) == SUBREG)
1789	    op0 = force_reg (imode, op0);
1790	}
1791      else
1792	{
1793	  poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1794	  rtx mem = assign_stack_temp (GET_MODE (op0), size);
1795	  emit_move_insn (mem, op0);
1796	  op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1797	}
1798    }
1799
1800  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1801     If that's wrong, the solution is to test for it and set TARGET to 0
1802     if needed.  */
1803
1804  /* Get the mode of the field to use for atomic access or subreg
1805     conversion.  */
1806  if (!SCALAR_INT_MODE_P (tmode)
1807      || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1808    mode1 = mode;
1809  gcc_assert (mode1 != BLKmode);
1810
1811  /* Extraction of a full MODE1 value can be done with a subreg as long
1812     as the least significant bit of the value is the least significant
1813     bit of either OP0 or a word of OP0.  */
1814  if (!MEM_P (op0) && !reverse)
1815    {
1816      rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1817      if (sub)
1818	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1819    }
1820
1821  /* Extraction of a full MODE1 value can be done with a load as long as
1822     the field is on a byte boundary and is sufficiently aligned.  */
1823  poly_uint64 bytenum;
1824  if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1825    {
1826      op0 = adjust_bitfield_address (op0, mode1, bytenum);
1827      if (reverse)
1828	op0 = flip_storage_order (mode1, op0);
1829      return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1830    }
1831
1832  /* If we have a memory source and a non-constant bit offset, restrict
1833     the memory to the referenced bytes.  This is a worst-case fallback
1834     but is useful for things like vector booleans.  */
1835  if (MEM_P (op0) && !bitnum.is_constant ())
1836    {
1837      bytenum = bits_to_bytes_round_down (bitnum);
1838      bitnum = num_trailing_bits (bitnum);
1839      poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1840      op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1841      op0_mode = opt_scalar_int_mode ();
1842    }
1843
1844  /* It's possible we'll need to handle other cases here for
1845     polynomial bitnum and bitsize.  */
1846
1847  /* From here on we need to be looking at a fixed-size insertion.  */
1848  return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1849				     bitnum.to_constant (), unsignedp,
1850				     target, mode, tmode, reverse, fallback_p);
1851}
1852
1853/* Subroutine of extract_bit_field_1, with the same arguments, except
1854   that BITSIZE and BITNUM are constant.  Handle cases specific to
1855   integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1856   otherwise OP0 is a BLKmode MEM.  */
1857
1858static rtx
1859extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1860			    unsigned HOST_WIDE_INT bitsize,
1861			    unsigned HOST_WIDE_INT bitnum, int unsignedp,
1862			    rtx target, machine_mode mode, machine_mode tmode,
1863			    bool reverse, bool fallback_p)
1864{
1865  /* Handle fields bigger than a word.  */
1866
1867  if (bitsize > BITS_PER_WORD)
1868    {
1869      /* Here we transfer the words of the field
1870	 in the order least significant first.
1871	 This is because the most significant word is the one which may
1872	 be less than full.  */
1873
1874      const bool backwards = WORDS_BIG_ENDIAN;
1875      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1876      unsigned int i;
1877      rtx_insn *last;
1878
1879      if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1880	target = gen_reg_rtx (mode);
1881
1882      /* In case we're about to clobber a base register or something
1883	 (see gcc.c-torture/execute/20040625-1.c).   */
1884      if (reg_mentioned_p (target, op0))
1885	target = gen_reg_rtx (mode);
1886
1887      /* Indicate for flow that the entire target reg is being set.  */
1888      emit_clobber (target);
1889
1890      /* The mode must be fixed-size, since extract_bit_field_1 handles
1891	 extractions from variable-sized objects before calling this
1892	 function.  */
1893      unsigned int target_size
1894	= GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1895      last = get_last_insn ();
1896      for (i = 0; i < nwords; i++)
1897	{
1898	  /* If I is 0, use the low-order word in both field and target;
1899	     if I is 1, use the next to lowest word; and so on.  */
1900	  /* Word number in TARGET to use.  */
1901	  unsigned int wordnum
1902	    = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1903	  /* Offset from start of field in OP0.  */
1904	  unsigned int bit_offset = (backwards ^ reverse
1905				     ? MAX ((int) bitsize - ((int) i + 1)
1906					    * BITS_PER_WORD,
1907					    0)
1908				     : (int) i * BITS_PER_WORD);
1909	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1910	  rtx result_part
1911	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1912					     bitsize - i * BITS_PER_WORD),
1913				   bitnum + bit_offset, 1, target_part,
1914				   mode, word_mode, reverse, fallback_p, NULL);
1915
1916	  gcc_assert (target_part);
1917	  if (!result_part)
1918	    {
1919	      delete_insns_since (last);
1920	      return NULL;
1921	    }
1922
1923	  if (result_part != target_part)
1924	    emit_move_insn (target_part, result_part);
1925	}
1926
1927      if (unsignedp)
1928	{
1929	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1930	     need to be zero'd out.  */
1931	  if (target_size > nwords * UNITS_PER_WORD)
1932	    {
1933	      unsigned int i, total_words;
1934
1935	      total_words = target_size / UNITS_PER_WORD;
1936	      for (i = nwords; i < total_words; i++)
1937		emit_move_insn
1938		  (operand_subword (target,
1939				    backwards ? total_words - i - 1 : i,
1940				    1, VOIDmode),
1941		   const0_rtx);
1942	    }
1943	  return target;
1944	}
1945
1946      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1947      target = expand_shift (LSHIFT_EXPR, mode, target,
1948			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1949      return expand_shift (RSHIFT_EXPR, mode, target,
1950			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1951    }
1952
1953  /* If OP0 is a multi-word register, narrow it to the affected word.
1954     If the region spans two words, defer to extract_split_bit_field.  */
1955  if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1956    {
1957      if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1958	{
1959	  if (!fallback_p)
1960	    return NULL_RTX;
1961	  target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1962					    unsignedp, reverse);
1963	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1964	}
1965      op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1966				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1967      op0_mode = word_mode;
1968      bitnum %= BITS_PER_WORD;
1969    }
1970
1971  /* From here on we know the desired field is smaller than a word.
1972     If OP0 is a register, it too fits within a word.  */
1973  enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1974  extraction_insn extv;
1975  if (!MEM_P (op0)
1976      && !reverse
1977      /* ??? We could limit the structure size to the part of OP0 that
1978	 contains the field, with appropriate checks for endianness
1979	 and TARGET_TRULY_NOOP_TRUNCATION.  */
1980      && get_best_reg_extraction_insn (&extv, pattern,
1981				       GET_MODE_BITSIZE (op0_mode.require ()),
1982				       tmode))
1983    {
1984      rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1985						 bitsize, bitnum,
1986						 unsignedp, target, mode,
1987						 tmode);
1988      if (result)
1989	return result;
1990    }
1991
1992  /* If OP0 is a memory, try copying it to a register and seeing if a
1993     cheap register alternative is available.  */
1994  if (MEM_P (op0) & !reverse)
1995    {
1996      if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1997					tmode))
1998	{
1999	  rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2000						     bitsize, bitnum,
2001						     unsignedp, target, mode,
2002						     tmode);
2003	  if (result)
2004	    return result;
2005	}
2006
2007      rtx_insn *last = get_last_insn ();
2008
2009      /* Try loading part of OP0 into a register and extracting the
2010	 bitfield from that.  */
2011      unsigned HOST_WIDE_INT bitpos;
2012      rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2013					       0, 0, tmode, &bitpos);
2014      if (xop0)
2015	{
2016	  xop0 = copy_to_reg (xop0);
2017	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2018					    unsignedp, target,
2019					    mode, tmode, reverse, false, NULL);
2020	  if (result)
2021	    return result;
2022	  delete_insns_since (last);
2023	}
2024    }
2025
2026  if (!fallback_p)
2027    return NULL;
2028
2029  /* Find a correspondingly-sized integer field, so we can apply
2030     shifts and masks to it.  */
2031  scalar_int_mode int_mode;
2032  if (!int_mode_for_mode (tmode).exists (&int_mode))
2033    /* If this fails, we should probably push op0 out to memory and then
2034       do a load.  */
2035    int_mode = int_mode_for_mode (mode).require ();
2036
2037  target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2038				    bitnum, target, unsignedp, reverse);
2039
2040  /* Complex values must be reversed piecewise, so we need to undo the global
2041     reversal, convert to the complex mode and reverse again.  */
2042  if (reverse && COMPLEX_MODE_P (tmode))
2043    {
2044      target = flip_storage_order (int_mode, target);
2045      target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2046      target = flip_storage_order (tmode, target);
2047    }
2048  else
2049    target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2050
2051  return target;
2052}
2053
2054/* Generate code to extract a byte-field from STR_RTX
2055   containing BITSIZE bits, starting at BITNUM,
2056   and put it in TARGET if possible (if TARGET is nonzero).
2057   Regardless of TARGET, we return the rtx for where the value is placed.
2058
2059   STR_RTX is the structure containing the byte (a REG or MEM).
2060   UNSIGNEDP is nonzero if this is an unsigned bit field.
2061   MODE is the natural mode of the field value once extracted.
2062   TMODE is the mode the caller would like the value to have;
2063   but the value may be returned with type MODE instead.
2064
2065   If REVERSE is true, the extraction is to be done in reverse order.
2066
2067   If a TARGET is specified and we can store in it at no extra cost,
2068   we do so, and return TARGET.
2069   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2070   if they are equally easy.  */
2071
2072rtx
2073extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2074		   int unsignedp, rtx target, machine_mode mode,
2075		   machine_mode tmode, bool reverse, rtx *alt_rtl)
2076{
2077  machine_mode mode1;
2078
2079  /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2080  if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2081    mode1 = GET_MODE (str_rtx);
2082  else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2083    mode1 = GET_MODE (target);
2084  else
2085    mode1 = tmode;
2086
2087  unsigned HOST_WIDE_INT ibitsize, ibitnum;
2088  scalar_int_mode int_mode;
2089  if (bitsize.is_constant (&ibitsize)
2090      && bitnum.is_constant (&ibitnum)
2091      && is_a <scalar_int_mode> (mode1, &int_mode)
2092      && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2093				     int_mode, 0, 0))
2094    {
2095      /* Extraction of a full INT_MODE value can be done with a simple load.
2096	 We know here that the field can be accessed with one single
2097	 instruction.  For targets that support unaligned memory,
2098	 an unaligned access may be necessary.  */
2099      if (ibitsize == GET_MODE_BITSIZE (int_mode))
2100	{
2101	  rtx result = adjust_bitfield_address (str_rtx, int_mode,
2102						ibitnum / BITS_PER_UNIT);
2103	  if (reverse)
2104	    result = flip_storage_order (int_mode, result);
2105	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2106	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2107	}
2108
2109      str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2110				      &ibitnum);
2111      gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2112      str_rtx = copy_to_reg (str_rtx);
2113      return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2114				  target, mode, tmode, reverse, true, alt_rtl);
2115    }
2116
2117  return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2118			      target, mode, tmode, reverse, true, alt_rtl);
2119}
2120
2121/* Use shifts and boolean operations to extract a field of BITSIZE bits
2122   from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2123   otherwise OP0 is a BLKmode MEM.
2124
2125   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2126   If REVERSE is true, the extraction is to be done in reverse order.
2127
2128   If TARGET is nonzero, attempts to store the value there
2129   and return TARGET, but this is not guaranteed.
2130   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2131
2132static rtx
2133extract_fixed_bit_field (machine_mode tmode, rtx op0,
2134			 opt_scalar_int_mode op0_mode,
2135			 unsigned HOST_WIDE_INT bitsize,
2136			 unsigned HOST_WIDE_INT bitnum, rtx target,
2137			 int unsignedp, bool reverse)
2138{
2139  scalar_int_mode mode;
2140  if (MEM_P (op0))
2141    {
2142      if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2143			  BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2144	/* The only way this should occur is if the field spans word
2145	   boundaries.  */
2146	return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2147					unsignedp, reverse);
2148
2149      op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2150    }
2151  else
2152    mode = op0_mode.require ();
2153
2154  return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2155				    target, unsignedp, reverse);
2156}
2157
2158/* Helper function for extract_fixed_bit_field, extracts
2159   the bit field always using MODE, which is the mode of OP0.
2160   The other arguments are as for extract_fixed_bit_field.  */
2161
2162static rtx
2163extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2164			   unsigned HOST_WIDE_INT bitsize,
2165			   unsigned HOST_WIDE_INT bitnum, rtx target,
2166			   int unsignedp, bool reverse)
2167{
2168  /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2169     for invalid input, such as extract equivalent of f5 from
2170     gcc.dg/pr48335-2.c.  */
2171
2172  if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2173    /* BITNUM is the distance between our msb and that of OP0.
2174       Convert it to the distance from the lsb.  */
2175    bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2176
2177  /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2178     We have reduced the big-endian case to the little-endian case.  */
2179  if (reverse)
2180    op0 = flip_storage_order (mode, op0);
2181
2182  if (unsignedp)
2183    {
2184      if (bitnum)
2185	{
2186	  /* If the field does not already start at the lsb,
2187	     shift it so it does.  */
2188	  /* Maybe propagate the target for the shift.  */
2189	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2190	  if (tmode != mode)
2191	    subtarget = 0;
2192	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2193	}
2194      /* Convert the value to the desired mode.  TMODE must also be a
2195	 scalar integer for this conversion to make sense, since we
2196	 shouldn't reinterpret the bits.  */
2197      scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2198      if (mode != new_mode)
2199	op0 = convert_to_mode (new_mode, op0, 1);
2200
2201      /* Unless the msb of the field used to be the msb when we shifted,
2202	 mask out the upper bits.  */
2203
2204      if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2205	return expand_binop (new_mode, and_optab, op0,
2206			     mask_rtx (new_mode, 0, bitsize, 0),
2207			     target, 1, OPTAB_LIB_WIDEN);
2208      return op0;
2209    }
2210
2211  /* To extract a signed bit-field, first shift its msb to the msb of the word,
2212     then arithmetic-shift its lsb to the lsb of the word.  */
2213  op0 = force_reg (mode, op0);
2214
2215  /* Find the narrowest integer mode that contains the field.  */
2216
2217  opt_scalar_int_mode mode_iter;
2218  FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2219    if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2220      break;
2221
2222  mode = mode_iter.require ();
2223  op0 = convert_to_mode (mode, op0, 0);
2224
2225  if (mode != tmode)
2226    target = 0;
2227
2228  if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2229    {
2230      int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2231      /* Maybe propagate the target for the shift.  */
2232      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2233      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2234    }
2235
2236  return expand_shift (RSHIFT_EXPR, mode, op0,
2237		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2238}
2239
2240/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2241   VALUE << BITPOS.  */
2242
2243static rtx
2244lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2245	      int bitpos)
2246{
2247  return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2248}
2249
2250/* Extract a bit field that is split across two words
2251   and return an RTX for the result.
2252
2253   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2254   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2255   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2256   If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2257   a BLKmode MEM.
2258
2259   If REVERSE is true, the extraction is to be done in reverse order.  */
2260
2261static rtx
2262extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2263			 unsigned HOST_WIDE_INT bitsize,
2264			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2265			 bool reverse)
2266{
2267  unsigned int unit;
2268  unsigned int bitsdone = 0;
2269  rtx result = NULL_RTX;
2270  int first = 1;
2271
2272  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2273     much at a time.  */
2274  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2275    unit = BITS_PER_WORD;
2276  else
2277    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2278
2279  while (bitsdone < bitsize)
2280    {
2281      unsigned HOST_WIDE_INT thissize;
2282      rtx part;
2283      unsigned HOST_WIDE_INT thispos;
2284      unsigned HOST_WIDE_INT offset;
2285
2286      offset = (bitpos + bitsdone) / unit;
2287      thispos = (bitpos + bitsdone) % unit;
2288
2289      /* THISSIZE must not overrun a word boundary.  Otherwise,
2290	 extract_fixed_bit_field will call us again, and we will mutually
2291	 recurse forever.  */
2292      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2293      thissize = MIN (thissize, unit - thispos);
2294
2295      /* If OP0 is a register, then handle OFFSET here.  */
2296      rtx op0_piece = op0;
2297      opt_scalar_int_mode op0_piece_mode = op0_mode;
2298      if (SUBREG_P (op0) || REG_P (op0))
2299	{
2300	  op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2301	  op0_piece_mode = word_mode;
2302	  offset = 0;
2303	}
2304
2305      /* Extract the parts in bit-counting order,
2306	 whose meaning is determined by BYTES_PER_UNIT.
2307	 OFFSET is in UNITs, and UNIT is in bits.  */
2308      part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2309				      thissize, offset * unit + thispos,
2310				      0, 1, reverse);
2311      bitsdone += thissize;
2312
2313      /* Shift this part into place for the result.  */
2314      if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2315	{
2316	  if (bitsize != bitsdone)
2317	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2318				 bitsize - bitsdone, 0, 1);
2319	}
2320      else
2321	{
2322	  if (bitsdone != thissize)
2323	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2324				 bitsdone - thissize, 0, 1);
2325	}
2326
2327      if (first)
2328	result = part;
2329      else
2330	/* Combine the parts with bitwise or.  This works
2331	   because we extracted each part as an unsigned bit field.  */
2332	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2333			       OPTAB_LIB_WIDEN);
2334
2335      first = 0;
2336    }
2337
2338  /* Unsigned bit field: we are done.  */
2339  if (unsignedp)
2340    return result;
2341  /* Signed bit field: sign-extend with two arithmetic shifts.  */
2342  result = expand_shift (LSHIFT_EXPR, word_mode, result,
2343			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2344  return expand_shift (RSHIFT_EXPR, word_mode, result,
2345		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2346}
2347
2348/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2349   the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2350   MODE, fill the upper bits with zeros.  Fail if the layout of either
2351   mode is unknown (as for CC modes) or if the extraction would involve
2352   unprofitable mode punning.  Return the value on success, otherwise
2353   return null.
2354
2355   This is different from gen_lowpart* in these respects:
2356
2357     - the returned value must always be considered an rvalue
2358
2359     - when MODE is wider than SRC_MODE, the extraction involves
2360       a zero extension
2361
2362     - when MODE is smaller than SRC_MODE, the extraction involves
2363       a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2364
2365   In other words, this routine performs a computation, whereas the
2366   gen_lowpart* routines are conceptually lvalue or rvalue subreg
2367   operations.  */
2368
2369rtx
2370extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2371{
2372  scalar_int_mode int_mode, src_int_mode;
2373
2374  if (mode == src_mode)
2375    return src;
2376
2377  if (CONSTANT_P (src))
2378    {
2379      /* simplify_gen_subreg can't be used here, as if simplify_subreg
2380	 fails, it will happily create (subreg (symbol_ref)) or similar
2381	 invalid SUBREGs.  */
2382      poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2383      rtx ret = simplify_subreg (mode, src, src_mode, byte);
2384      if (ret)
2385	return ret;
2386
2387      if (GET_MODE (src) == VOIDmode
2388	  || !validate_subreg (mode, src_mode, src, byte))
2389	return NULL_RTX;
2390
2391      src = force_reg (GET_MODE (src), src);
2392      return gen_rtx_SUBREG (mode, src, byte);
2393    }
2394
2395  if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2396    return NULL_RTX;
2397
2398  if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2399      && targetm.modes_tieable_p (mode, src_mode))
2400    {
2401      rtx x = gen_lowpart_common (mode, src);
2402      if (x)
2403        return x;
2404    }
2405
2406  if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2407      || !int_mode_for_mode (mode).exists (&int_mode))
2408    return NULL_RTX;
2409
2410  if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2411    return NULL_RTX;
2412  if (!targetm.modes_tieable_p (int_mode, mode))
2413    return NULL_RTX;
2414
2415  src = gen_lowpart (src_int_mode, src);
2416  src = convert_modes (int_mode, src_int_mode, src, true);
2417  src = gen_lowpart (mode, src);
2418  return src;
2419}
2420
2421/* Add INC into TARGET.  */
2422
2423void
2424expand_inc (rtx target, rtx inc)
2425{
2426  rtx value = expand_binop (GET_MODE (target), add_optab,
2427			    target, inc,
2428			    target, 0, OPTAB_LIB_WIDEN);
2429  if (value != target)
2430    emit_move_insn (target, value);
2431}
2432
2433/* Subtract DEC from TARGET.  */
2434
2435void
2436expand_dec (rtx target, rtx dec)
2437{
2438  rtx value = expand_binop (GET_MODE (target), sub_optab,
2439			    target, dec,
2440			    target, 0, OPTAB_LIB_WIDEN);
2441  if (value != target)
2442    emit_move_insn (target, value);
2443}
2444
2445/* Output a shift instruction for expression code CODE,
2446   with SHIFTED being the rtx for the value to shift,
2447   and AMOUNT the rtx for the amount to shift by.
2448   Store the result in the rtx TARGET, if that is convenient.
2449   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2450   Return the rtx for where the value is.
2451   If that cannot be done, abort the compilation unless MAY_FAIL is true,
2452   in which case 0 is returned.  */
2453
2454static rtx
2455expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2456		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2457{
2458  rtx op1, temp = 0;
2459  int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2460  int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2461  optab lshift_optab = ashl_optab;
2462  optab rshift_arith_optab = ashr_optab;
2463  optab rshift_uns_optab = lshr_optab;
2464  optab lrotate_optab = rotl_optab;
2465  optab rrotate_optab = rotr_optab;
2466  machine_mode op1_mode;
2467  scalar_mode scalar_mode = GET_MODE_INNER (mode);
2468  int attempt;
2469  bool speed = optimize_insn_for_speed_p ();
2470
2471  op1 = amount;
2472  op1_mode = GET_MODE (op1);
2473
2474  /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2475     shift amount is a vector, use the vector/vector shift patterns.  */
2476  if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2477    {
2478      lshift_optab = vashl_optab;
2479      rshift_arith_optab = vashr_optab;
2480      rshift_uns_optab = vlshr_optab;
2481      lrotate_optab = vrotl_optab;
2482      rrotate_optab = vrotr_optab;
2483    }
2484
2485  /* Previously detected shift-counts computed by NEGATE_EXPR
2486     and shifted in the other direction; but that does not work
2487     on all machines.  */
2488
2489  if (SHIFT_COUNT_TRUNCATED)
2490    {
2491      if (CONST_INT_P (op1)
2492	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2493	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2494	op1 = gen_int_shift_amount (mode,
2495				    (unsigned HOST_WIDE_INT) INTVAL (op1)
2496				    % GET_MODE_BITSIZE (scalar_mode));
2497      else if (GET_CODE (op1) == SUBREG
2498	       && subreg_lowpart_p (op1)
2499	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2500	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2501	op1 = SUBREG_REG (op1);
2502    }
2503
2504  /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2505     prefer left rotation, if op1 is from bitsize / 2 + 1 to
2506     bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2507     amount instead.  */
2508  if (rotate
2509      && CONST_INT_P (op1)
2510      && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2511		   GET_MODE_BITSIZE (scalar_mode) - 1))
2512    {
2513      op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2514					 - INTVAL (op1)));
2515      left = !left;
2516      code = left ? LROTATE_EXPR : RROTATE_EXPR;
2517    }
2518
2519  /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2520     Note that this is not the case for bigger values.  For instance a rotation
2521     of 0x01020304 by 16 bits gives 0x03040102 which is different from
2522     0x04030201 (bswapsi).  */
2523  if (rotate
2524      && CONST_INT_P (op1)
2525      && INTVAL (op1) == BITS_PER_UNIT
2526      && GET_MODE_SIZE (scalar_mode) == 2
2527      && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2528    return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2529
2530  if (op1 == const0_rtx)
2531    return shifted;
2532
2533  /* Check whether its cheaper to implement a left shift by a constant
2534     bit count by a sequence of additions.  */
2535  if (code == LSHIFT_EXPR
2536      && CONST_INT_P (op1)
2537      && INTVAL (op1) > 0
2538      && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2539      && INTVAL (op1) < MAX_BITS_PER_WORD
2540      && (shift_cost (speed, mode, INTVAL (op1))
2541	  > INTVAL (op1) * add_cost (speed, mode))
2542      && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2543    {
2544      int i;
2545      for (i = 0; i < INTVAL (op1); i++)
2546	{
2547	  temp = force_reg (mode, shifted);
2548	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2549				  unsignedp, OPTAB_LIB_WIDEN);
2550	}
2551      return shifted;
2552    }
2553
2554  for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2555    {
2556      enum optab_methods methods;
2557
2558      if (attempt == 0)
2559	methods = OPTAB_DIRECT;
2560      else if (attempt == 1)
2561	methods = OPTAB_WIDEN;
2562      else
2563	methods = OPTAB_LIB_WIDEN;
2564
2565      if (rotate)
2566	{
2567	  /* Widening does not work for rotation.  */
2568	  if (methods == OPTAB_WIDEN)
2569	    continue;
2570	  else if (methods == OPTAB_LIB_WIDEN)
2571	    {
2572	      /* If we have been unable to open-code this by a rotation,
2573		 do it as the IOR of two shifts.  I.e., to rotate A
2574		 by N bits, compute
2575		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2576		 where C is the bitsize of A.
2577
2578		 It is theoretically possible that the target machine might
2579		 not be able to perform either shift and hence we would
2580		 be making two libcalls rather than just the one for the
2581		 shift (similarly if IOR could not be done).  We will allow
2582		 this extremely unlikely lossage to avoid complicating the
2583		 code below.  */
2584
2585	      rtx subtarget = target == shifted ? 0 : target;
2586	      rtx new_amount, other_amount;
2587	      rtx temp1;
2588
2589	      new_amount = op1;
2590	      if (op1 == const0_rtx)
2591		return shifted;
2592	      else if (CONST_INT_P (op1))
2593		other_amount = gen_int_shift_amount
2594		  (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2595	      else
2596		{
2597		  other_amount
2598		    = simplify_gen_unary (NEG, GET_MODE (op1),
2599					  op1, GET_MODE (op1));
2600		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2601		  other_amount
2602		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2603					   gen_int_mode (mask, GET_MODE (op1)));
2604		}
2605
2606	      shifted = force_reg (mode, shifted);
2607
2608	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2609				     mode, shifted, new_amount, 0, 1);
2610	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2611				      mode, shifted, other_amount,
2612				      subtarget, 1);
2613	      return expand_binop (mode, ior_optab, temp, temp1, target,
2614				   unsignedp, methods);
2615	    }
2616
2617	  temp = expand_binop (mode,
2618			       left ? lrotate_optab : rrotate_optab,
2619			       shifted, op1, target, unsignedp, methods);
2620	}
2621      else if (unsignedp)
2622	temp = expand_binop (mode,
2623			     left ? lshift_optab : rshift_uns_optab,
2624			     shifted, op1, target, unsignedp, methods);
2625
2626      /* Do arithmetic shifts.
2627	 Also, if we are going to widen the operand, we can just as well
2628	 use an arithmetic right-shift instead of a logical one.  */
2629      if (temp == 0 && ! rotate
2630	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2631	{
2632	  enum optab_methods methods1 = methods;
2633
2634	  /* If trying to widen a log shift to an arithmetic shift,
2635	     don't accept an arithmetic shift of the same size.  */
2636	  if (unsignedp)
2637	    methods1 = OPTAB_MUST_WIDEN;
2638
2639	  /* Arithmetic shift */
2640
2641	  temp = expand_binop (mode,
2642			       left ? lshift_optab : rshift_arith_optab,
2643			       shifted, op1, target, unsignedp, methods1);
2644	}
2645
2646      /* We used to try extzv here for logical right shifts, but that was
2647	 only useful for one machine, the VAX, and caused poor code
2648	 generation there for lshrdi3, so the code was deleted and a
2649	 define_expand for lshrsi3 was added to vax.md.  */
2650    }
2651
2652  gcc_assert (temp != NULL_RTX || may_fail);
2653  return temp;
2654}
2655
2656/* Output a shift instruction for expression code CODE,
2657   with SHIFTED being the rtx for the value to shift,
2658   and AMOUNT the amount to shift by.
2659   Store the result in the rtx TARGET, if that is convenient.
2660   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2661   Return the rtx for where the value is.  */
2662
2663rtx
2664expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2665	      poly_int64 amount, rtx target, int unsignedp)
2666{
2667  return expand_shift_1 (code, mode, shifted,
2668			 gen_int_shift_amount (mode, amount),
2669			 target, unsignedp);
2670}
2671
2672/* Likewise, but return 0 if that cannot be done.  */
2673
2674static rtx
2675maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2676		    int amount, rtx target, int unsignedp)
2677{
2678  return expand_shift_1 (code, mode,
2679			 shifted, GEN_INT (amount), target, unsignedp, true);
2680}
2681
2682/* Output a shift instruction for expression code CODE,
2683   with SHIFTED being the rtx for the value to shift,
2684   and AMOUNT the tree for the amount to shift by.
2685   Store the result in the rtx TARGET, if that is convenient.
2686   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2687   Return the rtx for where the value is.  */
2688
2689rtx
2690expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2691		       tree amount, rtx target, int unsignedp)
2692{
2693  return expand_shift_1 (code, mode,
2694			 shifted, expand_normal (amount), target, unsignedp);
2695}
2696
2697
2698static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2699			const struct mult_cost *, machine_mode mode);
2700static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2701			      const struct algorithm *, enum mult_variant);
2702static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2703static rtx extract_high_half (scalar_int_mode, rtx);
2704static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2705static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2706				       int, int);
2707/* Compute and return the best algorithm for multiplying by T.
2708   The algorithm must cost less than cost_limit
2709   If retval.cost >= COST_LIMIT, no algorithm was found and all
2710   other field of the returned struct are undefined.
2711   MODE is the machine mode of the multiplication.  */
2712
2713static void
2714synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2715	    const struct mult_cost *cost_limit, machine_mode mode)
2716{
2717  int m;
2718  struct algorithm *alg_in, *best_alg;
2719  struct mult_cost best_cost;
2720  struct mult_cost new_limit;
2721  int op_cost, op_latency;
2722  unsigned HOST_WIDE_INT orig_t = t;
2723  unsigned HOST_WIDE_INT q;
2724  int maxm, hash_index;
2725  bool cache_hit = false;
2726  enum alg_code cache_alg = alg_zero;
2727  bool speed = optimize_insn_for_speed_p ();
2728  scalar_int_mode imode;
2729  struct alg_hash_entry *entry_ptr;
2730
2731  /* Indicate that no algorithm is yet found.  If no algorithm
2732     is found, this value will be returned and indicate failure.  */
2733  alg_out->cost.cost = cost_limit->cost + 1;
2734  alg_out->cost.latency = cost_limit->latency + 1;
2735
2736  if (cost_limit->cost < 0
2737      || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2738    return;
2739
2740  /* Be prepared for vector modes.  */
2741  imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2742
2743  maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2744
2745  /* Restrict the bits of "t" to the multiplication's mode.  */
2746  t &= GET_MODE_MASK (imode);
2747
2748  /* t == 1 can be done in zero cost.  */
2749  if (t == 1)
2750    {
2751      alg_out->ops = 1;
2752      alg_out->cost.cost = 0;
2753      alg_out->cost.latency = 0;
2754      alg_out->op[0] = alg_m;
2755      return;
2756    }
2757
2758  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2759     fail now.  */
2760  if (t == 0)
2761    {
2762      if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2763	return;
2764      else
2765	{
2766	  alg_out->ops = 1;
2767	  alg_out->cost.cost = zero_cost (speed);
2768	  alg_out->cost.latency = zero_cost (speed);
2769	  alg_out->op[0] = alg_zero;
2770	  return;
2771	}
2772    }
2773
2774  /* We'll be needing a couple extra algorithm structures now.  */
2775
2776  alg_in = XALLOCA (struct algorithm);
2777  best_alg = XALLOCA (struct algorithm);
2778  best_cost = *cost_limit;
2779
2780  /* Compute the hash index.  */
2781  hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2782
2783  /* See if we already know what to do for T.  */
2784  entry_ptr = alg_hash_entry_ptr (hash_index);
2785  if (entry_ptr->t == t
2786      && entry_ptr->mode == mode
2787      && entry_ptr->speed == speed
2788      && entry_ptr->alg != alg_unknown)
2789    {
2790      cache_alg = entry_ptr->alg;
2791
2792      if (cache_alg == alg_impossible)
2793	{
2794	  /* The cache tells us that it's impossible to synthesize
2795	     multiplication by T within entry_ptr->cost.  */
2796	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2797	    /* COST_LIMIT is at least as restrictive as the one
2798	       recorded in the hash table, in which case we have no
2799	       hope of synthesizing a multiplication.  Just
2800	       return.  */
2801	    return;
2802
2803	  /* If we get here, COST_LIMIT is less restrictive than the
2804	     one recorded in the hash table, so we may be able to
2805	     synthesize a multiplication.  Proceed as if we didn't
2806	     have the cache entry.  */
2807	}
2808      else
2809	{
2810	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2811	    /* The cached algorithm shows that this multiplication
2812	       requires more cost than COST_LIMIT.  Just return.  This
2813	       way, we don't clobber this cache entry with
2814	       alg_impossible but retain useful information.  */
2815	    return;
2816
2817	  cache_hit = true;
2818
2819	  switch (cache_alg)
2820	    {
2821	    case alg_shift:
2822	      goto do_alg_shift;
2823
2824	    case alg_add_t_m2:
2825	    case alg_sub_t_m2:
2826	      goto do_alg_addsub_t_m2;
2827
2828	    case alg_add_factor:
2829	    case alg_sub_factor:
2830	      goto do_alg_addsub_factor;
2831
2832	    case alg_add_t2_m:
2833	      goto do_alg_add_t2_m;
2834
2835	    case alg_sub_t2_m:
2836	      goto do_alg_sub_t2_m;
2837
2838	    default:
2839	      gcc_unreachable ();
2840	    }
2841	}
2842    }
2843
2844  /* If we have a group of zero bits at the low-order part of T, try
2845     multiplying by the remaining bits and then doing a shift.  */
2846
2847  if ((t & 1) == 0)
2848    {
2849    do_alg_shift:
2850      m = ctz_or_zero (t); /* m = number of low zero bits */
2851      if (m < maxm)
2852	{
2853	  q = t >> m;
2854	  /* The function expand_shift will choose between a shift and
2855	     a sequence of additions, so the observed cost is given as
2856	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2857	  op_cost = m * add_cost (speed, mode);
2858	  if (shift_cost (speed, mode, m) < op_cost)
2859	    op_cost = shift_cost (speed, mode, m);
2860	  new_limit.cost = best_cost.cost - op_cost;
2861	  new_limit.latency = best_cost.latency - op_cost;
2862	  synth_mult (alg_in, q, &new_limit, mode);
2863
2864	  alg_in->cost.cost += op_cost;
2865	  alg_in->cost.latency += op_cost;
2866	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2867	    {
2868	      best_cost = alg_in->cost;
2869	      std::swap (alg_in, best_alg);
2870	      best_alg->log[best_alg->ops] = m;
2871	      best_alg->op[best_alg->ops] = alg_shift;
2872	    }
2873
2874	  /* See if treating ORIG_T as a signed number yields a better
2875	     sequence.  Try this sequence only for a negative ORIG_T
2876	     as it would be useless for a non-negative ORIG_T.  */
2877	  if ((HOST_WIDE_INT) orig_t < 0)
2878	    {
2879	      /* Shift ORIG_T as follows because a right shift of a
2880		 negative-valued signed type is implementation
2881		 defined.  */
2882	      q = ~(~orig_t >> m);
2883	      /* The function expand_shift will choose between a shift
2884		 and a sequence of additions, so the observed cost is
2885		 given as MIN (m * add_cost(speed, mode),
2886		 shift_cost(speed, mode, m)).  */
2887	      op_cost = m * add_cost (speed, mode);
2888	      if (shift_cost (speed, mode, m) < op_cost)
2889		op_cost = shift_cost (speed, mode, m);
2890	      new_limit.cost = best_cost.cost - op_cost;
2891	      new_limit.latency = best_cost.latency - op_cost;
2892	      synth_mult (alg_in, q, &new_limit, mode);
2893
2894	      alg_in->cost.cost += op_cost;
2895	      alg_in->cost.latency += op_cost;
2896	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2897		{
2898		  best_cost = alg_in->cost;
2899		  std::swap (alg_in, best_alg);
2900		  best_alg->log[best_alg->ops] = m;
2901		  best_alg->op[best_alg->ops] = alg_shift;
2902		}
2903	    }
2904	}
2905      if (cache_hit)
2906	goto done;
2907    }
2908
2909  /* If we have an odd number, add or subtract one.  */
2910  if ((t & 1) != 0)
2911    {
2912      unsigned HOST_WIDE_INT w;
2913
2914    do_alg_addsub_t_m2:
2915      for (w = 1; (w & t) != 0; w <<= 1)
2916	;
2917      /* If T was -1, then W will be zero after the loop.  This is another
2918	 case where T ends with ...111.  Handling this with (T + 1) and
2919	 subtract 1 produces slightly better code and results in algorithm
2920	 selection much faster than treating it like the ...0111 case
2921	 below.  */
2922      if (w == 0
2923	  || (w > 2
2924	      /* Reject the case where t is 3.
2925		 Thus we prefer addition in that case.  */
2926	      && t != 3))
2927	{
2928	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2929
2930	  op_cost = add_cost (speed, mode);
2931	  new_limit.cost = best_cost.cost - op_cost;
2932	  new_limit.latency = best_cost.latency - op_cost;
2933	  synth_mult (alg_in, t + 1, &new_limit, mode);
2934
2935	  alg_in->cost.cost += op_cost;
2936	  alg_in->cost.latency += op_cost;
2937	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2938	    {
2939	      best_cost = alg_in->cost;
2940	      std::swap (alg_in, best_alg);
2941	      best_alg->log[best_alg->ops] = 0;
2942	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2943	    }
2944	}
2945      else
2946	{
2947	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2948
2949	  op_cost = add_cost (speed, mode);
2950	  new_limit.cost = best_cost.cost - op_cost;
2951	  new_limit.latency = best_cost.latency - op_cost;
2952	  synth_mult (alg_in, t - 1, &new_limit, mode);
2953
2954	  alg_in->cost.cost += op_cost;
2955	  alg_in->cost.latency += op_cost;
2956	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2957	    {
2958	      best_cost = alg_in->cost;
2959	      std::swap (alg_in, best_alg);
2960	      best_alg->log[best_alg->ops] = 0;
2961	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2962	    }
2963	}
2964
2965      /* We may be able to calculate a * -7, a * -15, a * -31, etc
2966	 quickly with a - a * n for some appropriate constant n.  */
2967      m = exact_log2 (-orig_t + 1);
2968      if (m >= 0 && m < maxm)
2969	{
2970	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2971	  /* If the target has a cheap shift-and-subtract insn use
2972	     that in preference to a shift insn followed by a sub insn.
2973	     Assume that the shift-and-sub is "atomic" with a latency
2974	     equal to it's cost, otherwise assume that on superscalar
2975	     hardware the shift may be executed concurrently with the
2976	     earlier steps in the algorithm.  */
2977	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2978	    {
2979	      op_cost = shiftsub1_cost (speed, mode, m);
2980	      op_latency = op_cost;
2981	    }
2982	  else
2983	    op_latency = add_cost (speed, mode);
2984
2985	  new_limit.cost = best_cost.cost - op_cost;
2986	  new_limit.latency = best_cost.latency - op_latency;
2987	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2988		      &new_limit, mode);
2989
2990	  alg_in->cost.cost += op_cost;
2991	  alg_in->cost.latency += op_latency;
2992	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2993	    {
2994	      best_cost = alg_in->cost;
2995	      std::swap (alg_in, best_alg);
2996	      best_alg->log[best_alg->ops] = m;
2997	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2998	    }
2999	}
3000
3001      if (cache_hit)
3002	goto done;
3003    }
3004
3005  /* Look for factors of t of the form
3006     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3007     If we find such a factor, we can multiply by t using an algorithm that
3008     multiplies by q, shift the result by m and add/subtract it to itself.
3009
3010     We search for large factors first and loop down, even if large factors
3011     are less probable than small; if we find a large factor we will find a
3012     good sequence quickly, and therefore be able to prune (by decreasing
3013     COST_LIMIT) the search.  */
3014
3015 do_alg_addsub_factor:
3016  for (m = floor_log2 (t - 1); m >= 2; m--)
3017    {
3018      unsigned HOST_WIDE_INT d;
3019
3020      d = (HOST_WIDE_INT_1U << m) + 1;
3021      if (t % d == 0 && t > d && m < maxm
3022	  && (!cache_hit || cache_alg == alg_add_factor))
3023	{
3024	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3025	  if (shiftadd_cost (speed, mode, m) <= op_cost)
3026	    op_cost = shiftadd_cost (speed, mode, m);
3027
3028	  op_latency = op_cost;
3029
3030
3031	  new_limit.cost = best_cost.cost - op_cost;
3032	  new_limit.latency = best_cost.latency - op_latency;
3033	  synth_mult (alg_in, t / d, &new_limit, mode);
3034
3035	  alg_in->cost.cost += op_cost;
3036	  alg_in->cost.latency += op_latency;
3037	  if (alg_in->cost.latency < op_cost)
3038	    alg_in->cost.latency = op_cost;
3039	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3040	    {
3041	      best_cost = alg_in->cost;
3042	      std::swap (alg_in, best_alg);
3043	      best_alg->log[best_alg->ops] = m;
3044	      best_alg->op[best_alg->ops] = alg_add_factor;
3045	    }
3046	  /* Other factors will have been taken care of in the recursion.  */
3047	  break;
3048	}
3049
3050      d = (HOST_WIDE_INT_1U << m) - 1;
3051      if (t % d == 0 && t > d && m < maxm
3052	  && (!cache_hit || cache_alg == alg_sub_factor))
3053	{
3054	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3055	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
3056	    op_cost = shiftsub0_cost (speed, mode, m);
3057
3058	  op_latency = op_cost;
3059
3060	  new_limit.cost = best_cost.cost - op_cost;
3061	  new_limit.latency = best_cost.latency - op_latency;
3062	  synth_mult (alg_in, t / d, &new_limit, mode);
3063
3064	  alg_in->cost.cost += op_cost;
3065	  alg_in->cost.latency += op_latency;
3066	  if (alg_in->cost.latency < op_cost)
3067	    alg_in->cost.latency = op_cost;
3068	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3069	    {
3070	      best_cost = alg_in->cost;
3071	      std::swap (alg_in, best_alg);
3072	      best_alg->log[best_alg->ops] = m;
3073	      best_alg->op[best_alg->ops] = alg_sub_factor;
3074	    }
3075	  break;
3076	}
3077    }
3078  if (cache_hit)
3079    goto done;
3080
3081  /* Try shift-and-add (load effective address) instructions,
3082     i.e. do a*3, a*5, a*9.  */
3083  if ((t & 1) != 0)
3084    {
3085    do_alg_add_t2_m:
3086      q = t - 1;
3087      m = ctz_hwi (q);
3088      if (q && m < maxm)
3089	{
3090	  op_cost = shiftadd_cost (speed, mode, m);
3091	  new_limit.cost = best_cost.cost - op_cost;
3092	  new_limit.latency = best_cost.latency - op_cost;
3093	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3094
3095	  alg_in->cost.cost += op_cost;
3096	  alg_in->cost.latency += op_cost;
3097	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3098	    {
3099	      best_cost = alg_in->cost;
3100	      std::swap (alg_in, best_alg);
3101	      best_alg->log[best_alg->ops] = m;
3102	      best_alg->op[best_alg->ops] = alg_add_t2_m;
3103	    }
3104	}
3105      if (cache_hit)
3106	goto done;
3107
3108    do_alg_sub_t2_m:
3109      q = t + 1;
3110      m = ctz_hwi (q);
3111      if (q && m < maxm)
3112	{
3113	  op_cost = shiftsub0_cost (speed, mode, m);
3114	  new_limit.cost = best_cost.cost - op_cost;
3115	  new_limit.latency = best_cost.latency - op_cost;
3116	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3117
3118	  alg_in->cost.cost += op_cost;
3119	  alg_in->cost.latency += op_cost;
3120	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3121	    {
3122	      best_cost = alg_in->cost;
3123	      std::swap (alg_in, best_alg);
3124	      best_alg->log[best_alg->ops] = m;
3125	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
3126	    }
3127	}
3128      if (cache_hit)
3129	goto done;
3130    }
3131
3132 done:
3133  /* If best_cost has not decreased, we have not found any algorithm.  */
3134  if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3135    {
3136      /* We failed to find an algorithm.  Record alg_impossible for
3137	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3138	 we are asked to find an algorithm for T within the same or
3139	 lower COST_LIMIT, we can immediately return to the
3140	 caller.  */
3141      entry_ptr->t = t;
3142      entry_ptr->mode = mode;
3143      entry_ptr->speed = speed;
3144      entry_ptr->alg = alg_impossible;
3145      entry_ptr->cost = *cost_limit;
3146      return;
3147    }
3148
3149  /* Cache the result.  */
3150  if (!cache_hit)
3151    {
3152      entry_ptr->t = t;
3153      entry_ptr->mode = mode;
3154      entry_ptr->speed = speed;
3155      entry_ptr->alg = best_alg->op[best_alg->ops];
3156      entry_ptr->cost.cost = best_cost.cost;
3157      entry_ptr->cost.latency = best_cost.latency;
3158    }
3159
3160  /* If we are getting a too long sequence for `struct algorithm'
3161     to record, make this search fail.  */
3162  if (best_alg->ops == MAX_BITS_PER_WORD)
3163    return;
3164
3165  /* Copy the algorithm from temporary space to the space at alg_out.
3166     We avoid using structure assignment because the majority of
3167     best_alg is normally undefined, and this is a critical function.  */
3168  alg_out->ops = best_alg->ops + 1;
3169  alg_out->cost = best_cost;
3170  memcpy (alg_out->op, best_alg->op,
3171	  alg_out->ops * sizeof *alg_out->op);
3172  memcpy (alg_out->log, best_alg->log,
3173	  alg_out->ops * sizeof *alg_out->log);
3174}
3175
3176/* Find the cheapest way of multiplying a value of mode MODE by VAL.
3177   Try three variations:
3178
3179       - a shift/add sequence based on VAL itself
3180       - a shift/add sequence based on -VAL, followed by a negation
3181       - a shift/add sequence based on VAL - 1, followed by an addition.
3182
3183   Return true if the cheapest of these cost less than MULT_COST,
3184   describing the algorithm in *ALG and final fixup in *VARIANT.  */
3185
3186bool
3187choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3188		     struct algorithm *alg, enum mult_variant *variant,
3189		     int mult_cost)
3190{
3191  struct algorithm alg2;
3192  struct mult_cost limit;
3193  int op_cost;
3194  bool speed = optimize_insn_for_speed_p ();
3195
3196  /* Fail quickly for impossible bounds.  */
3197  if (mult_cost < 0)
3198    return false;
3199
3200  /* Ensure that mult_cost provides a reasonable upper bound.
3201     Any constant multiplication can be performed with less
3202     than 2 * bits additions.  */
3203  op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3204  if (mult_cost > op_cost)
3205    mult_cost = op_cost;
3206
3207  *variant = basic_variant;
3208  limit.cost = mult_cost;
3209  limit.latency = mult_cost;
3210  synth_mult (alg, val, &limit, mode);
3211
3212  /* This works only if the inverted value actually fits in an
3213     `unsigned int' */
3214  if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3215    {
3216      op_cost = neg_cost (speed, mode);
3217      if (MULT_COST_LESS (&alg->cost, mult_cost))
3218	{
3219	  limit.cost = alg->cost.cost - op_cost;
3220	  limit.latency = alg->cost.latency - op_cost;
3221	}
3222      else
3223	{
3224	  limit.cost = mult_cost - op_cost;
3225	  limit.latency = mult_cost - op_cost;
3226	}
3227
3228      synth_mult (&alg2, -val, &limit, mode);
3229      alg2.cost.cost += op_cost;
3230      alg2.cost.latency += op_cost;
3231      if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3232	*alg = alg2, *variant = negate_variant;
3233    }
3234
3235  /* This proves very useful for division-by-constant.  */
3236  op_cost = add_cost (speed, mode);
3237  if (MULT_COST_LESS (&alg->cost, mult_cost))
3238    {
3239      limit.cost = alg->cost.cost - op_cost;
3240      limit.latency = alg->cost.latency - op_cost;
3241    }
3242  else
3243    {
3244      limit.cost = mult_cost - op_cost;
3245      limit.latency = mult_cost - op_cost;
3246    }
3247
3248  synth_mult (&alg2, val - 1, &limit, mode);
3249  alg2.cost.cost += op_cost;
3250  alg2.cost.latency += op_cost;
3251  if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3252    *alg = alg2, *variant = add_variant;
3253
3254  return MULT_COST_LESS (&alg->cost, mult_cost);
3255}
3256
3257/* A subroutine of expand_mult, used for constant multiplications.
3258   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3259   convenient.  Use the shift/add sequence described by ALG and apply
3260   the final fixup specified by VARIANT.  */
3261
3262static rtx
3263expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3264		   rtx target, const struct algorithm *alg,
3265		   enum mult_variant variant)
3266{
3267  unsigned HOST_WIDE_INT val_so_far;
3268  rtx_insn *insn;
3269  rtx accum, tem;
3270  int opno;
3271  machine_mode nmode;
3272
3273  /* Avoid referencing memory over and over and invalid sharing
3274     on SUBREGs.  */
3275  op0 = force_reg (mode, op0);
3276
3277  /* ACCUM starts out either as OP0 or as a zero, depending on
3278     the first operation.  */
3279
3280  if (alg->op[0] == alg_zero)
3281    {
3282      accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3283      val_so_far = 0;
3284    }
3285  else if (alg->op[0] == alg_m)
3286    {
3287      accum = copy_to_mode_reg (mode, op0);
3288      val_so_far = 1;
3289    }
3290  else
3291    gcc_unreachable ();
3292
3293  for (opno = 1; opno < alg->ops; opno++)
3294    {
3295      int log = alg->log[opno];
3296      rtx shift_subtarget = optimize ? 0 : accum;
3297      rtx add_target
3298	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3299	   && !optimize)
3300	  ? target : 0;
3301      rtx accum_target = optimize ? 0 : accum;
3302      rtx accum_inner;
3303
3304      switch (alg->op[opno])
3305	{
3306	case alg_shift:
3307	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3308	  /* REG_EQUAL note will be attached to the following insn.  */
3309	  emit_move_insn (accum, tem);
3310	  val_so_far <<= log;
3311	  break;
3312
3313	case alg_add_t_m2:
3314	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3315	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3316				 add_target ? add_target : accum_target);
3317	  val_so_far += HOST_WIDE_INT_1U << log;
3318	  break;
3319
3320	case alg_sub_t_m2:
3321	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3322	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3323				 add_target ? add_target : accum_target);
3324	  val_so_far -= HOST_WIDE_INT_1U << log;
3325	  break;
3326
3327	case alg_add_t2_m:
3328	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3329				log, shift_subtarget, 0);
3330	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3331				 add_target ? add_target : accum_target);
3332	  val_so_far = (val_so_far << log) + 1;
3333	  break;
3334
3335	case alg_sub_t2_m:
3336	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3337				log, shift_subtarget, 0);
3338	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3339				 add_target ? add_target : accum_target);
3340	  val_so_far = (val_so_far << log) - 1;
3341	  break;
3342
3343	case alg_add_factor:
3344	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3345	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3346				 add_target ? add_target : accum_target);
3347	  val_so_far += val_so_far << log;
3348	  break;
3349
3350	case alg_sub_factor:
3351	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3352	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3353				 (add_target
3354				  ? add_target : (optimize ? 0 : tem)));
3355	  val_so_far = (val_so_far << log) - val_so_far;
3356	  break;
3357
3358	default:
3359	  gcc_unreachable ();
3360	}
3361
3362      if (SCALAR_INT_MODE_P (mode))
3363	{
3364	  /* Write a REG_EQUAL note on the last insn so that we can cse
3365	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3366	     we've set the inner register and must properly indicate that.  */
3367	  tem = op0, nmode = mode;
3368	  accum_inner = accum;
3369	  if (GET_CODE (accum) == SUBREG)
3370	    {
3371	      accum_inner = SUBREG_REG (accum);
3372	      nmode = GET_MODE (accum_inner);
3373	      tem = gen_lowpart (nmode, op0);
3374	    }
3375
3376	  /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3377	     In that case, only the low bits of accum would be guaranteed to
3378	     be equal to the content of the REG_EQUAL note, the upper bits
3379	     can be anything.  */
3380	  if (!paradoxical_subreg_p (tem))
3381	    {
3382	      insn = get_last_insn ();
3383	      wide_int wval_so_far
3384		= wi::uhwi (val_so_far,
3385			    GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3386	      rtx c = immed_wide_int_const (wval_so_far, nmode);
3387	      set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3388				accum_inner);
3389	    }
3390	}
3391    }
3392
3393  if (variant == negate_variant)
3394    {
3395      val_so_far = -val_so_far;
3396      accum = expand_unop (mode, neg_optab, accum, target, 0);
3397    }
3398  else if (variant == add_variant)
3399    {
3400      val_so_far = val_so_far + 1;
3401      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3402    }
3403
3404  /* Compare only the bits of val and val_so_far that are significant
3405     in the result mode, to avoid sign-/zero-extension confusion.  */
3406  nmode = GET_MODE_INNER (mode);
3407  val &= GET_MODE_MASK (nmode);
3408  val_so_far &= GET_MODE_MASK (nmode);
3409  gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3410
3411  return accum;
3412}
3413
3414/* Perform a multiplication and return an rtx for the result.
3415   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3416   TARGET is a suggestion for where to store the result (an rtx).
3417
3418   We check specially for a constant integer as OP1.
3419   If you want this check for OP0 as well, then before calling
3420   you should swap the two operands if OP0 would be constant.  */
3421
3422rtx
3423expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3424	     int unsignedp, bool no_libcall)
3425{
3426  enum mult_variant variant;
3427  struct algorithm algorithm;
3428  rtx scalar_op1;
3429  int max_cost;
3430  bool speed = optimize_insn_for_speed_p ();
3431  bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3432
3433  if (CONSTANT_P (op0))
3434    std::swap (op0, op1);
3435
3436  /* For vectors, there are several simplifications that can be made if
3437     all elements of the vector constant are identical.  */
3438  scalar_op1 = unwrap_const_vec_duplicate (op1);
3439
3440  if (INTEGRAL_MODE_P (mode))
3441    {
3442      rtx fake_reg;
3443      HOST_WIDE_INT coeff;
3444      bool is_neg;
3445      int mode_bitsize;
3446
3447      if (op1 == CONST0_RTX (mode))
3448	return op1;
3449      if (op1 == CONST1_RTX (mode))
3450	return op0;
3451      if (op1 == CONSTM1_RTX (mode))
3452	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3453			    op0, target, 0);
3454
3455      if (do_trapv)
3456	goto skip_synth;
3457
3458      /* If mode is integer vector mode, check if the backend supports
3459	 vector lshift (by scalar or vector) at all.  If not, we can't use
3460	 synthetized multiply.  */
3461      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3462	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3463	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3464	goto skip_synth;
3465
3466      /* These are the operations that are potentially turned into
3467	 a sequence of shifts and additions.  */
3468      mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3469
3470      /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3471	 less than or equal in size to `unsigned int' this doesn't matter.
3472	 If the mode is larger than `unsigned int', then synth_mult works
3473	 only if the constant value exactly fits in an `unsigned int' without
3474	 any truncation.  This means that multiplying by negative values does
3475	 not work; results are off by 2^32 on a 32 bit machine.  */
3476      if (CONST_INT_P (scalar_op1))
3477	{
3478	  coeff = INTVAL (scalar_op1);
3479	  is_neg = coeff < 0;
3480	}
3481#if TARGET_SUPPORTS_WIDE_INT
3482      else if (CONST_WIDE_INT_P (scalar_op1))
3483#else
3484      else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3485#endif
3486	{
3487	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3488	  /* Perfect power of 2 (other than 1, which is handled above).  */
3489	  if (shift > 0)
3490	    return expand_shift (LSHIFT_EXPR, mode, op0,
3491				 shift, target, unsignedp);
3492	  else
3493	    goto skip_synth;
3494	}
3495      else
3496	goto skip_synth;
3497
3498      /* We used to test optimize here, on the grounds that it's better to
3499	 produce a smaller program when -O is not used.  But this causes
3500	 such a terrible slowdown sometimes that it seems better to always
3501	 use synth_mult.  */
3502
3503      /* Special case powers of two.  */
3504      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3505	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3506	return expand_shift (LSHIFT_EXPR, mode, op0,
3507			     floor_log2 (coeff), target, unsignedp);
3508
3509      fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3510
3511      /* Attempt to handle multiplication of DImode values by negative
3512	 coefficients, by performing the multiplication by a positive
3513	 multiplier and then inverting the result.  */
3514      if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3515	{
3516	  /* Its safe to use -coeff even for INT_MIN, as the
3517	     result is interpreted as an unsigned coefficient.
3518	     Exclude cost of op0 from max_cost to match the cost
3519	     calculation of the synth_mult.  */
3520	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3521	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3522				    mode, speed)
3523		      - neg_cost (speed, mode));
3524	  if (max_cost <= 0)
3525	    goto skip_synth;
3526
3527	  /* Special case powers of two.  */
3528	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3529	    {
3530	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3531				       floor_log2 (coeff), target, unsignedp);
3532	      return expand_unop (mode, neg_optab, temp, target, 0);
3533	    }
3534
3535	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3536				   max_cost))
3537	    {
3538	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3539					    &algorithm, variant);
3540	      return expand_unop (mode, neg_optab, temp, target, 0);
3541	    }
3542	  goto skip_synth;
3543	}
3544
3545      /* Exclude cost of op0 from max_cost to match the cost
3546	 calculation of the synth_mult.  */
3547      max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3548      if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3549	return expand_mult_const (mode, op0, coeff, target,
3550				  &algorithm, variant);
3551    }
3552 skip_synth:
3553
3554  /* Expand x*2.0 as x+x.  */
3555  if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3556      && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3557    {
3558      op0 = force_reg (GET_MODE (op0), op0);
3559      return expand_binop (mode, add_optab, op0, op0,
3560			   target, unsignedp,
3561			   no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3562    }
3563
3564  /* This used to use umul_optab if unsigned, but for non-widening multiply
3565     there is no difference between signed and unsigned.  */
3566  op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3567		      op0, op1, target, unsignedp,
3568		      no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3569  gcc_assert (op0 || no_libcall);
3570  return op0;
3571}
3572
3573/* Return a cost estimate for multiplying a register by the given
3574   COEFFicient in the given MODE and SPEED.  */
3575
3576int
3577mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3578{
3579  int max_cost;
3580  struct algorithm algorithm;
3581  enum mult_variant variant;
3582
3583  rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3584  max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3585			   mode, speed);
3586  if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3587    return algorithm.cost.cost;
3588  else
3589    return max_cost;
3590}
3591
3592/* Perform a widening multiplication and return an rtx for the result.
3593   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3594   TARGET is a suggestion for where to store the result (an rtx).
3595   THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3596   or smul_widen_optab.
3597
3598   We check specially for a constant integer as OP1, comparing the
3599   cost of a widening multiply against the cost of a sequence of shifts
3600   and adds.  */
3601
3602rtx
3603expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3604		      int unsignedp, optab this_optab)
3605{
3606  bool speed = optimize_insn_for_speed_p ();
3607  rtx cop1;
3608
3609  if (CONST_INT_P (op1)
3610      && GET_MODE (op0) != VOIDmode
3611      && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3612				this_optab == umul_widen_optab))
3613      && CONST_INT_P (cop1)
3614      && (INTVAL (cop1) >= 0
3615	  || HWI_COMPUTABLE_MODE_P (mode)))
3616    {
3617      HOST_WIDE_INT coeff = INTVAL (cop1);
3618      int max_cost;
3619      enum mult_variant variant;
3620      struct algorithm algorithm;
3621
3622      if (coeff == 0)
3623	return CONST0_RTX (mode);
3624
3625      /* Special case powers of two.  */
3626      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3627	{
3628	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3629	  return expand_shift (LSHIFT_EXPR, mode, op0,
3630			       floor_log2 (coeff), target, unsignedp);
3631	}
3632
3633      /* Exclude cost of op0 from max_cost to match the cost
3634	 calculation of the synth_mult.  */
3635      max_cost = mul_widen_cost (speed, mode);
3636      if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3637			       max_cost))
3638	{
3639	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3640	  return expand_mult_const (mode, op0, coeff, target,
3641				    &algorithm, variant);
3642	}
3643    }
3644  return expand_binop (mode, this_optab, op0, op1, target,
3645		       unsignedp, OPTAB_LIB_WIDEN);
3646}
3647
3648/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3649   replace division by D, and put the least significant N bits of the result
3650   in *MULTIPLIER_PTR and return the most significant bit.
3651
3652   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3653   needed precision is in PRECISION (should be <= N).
3654
3655   PRECISION should be as small as possible so this function can choose
3656   multiplier more freely.
3657
3658   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3659   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3660
3661   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3662   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3663
3664unsigned HOST_WIDE_INT
3665choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3666		   unsigned HOST_WIDE_INT *multiplier_ptr,
3667		   int *post_shift_ptr, int *lgup_ptr)
3668{
3669  int lgup, post_shift;
3670  int pow, pow2;
3671
3672  /* lgup = ceil(log2(divisor)); */
3673  lgup = ceil_log2 (d);
3674
3675  gcc_assert (lgup <= n);
3676
3677  pow = n + lgup;
3678  pow2 = n + lgup - precision;
3679
3680  /* mlow = 2^(N + lgup)/d */
3681  wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3682  wide_int mlow = wi::udiv_trunc (val, d);
3683
3684  /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3685  val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3686  wide_int mhigh = wi::udiv_trunc (val, d);
3687
3688  /* If precision == N, then mlow, mhigh exceed 2^N
3689     (but they do not exceed 2^(N+1)).  */
3690
3691  /* Reduce to lowest terms.  */
3692  for (post_shift = lgup; post_shift > 0; post_shift--)
3693    {
3694      unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3695						       HOST_BITS_PER_WIDE_INT);
3696      unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3697						       HOST_BITS_PER_WIDE_INT);
3698      if (ml_lo >= mh_lo)
3699	break;
3700
3701      mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3702      mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3703    }
3704
3705  *post_shift_ptr = post_shift;
3706  *lgup_ptr = lgup;
3707  if (n < HOST_BITS_PER_WIDE_INT)
3708    {
3709      unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3710      *multiplier_ptr = mhigh.to_uhwi () & mask;
3711      return mhigh.to_uhwi () >= mask;
3712    }
3713  else
3714    {
3715      *multiplier_ptr = mhigh.to_uhwi ();
3716      return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3717    }
3718}
3719
3720/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3721   congruent to 1 (mod 2**N).  */
3722
3723static unsigned HOST_WIDE_INT
3724invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3725{
3726  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3727
3728  /* The algorithm notes that the choice y = x satisfies
3729     x*y == 1 mod 2^3, since x is assumed odd.
3730     Each iteration doubles the number of bits of significance in y.  */
3731
3732  unsigned HOST_WIDE_INT mask;
3733  unsigned HOST_WIDE_INT y = x;
3734  int nbit = 3;
3735
3736  mask = (n == HOST_BITS_PER_WIDE_INT
3737	  ? HOST_WIDE_INT_M1U
3738	  : (HOST_WIDE_INT_1U << n) - 1);
3739
3740  while (nbit < n)
3741    {
3742      y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3743      nbit *= 2;
3744    }
3745  return y;
3746}
3747
3748/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3749   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3750   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3751   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3752   become signed.
3753
3754   The result is put in TARGET if that is convenient.
3755
3756   MODE is the mode of operation.  */
3757
3758rtx
3759expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3760			     rtx op1, rtx target, int unsignedp)
3761{
3762  rtx tem;
3763  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3764
3765  tem = expand_shift (RSHIFT_EXPR, mode, op0,
3766		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3767  tem = expand_and (mode, tem, op1, NULL_RTX);
3768  adj_operand
3769    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3770		     adj_operand);
3771
3772  tem = expand_shift (RSHIFT_EXPR, mode, op1,
3773		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3774  tem = expand_and (mode, tem, op0, NULL_RTX);
3775  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3776			  target);
3777
3778  return target;
3779}
3780
3781/* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3782
3783static rtx
3784extract_high_half (scalar_int_mode mode, rtx op)
3785{
3786  if (mode == word_mode)
3787    return gen_highpart (mode, op);
3788
3789  scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3790
3791  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3792		     GET_MODE_BITSIZE (mode), 0, 1);
3793  return convert_modes (mode, wider_mode, op, 0);
3794}
3795
3796/* Like expmed_mult_highpart, but only consider using a multiplication
3797   optab.  OP1 is an rtx for the constant operand.  */
3798
3799static rtx
3800expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3801			    rtx target, int unsignedp, int max_cost)
3802{
3803  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3804  optab moptab;
3805  rtx tem;
3806  int size;
3807  bool speed = optimize_insn_for_speed_p ();
3808
3809  scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3810
3811  size = GET_MODE_BITSIZE (mode);
3812
3813  /* Firstly, try using a multiplication insn that only generates the needed
3814     high part of the product, and in the sign flavor of unsignedp.  */
3815  if (mul_highpart_cost (speed, mode) < max_cost)
3816    {
3817      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3818      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3819			  unsignedp, OPTAB_DIRECT);
3820      if (tem)
3821	return tem;
3822    }
3823
3824  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3825     Need to adjust the result after the multiplication.  */
3826  if (size - 1 < BITS_PER_WORD
3827      && (mul_highpart_cost (speed, mode)
3828	  + 2 * shift_cost (speed, mode, size-1)
3829	  + 4 * add_cost (speed, mode) < max_cost))
3830    {
3831      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3832      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3833			  unsignedp, OPTAB_DIRECT);
3834      if (tem)
3835	/* We used the wrong signedness.  Adjust the result.  */
3836	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3837					    tem, unsignedp);
3838    }
3839
3840  /* Try widening multiplication.  */
3841  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3842  if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3843      && mul_widen_cost (speed, wider_mode) < max_cost)
3844    {
3845      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3846			  unsignedp, OPTAB_WIDEN);
3847      if (tem)
3848	return extract_high_half (mode, tem);
3849    }
3850
3851  /* Try widening the mode and perform a non-widening multiplication.  */
3852  if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3853      && size - 1 < BITS_PER_WORD
3854      && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3855	  < max_cost))
3856    {
3857      rtx_insn *insns;
3858      rtx wop0, wop1;
3859
3860      /* We need to widen the operands, for example to ensure the
3861	 constant multiplier is correctly sign or zero extended.
3862	 Use a sequence to clean-up any instructions emitted by
3863	 the conversions if things don't work out.  */
3864      start_sequence ();
3865      wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3866      wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3867      tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3868			  unsignedp, OPTAB_WIDEN);
3869      insns = get_insns ();
3870      end_sequence ();
3871
3872      if (tem)
3873	{
3874	  emit_insn (insns);
3875	  return extract_high_half (mode, tem);
3876	}
3877    }
3878
3879  /* Try widening multiplication of opposite signedness, and adjust.  */
3880  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3881  if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3882      && size - 1 < BITS_PER_WORD
3883      && (mul_widen_cost (speed, wider_mode)
3884	  + 2 * shift_cost (speed, mode, size-1)
3885	  + 4 * add_cost (speed, mode) < max_cost))
3886    {
3887      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3888			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3889      if (tem != 0)
3890	{
3891	  tem = extract_high_half (mode, tem);
3892	  /* We used the wrong signedness.  Adjust the result.  */
3893	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3894					      target, unsignedp);
3895	}
3896    }
3897
3898  return 0;
3899}
3900
3901/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3902   putting the high half of the result in TARGET if that is convenient,
3903   and return where the result is.  If the operation can not be performed,
3904   0 is returned.
3905
3906   MODE is the mode of operation and result.
3907
3908   UNSIGNEDP nonzero means unsigned multiply.
3909
3910   MAX_COST is the total allowed cost for the expanded RTL.  */
3911
3912static rtx
3913expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3914		      rtx target, int unsignedp, int max_cost)
3915{
3916  unsigned HOST_WIDE_INT cnst1;
3917  int extra_cost;
3918  bool sign_adjust = false;
3919  enum mult_variant variant;
3920  struct algorithm alg;
3921  rtx tem;
3922  bool speed = optimize_insn_for_speed_p ();
3923
3924  /* We can't support modes wider than HOST_BITS_PER_INT.  */
3925  gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3926
3927  cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3928
3929  /* We can't optimize modes wider than BITS_PER_WORD.
3930     ??? We might be able to perform double-word arithmetic if
3931     mode == word_mode, however all the cost calculations in
3932     synth_mult etc. assume single-word operations.  */
3933  scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3934  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3935    return expmed_mult_highpart_optab (mode, op0, op1, target,
3936				       unsignedp, max_cost);
3937
3938  extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3939
3940  /* Check whether we try to multiply by a negative constant.  */
3941  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3942    {
3943      sign_adjust = true;
3944      extra_cost += add_cost (speed, mode);
3945    }
3946
3947  /* See whether shift/add multiplication is cheap enough.  */
3948  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3949			   max_cost - extra_cost))
3950    {
3951      /* See whether the specialized multiplication optabs are
3952	 cheaper than the shift/add version.  */
3953      tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3954					alg.cost.cost + extra_cost);
3955      if (tem)
3956	return tem;
3957
3958      tem = convert_to_mode (wider_mode, op0, unsignedp);
3959      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3960      tem = extract_high_half (mode, tem);
3961
3962      /* Adjust result for signedness.  */
3963      if (sign_adjust)
3964	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3965
3966      return tem;
3967    }
3968  return expmed_mult_highpart_optab (mode, op0, op1, target,
3969				     unsignedp, max_cost);
3970}
3971
3972
3973/* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3974
3975static rtx
3976expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3977{
3978  rtx result, temp, shift;
3979  rtx_code_label *label;
3980  int logd;
3981  int prec = GET_MODE_PRECISION (mode);
3982
3983  logd = floor_log2 (d);
3984  result = gen_reg_rtx (mode);
3985
3986  /* Avoid conditional branches when they're expensive.  */
3987  if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3988      && optimize_insn_for_speed_p ())
3989    {
3990      rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3991				      mode, 0, -1);
3992      if (signmask)
3993	{
3994	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3995	  signmask = force_reg (mode, signmask);
3996	  shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3997
3998	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3999	     which instruction sequence to use.  If logical right shifts
4000	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4001	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4002
4003	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
4004	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4005	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4006		  > COSTS_N_INSNS (2)))
4007	    {
4008	      temp = expand_binop (mode, xor_optab, op0, signmask,
4009				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4010	      temp = expand_binop (mode, sub_optab, temp, signmask,
4011				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4012	      temp = expand_binop (mode, and_optab, temp,
4013				   gen_int_mode (masklow, mode),
4014				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4015	      temp = expand_binop (mode, xor_optab, temp, signmask,
4016				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017	      temp = expand_binop (mode, sub_optab, temp, signmask,
4018				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019	    }
4020	  else
4021	    {
4022	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
4023				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024	      signmask = force_reg (mode, signmask);
4025
4026	      temp = expand_binop (mode, add_optab, op0, signmask,
4027				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4028	      temp = expand_binop (mode, and_optab, temp,
4029				   gen_int_mode (masklow, mode),
4030				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031	      temp = expand_binop (mode, sub_optab, temp, signmask,
4032				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4033	    }
4034	  return temp;
4035	}
4036    }
4037
4038  /* Mask contains the mode's signbit and the significant bits of the
4039     modulus.  By including the signbit in the operation, many targets
4040     can avoid an explicit compare operation in the following comparison
4041     against zero.  */
4042  wide_int mask = wi::mask (logd, false, prec);
4043  mask = wi::set_bit (mask, prec - 1);
4044
4045  temp = expand_binop (mode, and_optab, op0,
4046		       immed_wide_int_const (mask, mode),
4047		       result, 1, OPTAB_LIB_WIDEN);
4048  if (temp != result)
4049    emit_move_insn (result, temp);
4050
4051  label = gen_label_rtx ();
4052  do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4053
4054  temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4055		       0, OPTAB_LIB_WIDEN);
4056
4057  mask = wi::mask (logd, true, prec);
4058  temp = expand_binop (mode, ior_optab, temp,
4059		       immed_wide_int_const (mask, mode),
4060		       result, 1, OPTAB_LIB_WIDEN);
4061  temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4062		       0, OPTAB_LIB_WIDEN);
4063  if (temp != result)
4064    emit_move_insn (result, temp);
4065  emit_label (label);
4066  return result;
4067}
4068
4069/* Expand signed division of OP0 by a power of two D in mode MODE.
4070   This routine is only called for positive values of D.  */
4071
4072static rtx
4073expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4074{
4075  rtx temp;
4076  rtx_code_label *label;
4077  int logd;
4078
4079  logd = floor_log2 (d);
4080
4081  if (d == 2
4082      && BRANCH_COST (optimize_insn_for_speed_p (),
4083		      false) >= 1)
4084    {
4085      temp = gen_reg_rtx (mode);
4086      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4087      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4088			   0, OPTAB_LIB_WIDEN);
4089      return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4090    }
4091
4092  if (HAVE_conditional_move
4093      && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4094    {
4095      rtx temp2;
4096
4097      start_sequence ();
4098      temp2 = copy_to_mode_reg (mode, op0);
4099      temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4100			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
4101      temp = force_reg (mode, temp);
4102
4103      /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4104      temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4105				     mode, temp, temp2, mode, 0);
4106      if (temp2)
4107	{
4108	  rtx_insn *seq = get_insns ();
4109	  end_sequence ();
4110	  emit_insn (seq);
4111	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4112	}
4113      end_sequence ();
4114    }
4115
4116  if (BRANCH_COST (optimize_insn_for_speed_p (),
4117		   false) >= 2)
4118    {
4119      int ushift = GET_MODE_BITSIZE (mode) - logd;
4120
4121      temp = gen_reg_rtx (mode);
4122      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4123      if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4124	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4125	     > COSTS_N_INSNS (1))
4126	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4127			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
4128      else
4129	temp = expand_shift (RSHIFT_EXPR, mode, temp,
4130			     ushift, NULL_RTX, 1);
4131      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4132			   0, OPTAB_LIB_WIDEN);
4133      return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4134    }
4135
4136  label = gen_label_rtx ();
4137  temp = copy_to_mode_reg (mode, op0);
4138  do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4139  expand_inc (temp, gen_int_mode (d - 1, mode));
4140  emit_label (label);
4141  return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4142}
4143
4144/* Emit the code to divide OP0 by OP1, putting the result in TARGET
4145   if that is convenient, and returning where the result is.
4146   You may request either the quotient or the remainder as the result;
4147   specify REM_FLAG nonzero to get the remainder.
4148
4149   CODE is the expression code for which kind of division this is;
4150   it controls how rounding is done.  MODE is the machine mode to use.
4151   UNSIGNEDP nonzero means do unsigned division.  */
4152
4153/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4154   and then correct it by or'ing in missing high bits
4155   if result of ANDI is nonzero.
4156   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4157   This could optimize to a bfexts instruction.
4158   But C doesn't use these operations, so their optimizations are
4159   left for later.  */
4160/* ??? For modulo, we don't actually need the highpart of the first product,
4161   the low part will do nicely.  And for small divisors, the second multiply
4162   can also be a low-part only multiply or even be completely left out.
4163   E.g. to calculate the remainder of a division by 3 with a 32 bit
4164   multiply, multiply with 0x55555556 and extract the upper two bits;
4165   the result is exact for inputs up to 0x1fffffff.
4166   The input range can be reduced by using cross-sum rules.
4167   For odd divisors >= 3, the following table gives right shift counts
4168   so that if a number is shifted by an integer multiple of the given
4169   amount, the remainder stays the same:
4170   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4171   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4172   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4173   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4174   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4175
4176   Cross-sum rules for even numbers can be derived by leaving as many bits
4177   to the right alone as the divisor has zeros to the right.
4178   E.g. if x is an unsigned 32 bit number:
4179   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4180   */
4181
4182rtx
4183expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4184	       rtx op0, rtx op1, rtx target, int unsignedp)
4185{
4186  machine_mode compute_mode;
4187  rtx tquotient;
4188  rtx quotient = 0, remainder = 0;
4189  rtx_insn *last;
4190  rtx_insn *insn;
4191  optab optab1, optab2;
4192  int op1_is_constant, op1_is_pow2 = 0;
4193  int max_cost, extra_cost;
4194  static HOST_WIDE_INT last_div_const = 0;
4195  bool speed = optimize_insn_for_speed_p ();
4196
4197  op1_is_constant = CONST_INT_P (op1);
4198  if (op1_is_constant)
4199    {
4200      wide_int ext_op1 = rtx_mode_t (op1, mode);
4201      op1_is_pow2 = (wi::popcount (ext_op1) == 1
4202		     || (! unsignedp
4203			 && wi::popcount (wi::neg (ext_op1)) == 1));
4204    }
4205
4206  /*
4207     This is the structure of expand_divmod:
4208
4209     First comes code to fix up the operands so we can perform the operations
4210     correctly and efficiently.
4211
4212     Second comes a switch statement with code specific for each rounding mode.
4213     For some special operands this code emits all RTL for the desired
4214     operation, for other cases, it generates only a quotient and stores it in
4215     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4216     to indicate that it has not done anything.
4217
4218     Last comes code that finishes the operation.  If QUOTIENT is set and
4219     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4220     QUOTIENT is not set, it is computed using trunc rounding.
4221
4222     We try to generate special code for division and remainder when OP1 is a
4223     constant.  If |OP1| = 2**n we can use shifts and some other fast
4224     operations.  For other values of OP1, we compute a carefully selected
4225     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4226     by m.
4227
4228     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4229     half of the product.  Different strategies for generating the product are
4230     implemented in expmed_mult_highpart.
4231
4232     If what we actually want is the remainder, we generate that by another
4233     by-constant multiplication and a subtraction.  */
4234
4235  /* We shouldn't be called with OP1 == const1_rtx, but some of the
4236     code below will malfunction if we are, so check here and handle
4237     the special case if so.  */
4238  if (op1 == const1_rtx)
4239    return rem_flag ? const0_rtx : op0;
4240
4241    /* When dividing by -1, we could get an overflow.
4242     negv_optab can handle overflows.  */
4243  if (! unsignedp && op1 == constm1_rtx)
4244    {
4245      if (rem_flag)
4246	return const0_rtx;
4247      return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4248			  ? negv_optab : neg_optab, op0, target, 0);
4249    }
4250
4251  if (target
4252      /* Don't use the function value register as a target
4253	 since we have to read it as well as write it,
4254	 and function-inlining gets confused by this.  */
4255      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4256	  /* Don't clobber an operand while doing a multi-step calculation.  */
4257	  || ((rem_flag || op1_is_constant)
4258	      && (reg_mentioned_p (target, op0)
4259		  || (MEM_P (op0) && MEM_P (target))))
4260	  || reg_mentioned_p (target, op1)
4261	  || (MEM_P (op1) && MEM_P (target))))
4262    target = 0;
4263
4264  /* Get the mode in which to perform this computation.  Normally it will
4265     be MODE, but sometimes we can't do the desired operation in MODE.
4266     If so, pick a wider mode in which we can do the operation.  Convert
4267     to that mode at the start to avoid repeated conversions.
4268
4269     First see what operations we need.  These depend on the expression
4270     we are evaluating.  (We assume that divxx3 insns exist under the
4271     same conditions that modxx3 insns and that these insns don't normally
4272     fail.  If these assumptions are not correct, we may generate less
4273     efficient code in some cases.)
4274
4275     Then see if we find a mode in which we can open-code that operation
4276     (either a division, modulus, or shift).  Finally, check for the smallest
4277     mode for which we can do the operation with a library call.  */
4278
4279  /* We might want to refine this now that we have division-by-constant
4280     optimization.  Since expmed_mult_highpart tries so many variants, it is
4281     not straightforward to generalize this.  Maybe we should make an array
4282     of possible modes in init_expmed?  Save this for GCC 2.7.  */
4283
4284  optab1 = (op1_is_pow2
4285	    ? (unsignedp ? lshr_optab : ashr_optab)
4286	    : (unsignedp ? udiv_optab : sdiv_optab));
4287  optab2 = (op1_is_pow2 ? optab1
4288	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4289
4290  FOR_EACH_MODE_FROM (compute_mode, mode)
4291    if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4292	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4293      break;
4294
4295  if (compute_mode == VOIDmode)
4296    FOR_EACH_MODE_FROM (compute_mode, mode)
4297      if (optab_libfunc (optab1, compute_mode)
4298	  || optab_libfunc (optab2, compute_mode))
4299	break;
4300
4301  /* If we still couldn't find a mode, use MODE, but expand_binop will
4302     probably die.  */
4303  if (compute_mode == VOIDmode)
4304    compute_mode = mode;
4305
4306  if (target && GET_MODE (target) == compute_mode)
4307    tquotient = target;
4308  else
4309    tquotient = gen_reg_rtx (compute_mode);
4310
4311#if 0
4312  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4313     (mode), and thereby get better code when OP1 is a constant.  Do that
4314     later.  It will require going over all usages of SIZE below.  */
4315  size = GET_MODE_BITSIZE (mode);
4316#endif
4317
4318  /* Only deduct something for a REM if the last divide done was
4319     for a different constant.   Then set the constant of the last
4320     divide.  */
4321  max_cost = (unsignedp
4322	      ? udiv_cost (speed, compute_mode)
4323	      : sdiv_cost (speed, compute_mode));
4324  if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4325		     && INTVAL (op1) == last_div_const))
4326    max_cost -= (mul_cost (speed, compute_mode)
4327		 + add_cost (speed, compute_mode));
4328
4329  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4330
4331  /* Now convert to the best mode to use.  */
4332  if (compute_mode != mode)
4333    {
4334      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4335      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4336
4337      /* convert_modes may have placed op1 into a register, so we
4338	 must recompute the following.  */
4339      op1_is_constant = CONST_INT_P (op1);
4340      if (op1_is_constant)
4341	{
4342	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4343	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4344			 || (! unsignedp
4345			     && wi::popcount (wi::neg (ext_op1)) == 1));
4346	}
4347      else
4348	op1_is_pow2 = 0;
4349    }
4350
4351  /* If one of the operands is a volatile MEM, copy it into a register.  */
4352
4353  if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4354    op0 = force_reg (compute_mode, op0);
4355  if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4356    op1 = force_reg (compute_mode, op1);
4357
4358  /* If we need the remainder or if OP1 is constant, we need to
4359     put OP0 in a register in case it has any queued subexpressions.  */
4360  if (rem_flag || op1_is_constant)
4361    op0 = force_reg (compute_mode, op0);
4362
4363  last = get_last_insn ();
4364
4365  /* Promote floor rounding to trunc rounding for unsigned operations.  */
4366  if (unsignedp)
4367    {
4368      if (code == FLOOR_DIV_EXPR)
4369	code = TRUNC_DIV_EXPR;
4370      if (code == FLOOR_MOD_EXPR)
4371	code = TRUNC_MOD_EXPR;
4372      if (code == EXACT_DIV_EXPR && op1_is_pow2)
4373	code = TRUNC_DIV_EXPR;
4374    }
4375
4376  if (op1 != const0_rtx)
4377    switch (code)
4378      {
4379      case TRUNC_MOD_EXPR:
4380      case TRUNC_DIV_EXPR:
4381	if (op1_is_constant)
4382	  {
4383	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4384	    int size = GET_MODE_BITSIZE (int_mode);
4385	    if (unsignedp)
4386	      {
4387		unsigned HOST_WIDE_INT mh, ml;
4388		int pre_shift, post_shift;
4389		int dummy;
4390		wide_int wd = rtx_mode_t (op1, int_mode);
4391		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4392
4393		if (wi::popcount (wd) == 1)
4394		  {
4395		    pre_shift = floor_log2 (d);
4396		    if (rem_flag)
4397		      {
4398			unsigned HOST_WIDE_INT mask
4399			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4400			remainder
4401			  = expand_binop (int_mode, and_optab, op0,
4402					  gen_int_mode (mask, int_mode),
4403					  remainder, 1,
4404					  OPTAB_LIB_WIDEN);
4405			if (remainder)
4406			  return gen_lowpart (mode, remainder);
4407		      }
4408		    quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4409					     pre_shift, tquotient, 1);
4410		  }
4411		else if (size <= HOST_BITS_PER_WIDE_INT)
4412		  {
4413		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4414		      {
4415			/* Most significant bit of divisor is set; emit an scc
4416			   insn.  */
4417			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4418							  int_mode, 1, 1);
4419		      }
4420		    else
4421		      {
4422			/* Find a suitable multiplier and right shift count
4423			   instead of multiplying with D.  */
4424
4425			mh = choose_multiplier (d, size, size,
4426						&ml, &post_shift, &dummy);
4427
4428			/* If the suggested multiplier is more than SIZE bits,
4429			   we can do better for even divisors, using an
4430			   initial right shift.  */
4431			if (mh != 0 && (d & 1) == 0)
4432			  {
4433			    pre_shift = ctz_or_zero (d);
4434			    mh = choose_multiplier (d >> pre_shift, size,
4435						    size - pre_shift,
4436						    &ml, &post_shift, &dummy);
4437			    gcc_assert (!mh);
4438			  }
4439			else
4440			  pre_shift = 0;
4441
4442			if (mh != 0)
4443			  {
4444			    rtx t1, t2, t3, t4;
4445
4446			    if (post_shift - 1 >= BITS_PER_WORD)
4447			      goto fail1;
4448
4449			    extra_cost
4450			      = (shift_cost (speed, int_mode, post_shift - 1)
4451				 + shift_cost (speed, int_mode, 1)
4452				 + 2 * add_cost (speed, int_mode));
4453			    t1 = expmed_mult_highpart
4454			      (int_mode, op0, gen_int_mode (ml, int_mode),
4455			       NULL_RTX, 1, max_cost - extra_cost);
4456			    if (t1 == 0)
4457			      goto fail1;
4458			    t2 = force_operand (gen_rtx_MINUS (int_mode,
4459							       op0, t1),
4460						NULL_RTX);
4461			    t3 = expand_shift (RSHIFT_EXPR, int_mode,
4462					       t2, 1, NULL_RTX, 1);
4463			    t4 = force_operand (gen_rtx_PLUS (int_mode,
4464							      t1, t3),
4465						NULL_RTX);
4466			    quotient = expand_shift
4467			      (RSHIFT_EXPR, int_mode, t4,
4468			       post_shift - 1, tquotient, 1);
4469			  }
4470			else
4471			  {
4472			    rtx t1, t2;
4473
4474			    if (pre_shift >= BITS_PER_WORD
4475				|| post_shift >= BITS_PER_WORD)
4476			      goto fail1;
4477
4478			    t1 = expand_shift
4479			      (RSHIFT_EXPR, int_mode, op0,
4480			       pre_shift, NULL_RTX, 1);
4481			    extra_cost
4482			      = (shift_cost (speed, int_mode, pre_shift)
4483				 + shift_cost (speed, int_mode, post_shift));
4484			    t2 = expmed_mult_highpart
4485			      (int_mode, t1,
4486			       gen_int_mode (ml, int_mode),
4487			       NULL_RTX, 1, max_cost - extra_cost);
4488			    if (t2 == 0)
4489			      goto fail1;
4490			    quotient = expand_shift
4491			      (RSHIFT_EXPR, int_mode, t2,
4492			       post_shift, tquotient, 1);
4493			  }
4494		      }
4495		  }
4496		else		/* Too wide mode to use tricky code */
4497		  break;
4498
4499		insn = get_last_insn ();
4500		if (insn != last)
4501		  set_dst_reg_note (insn, REG_EQUAL,
4502				    gen_rtx_UDIV (int_mode, op0, op1),
4503				    quotient);
4504	      }
4505	    else		/* TRUNC_DIV, signed */
4506	      {
4507		unsigned HOST_WIDE_INT ml;
4508		int lgup, post_shift;
4509		rtx mlr;
4510		HOST_WIDE_INT d = INTVAL (op1);
4511		unsigned HOST_WIDE_INT abs_d;
4512
4513		/* Not prepared to handle division/remainder by
4514		   0xffffffffffffffff8000000000000000 etc.  */
4515		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4516		  break;
4517
4518		/* Since d might be INT_MIN, we have to cast to
4519		   unsigned HOST_WIDE_INT before negating to avoid
4520		   undefined signed overflow.  */
4521		abs_d = (d >= 0
4522			 ? (unsigned HOST_WIDE_INT) d
4523			 : - (unsigned HOST_WIDE_INT) d);
4524
4525		/* n rem d = n rem -d */
4526		if (rem_flag && d < 0)
4527		  {
4528		    d = abs_d;
4529		    op1 = gen_int_mode (abs_d, int_mode);
4530		  }
4531
4532		if (d == 1)
4533		  quotient = op0;
4534		else if (d == -1)
4535		  quotient = expand_unop (int_mode, neg_optab, op0,
4536					  tquotient, 0);
4537		else if (size <= HOST_BITS_PER_WIDE_INT
4538			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4539		  {
4540		    /* This case is not handled correctly below.  */
4541		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4542						int_mode, 1, 1);
4543		    if (quotient == 0)
4544		      goto fail1;
4545		  }
4546		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4547			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4548			 && (rem_flag
4549			     ? smod_pow2_cheap (speed, int_mode)
4550			     : sdiv_pow2_cheap (speed, int_mode))
4551			 /* We assume that cheap metric is true if the
4552			    optab has an expander for this mode.  */
4553			 && ((optab_handler ((rem_flag ? smod_optab
4554					      : sdiv_optab),
4555					     int_mode)
4556			      != CODE_FOR_nothing)
4557			     || (optab_handler (sdivmod_optab, int_mode)
4558				 != CODE_FOR_nothing)))
4559		  ;
4560		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4561		  {
4562		    if (rem_flag)
4563		      {
4564			remainder = expand_smod_pow2 (int_mode, op0, d);
4565			if (remainder)
4566			  return gen_lowpart (mode, remainder);
4567		      }
4568
4569		    if (sdiv_pow2_cheap (speed, int_mode)
4570			&& ((optab_handler (sdiv_optab, int_mode)
4571			     != CODE_FOR_nothing)
4572			    || (optab_handler (sdivmod_optab, int_mode)
4573				!= CODE_FOR_nothing)))
4574		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4575						int_mode, op0,
4576						gen_int_mode (abs_d,
4577							      int_mode),
4578						NULL_RTX, 0);
4579		    else
4580		      quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4581
4582		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4583		       negate the quotient.  */
4584		    if (d < 0)
4585		      {
4586			insn = get_last_insn ();
4587			if (insn != last
4588			    && abs_d < (HOST_WIDE_INT_1U
4589					<< (HOST_BITS_PER_WIDE_INT - 1)))
4590			  set_dst_reg_note (insn, REG_EQUAL,
4591					    gen_rtx_DIV (int_mode, op0,
4592							 gen_int_mode
4593							   (abs_d,
4594							    int_mode)),
4595					    quotient);
4596
4597			quotient = expand_unop (int_mode, neg_optab,
4598						quotient, quotient, 0);
4599		      }
4600		  }
4601		else if (size <= HOST_BITS_PER_WIDE_INT)
4602		  {
4603		    choose_multiplier (abs_d, size, size - 1,
4604				       &ml, &post_shift, &lgup);
4605		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4606		      {
4607			rtx t1, t2, t3;
4608
4609			if (post_shift >= BITS_PER_WORD
4610			    || size - 1 >= BITS_PER_WORD)
4611			  goto fail1;
4612
4613			extra_cost = (shift_cost (speed, int_mode, post_shift)
4614				      + shift_cost (speed, int_mode, size - 1)
4615				      + add_cost (speed, int_mode));
4616			t1 = expmed_mult_highpart
4617			  (int_mode, op0, gen_int_mode (ml, int_mode),
4618			   NULL_RTX, 0, max_cost - extra_cost);
4619			if (t1 == 0)
4620			  goto fail1;
4621			t2 = expand_shift
4622			  (RSHIFT_EXPR, int_mode, t1,
4623			   post_shift, NULL_RTX, 0);
4624			t3 = expand_shift
4625			  (RSHIFT_EXPR, int_mode, op0,
4626			   size - 1, NULL_RTX, 0);
4627			if (d < 0)
4628			  quotient
4629			    = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4630					     tquotient);
4631			else
4632			  quotient
4633			    = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4634					     tquotient);
4635		      }
4636		    else
4637		      {
4638			rtx t1, t2, t3, t4;
4639
4640			if (post_shift >= BITS_PER_WORD
4641			    || size - 1 >= BITS_PER_WORD)
4642			  goto fail1;
4643
4644			ml |= HOST_WIDE_INT_M1U << (size - 1);
4645			mlr = gen_int_mode (ml, int_mode);
4646			extra_cost = (shift_cost (speed, int_mode, post_shift)
4647				      + shift_cost (speed, int_mode, size - 1)
4648				      + 2 * add_cost (speed, int_mode));
4649			t1 = expmed_mult_highpart (int_mode, op0, mlr,
4650						   NULL_RTX, 0,
4651						   max_cost - extra_cost);
4652			if (t1 == 0)
4653			  goto fail1;
4654			t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4655					    NULL_RTX);
4656			t3 = expand_shift
4657			  (RSHIFT_EXPR, int_mode, t2,
4658			   post_shift, NULL_RTX, 0);
4659			t4 = expand_shift
4660			  (RSHIFT_EXPR, int_mode, op0,
4661			   size - 1, NULL_RTX, 0);
4662			if (d < 0)
4663			  quotient
4664			    = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4665					     tquotient);
4666			else
4667			  quotient
4668			    = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4669					     tquotient);
4670		      }
4671		  }
4672		else		/* Too wide mode to use tricky code */
4673		  break;
4674
4675		insn = get_last_insn ();
4676		if (insn != last)
4677		  set_dst_reg_note (insn, REG_EQUAL,
4678				    gen_rtx_DIV (int_mode, op0, op1),
4679				    quotient);
4680	      }
4681	    break;
4682	  }
4683      fail1:
4684	delete_insns_since (last);
4685	break;
4686
4687      case FLOOR_DIV_EXPR:
4688      case FLOOR_MOD_EXPR:
4689      /* We will come here only for signed operations.  */
4690	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4691	  {
4692	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4693	    int size = GET_MODE_BITSIZE (int_mode);
4694	    unsigned HOST_WIDE_INT mh, ml;
4695	    int pre_shift, lgup, post_shift;
4696	    HOST_WIDE_INT d = INTVAL (op1);
4697
4698	    if (d > 0)
4699	      {
4700		/* We could just as easily deal with negative constants here,
4701		   but it does not seem worth the trouble for GCC 2.6.  */
4702		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4703		  {
4704		    pre_shift = floor_log2 (d);
4705		    if (rem_flag)
4706		      {
4707			unsigned HOST_WIDE_INT mask
4708			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4709			remainder = expand_binop
4710			  (int_mode, and_optab, op0,
4711			   gen_int_mode (mask, int_mode),
4712			   remainder, 0, OPTAB_LIB_WIDEN);
4713			if (remainder)
4714			  return gen_lowpart (mode, remainder);
4715		      }
4716		    quotient = expand_shift
4717		      (RSHIFT_EXPR, int_mode, op0,
4718		       pre_shift, tquotient, 0);
4719		  }
4720		else
4721		  {
4722		    rtx t1, t2, t3, t4;
4723
4724		    mh = choose_multiplier (d, size, size - 1,
4725					    &ml, &post_shift, &lgup);
4726		    gcc_assert (!mh);
4727
4728		    if (post_shift < BITS_PER_WORD
4729			&& size - 1 < BITS_PER_WORD)
4730		      {
4731			t1 = expand_shift
4732			  (RSHIFT_EXPR, int_mode, op0,
4733			   size - 1, NULL_RTX, 0);
4734			t2 = expand_binop (int_mode, xor_optab, op0, t1,
4735					   NULL_RTX, 0, OPTAB_WIDEN);
4736			extra_cost = (shift_cost (speed, int_mode, post_shift)
4737				      + shift_cost (speed, int_mode, size - 1)
4738				      + 2 * add_cost (speed, int_mode));
4739			t3 = expmed_mult_highpart
4740			  (int_mode, t2, gen_int_mode (ml, int_mode),
4741			   NULL_RTX, 1, max_cost - extra_cost);
4742			if (t3 != 0)
4743			  {
4744			    t4 = expand_shift
4745			      (RSHIFT_EXPR, int_mode, t3,
4746			       post_shift, NULL_RTX, 1);
4747			    quotient = expand_binop (int_mode, xor_optab,
4748						     t4, t1, tquotient, 0,
4749						     OPTAB_WIDEN);
4750			  }
4751		      }
4752		  }
4753	      }
4754	    else
4755	      {
4756		rtx nsign, t1, t2, t3, t4;
4757		t1 = force_operand (gen_rtx_PLUS (int_mode,
4758						  op0, constm1_rtx), NULL_RTX);
4759		t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4760				   0, OPTAB_WIDEN);
4761		nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4762				      size - 1, NULL_RTX, 0);
4763		t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4764				    NULL_RTX);
4765		t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4766				    NULL_RTX, 0);
4767		if (t4)
4768		  {
4769		    rtx t5;
4770		    t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4771				      NULL_RTX, 0);
4772		    quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4773					      tquotient);
4774		  }
4775	      }
4776	  }
4777
4778	if (quotient != 0)
4779	  break;
4780	delete_insns_since (last);
4781
4782	/* Try using an instruction that produces both the quotient and
4783	   remainder, using truncation.  We can easily compensate the quotient
4784	   or remainder to get floor rounding, once we have the remainder.
4785	   Notice that we compute also the final remainder value here,
4786	   and return the result right away.  */
4787	if (target == 0 || GET_MODE (target) != compute_mode)
4788	  target = gen_reg_rtx (compute_mode);
4789
4790	if (rem_flag)
4791	  {
4792	    remainder
4793	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4794	    quotient = gen_reg_rtx (compute_mode);
4795	  }
4796	else
4797	  {
4798	    quotient
4799	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4800	    remainder = gen_reg_rtx (compute_mode);
4801	  }
4802
4803	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4804				 quotient, remainder, 0))
4805	  {
4806	    /* This could be computed with a branch-less sequence.
4807	       Save that for later.  */
4808	    rtx tem;
4809	    rtx_code_label *label = gen_label_rtx ();
4810	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4811	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4812				NULL_RTX, 0, OPTAB_WIDEN);
4813	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4814	    expand_dec (quotient, const1_rtx);
4815	    expand_inc (remainder, op1);
4816	    emit_label (label);
4817	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4818	  }
4819
4820	/* No luck with division elimination or divmod.  Have to do it
4821	   by conditionally adjusting op0 *and* the result.  */
4822	{
4823	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4824	  rtx adjusted_op0;
4825	  rtx tem;
4826
4827	  quotient = gen_reg_rtx (compute_mode);
4828	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4829	  label1 = gen_label_rtx ();
4830	  label2 = gen_label_rtx ();
4831	  label3 = gen_label_rtx ();
4832	  label4 = gen_label_rtx ();
4833	  label5 = gen_label_rtx ();
4834	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4835	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4836	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4837			      quotient, 0, OPTAB_LIB_WIDEN);
4838	  if (tem != quotient)
4839	    emit_move_insn (quotient, tem);
4840	  emit_jump_insn (targetm.gen_jump (label5));
4841	  emit_barrier ();
4842	  emit_label (label1);
4843	  expand_inc (adjusted_op0, const1_rtx);
4844	  emit_jump_insn (targetm.gen_jump (label4));
4845	  emit_barrier ();
4846	  emit_label (label2);
4847	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4848	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4849			      quotient, 0, OPTAB_LIB_WIDEN);
4850	  if (tem != quotient)
4851	    emit_move_insn (quotient, tem);
4852	  emit_jump_insn (targetm.gen_jump (label5));
4853	  emit_barrier ();
4854	  emit_label (label3);
4855	  expand_dec (adjusted_op0, const1_rtx);
4856	  emit_label (label4);
4857	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4858			      quotient, 0, OPTAB_LIB_WIDEN);
4859	  if (tem != quotient)
4860	    emit_move_insn (quotient, tem);
4861	  expand_dec (quotient, const1_rtx);
4862	  emit_label (label5);
4863	}
4864	break;
4865
4866      case CEIL_DIV_EXPR:
4867      case CEIL_MOD_EXPR:
4868	if (unsignedp)
4869	  {
4870	    if (op1_is_constant
4871		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4872		&& (HWI_COMPUTABLE_MODE_P (compute_mode)
4873		    || INTVAL (op1) >= 0))
4874	      {
4875		scalar_int_mode int_mode
4876		  = as_a <scalar_int_mode> (compute_mode);
4877		rtx t1, t2, t3;
4878		unsigned HOST_WIDE_INT d = INTVAL (op1);
4879		t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4880				   floor_log2 (d), tquotient, 1);
4881		t2 = expand_binop (int_mode, and_optab, op0,
4882				   gen_int_mode (d - 1, int_mode),
4883				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4884		t3 = gen_reg_rtx (int_mode);
4885		t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4886		if (t3 == 0)
4887		  {
4888		    rtx_code_label *lab;
4889		    lab = gen_label_rtx ();
4890		    do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4891		    expand_inc (t1, const1_rtx);
4892		    emit_label (lab);
4893		    quotient = t1;
4894		  }
4895		else
4896		  quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4897					    tquotient);
4898		break;
4899	      }
4900
4901	    /* Try using an instruction that produces both the quotient and
4902	       remainder, using truncation.  We can easily compensate the
4903	       quotient or remainder to get ceiling rounding, once we have the
4904	       remainder.  Notice that we compute also the final remainder
4905	       value here, and return the result right away.  */
4906	    if (target == 0 || GET_MODE (target) != compute_mode)
4907	      target = gen_reg_rtx (compute_mode);
4908
4909	    if (rem_flag)
4910	      {
4911		remainder = (REG_P (target)
4912			     ? target : gen_reg_rtx (compute_mode));
4913		quotient = gen_reg_rtx (compute_mode);
4914	      }
4915	    else
4916	      {
4917		quotient = (REG_P (target)
4918			    ? target : gen_reg_rtx (compute_mode));
4919		remainder = gen_reg_rtx (compute_mode);
4920	      }
4921
4922	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4923				     remainder, 1))
4924	      {
4925		/* This could be computed with a branch-less sequence.
4926		   Save that for later.  */
4927		rtx_code_label *label = gen_label_rtx ();
4928		do_cmp_and_jump (remainder, const0_rtx, EQ,
4929				 compute_mode, label);
4930		expand_inc (quotient, const1_rtx);
4931		expand_dec (remainder, op1);
4932		emit_label (label);
4933		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4934	      }
4935
4936	    /* No luck with division elimination or divmod.  Have to do it
4937	       by conditionally adjusting op0 *and* the result.  */
4938	    {
4939	      rtx_code_label *label1, *label2;
4940	      rtx adjusted_op0, tem;
4941
4942	      quotient = gen_reg_rtx (compute_mode);
4943	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4944	      label1 = gen_label_rtx ();
4945	      label2 = gen_label_rtx ();
4946	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4947			       compute_mode, label1);
4948	      emit_move_insn  (quotient, const0_rtx);
4949	      emit_jump_insn (targetm.gen_jump (label2));
4950	      emit_barrier ();
4951	      emit_label (label1);
4952	      expand_dec (adjusted_op0, const1_rtx);
4953	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4954				  quotient, 1, OPTAB_LIB_WIDEN);
4955	      if (tem != quotient)
4956		emit_move_insn (quotient, tem);
4957	      expand_inc (quotient, const1_rtx);
4958	      emit_label (label2);
4959	    }
4960	  }
4961	else /* signed */
4962	  {
4963	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4964		&& INTVAL (op1) >= 0)
4965	      {
4966		/* This is extremely similar to the code for the unsigned case
4967		   above.  For 2.7 we should merge these variants, but for
4968		   2.6.1 I don't want to touch the code for unsigned since that
4969		   get used in C.  The signed case will only be used by other
4970		   languages (Ada).  */
4971
4972		rtx t1, t2, t3;
4973		unsigned HOST_WIDE_INT d = INTVAL (op1);
4974		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4975				   floor_log2 (d), tquotient, 0);
4976		t2 = expand_binop (compute_mode, and_optab, op0,
4977				   gen_int_mode (d - 1, compute_mode),
4978				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4979		t3 = gen_reg_rtx (compute_mode);
4980		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4981				      compute_mode, 1, 1);
4982		if (t3 == 0)
4983		  {
4984		    rtx_code_label *lab;
4985		    lab = gen_label_rtx ();
4986		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4987		    expand_inc (t1, const1_rtx);
4988		    emit_label (lab);
4989		    quotient = t1;
4990		  }
4991		else
4992		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4993							  t1, t3),
4994					    tquotient);
4995		break;
4996	      }
4997
4998	    /* Try using an instruction that produces both the quotient and
4999	       remainder, using truncation.  We can easily compensate the
5000	       quotient or remainder to get ceiling rounding, once we have the
5001	       remainder.  Notice that we compute also the final remainder
5002	       value here, and return the result right away.  */
5003	    if (target == 0 || GET_MODE (target) != compute_mode)
5004	      target = gen_reg_rtx (compute_mode);
5005	    if (rem_flag)
5006	      {
5007		remainder= (REG_P (target)
5008			    ? target : gen_reg_rtx (compute_mode));
5009		quotient = gen_reg_rtx (compute_mode);
5010	      }
5011	    else
5012	      {
5013		quotient = (REG_P (target)
5014			    ? target : gen_reg_rtx (compute_mode));
5015		remainder = gen_reg_rtx (compute_mode);
5016	      }
5017
5018	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5019				     remainder, 0))
5020	      {
5021		/* This could be computed with a branch-less sequence.
5022		   Save that for later.  */
5023		rtx tem;
5024		rtx_code_label *label = gen_label_rtx ();
5025		do_cmp_and_jump (remainder, const0_rtx, EQ,
5026				 compute_mode, label);
5027		tem = expand_binop (compute_mode, xor_optab, op0, op1,
5028				    NULL_RTX, 0, OPTAB_WIDEN);
5029		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5030		expand_inc (quotient, const1_rtx);
5031		expand_dec (remainder, op1);
5032		emit_label (label);
5033		return gen_lowpart (mode, rem_flag ? remainder : quotient);
5034	      }
5035
5036	    /* No luck with division elimination or divmod.  Have to do it
5037	       by conditionally adjusting op0 *and* the result.  */
5038	    {
5039	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
5040	      rtx adjusted_op0;
5041	      rtx tem;
5042
5043	      quotient = gen_reg_rtx (compute_mode);
5044	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5045	      label1 = gen_label_rtx ();
5046	      label2 = gen_label_rtx ();
5047	      label3 = gen_label_rtx ();
5048	      label4 = gen_label_rtx ();
5049	      label5 = gen_label_rtx ();
5050	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5051	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5052			       compute_mode, label1);
5053	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5054				  quotient, 0, OPTAB_LIB_WIDEN);
5055	      if (tem != quotient)
5056		emit_move_insn (quotient, tem);
5057	      emit_jump_insn (targetm.gen_jump (label5));
5058	      emit_barrier ();
5059	      emit_label (label1);
5060	      expand_dec (adjusted_op0, const1_rtx);
5061	      emit_jump_insn (targetm.gen_jump (label4));
5062	      emit_barrier ();
5063	      emit_label (label2);
5064	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5065			       compute_mode, label3);
5066	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5067				  quotient, 0, OPTAB_LIB_WIDEN);
5068	      if (tem != quotient)
5069		emit_move_insn (quotient, tem);
5070	      emit_jump_insn (targetm.gen_jump (label5));
5071	      emit_barrier ();
5072	      emit_label (label3);
5073	      expand_inc (adjusted_op0, const1_rtx);
5074	      emit_label (label4);
5075	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5076				  quotient, 0, OPTAB_LIB_WIDEN);
5077	      if (tem != quotient)
5078		emit_move_insn (quotient, tem);
5079	      expand_inc (quotient, const1_rtx);
5080	      emit_label (label5);
5081	    }
5082	  }
5083	break;
5084
5085      case EXACT_DIV_EXPR:
5086	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5087	  {
5088	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5089	    int size = GET_MODE_BITSIZE (int_mode);
5090	    HOST_WIDE_INT d = INTVAL (op1);
5091	    unsigned HOST_WIDE_INT ml;
5092	    int pre_shift;
5093	    rtx t1;
5094
5095	    pre_shift = ctz_or_zero (d);
5096	    ml = invert_mod2n (d >> pre_shift, size);
5097	    t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5098			       pre_shift, NULL_RTX, unsignedp);
5099	    quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5100				    NULL_RTX, 1);
5101
5102	    insn = get_last_insn ();
5103	    set_dst_reg_note (insn, REG_EQUAL,
5104			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5105					      int_mode, op0, op1),
5106			      quotient);
5107	  }
5108	break;
5109
5110      case ROUND_DIV_EXPR:
5111      case ROUND_MOD_EXPR:
5112	if (unsignedp)
5113	  {
5114	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5115	    rtx tem;
5116	    rtx_code_label *label;
5117	    label = gen_label_rtx ();
5118	    quotient = gen_reg_rtx (int_mode);
5119	    remainder = gen_reg_rtx (int_mode);
5120	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5121	      {
5122		rtx tem;
5123		quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5124					 quotient, 1, OPTAB_LIB_WIDEN);
5125		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5126		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5127					  remainder, 1, OPTAB_LIB_WIDEN);
5128	      }
5129	    tem = plus_constant (int_mode, op1, -1);
5130	    tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5131	    do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5132	    expand_inc (quotient, const1_rtx);
5133	    expand_dec (remainder, op1);
5134	    emit_label (label);
5135	  }
5136	else
5137	  {
5138	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5139	    int size = GET_MODE_BITSIZE (int_mode);
5140	    rtx abs_rem, abs_op1, tem, mask;
5141	    rtx_code_label *label;
5142	    label = gen_label_rtx ();
5143	    quotient = gen_reg_rtx (int_mode);
5144	    remainder = gen_reg_rtx (int_mode);
5145	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5146	      {
5147		rtx tem;
5148		quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5149					 quotient, 0, OPTAB_LIB_WIDEN);
5150		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5151		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5152					  remainder, 0, OPTAB_LIB_WIDEN);
5153	      }
5154	    abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5155	    abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5156	    tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5157				1, NULL_RTX, 1);
5158	    do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5159	    tem = expand_binop (int_mode, xor_optab, op0, op1,
5160				NULL_RTX, 0, OPTAB_WIDEN);
5161	    mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5162				 size - 1, NULL_RTX, 0);
5163	    tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5164				NULL_RTX, 0, OPTAB_WIDEN);
5165	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5166				NULL_RTX, 0, OPTAB_WIDEN);
5167	    expand_inc (quotient, tem);
5168	    tem = expand_binop (int_mode, xor_optab, mask, op1,
5169				NULL_RTX, 0, OPTAB_WIDEN);
5170	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5171				NULL_RTX, 0, OPTAB_WIDEN);
5172	    expand_dec (remainder, tem);
5173	    emit_label (label);
5174	  }
5175	return gen_lowpart (mode, rem_flag ? remainder : quotient);
5176
5177      default:
5178	gcc_unreachable ();
5179      }
5180
5181  if (quotient == 0)
5182    {
5183      if (target && GET_MODE (target) != compute_mode)
5184	target = 0;
5185
5186      if (rem_flag)
5187	{
5188	  /* Try to produce the remainder without producing the quotient.
5189	     If we seem to have a divmod pattern that does not require widening,
5190	     don't try widening here.  We should really have a WIDEN argument
5191	     to expand_twoval_binop, since what we'd really like to do here is
5192	     1) try a mod insn in compute_mode
5193	     2) try a divmod insn in compute_mode
5194	     3) try a div insn in compute_mode and multiply-subtract to get
5195	        remainder
5196	     4) try the same things with widening allowed.  */
5197	  remainder
5198	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5199				 op0, op1, target,
5200				 unsignedp,
5201				 ((optab_handler (optab2, compute_mode)
5202				   != CODE_FOR_nothing)
5203				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5204	  if (remainder == 0)
5205	    {
5206	      /* No luck there.  Can we do remainder and divide at once
5207		 without a library call?  */
5208	      remainder = gen_reg_rtx (compute_mode);
5209	      if (! expand_twoval_binop ((unsignedp
5210					  ? udivmod_optab
5211					  : sdivmod_optab),
5212					 op0, op1,
5213					 NULL_RTX, remainder, unsignedp))
5214		remainder = 0;
5215	    }
5216
5217	  if (remainder)
5218	    return gen_lowpart (mode, remainder);
5219	}
5220
5221      /* Produce the quotient.  Try a quotient insn, but not a library call.
5222	 If we have a divmod in this mode, use it in preference to widening
5223	 the div (for this test we assume it will not fail). Note that optab2
5224	 is set to the one of the two optabs that the call below will use.  */
5225      quotient
5226	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5227			     op0, op1, rem_flag ? NULL_RTX : target,
5228			     unsignedp,
5229			     ((optab_handler (optab2, compute_mode)
5230			       != CODE_FOR_nothing)
5231			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5232
5233      if (quotient == 0)
5234	{
5235	  /* No luck there.  Try a quotient-and-remainder insn,
5236	     keeping the quotient alone.  */
5237	  quotient = gen_reg_rtx (compute_mode);
5238	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5239				     op0, op1,
5240				     quotient, NULL_RTX, unsignedp))
5241	    {
5242	      quotient = 0;
5243	      if (! rem_flag)
5244		/* Still no luck.  If we are not computing the remainder,
5245		   use a library call for the quotient.  */
5246		quotient = sign_expand_binop (compute_mode,
5247					      udiv_optab, sdiv_optab,
5248					      op0, op1, target,
5249					      unsignedp, OPTAB_LIB_WIDEN);
5250	    }
5251	}
5252    }
5253
5254  if (rem_flag)
5255    {
5256      if (target && GET_MODE (target) != compute_mode)
5257	target = 0;
5258
5259      if (quotient == 0)
5260	{
5261	  /* No divide instruction either.  Use library for remainder.  */
5262	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5263					 op0, op1, target,
5264					 unsignedp, OPTAB_LIB_WIDEN);
5265	  /* No remainder function.  Try a quotient-and-remainder
5266	     function, keeping the remainder.  */
5267	  if (!remainder)
5268	    {
5269	      remainder = gen_reg_rtx (compute_mode);
5270	      if (!expand_twoval_binop_libfunc
5271		  (unsignedp ? udivmod_optab : sdivmod_optab,
5272		   op0, op1,
5273		   NULL_RTX, remainder,
5274		   unsignedp ? UMOD : MOD))
5275		remainder = NULL_RTX;
5276	    }
5277	}
5278      else
5279	{
5280	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5281	  remainder = expand_mult (compute_mode, quotient, op1,
5282				   NULL_RTX, unsignedp);
5283	  remainder = expand_binop (compute_mode, sub_optab, op0,
5284				    remainder, target, unsignedp,
5285				    OPTAB_LIB_WIDEN);
5286	}
5287    }
5288
5289  return gen_lowpart (mode, rem_flag ? remainder : quotient);
5290}
5291
5292/* Return a tree node with data type TYPE, describing the value of X.
5293   Usually this is an VAR_DECL, if there is no obvious better choice.
5294   X may be an expression, however we only support those expressions
5295   generated by loop.c.  */
5296
5297tree
5298make_tree (tree type, rtx x)
5299{
5300  tree t;
5301
5302  switch (GET_CODE (x))
5303    {
5304    case CONST_INT:
5305    case CONST_WIDE_INT:
5306      t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5307      return t;
5308
5309    case CONST_DOUBLE:
5310      STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5311      if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5312	t = wide_int_to_tree (type,
5313			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5314						    HOST_BITS_PER_WIDE_INT * 2));
5315      else
5316	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5317
5318      return t;
5319
5320    case CONST_VECTOR:
5321      {
5322	unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5323	unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5324	tree itype = TREE_TYPE (type);
5325
5326	/* Build a tree with vector elements.  */
5327	tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5328	unsigned int count = elts.encoded_nelts ();
5329	for (unsigned int i = 0; i < count; ++i)
5330	  {
5331	    rtx elt = CONST_VECTOR_ELT (x, i);
5332	    elts.quick_push (make_tree (itype, elt));
5333	  }
5334
5335	return elts.build ();
5336      }
5337
5338    case PLUS:
5339      return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5340			  make_tree (type, XEXP (x, 1)));
5341
5342    case MINUS:
5343      return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5344			  make_tree (type, XEXP (x, 1)));
5345
5346    case NEG:
5347      return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5348
5349    case MULT:
5350      return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5351			  make_tree (type, XEXP (x, 1)));
5352
5353    case ASHIFT:
5354      return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5355			  make_tree (type, XEXP (x, 1)));
5356
5357    case LSHIFTRT:
5358      t = unsigned_type_for (type);
5359      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5360			    		 make_tree (t, XEXP (x, 0)),
5361				    	 make_tree (type, XEXP (x, 1))));
5362
5363    case ASHIFTRT:
5364      t = signed_type_for (type);
5365      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5366					 make_tree (t, XEXP (x, 0)),
5367				    	 make_tree (type, XEXP (x, 1))));
5368
5369    case DIV:
5370      if (TREE_CODE (type) != REAL_TYPE)
5371	t = signed_type_for (type);
5372      else
5373	t = type;
5374
5375      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5376				    	 make_tree (t, XEXP (x, 0)),
5377				    	 make_tree (t, XEXP (x, 1))));
5378    case UDIV:
5379      t = unsigned_type_for (type);
5380      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5381				    	 make_tree (t, XEXP (x, 0)),
5382				    	 make_tree (t, XEXP (x, 1))));
5383
5384    case SIGN_EXTEND:
5385    case ZERO_EXTEND:
5386      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5387					  GET_CODE (x) == ZERO_EXTEND);
5388      return fold_convert (type, make_tree (t, XEXP (x, 0)));
5389
5390    case CONST:
5391      return make_tree (type, XEXP (x, 0));
5392
5393    case SYMBOL_REF:
5394      t = SYMBOL_REF_DECL (x);
5395      if (t)
5396	return fold_convert (type, build_fold_addr_expr (t));
5397      /* fall through.  */
5398
5399    default:
5400      if (CONST_POLY_INT_P (x))
5401	return wide_int_to_tree (t, const_poly_int_value (x));
5402
5403      t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5404
5405      /* If TYPE is a POINTER_TYPE, we might need to convert X from
5406	 address mode to pointer mode.  */
5407      if (POINTER_TYPE_P (type))
5408	x = convert_memory_address_addr_space
5409	  (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5410
5411      /* Note that we do *not* use SET_DECL_RTL here, because we do not
5412	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5413      t->decl_with_rtl.rtl = x;
5414
5415      return t;
5416    }
5417}
5418
5419/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5420   and returning TARGET.
5421
5422   If TARGET is 0, a pseudo-register or constant is returned.  */
5423
5424rtx
5425expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5426{
5427  rtx tem = 0;
5428
5429  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5430    tem = simplify_binary_operation (AND, mode, op0, op1);
5431  if (tem == 0)
5432    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5433
5434  if (target == 0)
5435    target = tem;
5436  else if (tem != target)
5437    emit_move_insn (target, tem);
5438  return target;
5439}
5440
5441/* Helper function for emit_store_flag.  */
5442rtx
5443emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5444	     machine_mode mode, machine_mode compare_mode,
5445	     int unsignedp, rtx x, rtx y, int normalizep,
5446	     machine_mode target_mode)
5447{
5448  struct expand_operand ops[4];
5449  rtx op0, comparison, subtarget;
5450  rtx_insn *last;
5451  scalar_int_mode result_mode = targetm.cstore_mode (icode);
5452  scalar_int_mode int_target_mode;
5453
5454  last = get_last_insn ();
5455  x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5456  y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5457  if (!x || !y)
5458    {
5459      delete_insns_since (last);
5460      return NULL_RTX;
5461    }
5462
5463  if (target_mode == VOIDmode)
5464    int_target_mode = result_mode;
5465  else
5466    int_target_mode = as_a <scalar_int_mode> (target_mode);
5467  if (!target)
5468    target = gen_reg_rtx (int_target_mode);
5469
5470  comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5471
5472  create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5473  create_fixed_operand (&ops[1], comparison);
5474  create_fixed_operand (&ops[2], x);
5475  create_fixed_operand (&ops[3], y);
5476  if (!maybe_expand_insn (icode, 4, ops))
5477    {
5478      delete_insns_since (last);
5479      return NULL_RTX;
5480    }
5481  subtarget = ops[0].value;
5482
5483  /* If we are converting to a wider mode, first convert to
5484     INT_TARGET_MODE, then normalize.  This produces better combining
5485     opportunities on machines that have a SIGN_EXTRACT when we are
5486     testing a single bit.  This mostly benefits the 68k.
5487
5488     If STORE_FLAG_VALUE does not have the sign bit set when
5489     interpreted in MODE, we can do this conversion as unsigned, which
5490     is usually more efficient.  */
5491  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5492    {
5493      convert_move (target, subtarget,
5494		    val_signbit_known_clear_p (result_mode,
5495					       STORE_FLAG_VALUE));
5496      op0 = target;
5497      result_mode = int_target_mode;
5498    }
5499  else
5500    op0 = subtarget;
5501
5502  /* If we want to keep subexpressions around, don't reuse our last
5503     target.  */
5504  if (optimize)
5505    subtarget = 0;
5506
5507  /* Now normalize to the proper value in MODE.  Sometimes we don't
5508     have to do anything.  */
5509  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5510    ;
5511  /* STORE_FLAG_VALUE might be the most negative number, so write
5512     the comparison this way to avoid a compiler-time warning.  */
5513  else if (- normalizep == STORE_FLAG_VALUE)
5514    op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5515
5516  /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5517     it hard to use a value of just the sign bit due to ANSI integer
5518     constant typing rules.  */
5519  else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5520    op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5521			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5522			normalizep == 1);
5523  else
5524    {
5525      gcc_assert (STORE_FLAG_VALUE & 1);
5526
5527      op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5528      if (normalizep == -1)
5529	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5530    }
5531
5532  /* If we were converting to a smaller mode, do the conversion now.  */
5533  if (int_target_mode != result_mode)
5534    {
5535      convert_move (target, op0, 0);
5536      return target;
5537    }
5538  else
5539    return op0;
5540}
5541
5542
5543/* A subroutine of emit_store_flag only including "tricks" that do not
5544   need a recursive call.  These are kept separate to avoid infinite
5545   loops.  */
5546
5547static rtx
5548emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5549		   machine_mode mode, int unsignedp, int normalizep,
5550		   machine_mode target_mode)
5551{
5552  rtx subtarget;
5553  enum insn_code icode;
5554  machine_mode compare_mode;
5555  enum mode_class mclass;
5556  enum rtx_code scode;
5557
5558  if (unsignedp)
5559    code = unsigned_condition (code);
5560  scode = swap_condition (code);
5561
5562  /* If one operand is constant, make it the second one.  Only do this
5563     if the other operand is not constant as well.  */
5564
5565  if (swap_commutative_operands_p (op0, op1))
5566    {
5567      std::swap (op0, op1);
5568      code = swap_condition (code);
5569    }
5570
5571  if (mode == VOIDmode)
5572    mode = GET_MODE (op0);
5573
5574  /* For some comparisons with 1 and -1, we can convert this to
5575     comparisons with zero.  This will often produce more opportunities for
5576     store-flag insns.  */
5577
5578  switch (code)
5579    {
5580    case LT:
5581      if (op1 == const1_rtx)
5582	op1 = const0_rtx, code = LE;
5583      break;
5584    case LE:
5585      if (op1 == constm1_rtx)
5586	op1 = const0_rtx, code = LT;
5587      break;
5588    case GE:
5589      if (op1 == const1_rtx)
5590	op1 = const0_rtx, code = GT;
5591      break;
5592    case GT:
5593      if (op1 == constm1_rtx)
5594	op1 = const0_rtx, code = GE;
5595      break;
5596    case GEU:
5597      if (op1 == const1_rtx)
5598	op1 = const0_rtx, code = NE;
5599      break;
5600    case LTU:
5601      if (op1 == const1_rtx)
5602	op1 = const0_rtx, code = EQ;
5603      break;
5604    default:
5605      break;
5606    }
5607
5608  /* If we are comparing a double-word integer with zero or -1, we can
5609     convert the comparison into one involving a single word.  */
5610  scalar_int_mode int_mode;
5611  if (is_int_mode (mode, &int_mode)
5612      && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5613      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5614    {
5615      rtx tem;
5616      if ((code == EQ || code == NE)
5617	  && (op1 == const0_rtx || op1 == constm1_rtx))
5618	{
5619	  rtx op00, op01;
5620
5621	  /* Do a logical OR or AND of the two words and compare the
5622	     result.  */
5623	  op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5624	  op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5625	  tem = expand_binop (word_mode,
5626			      op1 == const0_rtx ? ior_optab : and_optab,
5627			      op00, op01, NULL_RTX, unsignedp,
5628			      OPTAB_DIRECT);
5629
5630	  if (tem != 0)
5631	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5632				   unsignedp, normalizep);
5633	}
5634      else if ((code == LT || code == GE) && op1 == const0_rtx)
5635	{
5636	  rtx op0h;
5637
5638	  /* If testing the sign bit, can just test on high word.  */
5639	  op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5640				      subreg_highpart_offset (word_mode,
5641							      int_mode));
5642	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5643				 unsignedp, normalizep);
5644	}
5645      else
5646	tem = NULL_RTX;
5647
5648      if (tem)
5649	{
5650	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5651	    return tem;
5652	  if (!target)
5653	    target = gen_reg_rtx (target_mode);
5654
5655	  convert_move (target, tem,
5656			!val_signbit_known_set_p (word_mode,
5657						  (normalizep ? normalizep
5658						   : STORE_FLAG_VALUE)));
5659	  return target;
5660	}
5661    }
5662
5663  /* If this is A < 0 or A >= 0, we can do this by taking the ones
5664     complement of A (for GE) and shifting the sign bit to the low bit.  */
5665  if (op1 == const0_rtx && (code == LT || code == GE)
5666      && is_int_mode (mode, &int_mode)
5667      && (normalizep || STORE_FLAG_VALUE == 1
5668	  || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5669    {
5670      scalar_int_mode int_target_mode;
5671      subtarget = target;
5672
5673      if (!target)
5674	int_target_mode = int_mode;
5675      else
5676	{
5677	  /* If the result is to be wider than OP0, it is best to convert it
5678	     first.  If it is to be narrower, it is *incorrect* to convert it
5679	     first.  */
5680	  int_target_mode = as_a <scalar_int_mode> (target_mode);
5681	  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5682	    {
5683	      op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5684	      int_mode = int_target_mode;
5685	    }
5686	}
5687
5688      if (int_target_mode != int_mode)
5689	subtarget = 0;
5690
5691      if (code == GE)
5692	op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5693			   ((STORE_FLAG_VALUE == 1 || normalizep)
5694			    ? 0 : subtarget), 0);
5695
5696      if (STORE_FLAG_VALUE == 1 || normalizep)
5697	/* If we are supposed to produce a 0/1 value, we want to do
5698	   a logical shift from the sign bit to the low-order bit; for
5699	   a -1/0 value, we do an arithmetic shift.  */
5700	op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5701			    GET_MODE_BITSIZE (int_mode) - 1,
5702			    subtarget, normalizep != -1);
5703
5704      if (int_mode != int_target_mode)
5705	op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5706
5707      return op0;
5708    }
5709
5710  mclass = GET_MODE_CLASS (mode);
5711  FOR_EACH_MODE_FROM (compare_mode, mode)
5712    {
5713     machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5714     icode = optab_handler (cstore_optab, optab_mode);
5715     if (icode != CODE_FOR_nothing)
5716	{
5717	  do_pending_stack_adjust ();
5718	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5719				 unsignedp, op0, op1, normalizep, target_mode);
5720	  if (tem)
5721	    return tem;
5722
5723	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5724	    {
5725	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5726				 unsignedp, op1, op0, normalizep, target_mode);
5727	      if (tem)
5728	        return tem;
5729	    }
5730	  break;
5731	}
5732    }
5733
5734  return 0;
5735}
5736
5737/* Subroutine of emit_store_flag that handles cases in which the operands
5738   are scalar integers.  SUBTARGET is the target to use for temporary
5739   operations and TRUEVAL is the value to store when the condition is
5740   true.  All other arguments are as for emit_store_flag.  */
5741
5742rtx
5743emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5744		     rtx op1, scalar_int_mode mode, int unsignedp,
5745		     int normalizep, rtx trueval)
5746{
5747  machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5748  rtx_insn *last = get_last_insn ();
5749
5750  /* If this is an equality comparison of integers, we can try to exclusive-or
5751     (or subtract) the two operands and use a recursive call to try the
5752     comparison with zero.  Don't do any of these cases if branches are
5753     very cheap.  */
5754
5755  if ((code == EQ || code == NE) && op1 != const0_rtx)
5756    {
5757      rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5758			      OPTAB_WIDEN);
5759
5760      if (tem == 0)
5761	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5762			    OPTAB_WIDEN);
5763      if (tem != 0)
5764	tem = emit_store_flag (target, code, tem, const0_rtx,
5765			       mode, unsignedp, normalizep);
5766      if (tem != 0)
5767	return tem;
5768
5769      delete_insns_since (last);
5770    }
5771
5772  /* For integer comparisons, try the reverse comparison.  However, for
5773     small X and if we'd have anyway to extend, implementing "X != 0"
5774     as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5775  rtx_code rcode = reverse_condition (code);
5776  if (can_compare_p (rcode, mode, ccp_store_flag)
5777      && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5778	    && code == NE
5779	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5780	    && op1 == const0_rtx))
5781    {
5782      int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5783		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5784
5785      /* Again, for the reverse comparison, use either an addition or a XOR.  */
5786      if (want_add
5787	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5788		       optimize_insn_for_speed_p ()) == 0)
5789	{
5790	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5791				       STORE_FLAG_VALUE, target_mode);
5792	  if (tem != 0)
5793	    tem = expand_binop (target_mode, add_optab, tem,
5794				gen_int_mode (normalizep, target_mode),
5795				target, 0, OPTAB_WIDEN);
5796	  if (tem != 0)
5797	    return tem;
5798	}
5799      else if (!want_add
5800	       && rtx_cost (trueval, mode, XOR, 1,
5801			    optimize_insn_for_speed_p ()) == 0)
5802	{
5803	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5804				       normalizep, target_mode);
5805	  if (tem != 0)
5806	    tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5807				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5808	  if (tem != 0)
5809	    return tem;
5810	}
5811
5812      delete_insns_since (last);
5813    }
5814
5815  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5816     the constant zero.  Reject all other comparisons at this point.  Only
5817     do LE and GT if branches are expensive since they are expensive on
5818     2-operand machines.  */
5819
5820  if (op1 != const0_rtx
5821      || (code != EQ && code != NE
5822	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5823			   false) <= 1 || (code != LE && code != GT))))
5824    return 0;
5825
5826  /* Try to put the result of the comparison in the sign bit.  Assume we can't
5827     do the necessary operation below.  */
5828
5829  rtx tem = 0;
5830
5831  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5832     the sign bit set.  */
5833
5834  if (code == LE)
5835    {
5836      /* This is destructive, so SUBTARGET can't be OP0.  */
5837      if (rtx_equal_p (subtarget, op0))
5838	subtarget = 0;
5839
5840      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5841			  OPTAB_WIDEN);
5842      if (tem)
5843	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5844			    OPTAB_WIDEN);
5845    }
5846
5847  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5848     number of bits in the mode of OP0, minus one.  */
5849
5850  if (code == GT)
5851    {
5852      if (rtx_equal_p (subtarget, op0))
5853	subtarget = 0;
5854
5855      tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5856				GET_MODE_BITSIZE (mode) - 1,
5857				subtarget, 0);
5858      if (tem)
5859	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5860			    OPTAB_WIDEN);
5861    }
5862
5863  if (code == EQ || code == NE)
5864    {
5865      /* For EQ or NE, one way to do the comparison is to apply an operation
5866	 that converts the operand into a positive number if it is nonzero
5867	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5868	 for NE we negate.  This puts the result in the sign bit.  Then we
5869	 normalize with a shift, if needed.
5870
5871	 Two operations that can do the above actions are ABS and FFS, so try
5872	 them.  If that doesn't work, and MODE is smaller than a full word,
5873	 we can use zero-extension to the wider mode (an unsigned conversion)
5874	 as the operation.  */
5875
5876      /* Note that ABS doesn't yield a positive number for INT_MIN, but
5877	 that is compensated by the subsequent overflow when subtracting
5878	 one / negating.  */
5879
5880      if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5881	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5882      else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5883	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5884      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5885	{
5886	  tem = convert_modes (word_mode, mode, op0, 1);
5887	  mode = word_mode;
5888	}
5889
5890      if (tem != 0)
5891	{
5892	  if (code == EQ)
5893	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5894				0, OPTAB_WIDEN);
5895	  else
5896	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5897	}
5898
5899      /* If we couldn't do it that way, for NE we can "or" the two's complement
5900	 of the value with itself.  For EQ, we take the one's complement of
5901	 that "or", which is an extra insn, so we only handle EQ if branches
5902	 are expensive.  */
5903
5904      if (tem == 0
5905	  && (code == NE
5906	      || BRANCH_COST (optimize_insn_for_speed_p (),
5907			      false) > 1))
5908	{
5909	  if (rtx_equal_p (subtarget, op0))
5910	    subtarget = 0;
5911
5912	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5913	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5914			      OPTAB_WIDEN);
5915
5916	  if (tem && code == EQ)
5917	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5918	}
5919    }
5920
5921  if (tem && normalizep)
5922    tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5923			      GET_MODE_BITSIZE (mode) - 1,
5924			      subtarget, normalizep == 1);
5925
5926  if (tem)
5927    {
5928      if (!target)
5929	;
5930      else if (GET_MODE (tem) != target_mode)
5931	{
5932	  convert_move (target, tem, 0);
5933	  tem = target;
5934	}
5935      else if (!subtarget)
5936	{
5937	  emit_move_insn (target, tem);
5938	  tem = target;
5939	}
5940    }
5941  else
5942    delete_insns_since (last);
5943
5944  return tem;
5945}
5946
5947/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5948   and storing in TARGET.  Normally return TARGET.
5949   Return 0 if that cannot be done.
5950
5951   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5952   it is VOIDmode, they cannot both be CONST_INT.
5953
5954   UNSIGNEDP is for the case where we have to widen the operands
5955   to perform the operation.  It says to use zero-extension.
5956
5957   NORMALIZEP is 1 if we should convert the result to be either zero
5958   or one.  Normalize is -1 if we should convert the result to be
5959   either zero or -1.  If NORMALIZEP is zero, the result will be left
5960   "raw" out of the scc insn.  */
5961
5962rtx
5963emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5964		 machine_mode mode, int unsignedp, int normalizep)
5965{
5966  machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5967  enum rtx_code rcode;
5968  rtx subtarget;
5969  rtx tem, trueval;
5970  rtx_insn *last;
5971
5972  /* If we compare constants, we shouldn't use a store-flag operation,
5973     but a constant load.  We can get there via the vanilla route that
5974     usually generates a compare-branch sequence, but will in this case
5975     fold the comparison to a constant, and thus elide the branch.  */
5976  if (CONSTANT_P (op0) && CONSTANT_P (op1))
5977    return NULL_RTX;
5978
5979  tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5980			   target_mode);
5981  if (tem)
5982    return tem;
5983
5984  /* If we reached here, we can't do this with a scc insn, however there
5985     are some comparisons that can be done in other ways.  Don't do any
5986     of these cases if branches are very cheap.  */
5987  if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5988    return 0;
5989
5990  /* See what we need to return.  We can only return a 1, -1, or the
5991     sign bit.  */
5992
5993  if (normalizep == 0)
5994    {
5995      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5996	normalizep = STORE_FLAG_VALUE;
5997
5998      else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5999	;
6000      else
6001	return 0;
6002    }
6003
6004  last = get_last_insn ();
6005
6006  /* If optimizing, use different pseudo registers for each insn, instead
6007     of reusing the same pseudo.  This leads to better CSE, but slows
6008     down the compiler, since there are more pseudos.  */
6009  subtarget = (!optimize
6010	       && (target_mode == mode)) ? target : NULL_RTX;
6011  trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6012
6013  /* For floating-point comparisons, try the reverse comparison or try
6014     changing the "orderedness" of the comparison.  */
6015  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6016    {
6017      enum rtx_code first_code;
6018      bool and_them;
6019
6020      rcode = reverse_condition_maybe_unordered (code);
6021      if (can_compare_p (rcode, mode, ccp_store_flag)
6022	  && (code == ORDERED || code == UNORDERED
6023	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6024	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6025	{
6026	  int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6027			  || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6028
6029	  /* For the reverse comparison, use either an addition or a XOR.  */
6030	  if (want_add
6031	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6032			   optimize_insn_for_speed_p ()) == 0)
6033	    {
6034	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6035				       STORE_FLAG_VALUE, target_mode);
6036	      if (tem)
6037		return expand_binop (target_mode, add_optab, tem,
6038				     gen_int_mode (normalizep, target_mode),
6039				     target, 0, OPTAB_WIDEN);
6040	    }
6041	  else if (!want_add
6042		   && rtx_cost (trueval, mode, XOR, 1,
6043				optimize_insn_for_speed_p ()) == 0)
6044	    {
6045	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6046				       normalizep, target_mode);
6047	      if (tem)
6048		return expand_binop (target_mode, xor_optab, tem, trueval,
6049				     target, INTVAL (trueval) >= 0,
6050				     OPTAB_WIDEN);
6051	    }
6052	}
6053
6054      delete_insns_since (last);
6055
6056      /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6057      if (code == ORDERED || code == UNORDERED)
6058	return 0;
6059
6060      and_them = split_comparison (code, mode, &first_code, &code);
6061
6062      /* If there are no NaNs, the first comparison should always fall through.
6063	 Effectively change the comparison to the other one.  */
6064      if (!HONOR_NANS (mode))
6065	{
6066	  gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6067	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6068				    target_mode);
6069	}
6070
6071      if (!HAVE_conditional_move)
6072	return 0;
6073
6074      /* Do not turn a trapping comparison into a non-trapping one.  */
6075      if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6076	  && flag_trapping_math)
6077	return 0;
6078
6079      /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6080	 conditional move.  */
6081      tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6082			       normalizep, target_mode);
6083      if (tem == 0)
6084	return 0;
6085
6086      if (and_them)
6087	tem = emit_conditional_move (target, code, op0, op1, mode,
6088				     tem, const0_rtx, GET_MODE (tem), 0);
6089      else
6090	tem = emit_conditional_move (target, code, op0, op1, mode,
6091				     trueval, tem, GET_MODE (tem), 0);
6092
6093      if (tem == 0)
6094	delete_insns_since (last);
6095      return tem;
6096    }
6097
6098  /* The remaining tricks only apply to integer comparisons.  */
6099
6100  scalar_int_mode int_mode;
6101  if (is_int_mode (mode, &int_mode))
6102    return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6103				unsignedp, normalizep, trueval);
6104
6105  return 0;
6106}
6107
6108/* Like emit_store_flag, but always succeeds.  */
6109
6110rtx
6111emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6112		       machine_mode mode, int unsignedp, int normalizep)
6113{
6114  rtx tem;
6115  rtx_code_label *label;
6116  rtx trueval, falseval;
6117
6118  /* First see if emit_store_flag can do the job.  */
6119  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6120  if (tem != 0)
6121    return tem;
6122
6123  /* If one operand is constant, make it the second one.  Only do this
6124     if the other operand is not constant as well.  */
6125  if (swap_commutative_operands_p (op0, op1))
6126    {
6127      std::swap (op0, op1);
6128      code = swap_condition (code);
6129    }
6130
6131  if (mode == VOIDmode)
6132    mode = GET_MODE (op0);
6133
6134  if (!target)
6135    target = gen_reg_rtx (word_mode);
6136
6137  /* If this failed, we have to do this with set/compare/jump/set code.
6138     For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6139  trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6140  if (code == NE
6141      && GET_MODE_CLASS (mode) == MODE_INT
6142      && REG_P (target)
6143      && op0 == target
6144      && op1 == const0_rtx)
6145    {
6146      label = gen_label_rtx ();
6147      do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6148			       NULL_RTX, NULL, label,
6149			       profile_probability::uninitialized ());
6150      emit_move_insn (target, trueval);
6151      emit_label (label);
6152      return target;
6153    }
6154
6155  if (!REG_P (target)
6156      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6157    target = gen_reg_rtx (GET_MODE (target));
6158
6159  /* Jump in the right direction if the target cannot implement CODE
6160     but can jump on its reverse condition.  */
6161  falseval = const0_rtx;
6162  if (! can_compare_p (code, mode, ccp_jump)
6163      && (! FLOAT_MODE_P (mode)
6164          || code == ORDERED || code == UNORDERED
6165          || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6166          || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6167    {
6168      enum rtx_code rcode;
6169      if (FLOAT_MODE_P (mode))
6170        rcode = reverse_condition_maybe_unordered (code);
6171      else
6172        rcode = reverse_condition (code);
6173
6174      /* Canonicalize to UNORDERED for the libcall.  */
6175      if (can_compare_p (rcode, mode, ccp_jump)
6176          || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6177	{
6178	  falseval = trueval;
6179	  trueval = const0_rtx;
6180	  code = rcode;
6181	}
6182    }
6183
6184  emit_move_insn (target, trueval);
6185  label = gen_label_rtx ();
6186  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6187			   label, profile_probability::uninitialized ());
6188
6189  emit_move_insn (target, falseval);
6190  emit_label (label);
6191
6192  return target;
6193}
6194
6195/* Perform possibly multi-word comparison and conditional jump to LABEL
6196   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6197   now a thin wrapper around do_compare_rtx_and_jump.  */
6198
6199static void
6200do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6201		 rtx_code_label *label)
6202{
6203  int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6204  do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6205			   NULL, label, profile_probability::uninitialized ());
6206}
6207