expmed.c revision 1.11
1/* Medium-level subroutines: convert bit-field store and extract
2   and shifts, multiplies and divides to rtl instructions.
3   Copyright (C) 1987-2017 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "predict.h"
30#include "memmodel.h"
31#include "tm_p.h"
32#include "expmed.h"
33#include "optabs.h"
34#include "emit-rtl.h"
35#include "diagnostic-core.h"
36#include "fold-const.h"
37#include "stor-layout.h"
38#include "dojump.h"
39#include "explow.h"
40#include "expr.h"
41#include "langhooks.h"
42
43struct target_expmed default_target_expmed;
44#if SWITCHABLE_TARGET
45struct target_expmed *this_target_expmed = &default_target_expmed;
46#endif
47
48static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49				   unsigned HOST_WIDE_INT,
50				   unsigned HOST_WIDE_INT,
51				   unsigned HOST_WIDE_INT,
52				   rtx, bool);
53static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54				     unsigned HOST_WIDE_INT,
55				     rtx, bool);
56static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
57				   unsigned HOST_WIDE_INT,
58				   unsigned HOST_WIDE_INT,
59				   unsigned HOST_WIDE_INT,
60				   rtx, bool);
61static rtx extract_fixed_bit_field (machine_mode, rtx,
62				    unsigned HOST_WIDE_INT,
63				    unsigned HOST_WIDE_INT, rtx, int, bool);
64static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
65				      unsigned HOST_WIDE_INT,
66				      unsigned HOST_WIDE_INT, rtx, int, bool);
67static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
68static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69				    unsigned HOST_WIDE_INT, int, bool);
70static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
73
74/* Return a constant integer mask value of mode MODE with BITSIZE ones
75   followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76   The mask is truncated if necessary to the width of mode MODE.  The
77   mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
78
79static inline rtx
80mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
81{
82  return immed_wide_int_const
83    (wi::shifted_mask (bitpos, bitsize, complement,
84		       GET_MODE_PRECISION (mode)), mode);
85}
86
87/* Test whether a value is zero of a power of two.  */
88#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89  (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
90
91struct init_expmed_rtl
92{
93  rtx reg;
94  rtx plus;
95  rtx neg;
96  rtx mult;
97  rtx sdiv;
98  rtx udiv;
99  rtx sdiv_32;
100  rtx smod_32;
101  rtx wide_mult;
102  rtx wide_lshr;
103  rtx wide_trunc;
104  rtx shift;
105  rtx shift_mult;
106  rtx shift_add;
107  rtx shift_sub0;
108  rtx shift_sub1;
109  rtx zext;
110  rtx trunc;
111
112  rtx pow2[MAX_BITS_PER_WORD];
113  rtx cint[MAX_BITS_PER_WORD];
114};
115
116static void
117init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118		      machine_mode from_mode, bool speed)
119{
120  int to_size, from_size;
121  rtx which;
122
123  to_size = GET_MODE_PRECISION (to_mode);
124  from_size = GET_MODE_PRECISION (from_mode);
125
126  /* Most partial integers have a precision less than the "full"
127     integer it requires for storage.  In case one doesn't, for
128     comparison purposes here, reduce the bit size by one in that
129     case.  */
130  if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131      && pow2p_hwi (to_size))
132    to_size --;
133  if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134      && pow2p_hwi (from_size))
135    from_size --;
136
137  /* Assume cost of zero-extend and sign-extend is the same.  */
138  which = (to_size < from_size ? all->trunc : all->zext);
139
140  PUT_MODE (all->reg, from_mode);
141  set_convert_cost (to_mode, from_mode, speed,
142		    set_src_cost (which, to_mode, speed));
143}
144
145static void
146init_expmed_one_mode (struct init_expmed_rtl *all,
147		      machine_mode mode, int speed)
148{
149  int m, n, mode_bitsize;
150  machine_mode mode_from;
151
152  mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
153
154  PUT_MODE (all->reg, mode);
155  PUT_MODE (all->plus, mode);
156  PUT_MODE (all->neg, mode);
157  PUT_MODE (all->mult, mode);
158  PUT_MODE (all->sdiv, mode);
159  PUT_MODE (all->udiv, mode);
160  PUT_MODE (all->sdiv_32, mode);
161  PUT_MODE (all->smod_32, mode);
162  PUT_MODE (all->wide_trunc, mode);
163  PUT_MODE (all->shift, mode);
164  PUT_MODE (all->shift_mult, mode);
165  PUT_MODE (all->shift_add, mode);
166  PUT_MODE (all->shift_sub0, mode);
167  PUT_MODE (all->shift_sub1, mode);
168  PUT_MODE (all->zext, mode);
169  PUT_MODE (all->trunc, mode);
170
171  set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172  set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173  set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174  set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175  set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
176
177  set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
178				     <= 2 * add_cost (speed, mode)));
179  set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
180				     <= 4 * add_cost (speed, mode)));
181
182  set_shift_cost (speed, mode, 0, 0);
183  {
184    int cost = add_cost (speed, mode);
185    set_shiftadd_cost (speed, mode, 0, cost);
186    set_shiftsub0_cost (speed, mode, 0, cost);
187    set_shiftsub1_cost (speed, mode, 0, cost);
188  }
189
190  n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191  for (m = 1; m < n; m++)
192    {
193      XEXP (all->shift, 1) = all->cint[m];
194      XEXP (all->shift_mult, 1) = all->pow2[m];
195
196      set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197      set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198						       speed));
199      set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200							speed));
201      set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202							speed));
203    }
204
205  if (SCALAR_INT_MODE_P (mode))
206    {
207      for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
208	   mode_from = (machine_mode)(mode_from + 1))
209	init_expmed_one_conv (all, mode, mode_from, speed);
210    }
211  if (GET_MODE_CLASS (mode) == MODE_INT)
212    {
213      machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
214      if (wider_mode != VOIDmode)
215	{
216	  PUT_MODE (all->zext, wider_mode);
217	  PUT_MODE (all->wide_mult, wider_mode);
218	  PUT_MODE (all->wide_lshr, wider_mode);
219	  XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
220
221	  set_mul_widen_cost (speed, wider_mode,
222			      set_src_cost (all->wide_mult, wider_mode, speed));
223	  set_mul_highpart_cost (speed, mode,
224				 set_src_cost (all->wide_trunc, mode, speed));
225	}
226    }
227}
228
229void
230init_expmed (void)
231{
232  struct init_expmed_rtl all;
233  machine_mode mode = QImode;
234  int m, speed;
235
236  memset (&all, 0, sizeof all);
237  for (m = 1; m < MAX_BITS_PER_WORD; m++)
238    {
239      all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
240      all.cint[m] = GEN_INT (m);
241    }
242
243  /* Avoid using hard regs in ways which may be unsupported.  */
244  all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
245  all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
246  all.neg = gen_rtx_NEG (mode, all.reg);
247  all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
248  all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
249  all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
250  all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
251  all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
252  all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
253  all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
254  all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
255  all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
256  all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
257  all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
258  all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
259  all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
260  all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
261  all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
262
263  for (speed = 0; speed < 2; speed++)
264    {
265      crtl->maybe_hot_insn_p = speed;
266      set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
267
268      for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
269	   mode = (machine_mode)(mode + 1))
270	init_expmed_one_mode (&all, mode, speed);
271
272      if (MIN_MODE_PARTIAL_INT != VOIDmode)
273	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
274	     mode = (machine_mode)(mode + 1))
275	  init_expmed_one_mode (&all, mode, speed);
276
277      if (MIN_MODE_VECTOR_INT != VOIDmode)
278	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
279	     mode = (machine_mode)(mode + 1))
280	  init_expmed_one_mode (&all, mode, speed);
281    }
282
283  if (alg_hash_used_p ())
284    {
285      struct alg_hash_entry *p = alg_hash_entry_ptr (0);
286      memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
287    }
288  else
289    set_alg_hash_used_p (true);
290  default_rtl_profile ();
291
292  ggc_free (all.trunc);
293  ggc_free (all.shift_sub1);
294  ggc_free (all.shift_sub0);
295  ggc_free (all.shift_add);
296  ggc_free (all.shift_mult);
297  ggc_free (all.shift);
298  ggc_free (all.wide_trunc);
299  ggc_free (all.wide_lshr);
300  ggc_free (all.wide_mult);
301  ggc_free (all.zext);
302  ggc_free (all.smod_32);
303  ggc_free (all.sdiv_32);
304  ggc_free (all.udiv);
305  ggc_free (all.sdiv);
306  ggc_free (all.mult);
307  ggc_free (all.neg);
308  ggc_free (all.plus);
309  ggc_free (all.reg);
310}
311
312/* Return an rtx representing minus the value of X.
313   MODE is the intended mode of the result,
314   useful if X is a CONST_INT.  */
315
316rtx
317negate_rtx (machine_mode mode, rtx x)
318{
319  rtx result = simplify_unary_operation (NEG, mode, x, mode);
320
321  if (result == 0)
322    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
323
324  return result;
325}
326
327/* Whether reverse storage order is supported on the target.  */
328static int reverse_storage_order_supported = -1;
329
330/* Check whether reverse storage order is supported on the target.  */
331
332static void
333check_reverse_storage_order_support (void)
334{
335  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
336    {
337      reverse_storage_order_supported = 0;
338      sorry ("reverse scalar storage order");
339    }
340  else
341    reverse_storage_order_supported = 1;
342}
343
344/* Whether reverse FP storage order is supported on the target.  */
345static int reverse_float_storage_order_supported = -1;
346
347/* Check whether reverse FP storage order is supported on the target.  */
348
349static void
350check_reverse_float_storage_order_support (void)
351{
352  if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
353    {
354      reverse_float_storage_order_supported = 0;
355      sorry ("reverse floating-point scalar storage order");
356    }
357  else
358    reverse_float_storage_order_supported = 1;
359}
360
361/* Return an rtx representing value of X with reverse storage order.
362   MODE is the intended mode of the result,
363   useful if X is a CONST_INT.  */
364
365rtx
366flip_storage_order (enum machine_mode mode, rtx x)
367{
368  enum machine_mode int_mode;
369  rtx result;
370
371  if (mode == QImode)
372    return x;
373
374  if (COMPLEX_MODE_P (mode))
375    {
376      rtx real = read_complex_part (x, false);
377      rtx imag = read_complex_part (x, true);
378
379      real = flip_storage_order (GET_MODE_INNER (mode), real);
380      imag = flip_storage_order (GET_MODE_INNER (mode), imag);
381
382      return gen_rtx_CONCAT (mode, real, imag);
383    }
384
385  if (__builtin_expect (reverse_storage_order_supported < 0, 0))
386    check_reverse_storage_order_support ();
387
388  if (SCALAR_INT_MODE_P (mode))
389    int_mode = mode;
390  else
391    {
392      if (FLOAT_MODE_P (mode)
393	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
394	check_reverse_float_storage_order_support ();
395
396      int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
397      if (int_mode == BLKmode)
398	{
399	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
400	  return x;
401	}
402      x = gen_lowpart (int_mode, x);
403    }
404
405  result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
406  if (result == 0)
407    result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
408
409  if (int_mode != mode)
410    result = gen_lowpart (mode, result);
411
412  return result;
413}
414
415/* Adjust bitfield memory MEM so that it points to the first unit of mode
416   MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417   If MODE is BLKmode, return a reference to every byte in the bitfield.
418   Set *NEW_BITNUM to the bit position of the field within the new memory.  */
419
420static rtx
421narrow_bit_field_mem (rtx mem, machine_mode mode,
422		      unsigned HOST_WIDE_INT bitsize,
423		      unsigned HOST_WIDE_INT bitnum,
424		      unsigned HOST_WIDE_INT *new_bitnum)
425{
426  if (mode == BLKmode)
427    {
428      *new_bitnum = bitnum % BITS_PER_UNIT;
429      HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
430      HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
431			    / BITS_PER_UNIT);
432      return adjust_bitfield_address_size (mem, mode, offset, size);
433    }
434  else
435    {
436      unsigned int unit = GET_MODE_BITSIZE (mode);
437      *new_bitnum = bitnum % unit;
438      HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
439      return adjust_bitfield_address (mem, mode, offset);
440    }
441}
442
443/* The caller wants to perform insertion or extraction PATTERN on a
444   bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445   BITREGION_START and BITREGION_END are as for store_bit_field
446   and FIELDMODE is the natural mode of the field.
447
448   Search for a mode that is compatible with the memory access
449   restrictions and (where applicable) with a register insertion or
450   extraction.  Return the new memory on success, storing the adjusted
451   bit position in *NEW_BITNUM.  Return null otherwise.  */
452
453static rtx
454adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
455			      rtx op0, HOST_WIDE_INT bitsize,
456			      HOST_WIDE_INT bitnum,
457			      unsigned HOST_WIDE_INT bitregion_start,
458			      unsigned HOST_WIDE_INT bitregion_end,
459			      machine_mode fieldmode,
460			      unsigned HOST_WIDE_INT *new_bitnum)
461{
462  bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
463				bitregion_end, MEM_ALIGN (op0),
464				MEM_VOLATILE_P (op0));
465  machine_mode best_mode;
466  if (iter.next_mode (&best_mode))
467    {
468      /* We can use a memory in BEST_MODE.  See whether this is true for
469	 any wider modes.  All other things being equal, we prefer to
470	 use the widest mode possible because it tends to expose more
471	 CSE opportunities.  */
472      if (!iter.prefer_smaller_modes ())
473	{
474	  /* Limit the search to the mode required by the corresponding
475	     register insertion or extraction instruction, if any.  */
476	  machine_mode limit_mode = word_mode;
477	  extraction_insn insn;
478	  if (get_best_reg_extraction_insn (&insn, pattern,
479					    GET_MODE_BITSIZE (best_mode),
480					    fieldmode))
481	    limit_mode = insn.field_mode;
482
483	  machine_mode wider_mode;
484	  while (iter.next_mode (&wider_mode)
485		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
486	    best_mode = wider_mode;
487	}
488      return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
489				   new_bitnum);
490    }
491  return NULL_RTX;
492}
493
494/* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495   a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
496   offset is then BITNUM / BITS_PER_UNIT.  */
497
498static bool
499lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
500		     unsigned HOST_WIDE_INT bitsize,
501		     machine_mode struct_mode)
502{
503  if (BYTES_BIG_ENDIAN)
504    return (bitnum % BITS_PER_UNIT == 0
505	    && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
506		|| (bitnum + bitsize) % BITS_PER_WORD == 0));
507  else
508    return bitnum % BITS_PER_WORD == 0;
509}
510
511/* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512   containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513   Return false if the access would touch memory outside the range
514   BITREGION_START to BITREGION_END for conformance to the C++ memory
515   model.  */
516
517static bool
518strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
519			    unsigned HOST_WIDE_INT bitnum,
520			    machine_mode fieldmode,
521			    unsigned HOST_WIDE_INT bitregion_start,
522			    unsigned HOST_WIDE_INT bitregion_end)
523{
524  unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
525
526  /* -fstrict-volatile-bitfields must be enabled and we must have a
527     volatile MEM.  */
528  if (!MEM_P (op0)
529      || !MEM_VOLATILE_P (op0)
530      || flag_strict_volatile_bitfields <= 0)
531    return false;
532
533  /* Non-integral modes likely only happen with packed structures.
534     Punt.  */
535  if (!SCALAR_INT_MODE_P (fieldmode))
536    return false;
537
538  /* The bit size must not be larger than the field mode, and
539     the field mode must not be larger than a word.  */
540  if (bitsize > modesize || modesize > BITS_PER_WORD)
541    return false;
542
543  /* Check for cases of unaligned fields that must be split.  */
544  if (bitnum % modesize + bitsize > modesize)
545    return false;
546
547  /* The memory must be sufficiently aligned for a MODESIZE access.
548     This condition guarantees, that the memory access will not
549     touch anything after the end of the structure.  */
550  if (MEM_ALIGN (op0) < modesize)
551    return false;
552
553  /* Check for cases where the C++ memory model applies.  */
554  if (bitregion_end != 0
555      && (bitnum - bitnum % modesize < bitregion_start
556	  || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557    return false;
558
559  return true;
560}
561
562/* Return true if OP is a memory and if a bitfield of size BITSIZE at
563   bit number BITNUM can be treated as a simple value of mode MODE.  */
564
565static bool
566simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567		       unsigned HOST_WIDE_INT bitnum, machine_mode mode)
568{
569  return (MEM_P (op0)
570	  && bitnum % BITS_PER_UNIT == 0
571	  && bitsize == GET_MODE_BITSIZE (mode)
572	  && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573	      || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
575}
576
577/* Try to use instruction INSV to store VALUE into a field of OP0.
578   BITSIZE and BITNUM are as for store_bit_field.  */
579
580static bool
581store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
582			    unsigned HOST_WIDE_INT bitsize,
583			    unsigned HOST_WIDE_INT bitnum,
584			    rtx value)
585{
586  struct expand_operand ops[4];
587  rtx value1;
588  rtx xop0 = op0;
589  rtx_insn *last = get_last_insn ();
590  bool copy_back = false;
591
592  machine_mode op_mode = insv->field_mode;
593  unsigned int unit = GET_MODE_BITSIZE (op_mode);
594  if (bitsize == 0 || bitsize > unit)
595    return false;
596
597  if (MEM_P (xop0))
598    /* Get a reference to the first byte of the field.  */
599    xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
600				 &bitnum);
601  else
602    {
603      /* Convert from counting within OP0 to counting in OP_MODE.  */
604      if (BYTES_BIG_ENDIAN)
605	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
606
607      /* If xop0 is a register, we need it in OP_MODE
608	 to make it acceptable to the format of insv.  */
609      if (GET_CODE (xop0) == SUBREG)
610	/* We can't just change the mode, because this might clobber op0,
611	   and we will need the original value of op0 if insv fails.  */
612	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
613      if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
614	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
615    }
616
617  /* If the destination is a paradoxical subreg such that we need a
618     truncate to the inner mode, perform the insertion on a temporary and
619     truncate the result to the original destination.  Note that we can't
620     just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621     X) 0)) is (reg:N X).  */
622  if (GET_CODE (xop0) == SUBREG
623      && REG_P (SUBREG_REG (xop0))
624      && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
625					 op_mode))
626    {
627      rtx tem = gen_reg_rtx (op_mode);
628      emit_move_insn (tem, xop0);
629      xop0 = tem;
630      copy_back = true;
631    }
632
633  /* There are similar overflow check at the start of store_bit_field_1,
634     but that only check the situation where the field lies completely
635     outside the register, while there do have situation where the field
636     lies partialy in the register, we need to adjust bitsize for this
637     partial overflow situation.  Without this fix, pr48335-2.c on big-endian
638     will broken on those arch support bit insert instruction, like arm, aarch64
639     etc.  */
640  if (bitsize + bitnum > unit && bitnum < unit)
641    {
642      warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
643	       "destination object, data truncated into %wu-bit",
644	       bitsize, unit - bitnum);
645      bitsize = unit - bitnum;
646    }
647
648  /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649     "backwards" from the size of the unit we are inserting into.
650     Otherwise, we count bits from the most significant on a
651     BYTES/BITS_BIG_ENDIAN machine.  */
652
653  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
654    bitnum = unit - bitsize - bitnum;
655
656  /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
657  value1 = value;
658  if (GET_MODE (value) != op_mode)
659    {
660      if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
661	{
662	  rtx tmp;
663	  /* Optimization: Don't bother really extending VALUE
664	     if it has all the bits we will actually use.  However,
665	     if we must narrow it, be sure we do it correctly.  */
666
667	  if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
668	    {
669	      tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670	      if (! tmp)
671		tmp = simplify_gen_subreg (op_mode,
672					   force_reg (GET_MODE (value),
673						      value1),
674					   GET_MODE (value), 0);
675	    }
676	  else
677	    {
678	      tmp = gen_lowpart_if_possible (op_mode, value1);
679	      if (! tmp)
680		tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
681						       value1));
682	    }
683	  value1 = tmp;
684	}
685      else if (CONST_INT_P (value))
686	value1 = gen_int_mode (INTVAL (value), op_mode);
687      else
688	/* Parse phase is supposed to make VALUE's data type
689	   match that of the component reference, which is a type
690	   at least as wide as the field; so VALUE should have
691	   a mode that corresponds to that type.  */
692	gcc_assert (CONSTANT_P (value));
693    }
694
695  create_fixed_operand (&ops[0], xop0);
696  create_integer_operand (&ops[1], bitsize);
697  create_integer_operand (&ops[2], bitnum);
698  create_input_operand (&ops[3], value1, op_mode);
699  if (maybe_expand_insn (insv->icode, 4, ops))
700    {
701      if (copy_back)
702	convert_move (op0, xop0, true);
703      return true;
704    }
705  delete_insns_since (last);
706  return false;
707}
708
709/* A subroutine of store_bit_field, with the same arguments.  Return true
710   if the operation could be implemented.
711
712   If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713   no other way of implementing the operation.  If FALLBACK_P is false,
714   return false instead.  */
715
716static bool
717store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
718		   unsigned HOST_WIDE_INT bitnum,
719		   unsigned HOST_WIDE_INT bitregion_start,
720		   unsigned HOST_WIDE_INT bitregion_end,
721		   machine_mode fieldmode,
722		   rtx value, bool reverse, bool fallback_p)
723{
724  rtx op0 = str_rtx;
725  rtx orig_value;
726
727  while (GET_CODE (op0) == SUBREG)
728    {
729      /* The following line once was done only if WORDS_BIG_ENDIAN,
730	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
731	 meaningful at a much higher level; when structures are copied
732	 between memory and regs, the higher-numbered regs
733	 always get higher addresses.  */
734      int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
735      int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
736      int byte_offset = 0;
737
738      /* Paradoxical subregs need special handling on big-endian machines.  */
739      if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
740	{
741	  int difference = inner_mode_size - outer_mode_size;
742
743	  if (WORDS_BIG_ENDIAN)
744	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
745	  if (BYTES_BIG_ENDIAN)
746	    byte_offset += difference % UNITS_PER_WORD;
747	}
748      else
749	byte_offset = SUBREG_BYTE (op0);
750
751      bitnum += byte_offset * BITS_PER_UNIT;
752      op0 = SUBREG_REG (op0);
753    }
754
755  /* No action is needed if the target is a register and if the field
756     lies completely outside that register.  This can occur if the source
757     code contains an out-of-bounds access to a small array.  */
758  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
759    return true;
760
761  /* Use vec_set patterns for inserting parts of vectors whenever
762     available.  */
763  if (VECTOR_MODE_P (GET_MODE (op0))
764      && !MEM_P (op0)
765      && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
766      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
767      && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
768      && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
769    {
770      struct expand_operand ops[3];
771      machine_mode outermode = GET_MODE (op0);
772      machine_mode innermode = GET_MODE_INNER (outermode);
773      enum insn_code icode = optab_handler (vec_set_optab, outermode);
774      int pos = bitnum / GET_MODE_BITSIZE (innermode);
775
776      create_fixed_operand (&ops[0], op0);
777      create_input_operand (&ops[1], value, innermode);
778      create_integer_operand (&ops[2], pos);
779      if (maybe_expand_insn (icode, 3, ops))
780	return true;
781    }
782
783  /* If the target is a register, overwriting the entire object, or storing
784     a full-word or multi-word field can be done with just a SUBREG.  */
785  if (!MEM_P (op0)
786      && bitsize == GET_MODE_BITSIZE (fieldmode)
787      && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
788	  || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
789    {
790      /* Use the subreg machinery either to narrow OP0 to the required
791	 words or to cope with mode punning between equal-sized modes.
792	 In the latter case, use subreg on the rhs side, not lhs.  */
793      rtx sub;
794
795      if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
796	{
797	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
798	  if (sub)
799	    {
800	      if (reverse)
801		sub = flip_storage_order (GET_MODE (op0), sub);
802	      emit_move_insn (op0, sub);
803	      return true;
804	    }
805	}
806      else
807	{
808	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
809				     bitnum / BITS_PER_UNIT);
810	  if (sub)
811	    {
812	      if (reverse)
813		value = flip_storage_order (fieldmode, value);
814	      emit_move_insn (sub, value);
815	      return true;
816	    }
817	}
818    }
819
820  /* If the target is memory, storing any naturally aligned field can be
821     done with a simple store.  For targets that support fast unaligned
822     memory, any naturally sized, unit aligned field can be done directly.  */
823  if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
824    {
825      op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
826      if (reverse)
827	value = flip_storage_order (fieldmode, value);
828      emit_move_insn (op0, value);
829      return true;
830    }
831
832  /* Make sure we are playing with integral modes.  Pun with subregs
833     if we aren't.  This must come after the entire register case above,
834     since that case is valid for any mode.  The following cases are only
835     valid for integral modes.  */
836  {
837    machine_mode imode = int_mode_for_mode (GET_MODE (op0));
838    if (imode != GET_MODE (op0))
839      {
840	if (MEM_P (op0))
841	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
842	else
843	  {
844	    gcc_assert (imode != BLKmode);
845	    op0 = gen_lowpart (imode, op0);
846	  }
847      }
848  }
849
850  /* Storing an lsb-aligned field in a register
851     can be done with a movstrict instruction.  */
852
853  if (!MEM_P (op0)
854      && !reverse
855      && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
856      && bitsize == GET_MODE_BITSIZE (fieldmode)
857      && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
858    {
859      struct expand_operand ops[2];
860      enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
861      rtx arg0 = op0;
862      unsigned HOST_WIDE_INT subreg_off;
863
864      if (GET_CODE (arg0) == SUBREG)
865	{
866	  /* Else we've got some float mode source being extracted into
867	     a different float mode destination -- this combination of
868	     subregs results in Severe Tire Damage.  */
869	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
870		      || GET_MODE_CLASS (fieldmode) == MODE_INT
871		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
872	  arg0 = SUBREG_REG (arg0);
873	}
874
875      subreg_off = bitnum / BITS_PER_UNIT;
876      if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
877	{
878	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
879
880	  create_fixed_operand (&ops[0], arg0);
881	  /* Shrink the source operand to FIELDMODE.  */
882	  create_convert_operand_to (&ops[1], value, fieldmode, false);
883	  if (maybe_expand_insn (icode, 2, ops))
884	    return true;
885	}
886    }
887
888  /* Handle fields bigger than a word.  */
889
890  if (bitsize > BITS_PER_WORD)
891    {
892      /* Here we transfer the words of the field
893	 in the order least significant first.
894	 This is because the most significant word is the one which may
895	 be less than full.
896	 However, only do that if the value is not BLKmode.  */
897
898      const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
899      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
900      unsigned int i;
901      rtx_insn *last;
902
903      /* This is the mode we must force value to, so that there will be enough
904	 subwords to extract.  Note that fieldmode will often (always?) be
905	 VOIDmode, because that is what store_field uses to indicate that this
906	 is a bit field, but passing VOIDmode to operand_subword_force
907	 is not allowed.  */
908      fieldmode = GET_MODE (value);
909      if (fieldmode == VOIDmode)
910	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
911
912      last = get_last_insn ();
913      for (i = 0; i < nwords; i++)
914	{
915	  /* If I is 0, use the low-order word in both field and target;
916	     if I is 1, use the next to lowest word; and so on.  */
917	  unsigned int wordnum = (backwards
918				  ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
919				  - i - 1
920				  : i);
921	  unsigned int bit_offset = (backwards ^ reverse
922				     ? MAX ((int) bitsize - ((int) i + 1)
923					    * BITS_PER_WORD,
924					    0)
925				     : (int) i * BITS_PER_WORD);
926	  rtx value_word = operand_subword_force (value, wordnum, fieldmode);
927	  unsigned HOST_WIDE_INT new_bitsize =
928	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
929
930	  /* If the remaining chunk doesn't have full wordsize we have
931	     to make sure that for big-endian machines the higher order
932	     bits are used.  */
933	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
934	    value_word = simplify_expand_binop (word_mode, lshr_optab,
935						value_word,
936						GEN_INT (BITS_PER_WORD
937							 - new_bitsize),
938						NULL_RTX, true,
939						OPTAB_LIB_WIDEN);
940
941	  if (!store_bit_field_1 (op0, new_bitsize,
942				  bitnum + bit_offset,
943				  bitregion_start, bitregion_end,
944				  word_mode,
945				  value_word, reverse, fallback_p))
946	    {
947	      delete_insns_since (last);
948	      return false;
949	    }
950	}
951      return true;
952    }
953
954  /* If VALUE has a floating-point or complex mode, access it as an
955     integer of the corresponding size.  This can occur on a machine
956     with 64 bit registers that uses SFmode for float.  It can also
957     occur for unaligned float or complex fields.  */
958  orig_value = value;
959  if (GET_MODE (value) != VOIDmode
960      && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
961      && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
962    {
963      value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
964      emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
965    }
966
967  /* If OP0 is a multi-word register, narrow it to the affected word.
968     If the region spans two words, defer to store_split_bit_field.
969     Don't do this if op0 is a single hard register wider than word
970     such as a float or vector register.  */
971  if (!MEM_P (op0)
972      && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
973      && (!REG_P (op0)
974	  || !HARD_REGISTER_P (op0)
975	  || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
976    {
977      if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
978	{
979	  if (!fallback_p)
980	    return false;
981
982	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
983				 bitregion_end, value, reverse);
984	  return true;
985	}
986      op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
987				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
988      gcc_assert (op0);
989      bitnum %= BITS_PER_WORD;
990    }
991
992  /* From here on we can assume that the field to be stored in fits
993     within a word.  If the destination is a register, it too fits
994     in a word.  */
995
996  extraction_insn insv;
997  if (!MEM_P (op0)
998      && !reverse
999      && get_best_reg_extraction_insn (&insv, EP_insv,
1000				       GET_MODE_BITSIZE (GET_MODE (op0)),
1001				       fieldmode)
1002      && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003    return true;
1004
1005  /* If OP0 is a memory, try copying it to a register and seeing if a
1006     cheap register alternative is available.  */
1007  if (MEM_P (op0) && !reverse)
1008    {
1009      if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010					fieldmode)
1011	  && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012	return true;
1013
1014      rtx_insn *last = get_last_insn ();
1015
1016      /* Try loading part of OP0 into a register, inserting the bitfield
1017	 into that, and then copying the result back to OP0.  */
1018      unsigned HOST_WIDE_INT bitpos;
1019      rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020					       bitregion_start, bitregion_end,
1021					       fieldmode, &bitpos);
1022      if (xop0)
1023	{
1024	  rtx tempreg = copy_to_reg (xop0);
1025	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026				 bitregion_start, bitregion_end,
1027				 fieldmode, orig_value, reverse, false))
1028	    {
1029	      emit_move_insn (xop0, tempreg);
1030	      return true;
1031	    }
1032	  delete_insns_since (last);
1033	}
1034    }
1035
1036  if (!fallback_p)
1037    return false;
1038
1039  store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040			 bitregion_end, value, reverse);
1041  return true;
1042}
1043
1044/* Generate code to store value from rtx VALUE
1045   into a bit-field within structure STR_RTX
1046   containing BITSIZE bits starting at bit BITNUM.
1047
1048   BITREGION_START is bitpos of the first bitfield in this region.
1049   BITREGION_END is the bitpos of the ending bitfield in this region.
1050   These two fields are 0, if the C++ memory model does not apply,
1051   or we are not interested in keeping track of bitfield regions.
1052
1053   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1054
1055   If REVERSE is true, the store is to be done in reverse order.  */
1056
1057void
1058store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059		 unsigned HOST_WIDE_INT bitnum,
1060		 unsigned HOST_WIDE_INT bitregion_start,
1061		 unsigned HOST_WIDE_INT bitregion_end,
1062		 machine_mode fieldmode,
1063		 rtx value, bool reverse)
1064{
1065  /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1066  if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067				  bitregion_start, bitregion_end))
1068    {
1069      /* Storing of a full word can be done with a simple store.
1070	 We know here that the field can be accessed with one single
1071	 instruction.  For targets that support unaligned memory,
1072	 an unaligned access may be necessary.  */
1073      if (bitsize == GET_MODE_BITSIZE (fieldmode))
1074	{
1075	  str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076					     bitnum / BITS_PER_UNIT);
1077	  if (reverse)
1078	    value = flip_storage_order (fieldmode, value);
1079	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080	  emit_move_insn (str_rtx, value);
1081	}
1082      else
1083	{
1084	  rtx temp;
1085
1086	  str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087					  &bitnum);
1088	  gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089	  temp = copy_to_reg (str_rtx);
1090	  if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091				  fieldmode, value, reverse, true))
1092	    gcc_unreachable ();
1093
1094	  emit_move_insn (str_rtx, temp);
1095	}
1096
1097      return;
1098    }
1099
1100  /* Under the C++0x memory model, we must not touch bits outside the
1101     bit region.  Adjust the address to start at the beginning of the
1102     bit region.  */
1103  if (MEM_P (str_rtx) && bitregion_start > 0)
1104    {
1105      machine_mode bestmode;
1106      HOST_WIDE_INT offset, size;
1107
1108      gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1109
1110      offset = bitregion_start / BITS_PER_UNIT;
1111      bitnum -= bitregion_start;
1112      size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113      bitregion_end -= bitregion_start;
1114      bitregion_start = 0;
1115      bestmode = get_best_mode (bitsize, bitnum,
1116				bitregion_start, bitregion_end,
1117				MEM_ALIGN (str_rtx), VOIDmode,
1118				MEM_VOLATILE_P (str_rtx));
1119      str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1120    }
1121
1122  if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123			  bitregion_start, bitregion_end,
1124			  fieldmode, value, reverse, true))
1125    gcc_unreachable ();
1126}
1127
1128/* Use shifts and boolean operations to store VALUE into a bit field of
1129   width BITSIZE in OP0, starting at bit BITNUM.
1130
1131   If REVERSE is true, the store is to be done in reverse order.  */
1132
1133static void
1134store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135		       unsigned HOST_WIDE_INT bitnum,
1136		       unsigned HOST_WIDE_INT bitregion_start,
1137		       unsigned HOST_WIDE_INT bitregion_end,
1138		       rtx value, bool reverse)
1139{
1140  /* There is a case not handled here:
1141     a structure with a known alignment of just a halfword
1142     and a field split across two aligned halfwords within the structure.
1143     Or likewise a structure with a known alignment of just a byte
1144     and a field split across two bytes.
1145     Such cases are not supposed to be able to occur.  */
1146
1147  if (MEM_P (op0))
1148    {
1149      machine_mode mode = GET_MODE (op0);
1150      if (GET_MODE_BITSIZE (mode) == 0
1151	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152	mode = word_mode;
1153      mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1155
1156      if (mode == VOIDmode)
1157	{
1158	  /* The only way this should occur is if the field spans word
1159	     boundaries.  */
1160	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161				 bitregion_end, value, reverse);
1162	  return;
1163	}
1164
1165      op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1166    }
1167
1168  store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1169}
1170
1171/* Helper function for store_fixed_bit_field, stores
1172   the bit field always using the MODE of OP0.  */
1173
1174static void
1175store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176			 unsigned HOST_WIDE_INT bitnum,
1177			 rtx value, bool reverse)
1178{
1179  machine_mode mode;
1180  rtx temp;
1181  int all_zero = 0;
1182  int all_one = 0;
1183
1184  mode = GET_MODE (op0);
1185  gcc_assert (SCALAR_INT_MODE_P (mode));
1186
1187  /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188     for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1189
1190  if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191    /* BITNUM is the distance between our msb
1192       and that of the containing datum.
1193       Convert it to the distance from the lsb.  */
1194    bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1195
1196  /* Now BITNUM is always the distance between our lsb
1197     and that of OP0.  */
1198
1199  /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1200     we must first convert its mode to MODE.  */
1201
1202  if (CONST_INT_P (value))
1203    {
1204      unsigned HOST_WIDE_INT v = UINTVAL (value);
1205
1206      if (bitsize < HOST_BITS_PER_WIDE_INT)
1207	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1208
1209      if (v == 0)
1210	all_zero = 1;
1211      else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213	       || (bitsize == HOST_BITS_PER_WIDE_INT
1214		   && v == HOST_WIDE_INT_M1U))
1215	all_one = 1;
1216
1217      value = lshift_value (mode, v, bitnum);
1218    }
1219  else
1220    {
1221      int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1223
1224      if (GET_MODE (value) != mode)
1225	value = convert_to_mode (mode, value, 1);
1226
1227      if (must_and)
1228	value = expand_binop (mode, and_optab, value,
1229			      mask_rtx (mode, 0, bitsize, 0),
1230			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231      if (bitnum > 0)
1232	value = expand_shift (LSHIFT_EXPR, mode, value,
1233			      bitnum, NULL_RTX, 1);
1234    }
1235
1236  if (reverse)
1237    value = flip_storage_order (mode, value);
1238
1239  /* Now clear the chosen bits in OP0,
1240     except that if VALUE is -1 we need not bother.  */
1241  /* We keep the intermediates in registers to allow CSE to combine
1242     consecutive bitfield assignments.  */
1243
1244  temp = force_reg (mode, op0);
1245
1246  if (! all_one)
1247    {
1248      rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249      if (reverse)
1250	mask = flip_storage_order (mode, mask);
1251      temp = expand_binop (mode, and_optab, temp, mask,
1252			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253      temp = force_reg (mode, temp);
1254    }
1255
1256  /* Now logical-or VALUE into OP0, unless it is zero.  */
1257
1258  if (! all_zero)
1259    {
1260      temp = expand_binop (mode, ior_optab, temp, value,
1261			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262      temp = force_reg (mode, temp);
1263    }
1264
1265  if (op0 != temp)
1266    {
1267      op0 = copy_rtx (op0);
1268      emit_move_insn (op0, temp);
1269    }
1270}
1271
1272/* Store a bit field that is split across multiple accessible memory objects.
1273
1274   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275   BITSIZE is the field width; BITPOS the position of its first bit
1276   (within the word).
1277   VALUE is the value to store.
1278
1279   If REVERSE is true, the store is to be done in reverse order.
1280
1281   This does not yet handle fields wider than BITS_PER_WORD.  */
1282
1283static void
1284store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285		       unsigned HOST_WIDE_INT bitpos,
1286		       unsigned HOST_WIDE_INT bitregion_start,
1287		       unsigned HOST_WIDE_INT bitregion_end,
1288		       rtx value, bool reverse)
1289{
1290  unsigned int unit, total_bits, bitsdone = 0;
1291
1292  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293     much at a time.  */
1294  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295    unit = BITS_PER_WORD;
1296  else
1297    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1298
1299  /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300     OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1301     again, and we will mutually recurse forever.  */
1302  if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303    unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1304
1305  /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1307     that VALUE might be a floating-point constant.  */
1308  if (CONSTANT_P (value) && !CONST_INT_P (value))
1309    {
1310      rtx word = gen_lowpart_common (word_mode, value);
1311
1312      if (word && (value != word))
1313	value = word;
1314      else
1315	value = gen_lowpart_common (word_mode,
1316				    force_reg (GET_MODE (value) != VOIDmode
1317					       ? GET_MODE (value)
1318					       : word_mode, value));
1319    }
1320
1321  total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1322
1323  while (bitsdone < bitsize)
1324    {
1325      unsigned HOST_WIDE_INT thissize;
1326      unsigned HOST_WIDE_INT thispos;
1327      unsigned HOST_WIDE_INT offset;
1328      rtx part, word;
1329
1330      offset = (bitpos + bitsdone) / unit;
1331      thispos = (bitpos + bitsdone) % unit;
1332
1333      /* When region of bytes we can touch is restricted, decrease
1334	 UNIT close to the end of the region as needed.  If op0 is a REG
1335	 or SUBREG of REG, don't do this, as there can't be data races
1336	 on a register and we can expand shorter code in some cases.  */
1337      if (bitregion_end
1338	  && unit > BITS_PER_UNIT
1339	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340	  && !REG_P (op0)
1341	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1342	{
1343	  unit = unit / 2;
1344	  continue;
1345	}
1346
1347      /* THISSIZE must not overrun a word boundary.  Otherwise,
1348	 store_fixed_bit_field will call us again, and we will mutually
1349	 recurse forever.  */
1350      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351      thissize = MIN (thissize, unit - thispos);
1352
1353      if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1354	{
1355	  /* Fetch successively less significant portions.  */
1356	  if (CONST_INT_P (value))
1357	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358			     >> (bitsize - bitsdone - thissize))
1359			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1360          /* Likewise, but the source is little-endian.  */
1361          else if (reverse)
1362	    part = extract_fixed_bit_field (word_mode, value, thissize,
1363					    bitsize - bitsdone - thissize,
1364					    NULL_RTX, 1, false);
1365	  else
1366	    {
1367	      int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368	      /* The args are chosen so that the last part includes the
1369		 lsb.  Give extract_bit_field the value it needs (with
1370		 endianness compensation) to fetch the piece we want.  */
1371	      part = extract_fixed_bit_field (word_mode, value, thissize,
1372					      total_bits - bitsize + bitsdone,
1373					      NULL_RTX, 1, false);
1374	    }
1375	}
1376      else
1377	{
1378	  /* Fetch successively more significant portions.  */
1379	  if (CONST_INT_P (value))
1380	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381			     >> bitsdone)
1382			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1383	  /* Likewise, but the source is big-endian.  */
1384          else if (reverse)
1385	    part = extract_fixed_bit_field (word_mode, value, thissize,
1386					    total_bits - bitsdone - thissize,
1387					    NULL_RTX, 1, false);
1388	  else
1389	    part = extract_fixed_bit_field (word_mode, value, thissize,
1390					    bitsdone, NULL_RTX, 1, false);
1391	}
1392
1393      /* If OP0 is a register, then handle OFFSET here.  */
1394      if (SUBREG_P (op0) || REG_P (op0))
1395	{
1396	  machine_mode op0_mode = GET_MODE (op0);
1397	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398	    word = offset ? const0_rtx : op0;
1399	  else
1400	    word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401					  GET_MODE (op0));
1402	  offset &= BITS_PER_WORD / unit - 1;
1403	}
1404      else
1405	word = op0;
1406
1407      /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1408	 it is just an out-of-bounds access.  Ignore it.  */
1409      if (word != const0_rtx)
1410	store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411			       bitregion_start, bitregion_end, part,
1412			       reverse);
1413      bitsdone += thissize;
1414    }
1415}
1416
1417/* A subroutine of extract_bit_field_1 that converts return value X
1418   to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1419   to extract_bit_field.  */
1420
1421static rtx
1422convert_extracted_bit_field (rtx x, machine_mode mode,
1423			     machine_mode tmode, bool unsignedp)
1424{
1425  if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426    return x;
1427
1428  /* If the x mode is not a scalar integral, first convert to the
1429     integer mode of that size and then access it as a floating-point
1430     value via a SUBREG.  */
1431  if (!SCALAR_INT_MODE_P (tmode))
1432    {
1433      machine_mode smode;
1434
1435      smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436      x = convert_to_mode (smode, x, unsignedp);
1437      x = force_reg (smode, x);
1438      return gen_lowpart (tmode, x);
1439    }
1440
1441  return convert_to_mode (tmode, x, unsignedp);
1442}
1443
1444/* Try to use an ext(z)v pattern to extract a field from OP0.
1445   Return the extracted value on success, otherwise return null.
1446   EXT_MODE is the mode of the extraction and the other arguments
1447   are as for extract_bit_field.  */
1448
1449static rtx
1450extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451			      unsigned HOST_WIDE_INT bitsize,
1452			      unsigned HOST_WIDE_INT bitnum,
1453			      int unsignedp, rtx target,
1454			      machine_mode mode, machine_mode tmode)
1455{
1456  struct expand_operand ops[4];
1457  rtx spec_target = target;
1458  rtx spec_target_subreg = 0;
1459  machine_mode ext_mode = extv->field_mode;
1460  unsigned unit = GET_MODE_BITSIZE (ext_mode);
1461
1462  if (bitsize == 0 || unit < bitsize)
1463    return NULL_RTX;
1464
1465  if (MEM_P (op0))
1466    /* Get a reference to the first byte of the field.  */
1467    op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468				&bitnum);
1469  else
1470    {
1471      /* Convert from counting within OP0 to counting in EXT_MODE.  */
1472      if (BYTES_BIG_ENDIAN)
1473	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1474
1475      /* If op0 is a register, we need it in EXT_MODE to make it
1476	 acceptable to the format of ext(z)v.  */
1477      if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478	return NULL_RTX;
1479      if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1481    }
1482
1483  /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484     "backwards" from the size of the unit we are extracting from.
1485     Otherwise, we count bits from the most significant on a
1486     BYTES/BITS_BIG_ENDIAN machine.  */
1487
1488  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489    bitnum = unit - bitsize - bitnum;
1490
1491  if (target == 0)
1492    target = spec_target = gen_reg_rtx (tmode);
1493
1494  if (GET_MODE (target) != ext_mode)
1495    {
1496      /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497	 between the mode of the extraction (word_mode) and the target
1498	 mode.  Instead, create a temporary and use convert_move to set
1499	 the target.  */
1500      if (REG_P (target)
1501	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1502	{
1503	  target = gen_lowpart (ext_mode, target);
1504	  if (GET_MODE_PRECISION (ext_mode)
1505	      > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506	    spec_target_subreg = target;
1507	}
1508      else
1509	target = gen_reg_rtx (ext_mode);
1510    }
1511
1512  create_output_operand (&ops[0], target, ext_mode);
1513  create_fixed_operand (&ops[1], op0);
1514  create_integer_operand (&ops[2], bitsize);
1515  create_integer_operand (&ops[3], bitnum);
1516  if (maybe_expand_insn (extv->icode, 4, ops))
1517    {
1518      target = ops[0].value;
1519      if (target == spec_target)
1520	return target;
1521      if (target == spec_target_subreg)
1522	return spec_target;
1523      return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1524    }
1525  return NULL_RTX;
1526}
1527
1528/* A subroutine of extract_bit_field, with the same arguments.
1529   If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530   if we can find no other means of implementing the operation.
1531   if FALLBACK_P is false, return NULL instead.  */
1532
1533static rtx
1534extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535		     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536		     machine_mode mode, machine_mode tmode,
1537		     bool reverse, bool fallback_p)
1538{
1539  rtx op0 = str_rtx;
1540  machine_mode int_mode;
1541  machine_mode mode1;
1542
1543  if (tmode == VOIDmode)
1544    tmode = mode;
1545
1546  while (GET_CODE (op0) == SUBREG)
1547    {
1548      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549      op0 = SUBREG_REG (op0);
1550    }
1551
1552  /* If we have an out-of-bounds access to a register, just return an
1553     uninitialized register of the required mode.  This can occur if the
1554     source code contains an out-of-bounds access to a small array.  */
1555  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556    return gen_reg_rtx (tmode);
1557
1558  if (REG_P (op0)
1559      && mode == GET_MODE (op0)
1560      && bitnum == 0
1561      && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1562    {
1563      if (reverse)
1564	op0 = flip_storage_order (mode, op0);
1565      /* We're trying to extract a full register from itself.  */
1566      return op0;
1567    }
1568
1569  /* See if we can get a better vector mode before extracting.  */
1570  if (VECTOR_MODE_P (GET_MODE (op0))
1571      && !MEM_P (op0)
1572      && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1573    {
1574      machine_mode new_mode;
1575
1576      if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1577	new_mode = MIN_MODE_VECTOR_FLOAT;
1578      else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1579	new_mode = MIN_MODE_VECTOR_FRACT;
1580      else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1581	new_mode = MIN_MODE_VECTOR_UFRACT;
1582      else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1583	new_mode = MIN_MODE_VECTOR_ACCUM;
1584      else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1585	new_mode = MIN_MODE_VECTOR_UACCUM;
1586      else
1587	new_mode = MIN_MODE_VECTOR_INT;
1588
1589      for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1590	if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1591	    && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1592	    && targetm.vector_mode_supported_p (new_mode))
1593	  break;
1594      if (new_mode != VOIDmode)
1595	op0 = gen_lowpart (new_mode, op0);
1596    }
1597
1598  /* Use vec_extract patterns for extracting parts of vectors whenever
1599     available.  */
1600  if (VECTOR_MODE_P (GET_MODE (op0))
1601      && !MEM_P (op0)
1602      && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1603      && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1604	  == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1605    {
1606      struct expand_operand ops[3];
1607      machine_mode outermode = GET_MODE (op0);
1608      machine_mode innermode = GET_MODE_INNER (outermode);
1609      enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1610      unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1611
1612      create_output_operand (&ops[0], target, innermode);
1613      create_input_operand (&ops[1], op0, outermode);
1614      create_integer_operand (&ops[2], pos);
1615      if (maybe_expand_insn (icode, 3, ops))
1616	{
1617	  target = ops[0].value;
1618      	  if (GET_MODE (target) != mode)
1619	    return gen_lowpart (tmode, target);
1620	  return target;
1621	}
1622    }
1623
1624  /* Make sure we are playing with integral modes.  Pun with subregs
1625     if we aren't.  */
1626  {
1627    machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1628    if (imode != GET_MODE (op0))
1629      {
1630	if (MEM_P (op0))
1631	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1632	else if (imode != BLKmode)
1633	  {
1634	    op0 = gen_lowpart (imode, op0);
1635
1636	    /* If we got a SUBREG, force it into a register since we
1637	       aren't going to be able to do another SUBREG on it.  */
1638	    if (GET_CODE (op0) == SUBREG)
1639	      op0 = force_reg (imode, op0);
1640	  }
1641	else
1642	  {
1643	    HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1644	    rtx mem = assign_stack_temp (GET_MODE (op0), size);
1645	    emit_move_insn (mem, op0);
1646	    op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1647	  }
1648      }
1649  }
1650
1651  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1652     If that's wrong, the solution is to test for it and set TARGET to 0
1653     if needed.  */
1654
1655  /* Get the mode of the field to use for atomic access or subreg
1656     conversion.  */
1657  mode1 = mode;
1658  if (SCALAR_INT_MODE_P (tmode))
1659    {
1660      machine_mode try_mode = mode_for_size (bitsize,
1661						  GET_MODE_CLASS (tmode), 0);
1662      if (try_mode != BLKmode)
1663	mode1 = try_mode;
1664    }
1665  gcc_assert (mode1 != BLKmode);
1666
1667  /* Extraction of a full MODE1 value can be done with a subreg as long
1668     as the least significant bit of the value is the least significant
1669     bit of either OP0 or a word of OP0.  */
1670  if (!MEM_P (op0)
1671      && !reverse
1672      && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1673      && bitsize == GET_MODE_BITSIZE (mode1)
1674      && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1675    {
1676      rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1677				     bitnum / BITS_PER_UNIT);
1678      if (sub)
1679	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1680    }
1681
1682  /* Extraction of a full MODE1 value can be done with a load as long as
1683     the field is on a byte boundary and is sufficiently aligned.  */
1684  if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1685    {
1686      op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1687      if (reverse)
1688	op0 = flip_storage_order (mode1, op0);
1689      return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1690    }
1691
1692  /* Handle fields bigger than a word.  */
1693
1694  if (bitsize > BITS_PER_WORD)
1695    {
1696      /* Here we transfer the words of the field
1697	 in the order least significant first.
1698	 This is because the most significant word is the one which may
1699	 be less than full.  */
1700
1701      const bool backwards = WORDS_BIG_ENDIAN;
1702      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1703      unsigned int i;
1704      rtx_insn *last;
1705
1706      if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1707	target = gen_reg_rtx (mode);
1708
1709      /* In case we're about to clobber a base register or something
1710	 (see gcc.c-torture/execute/20040625-1.c).   */
1711      if (reg_mentioned_p (target, str_rtx))
1712	target = gen_reg_rtx (mode);
1713
1714      /* Indicate for flow that the entire target reg is being set.  */
1715      emit_clobber (target);
1716
1717      last = get_last_insn ();
1718      for (i = 0; i < nwords; i++)
1719	{
1720	  /* If I is 0, use the low-order word in both field and target;
1721	     if I is 1, use the next to lowest word; and so on.  */
1722	  /* Word number in TARGET to use.  */
1723	  unsigned int wordnum
1724	    = (backwards
1725	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1726	       : i);
1727	  /* Offset from start of field in OP0.  */
1728	  unsigned int bit_offset = (backwards ^ reverse
1729				     ? MAX ((int) bitsize - ((int) i + 1)
1730					    * BITS_PER_WORD,
1731					    0)
1732				     : (int) i * BITS_PER_WORD);
1733	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1734	  rtx result_part
1735	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1736					     bitsize - i * BITS_PER_WORD),
1737				   bitnum + bit_offset, 1, target_part,
1738				   mode, word_mode, reverse, fallback_p);
1739
1740	  gcc_assert (target_part);
1741	  if (!result_part)
1742	    {
1743	      delete_insns_since (last);
1744	      return NULL;
1745	    }
1746
1747	  if (result_part != target_part)
1748	    emit_move_insn (target_part, result_part);
1749	}
1750
1751      if (unsignedp)
1752	{
1753	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1754	     need to be zero'd out.  */
1755	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1756	    {
1757	      unsigned int i, total_words;
1758
1759	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1760	      for (i = nwords; i < total_words; i++)
1761		emit_move_insn
1762		  (operand_subword (target,
1763				    backwards ? total_words - i - 1 : i,
1764				    1, VOIDmode),
1765		   const0_rtx);
1766	    }
1767	  return target;
1768	}
1769
1770      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1771      target = expand_shift (LSHIFT_EXPR, mode, target,
1772			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1773      return expand_shift (RSHIFT_EXPR, mode, target,
1774			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1775    }
1776
1777  /* If OP0 is a multi-word register, narrow it to the affected word.
1778     If the region spans two words, defer to extract_split_bit_field.  */
1779  if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1780    {
1781      if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1782	{
1783	  if (!fallback_p)
1784	    return NULL_RTX;
1785	  target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1786					    reverse);
1787	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1788	}
1789      op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1790				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1791      bitnum %= BITS_PER_WORD;
1792    }
1793
1794  /* From here on we know the desired field is smaller than a word.
1795     If OP0 is a register, it too fits within a word.  */
1796  enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1797  extraction_insn extv;
1798  if (!MEM_P (op0)
1799      && !reverse
1800      /* ??? We could limit the structure size to the part of OP0 that
1801	 contains the field, with appropriate checks for endianness
1802	 and TRULY_NOOP_TRUNCATION.  */
1803      && get_best_reg_extraction_insn (&extv, pattern,
1804				       GET_MODE_BITSIZE (GET_MODE (op0)),
1805				       tmode))
1806    {
1807      rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1808						 unsignedp, target, mode,
1809						 tmode);
1810      if (result)
1811	return result;
1812    }
1813
1814  /* If OP0 is a memory, try copying it to a register and seeing if a
1815     cheap register alternative is available.  */
1816  if (MEM_P (op0) & !reverse)
1817    {
1818      if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1819					tmode))
1820	{
1821	  rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1822						     bitnum, unsignedp,
1823						     target, mode,
1824						     tmode);
1825	  if (result)
1826	    return result;
1827	}
1828
1829      rtx_insn *last = get_last_insn ();
1830
1831      /* Try loading part of OP0 into a register and extracting the
1832	 bitfield from that.  */
1833      unsigned HOST_WIDE_INT bitpos;
1834      rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1835					       0, 0, tmode, &bitpos);
1836      if (xop0)
1837	{
1838	  xop0 = copy_to_reg (xop0);
1839	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1840					    unsignedp, target,
1841					    mode, tmode, reverse, false);
1842	  if (result)
1843	    return result;
1844	  delete_insns_since (last);
1845	}
1846    }
1847
1848  if (!fallback_p)
1849    return NULL;
1850
1851  /* Find a correspondingly-sized integer field, so we can apply
1852     shifts and masks to it.  */
1853  int_mode = int_mode_for_mode (tmode);
1854  if (int_mode == BLKmode)
1855    int_mode = int_mode_for_mode (mode);
1856  /* Should probably push op0 out to memory and then do a load.  */
1857  gcc_assert (int_mode != BLKmode);
1858
1859  target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1860				    unsignedp, reverse);
1861
1862  /* Complex values must be reversed piecewise, so we need to undo the global
1863     reversal, convert to the complex mode and reverse again.  */
1864  if (reverse && COMPLEX_MODE_P (tmode))
1865    {
1866      target = flip_storage_order (int_mode, target);
1867      target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1868      target = flip_storage_order (tmode, target);
1869    }
1870  else
1871    target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1872
1873  return target;
1874}
1875
1876/* Generate code to extract a byte-field from STR_RTX
1877   containing BITSIZE bits, starting at BITNUM,
1878   and put it in TARGET if possible (if TARGET is nonzero).
1879   Regardless of TARGET, we return the rtx for where the value is placed.
1880
1881   STR_RTX is the structure containing the byte (a REG or MEM).
1882   UNSIGNEDP is nonzero if this is an unsigned bit field.
1883   MODE is the natural mode of the field value once extracted.
1884   TMODE is the mode the caller would like the value to have;
1885   but the value may be returned with type MODE instead.
1886
1887   If REVERSE is true, the extraction is to be done in reverse order.
1888
1889   If a TARGET is specified and we can store in it at no extra cost,
1890   we do so, and return TARGET.
1891   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1892   if they are equally easy.  */
1893
1894rtx
1895extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1896		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1897		   machine_mode mode, machine_mode tmode, bool reverse)
1898{
1899  machine_mode mode1;
1900
1901  /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1902  if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1903    mode1 = GET_MODE (str_rtx);
1904  else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1905    mode1 = GET_MODE (target);
1906  else
1907    mode1 = tmode;
1908
1909  if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1910    {
1911      /* Extraction of a full MODE1 value can be done with a simple load.
1912	 We know here that the field can be accessed with one single
1913	 instruction.  For targets that support unaligned memory,
1914	 an unaligned access may be necessary.  */
1915      if (bitsize == GET_MODE_BITSIZE (mode1))
1916	{
1917	  rtx result = adjust_bitfield_address (str_rtx, mode1,
1918						bitnum / BITS_PER_UNIT);
1919	  if (reverse)
1920	    result = flip_storage_order (mode1, result);
1921	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1922	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1923	}
1924
1925      str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1926				      &bitnum);
1927      gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1928      str_rtx = copy_to_reg (str_rtx);
1929    }
1930
1931  return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1932			      target, mode, tmode, reverse, true);
1933}
1934
1935/* Use shifts and boolean operations to extract a field of BITSIZE bits
1936   from bit BITNUM of OP0.
1937
1938   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1939   If REVERSE is true, the extraction is to be done in reverse order.
1940
1941   If TARGET is nonzero, attempts to store the value there
1942   and return TARGET, but this is not guaranteed.
1943   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1944
1945static rtx
1946extract_fixed_bit_field (machine_mode tmode, rtx op0,
1947			 unsigned HOST_WIDE_INT bitsize,
1948			 unsigned HOST_WIDE_INT bitnum, rtx target,
1949			 int unsignedp, bool reverse)
1950{
1951  if (MEM_P (op0))
1952    {
1953      machine_mode mode
1954	= get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1955			 MEM_VOLATILE_P (op0));
1956
1957      if (mode == VOIDmode)
1958	/* The only way this should occur is if the field spans word
1959	   boundaries.  */
1960	return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1961					reverse);
1962
1963      op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1964    }
1965
1966  return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1967				    target, unsignedp, reverse);
1968}
1969
1970/* Helper function for extract_fixed_bit_field, extracts
1971   the bit field always using the MODE of OP0.  */
1972
1973static rtx
1974extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1975			   unsigned HOST_WIDE_INT bitsize,
1976			   unsigned HOST_WIDE_INT bitnum, rtx target,
1977			   int unsignedp, bool reverse)
1978{
1979  machine_mode mode = GET_MODE (op0);
1980  gcc_assert (SCALAR_INT_MODE_P (mode));
1981
1982  /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1983     for invalid input, such as extract equivalent of f5 from
1984     gcc.dg/pr48335-2.c.  */
1985
1986  if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1987    /* BITNUM is the distance between our msb and that of OP0.
1988       Convert it to the distance from the lsb.  */
1989    bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1990
1991  /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1992     We have reduced the big-endian case to the little-endian case.  */
1993  if (reverse)
1994    op0 = flip_storage_order (mode, op0);
1995
1996  if (unsignedp)
1997    {
1998      if (bitnum)
1999	{
2000	  /* If the field does not already start at the lsb,
2001	     shift it so it does.  */
2002	  /* Maybe propagate the target for the shift.  */
2003	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2004	  if (tmode != mode)
2005	    subtarget = 0;
2006	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2007	}
2008      /* Convert the value to the desired mode.  */
2009      if (mode != tmode)
2010	op0 = convert_to_mode (tmode, op0, 1);
2011
2012      /* Unless the msb of the field used to be the msb when we shifted,
2013	 mask out the upper bits.  */
2014
2015      if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2016	return expand_binop (GET_MODE (op0), and_optab, op0,
2017			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2018			     target, 1, OPTAB_LIB_WIDEN);
2019      return op0;
2020    }
2021
2022  /* To extract a signed bit-field, first shift its msb to the msb of the word,
2023     then arithmetic-shift its lsb to the lsb of the word.  */
2024  op0 = force_reg (mode, op0);
2025
2026  /* Find the narrowest integer mode that contains the field.  */
2027
2028  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2029       mode = GET_MODE_WIDER_MODE (mode))
2030    if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2031      {
2032	op0 = convert_to_mode (mode, op0, 0);
2033	break;
2034      }
2035
2036  if (mode != tmode)
2037    target = 0;
2038
2039  if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2040    {
2041      int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2042      /* Maybe propagate the target for the shift.  */
2043      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2044      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2045    }
2046
2047  return expand_shift (RSHIFT_EXPR, mode, op0,
2048		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2049}
2050
2051/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2052   VALUE << BITPOS.  */
2053
2054static rtx
2055lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2056	      int bitpos)
2057{
2058  return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2059}
2060
2061/* Extract a bit field that is split across two words
2062   and return an RTX for the result.
2063
2064   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2065   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2066   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2067
2068   If REVERSE is true, the extraction is to be done in reverse order.  */
2069
2070static rtx
2071extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2072			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2073			 bool reverse)
2074{
2075  unsigned int unit;
2076  unsigned int bitsdone = 0;
2077  rtx result = NULL_RTX;
2078  int first = 1;
2079
2080  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2081     much at a time.  */
2082  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2083    unit = BITS_PER_WORD;
2084  else
2085    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2086
2087  while (bitsdone < bitsize)
2088    {
2089      unsigned HOST_WIDE_INT thissize;
2090      rtx part, word;
2091      unsigned HOST_WIDE_INT thispos;
2092      unsigned HOST_WIDE_INT offset;
2093
2094      offset = (bitpos + bitsdone) / unit;
2095      thispos = (bitpos + bitsdone) % unit;
2096
2097      /* THISSIZE must not overrun a word boundary.  Otherwise,
2098	 extract_fixed_bit_field will call us again, and we will mutually
2099	 recurse forever.  */
2100      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2101      thissize = MIN (thissize, unit - thispos);
2102
2103      /* If OP0 is a register, then handle OFFSET here.  */
2104      if (SUBREG_P (op0) || REG_P (op0))
2105	{
2106	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2107	  offset = 0;
2108	}
2109      else
2110	word = op0;
2111
2112      /* Extract the parts in bit-counting order,
2113	 whose meaning is determined by BYTES_PER_UNIT.
2114	 OFFSET is in UNITs, and UNIT is in bits.  */
2115      part = extract_fixed_bit_field (word_mode, word, thissize,
2116				      offset * unit + thispos, 0, 1, reverse);
2117      bitsdone += thissize;
2118
2119      /* Shift this part into place for the result.  */
2120      if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2121	{
2122	  if (bitsize != bitsdone)
2123	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2124				 bitsize - bitsdone, 0, 1);
2125	}
2126      else
2127	{
2128	  if (bitsdone != thissize)
2129	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2130				 bitsdone - thissize, 0, 1);
2131	}
2132
2133      if (first)
2134	result = part;
2135      else
2136	/* Combine the parts with bitwise or.  This works
2137	   because we extracted each part as an unsigned bit field.  */
2138	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2139			       OPTAB_LIB_WIDEN);
2140
2141      first = 0;
2142    }
2143
2144  /* Unsigned bit field: we are done.  */
2145  if (unsignedp)
2146    return result;
2147  /* Signed bit field: sign-extend with two arithmetic shifts.  */
2148  result = expand_shift (LSHIFT_EXPR, word_mode, result,
2149			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2150  return expand_shift (RSHIFT_EXPR, word_mode, result,
2151		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2152}
2153
2154/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2155   the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2156   MODE, fill the upper bits with zeros.  Fail if the layout of either
2157   mode is unknown (as for CC modes) or if the extraction would involve
2158   unprofitable mode punning.  Return the value on success, otherwise
2159   return null.
2160
2161   This is different from gen_lowpart* in these respects:
2162
2163     - the returned value must always be considered an rvalue
2164
2165     - when MODE is wider than SRC_MODE, the extraction involves
2166       a zero extension
2167
2168     - when MODE is smaller than SRC_MODE, the extraction involves
2169       a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2170
2171   In other words, this routine performs a computation, whereas the
2172   gen_lowpart* routines are conceptually lvalue or rvalue subreg
2173   operations.  */
2174
2175rtx
2176extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2177{
2178  machine_mode int_mode, src_int_mode;
2179
2180  if (mode == src_mode)
2181    return src;
2182
2183  if (CONSTANT_P (src))
2184    {
2185      /* simplify_gen_subreg can't be used here, as if simplify_subreg
2186	 fails, it will happily create (subreg (symbol_ref)) or similar
2187	 invalid SUBREGs.  */
2188      unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2189      rtx ret = simplify_subreg (mode, src, src_mode, byte);
2190      if (ret)
2191	return ret;
2192
2193      if (GET_MODE (src) == VOIDmode
2194	  || !validate_subreg (mode, src_mode, src, byte))
2195	return NULL_RTX;
2196
2197      src = force_reg (GET_MODE (src), src);
2198      return gen_rtx_SUBREG (mode, src, byte);
2199    }
2200
2201  if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2202    return NULL_RTX;
2203
2204  if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2205      && MODES_TIEABLE_P (mode, src_mode))
2206    {
2207      rtx x = gen_lowpart_common (mode, src);
2208      if (x)
2209        return x;
2210    }
2211
2212  src_int_mode = int_mode_for_mode (src_mode);
2213  int_mode = int_mode_for_mode (mode);
2214  if (src_int_mode == BLKmode || int_mode == BLKmode)
2215    return NULL_RTX;
2216
2217  if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2218    return NULL_RTX;
2219  if (!MODES_TIEABLE_P (int_mode, mode))
2220    return NULL_RTX;
2221
2222  src = gen_lowpart (src_int_mode, src);
2223  src = convert_modes (int_mode, src_int_mode, src, true);
2224  src = gen_lowpart (mode, src);
2225  return src;
2226}
2227
2228/* Add INC into TARGET.  */
2229
2230void
2231expand_inc (rtx target, rtx inc)
2232{
2233  rtx value = expand_binop (GET_MODE (target), add_optab,
2234			    target, inc,
2235			    target, 0, OPTAB_LIB_WIDEN);
2236  if (value != target)
2237    emit_move_insn (target, value);
2238}
2239
2240/* Subtract DEC from TARGET.  */
2241
2242void
2243expand_dec (rtx target, rtx dec)
2244{
2245  rtx value = expand_binop (GET_MODE (target), sub_optab,
2246			    target, dec,
2247			    target, 0, OPTAB_LIB_WIDEN);
2248  if (value != target)
2249    emit_move_insn (target, value);
2250}
2251
2252/* Output a shift instruction for expression code CODE,
2253   with SHIFTED being the rtx for the value to shift,
2254   and AMOUNT the rtx for the amount to shift by.
2255   Store the result in the rtx TARGET, if that is convenient.
2256   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2257   Return the rtx for where the value is.
2258   If that cannot be done, abort the compilation unless MAY_FAIL is true,
2259   in which case 0 is returned.  */
2260
2261static rtx
2262expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2263		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2264{
2265  rtx op1, temp = 0;
2266  int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2267  int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2268  optab lshift_optab = ashl_optab;
2269  optab rshift_arith_optab = ashr_optab;
2270  optab rshift_uns_optab = lshr_optab;
2271  optab lrotate_optab = rotl_optab;
2272  optab rrotate_optab = rotr_optab;
2273  machine_mode op1_mode;
2274  machine_mode scalar_mode = mode;
2275  int attempt;
2276  bool speed = optimize_insn_for_speed_p ();
2277
2278  if (VECTOR_MODE_P (mode))
2279    scalar_mode = GET_MODE_INNER (mode);
2280  op1 = amount;
2281  op1_mode = GET_MODE (op1);
2282
2283  /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2284     shift amount is a vector, use the vector/vector shift patterns.  */
2285  if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2286    {
2287      lshift_optab = vashl_optab;
2288      rshift_arith_optab = vashr_optab;
2289      rshift_uns_optab = vlshr_optab;
2290      lrotate_optab = vrotl_optab;
2291      rrotate_optab = vrotr_optab;
2292    }
2293
2294  /* Previously detected shift-counts computed by NEGATE_EXPR
2295     and shifted in the other direction; but that does not work
2296     on all machines.  */
2297
2298  if (SHIFT_COUNT_TRUNCATED)
2299    {
2300      if (CONST_INT_P (op1)
2301	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2302	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2303	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2304		       % GET_MODE_BITSIZE (scalar_mode));
2305      else if (GET_CODE (op1) == SUBREG
2306	       && subreg_lowpart_p (op1)
2307	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2308	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2309	op1 = SUBREG_REG (op1);
2310    }
2311
2312  /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2313     prefer left rotation, if op1 is from bitsize / 2 + 1 to
2314     bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2315     amount instead.  */
2316  if (rotate
2317      && CONST_INT_P (op1)
2318      && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2319		   GET_MODE_BITSIZE (scalar_mode) - 1))
2320    {
2321      op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2322      left = !left;
2323      code = left ? LROTATE_EXPR : RROTATE_EXPR;
2324    }
2325
2326  /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2327     Note that this is not the case for bigger values.  For instance a rotation
2328     of 0x01020304 by 16 bits gives 0x03040102 which is different from
2329     0x04030201 (bswapsi).  */
2330  if (rotate
2331      && CONST_INT_P (op1)
2332      && INTVAL (op1) == BITS_PER_UNIT
2333      && GET_MODE_SIZE (scalar_mode) == 2
2334      && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2335    return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2336
2337  if (op1 == const0_rtx)
2338    return shifted;
2339
2340  /* Check whether its cheaper to implement a left shift by a constant
2341     bit count by a sequence of additions.  */
2342  if (code == LSHIFT_EXPR
2343      && CONST_INT_P (op1)
2344      && INTVAL (op1) > 0
2345      && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2346      && INTVAL (op1) < MAX_BITS_PER_WORD
2347      && (shift_cost (speed, mode, INTVAL (op1))
2348	  > INTVAL (op1) * add_cost (speed, mode))
2349      && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2350    {
2351      int i;
2352      for (i = 0; i < INTVAL (op1); i++)
2353	{
2354	  temp = force_reg (mode, shifted);
2355	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2356				  unsignedp, OPTAB_LIB_WIDEN);
2357	}
2358      return shifted;
2359    }
2360
2361  for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2362    {
2363      enum optab_methods methods;
2364
2365      if (attempt == 0)
2366	methods = OPTAB_DIRECT;
2367      else if (attempt == 1)
2368	methods = OPTAB_WIDEN;
2369      else
2370	methods = OPTAB_LIB_WIDEN;
2371
2372      if (rotate)
2373	{
2374	  /* Widening does not work for rotation.  */
2375	  if (methods == OPTAB_WIDEN)
2376	    continue;
2377	  else if (methods == OPTAB_LIB_WIDEN)
2378	    {
2379	      /* If we have been unable to open-code this by a rotation,
2380		 do it as the IOR of two shifts.  I.e., to rotate A
2381		 by N bits, compute
2382		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2383		 where C is the bitsize of A.
2384
2385		 It is theoretically possible that the target machine might
2386		 not be able to perform either shift and hence we would
2387		 be making two libcalls rather than just the one for the
2388		 shift (similarly if IOR could not be done).  We will allow
2389		 this extremely unlikely lossage to avoid complicating the
2390		 code below.  */
2391
2392	      rtx subtarget = target == shifted ? 0 : target;
2393	      rtx new_amount, other_amount;
2394	      rtx temp1;
2395
2396	      new_amount = op1;
2397	      if (op1 == const0_rtx)
2398		return shifted;
2399	      else if (CONST_INT_P (op1))
2400		other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2401					- INTVAL (op1));
2402	      else
2403		{
2404		  other_amount
2405		    = simplify_gen_unary (NEG, GET_MODE (op1),
2406					  op1, GET_MODE (op1));
2407		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2408		  other_amount
2409		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2410					   gen_int_mode (mask, GET_MODE (op1)));
2411		}
2412
2413	      shifted = force_reg (mode, shifted);
2414
2415	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2416				     mode, shifted, new_amount, 0, 1);
2417	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2418				      mode, shifted, other_amount,
2419				      subtarget, 1);
2420	      return expand_binop (mode, ior_optab, temp, temp1, target,
2421				   unsignedp, methods);
2422	    }
2423
2424	  temp = expand_binop (mode,
2425			       left ? lrotate_optab : rrotate_optab,
2426			       shifted, op1, target, unsignedp, methods);
2427	}
2428      else if (unsignedp)
2429	temp = expand_binop (mode,
2430			     left ? lshift_optab : rshift_uns_optab,
2431			     shifted, op1, target, unsignedp, methods);
2432
2433      /* Do arithmetic shifts.
2434	 Also, if we are going to widen the operand, we can just as well
2435	 use an arithmetic right-shift instead of a logical one.  */
2436      if (temp == 0 && ! rotate
2437	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2438	{
2439	  enum optab_methods methods1 = methods;
2440
2441	  /* If trying to widen a log shift to an arithmetic shift,
2442	     don't accept an arithmetic shift of the same size.  */
2443	  if (unsignedp)
2444	    methods1 = OPTAB_MUST_WIDEN;
2445
2446	  /* Arithmetic shift */
2447
2448	  temp = expand_binop (mode,
2449			       left ? lshift_optab : rshift_arith_optab,
2450			       shifted, op1, target, unsignedp, methods1);
2451	}
2452
2453      /* We used to try extzv here for logical right shifts, but that was
2454	 only useful for one machine, the VAX, and caused poor code
2455	 generation there for lshrdi3, so the code was deleted and a
2456	 define_expand for lshrsi3 was added to vax.md.  */
2457    }
2458
2459  gcc_assert (temp != NULL_RTX || may_fail);
2460  return temp;
2461}
2462
2463/* Output a shift instruction for expression code CODE,
2464   with SHIFTED being the rtx for the value to shift,
2465   and AMOUNT the amount to shift by.
2466   Store the result in the rtx TARGET, if that is convenient.
2467   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2468   Return the rtx for where the value is.  */
2469
2470rtx
2471expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2472	      int amount, rtx target, int unsignedp)
2473{
2474  return expand_shift_1 (code, mode,
2475			 shifted, GEN_INT (amount), target, unsignedp);
2476}
2477
2478/* Likewise, but return 0 if that cannot be done.  */
2479
2480static rtx
2481maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2482		    int amount, rtx target, int unsignedp)
2483{
2484  return expand_shift_1 (code, mode,
2485			 shifted, GEN_INT (amount), target, unsignedp, true);
2486}
2487
2488/* Output a shift instruction for expression code CODE,
2489   with SHIFTED being the rtx for the value to shift,
2490   and AMOUNT the tree for the amount to shift by.
2491   Store the result in the rtx TARGET, if that is convenient.
2492   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2493   Return the rtx for where the value is.  */
2494
2495rtx
2496expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2497		       tree amount, rtx target, int unsignedp)
2498{
2499  return expand_shift_1 (code, mode,
2500			 shifted, expand_normal (amount), target, unsignedp);
2501}
2502
2503
2504static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2505			const struct mult_cost *, machine_mode mode);
2506static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2507			      const struct algorithm *, enum mult_variant);
2508static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2509static rtx extract_high_half (machine_mode, rtx);
2510static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2511static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2512				       int, int);
2513/* Compute and return the best algorithm for multiplying by T.
2514   The algorithm must cost less than cost_limit
2515   If retval.cost >= COST_LIMIT, no algorithm was found and all
2516   other field of the returned struct are undefined.
2517   MODE is the machine mode of the multiplication.  */
2518
2519static void
2520synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2521	    const struct mult_cost *cost_limit, machine_mode mode)
2522{
2523  int m;
2524  struct algorithm *alg_in, *best_alg;
2525  struct mult_cost best_cost;
2526  struct mult_cost new_limit;
2527  int op_cost, op_latency;
2528  unsigned HOST_WIDE_INT orig_t = t;
2529  unsigned HOST_WIDE_INT q;
2530  int maxm, hash_index;
2531  bool cache_hit = false;
2532  enum alg_code cache_alg = alg_zero;
2533  bool speed = optimize_insn_for_speed_p ();
2534  machine_mode imode;
2535  struct alg_hash_entry *entry_ptr;
2536
2537  /* Indicate that no algorithm is yet found.  If no algorithm
2538     is found, this value will be returned and indicate failure.  */
2539  alg_out->cost.cost = cost_limit->cost + 1;
2540  alg_out->cost.latency = cost_limit->latency + 1;
2541
2542  if (cost_limit->cost < 0
2543      || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2544    return;
2545
2546  /* Be prepared for vector modes.  */
2547  imode = GET_MODE_INNER (mode);
2548
2549  maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2550
2551  /* Restrict the bits of "t" to the multiplication's mode.  */
2552  t &= GET_MODE_MASK (imode);
2553
2554  /* t == 1 can be done in zero cost.  */
2555  if (t == 1)
2556    {
2557      alg_out->ops = 1;
2558      alg_out->cost.cost = 0;
2559      alg_out->cost.latency = 0;
2560      alg_out->op[0] = alg_m;
2561      return;
2562    }
2563
2564  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2565     fail now.  */
2566  if (t == 0)
2567    {
2568      if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2569	return;
2570      else
2571	{
2572	  alg_out->ops = 1;
2573	  alg_out->cost.cost = zero_cost (speed);
2574	  alg_out->cost.latency = zero_cost (speed);
2575	  alg_out->op[0] = alg_zero;
2576	  return;
2577	}
2578    }
2579
2580  /* We'll be needing a couple extra algorithm structures now.  */
2581
2582  alg_in = XALLOCA (struct algorithm);
2583  best_alg = XALLOCA (struct algorithm);
2584  best_cost = *cost_limit;
2585
2586  /* Compute the hash index.  */
2587  hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2588
2589  /* See if we already know what to do for T.  */
2590  entry_ptr = alg_hash_entry_ptr (hash_index);
2591  if (entry_ptr->t == t
2592      && entry_ptr->mode == mode
2593      && entry_ptr->speed == speed
2594      && entry_ptr->alg != alg_unknown)
2595    {
2596      cache_alg = entry_ptr->alg;
2597
2598      if (cache_alg == alg_impossible)
2599	{
2600	  /* The cache tells us that it's impossible to synthesize
2601	     multiplication by T within entry_ptr->cost.  */
2602	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2603	    /* COST_LIMIT is at least as restrictive as the one
2604	       recorded in the hash table, in which case we have no
2605	       hope of synthesizing a multiplication.  Just
2606	       return.  */
2607	    return;
2608
2609	  /* If we get here, COST_LIMIT is less restrictive than the
2610	     one recorded in the hash table, so we may be able to
2611	     synthesize a multiplication.  Proceed as if we didn't
2612	     have the cache entry.  */
2613	}
2614      else
2615	{
2616	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2617	    /* The cached algorithm shows that this multiplication
2618	       requires more cost than COST_LIMIT.  Just return.  This
2619	       way, we don't clobber this cache entry with
2620	       alg_impossible but retain useful information.  */
2621	    return;
2622
2623	  cache_hit = true;
2624
2625	  switch (cache_alg)
2626	    {
2627	    case alg_shift:
2628	      goto do_alg_shift;
2629
2630	    case alg_add_t_m2:
2631	    case alg_sub_t_m2:
2632	      goto do_alg_addsub_t_m2;
2633
2634	    case alg_add_factor:
2635	    case alg_sub_factor:
2636	      goto do_alg_addsub_factor;
2637
2638	    case alg_add_t2_m:
2639	      goto do_alg_add_t2_m;
2640
2641	    case alg_sub_t2_m:
2642	      goto do_alg_sub_t2_m;
2643
2644	    default:
2645	      gcc_unreachable ();
2646	    }
2647	}
2648    }
2649
2650  /* If we have a group of zero bits at the low-order part of T, try
2651     multiplying by the remaining bits and then doing a shift.  */
2652
2653  if ((t & 1) == 0)
2654    {
2655    do_alg_shift:
2656      m = ctz_or_zero (t); /* m = number of low zero bits */
2657      if (m < maxm)
2658	{
2659	  q = t >> m;
2660	  /* The function expand_shift will choose between a shift and
2661	     a sequence of additions, so the observed cost is given as
2662	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2663	  op_cost = m * add_cost (speed, mode);
2664	  if (shift_cost (speed, mode, m) < op_cost)
2665	    op_cost = shift_cost (speed, mode, m);
2666	  new_limit.cost = best_cost.cost - op_cost;
2667	  new_limit.latency = best_cost.latency - op_cost;
2668	  synth_mult (alg_in, q, &new_limit, mode);
2669
2670	  alg_in->cost.cost += op_cost;
2671	  alg_in->cost.latency += op_cost;
2672	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673	    {
2674	      best_cost = alg_in->cost;
2675	      std::swap (alg_in, best_alg);
2676	      best_alg->log[best_alg->ops] = m;
2677	      best_alg->op[best_alg->ops] = alg_shift;
2678	    }
2679
2680	  /* See if treating ORIG_T as a signed number yields a better
2681	     sequence.  Try this sequence only for a negative ORIG_T
2682	     as it would be useless for a non-negative ORIG_T.  */
2683	  if ((HOST_WIDE_INT) orig_t < 0)
2684	    {
2685	      /* Shift ORIG_T as follows because a right shift of a
2686		 negative-valued signed type is implementation
2687		 defined.  */
2688	      q = ~(~orig_t >> m);
2689	      /* The function expand_shift will choose between a shift
2690		 and a sequence of additions, so the observed cost is
2691		 given as MIN (m * add_cost(speed, mode),
2692		 shift_cost(speed, mode, m)).  */
2693	      op_cost = m * add_cost (speed, mode);
2694	      if (shift_cost (speed, mode, m) < op_cost)
2695		op_cost = shift_cost (speed, mode, m);
2696	      new_limit.cost = best_cost.cost - op_cost;
2697	      new_limit.latency = best_cost.latency - op_cost;
2698	      synth_mult (alg_in, q, &new_limit, mode);
2699
2700	      alg_in->cost.cost += op_cost;
2701	      alg_in->cost.latency += op_cost;
2702	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2703		{
2704		  best_cost = alg_in->cost;
2705		  std::swap (alg_in, best_alg);
2706		  best_alg->log[best_alg->ops] = m;
2707		  best_alg->op[best_alg->ops] = alg_shift;
2708		}
2709	    }
2710	}
2711      if (cache_hit)
2712	goto done;
2713    }
2714
2715  /* If we have an odd number, add or subtract one.  */
2716  if ((t & 1) != 0)
2717    {
2718      unsigned HOST_WIDE_INT w;
2719
2720    do_alg_addsub_t_m2:
2721      for (w = 1; (w & t) != 0; w <<= 1)
2722	;
2723      /* If T was -1, then W will be zero after the loop.  This is another
2724	 case where T ends with ...111.  Handling this with (T + 1) and
2725	 subtract 1 produces slightly better code and results in algorithm
2726	 selection much faster than treating it like the ...0111 case
2727	 below.  */
2728      if (w == 0
2729	  || (w > 2
2730	      /* Reject the case where t is 3.
2731		 Thus we prefer addition in that case.  */
2732	      && t != 3))
2733	{
2734	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2735
2736	  op_cost = add_cost (speed, mode);
2737	  new_limit.cost = best_cost.cost - op_cost;
2738	  new_limit.latency = best_cost.latency - op_cost;
2739	  synth_mult (alg_in, t + 1, &new_limit, mode);
2740
2741	  alg_in->cost.cost += op_cost;
2742	  alg_in->cost.latency += op_cost;
2743	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2744	    {
2745	      best_cost = alg_in->cost;
2746	      std::swap (alg_in, best_alg);
2747	      best_alg->log[best_alg->ops] = 0;
2748	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2749	    }
2750	}
2751      else
2752	{
2753	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2754
2755	  op_cost = add_cost (speed, mode);
2756	  new_limit.cost = best_cost.cost - op_cost;
2757	  new_limit.latency = best_cost.latency - op_cost;
2758	  synth_mult (alg_in, t - 1, &new_limit, mode);
2759
2760	  alg_in->cost.cost += op_cost;
2761	  alg_in->cost.latency += op_cost;
2762	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2763	    {
2764	      best_cost = alg_in->cost;
2765	      std::swap (alg_in, best_alg);
2766	      best_alg->log[best_alg->ops] = 0;
2767	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2768	    }
2769	}
2770
2771      /* We may be able to calculate a * -7, a * -15, a * -31, etc
2772	 quickly with a - a * n for some appropriate constant n.  */
2773      m = exact_log2 (-orig_t + 1);
2774      if (m >= 0 && m < maxm)
2775	{
2776	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2777	  /* If the target has a cheap shift-and-subtract insn use
2778	     that in preference to a shift insn followed by a sub insn.
2779	     Assume that the shift-and-sub is "atomic" with a latency
2780	     equal to it's cost, otherwise assume that on superscalar
2781	     hardware the shift may be executed concurrently with the
2782	     earlier steps in the algorithm.  */
2783	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2784	    {
2785	      op_cost = shiftsub1_cost (speed, mode, m);
2786	      op_latency = op_cost;
2787	    }
2788	  else
2789	    op_latency = add_cost (speed, mode);
2790
2791	  new_limit.cost = best_cost.cost - op_cost;
2792	  new_limit.latency = best_cost.latency - op_latency;
2793	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2794		      &new_limit, mode);
2795
2796	  alg_in->cost.cost += op_cost;
2797	  alg_in->cost.latency += op_latency;
2798	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2799	    {
2800	      best_cost = alg_in->cost;
2801	      std::swap (alg_in, best_alg);
2802	      best_alg->log[best_alg->ops] = m;
2803	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2804	    }
2805	}
2806
2807      if (cache_hit)
2808	goto done;
2809    }
2810
2811  /* Look for factors of t of the form
2812     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2813     If we find such a factor, we can multiply by t using an algorithm that
2814     multiplies by q, shift the result by m and add/subtract it to itself.
2815
2816     We search for large factors first and loop down, even if large factors
2817     are less probable than small; if we find a large factor we will find a
2818     good sequence quickly, and therefore be able to prune (by decreasing
2819     COST_LIMIT) the search.  */
2820
2821 do_alg_addsub_factor:
2822  for (m = floor_log2 (t - 1); m >= 2; m--)
2823    {
2824      unsigned HOST_WIDE_INT d;
2825
2826      d = (HOST_WIDE_INT_1U << m) + 1;
2827      if (t % d == 0 && t > d && m < maxm
2828	  && (!cache_hit || cache_alg == alg_add_factor))
2829	{
2830	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2831	  if (shiftadd_cost (speed, mode, m) <= op_cost)
2832	    op_cost = shiftadd_cost (speed, mode, m);
2833
2834	  op_latency = op_cost;
2835
2836
2837	  new_limit.cost = best_cost.cost - op_cost;
2838	  new_limit.latency = best_cost.latency - op_latency;
2839	  synth_mult (alg_in, t / d, &new_limit, mode);
2840
2841	  alg_in->cost.cost += op_cost;
2842	  alg_in->cost.latency += op_latency;
2843	  if (alg_in->cost.latency < op_cost)
2844	    alg_in->cost.latency = op_cost;
2845	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2846	    {
2847	      best_cost = alg_in->cost;
2848	      std::swap (alg_in, best_alg);
2849	      best_alg->log[best_alg->ops] = m;
2850	      best_alg->op[best_alg->ops] = alg_add_factor;
2851	    }
2852	  /* Other factors will have been taken care of in the recursion.  */
2853	  break;
2854	}
2855
2856      d = (HOST_WIDE_INT_1U << m) - 1;
2857      if (t % d == 0 && t > d && m < maxm
2858	  && (!cache_hit || cache_alg == alg_sub_factor))
2859	{
2860	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2861	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
2862	    op_cost = shiftsub0_cost (speed, mode, m);
2863
2864	  op_latency = op_cost;
2865
2866	  new_limit.cost = best_cost.cost - op_cost;
2867	  new_limit.latency = best_cost.latency - op_latency;
2868	  synth_mult (alg_in, t / d, &new_limit, mode);
2869
2870	  alg_in->cost.cost += op_cost;
2871	  alg_in->cost.latency += op_latency;
2872	  if (alg_in->cost.latency < op_cost)
2873	    alg_in->cost.latency = op_cost;
2874	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2875	    {
2876	      best_cost = alg_in->cost;
2877	      std::swap (alg_in, best_alg);
2878	      best_alg->log[best_alg->ops] = m;
2879	      best_alg->op[best_alg->ops] = alg_sub_factor;
2880	    }
2881	  break;
2882	}
2883    }
2884  if (cache_hit)
2885    goto done;
2886
2887  /* Try shift-and-add (load effective address) instructions,
2888     i.e. do a*3, a*5, a*9.  */
2889  if ((t & 1) != 0)
2890    {
2891    do_alg_add_t2_m:
2892      q = t - 1;
2893      m = ctz_hwi (q);
2894      if (q && m < maxm)
2895	{
2896	  op_cost = shiftadd_cost (speed, mode, m);
2897	  new_limit.cost = best_cost.cost - op_cost;
2898	  new_limit.latency = best_cost.latency - op_cost;
2899	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2900
2901	  alg_in->cost.cost += op_cost;
2902	  alg_in->cost.latency += op_cost;
2903	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2904	    {
2905	      best_cost = alg_in->cost;
2906	      std::swap (alg_in, best_alg);
2907	      best_alg->log[best_alg->ops] = m;
2908	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2909	    }
2910	}
2911      if (cache_hit)
2912	goto done;
2913
2914    do_alg_sub_t2_m:
2915      q = t + 1;
2916      m = ctz_hwi (q);
2917      if (q && m < maxm)
2918	{
2919	  op_cost = shiftsub0_cost (speed, mode, m);
2920	  new_limit.cost = best_cost.cost - op_cost;
2921	  new_limit.latency = best_cost.latency - op_cost;
2922	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2923
2924	  alg_in->cost.cost += op_cost;
2925	  alg_in->cost.latency += op_cost;
2926	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2927	    {
2928	      best_cost = alg_in->cost;
2929	      std::swap (alg_in, best_alg);
2930	      best_alg->log[best_alg->ops] = m;
2931	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2932	    }
2933	}
2934      if (cache_hit)
2935	goto done;
2936    }
2937
2938 done:
2939  /* If best_cost has not decreased, we have not found any algorithm.  */
2940  if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2941    {
2942      /* We failed to find an algorithm.  Record alg_impossible for
2943	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2944	 we are asked to find an algorithm for T within the same or
2945	 lower COST_LIMIT, we can immediately return to the
2946	 caller.  */
2947      entry_ptr->t = t;
2948      entry_ptr->mode = mode;
2949      entry_ptr->speed = speed;
2950      entry_ptr->alg = alg_impossible;
2951      entry_ptr->cost = *cost_limit;
2952      return;
2953    }
2954
2955  /* Cache the result.  */
2956  if (!cache_hit)
2957    {
2958      entry_ptr->t = t;
2959      entry_ptr->mode = mode;
2960      entry_ptr->speed = speed;
2961      entry_ptr->alg = best_alg->op[best_alg->ops];
2962      entry_ptr->cost.cost = best_cost.cost;
2963      entry_ptr->cost.latency = best_cost.latency;
2964    }
2965
2966  /* If we are getting a too long sequence for `struct algorithm'
2967     to record, make this search fail.  */
2968  if (best_alg->ops == MAX_BITS_PER_WORD)
2969    return;
2970
2971  /* Copy the algorithm from temporary space to the space at alg_out.
2972     We avoid using structure assignment because the majority of
2973     best_alg is normally undefined, and this is a critical function.  */
2974  alg_out->ops = best_alg->ops + 1;
2975  alg_out->cost = best_cost;
2976  memcpy (alg_out->op, best_alg->op,
2977	  alg_out->ops * sizeof *alg_out->op);
2978  memcpy (alg_out->log, best_alg->log,
2979	  alg_out->ops * sizeof *alg_out->log);
2980}
2981
2982/* Find the cheapest way of multiplying a value of mode MODE by VAL.
2983   Try three variations:
2984
2985       - a shift/add sequence based on VAL itself
2986       - a shift/add sequence based on -VAL, followed by a negation
2987       - a shift/add sequence based on VAL - 1, followed by an addition.
2988
2989   Return true if the cheapest of these cost less than MULT_COST,
2990   describing the algorithm in *ALG and final fixup in *VARIANT.  */
2991
2992bool
2993choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2994		     struct algorithm *alg, enum mult_variant *variant,
2995		     int mult_cost)
2996{
2997  struct algorithm alg2;
2998  struct mult_cost limit;
2999  int op_cost;
3000  bool speed = optimize_insn_for_speed_p ();
3001
3002  /* Fail quickly for impossible bounds.  */
3003  if (mult_cost < 0)
3004    return false;
3005
3006  /* Ensure that mult_cost provides a reasonable upper bound.
3007     Any constant multiplication can be performed with less
3008     than 2 * bits additions.  */
3009  op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3010  if (mult_cost > op_cost)
3011    mult_cost = op_cost;
3012
3013  *variant = basic_variant;
3014  limit.cost = mult_cost;
3015  limit.latency = mult_cost;
3016  synth_mult (alg, val, &limit, mode);
3017
3018  /* This works only if the inverted value actually fits in an
3019     `unsigned int' */
3020  if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3021    {
3022      op_cost = neg_cost (speed, mode);
3023      if (MULT_COST_LESS (&alg->cost, mult_cost))
3024	{
3025	  limit.cost = alg->cost.cost - op_cost;
3026	  limit.latency = alg->cost.latency - op_cost;
3027	}
3028      else
3029	{
3030	  limit.cost = mult_cost - op_cost;
3031	  limit.latency = mult_cost - op_cost;
3032	}
3033
3034      synth_mult (&alg2, -val, &limit, mode);
3035      alg2.cost.cost += op_cost;
3036      alg2.cost.latency += op_cost;
3037      if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3038	*alg = alg2, *variant = negate_variant;
3039    }
3040
3041  /* This proves very useful for division-by-constant.  */
3042  op_cost = add_cost (speed, mode);
3043  if (MULT_COST_LESS (&alg->cost, mult_cost))
3044    {
3045      limit.cost = alg->cost.cost - op_cost;
3046      limit.latency = alg->cost.latency - op_cost;
3047    }
3048  else
3049    {
3050      limit.cost = mult_cost - op_cost;
3051      limit.latency = mult_cost - op_cost;
3052    }
3053
3054  synth_mult (&alg2, val - 1, &limit, mode);
3055  alg2.cost.cost += op_cost;
3056  alg2.cost.latency += op_cost;
3057  if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3058    *alg = alg2, *variant = add_variant;
3059
3060  return MULT_COST_LESS (&alg->cost, mult_cost);
3061}
3062
3063/* A subroutine of expand_mult, used for constant multiplications.
3064   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3065   convenient.  Use the shift/add sequence described by ALG and apply
3066   the final fixup specified by VARIANT.  */
3067
3068static rtx
3069expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3070		   rtx target, const struct algorithm *alg,
3071		   enum mult_variant variant)
3072{
3073  unsigned HOST_WIDE_INT val_so_far;
3074  rtx_insn *insn;
3075  rtx accum, tem;
3076  int opno;
3077  machine_mode nmode;
3078
3079  /* Avoid referencing memory over and over and invalid sharing
3080     on SUBREGs.  */
3081  op0 = force_reg (mode, op0);
3082
3083  /* ACCUM starts out either as OP0 or as a zero, depending on
3084     the first operation.  */
3085
3086  if (alg->op[0] == alg_zero)
3087    {
3088      accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3089      val_so_far = 0;
3090    }
3091  else if (alg->op[0] == alg_m)
3092    {
3093      accum = copy_to_mode_reg (mode, op0);
3094      val_so_far = 1;
3095    }
3096  else
3097    gcc_unreachable ();
3098
3099  for (opno = 1; opno < alg->ops; opno++)
3100    {
3101      int log = alg->log[opno];
3102      rtx shift_subtarget = optimize ? 0 : accum;
3103      rtx add_target
3104	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3105	   && !optimize)
3106	  ? target : 0;
3107      rtx accum_target = optimize ? 0 : accum;
3108      rtx accum_inner;
3109
3110      switch (alg->op[opno])
3111	{
3112	case alg_shift:
3113	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3114	  /* REG_EQUAL note will be attached to the following insn.  */
3115	  emit_move_insn (accum, tem);
3116	  val_so_far <<= log;
3117	  break;
3118
3119	case alg_add_t_m2:
3120	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3121	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3122				 add_target ? add_target : accum_target);
3123	  val_so_far += HOST_WIDE_INT_1U << log;
3124	  break;
3125
3126	case alg_sub_t_m2:
3127	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3128	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3129				 add_target ? add_target : accum_target);
3130	  val_so_far -= HOST_WIDE_INT_1U << log;
3131	  break;
3132
3133	case alg_add_t2_m:
3134	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3135				log, shift_subtarget, 0);
3136	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3137				 add_target ? add_target : accum_target);
3138	  val_so_far = (val_so_far << log) + 1;
3139	  break;
3140
3141	case alg_sub_t2_m:
3142	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3143				log, shift_subtarget, 0);
3144	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3145				 add_target ? add_target : accum_target);
3146	  val_so_far = (val_so_far << log) - 1;
3147	  break;
3148
3149	case alg_add_factor:
3150	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3151	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3152				 add_target ? add_target : accum_target);
3153	  val_so_far += val_so_far << log;
3154	  break;
3155
3156	case alg_sub_factor:
3157	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3158	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3159				 (add_target
3160				  ? add_target : (optimize ? 0 : tem)));
3161	  val_so_far = (val_so_far << log) - val_so_far;
3162	  break;
3163
3164	default:
3165	  gcc_unreachable ();
3166	}
3167
3168      if (SCALAR_INT_MODE_P (mode))
3169	{
3170	  /* Write a REG_EQUAL note on the last insn so that we can cse
3171	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3172	     we've set the inner register and must properly indicate that.  */
3173          tem = op0, nmode = mode;
3174          accum_inner = accum;
3175          if (GET_CODE (accum) == SUBREG)
3176	    {
3177	      accum_inner = SUBREG_REG (accum);
3178	      nmode = GET_MODE (accum_inner);
3179	      tem = gen_lowpart (nmode, op0);
3180	    }
3181
3182	  /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3183	     In that case, only the low bits of accum would be guaranteed to
3184	     be equal to the content of the REG_EQUAL note, the upper bits
3185	     can be anything.  */
3186	  if (!paradoxical_subreg_p (tem))
3187	    {
3188	      insn = get_last_insn ();
3189	      set_dst_reg_note (insn, REG_EQUAL,
3190				gen_rtx_MULT (nmode, tem,
3191					      gen_int_mode (val_so_far,
3192							    nmode)),
3193				accum_inner);
3194	    }
3195	}
3196    }
3197
3198  if (variant == negate_variant)
3199    {
3200      val_so_far = -val_so_far;
3201      accum = expand_unop (mode, neg_optab, accum, target, 0);
3202    }
3203  else if (variant == add_variant)
3204    {
3205      val_so_far = val_so_far + 1;
3206      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3207    }
3208
3209  /* Compare only the bits of val and val_so_far that are significant
3210     in the result mode, to avoid sign-/zero-extension confusion.  */
3211  nmode = GET_MODE_INNER (mode);
3212  val &= GET_MODE_MASK (nmode);
3213  val_so_far &= GET_MODE_MASK (nmode);
3214  gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3215
3216  return accum;
3217}
3218
3219/* Perform a multiplication and return an rtx for the result.
3220   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3221   TARGET is a suggestion for where to store the result (an rtx).
3222
3223   We check specially for a constant integer as OP1.
3224   If you want this check for OP0 as well, then before calling
3225   you should swap the two operands if OP0 would be constant.  */
3226
3227rtx
3228expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3229	     int unsignedp)
3230{
3231  enum mult_variant variant;
3232  struct algorithm algorithm;
3233  rtx scalar_op1;
3234  int max_cost;
3235  bool speed = optimize_insn_for_speed_p ();
3236  bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3237
3238  if (CONSTANT_P (op0))
3239    std::swap (op0, op1);
3240
3241  /* For vectors, there are several simplifications that can be made if
3242     all elements of the vector constant are identical.  */
3243  scalar_op1 = unwrap_const_vec_duplicate (op1);
3244
3245  if (INTEGRAL_MODE_P (mode))
3246    {
3247      rtx fake_reg;
3248      HOST_WIDE_INT coeff;
3249      bool is_neg;
3250      int mode_bitsize;
3251
3252      if (op1 == CONST0_RTX (mode))
3253	return op1;
3254      if (op1 == CONST1_RTX (mode))
3255	return op0;
3256      if (op1 == CONSTM1_RTX (mode))
3257	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3258			    op0, target, 0);
3259
3260      if (do_trapv)
3261	goto skip_synth;
3262
3263      /* If mode is integer vector mode, check if the backend supports
3264	 vector lshift (by scalar or vector) at all.  If not, we can't use
3265	 synthetized multiply.  */
3266      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3267	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3268	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3269	goto skip_synth;
3270
3271      /* These are the operations that are potentially turned into
3272	 a sequence of shifts and additions.  */
3273      mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3274
3275      /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3276	 less than or equal in size to `unsigned int' this doesn't matter.
3277	 If the mode is larger than `unsigned int', then synth_mult works
3278	 only if the constant value exactly fits in an `unsigned int' without
3279	 any truncation.  This means that multiplying by negative values does
3280	 not work; results are off by 2^32 on a 32 bit machine.  */
3281      if (CONST_INT_P (scalar_op1))
3282	{
3283	  coeff = INTVAL (scalar_op1);
3284	  is_neg = coeff < 0;
3285	}
3286#if TARGET_SUPPORTS_WIDE_INT
3287      else if (CONST_WIDE_INT_P (scalar_op1))
3288#else
3289      else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3290#endif
3291	{
3292	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3293	  /* Perfect power of 2 (other than 1, which is handled above).  */
3294	  if (shift > 0)
3295	    return expand_shift (LSHIFT_EXPR, mode, op0,
3296				 shift, target, unsignedp);
3297	  else
3298	    goto skip_synth;
3299	}
3300      else
3301	goto skip_synth;
3302
3303      /* We used to test optimize here, on the grounds that it's better to
3304	 produce a smaller program when -O is not used.  But this causes
3305	 such a terrible slowdown sometimes that it seems better to always
3306	 use synth_mult.  */
3307
3308      /* Special case powers of two.  */
3309      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3310	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3311	return expand_shift (LSHIFT_EXPR, mode, op0,
3312			     floor_log2 (coeff), target, unsignedp);
3313
3314      fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3315
3316      /* Attempt to handle multiplication of DImode values by negative
3317	 coefficients, by performing the multiplication by a positive
3318	 multiplier and then inverting the result.  */
3319      if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3320	{
3321	  /* Its safe to use -coeff even for INT_MIN, as the
3322	     result is interpreted as an unsigned coefficient.
3323	     Exclude cost of op0 from max_cost to match the cost
3324	     calculation of the synth_mult.  */
3325	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3326	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3327				    mode, speed)
3328		      - neg_cost (speed, mode));
3329	  if (max_cost <= 0)
3330	    goto skip_synth;
3331
3332	  /* Special case powers of two.  */
3333	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3334	    {
3335	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3336				       floor_log2 (coeff), target, unsignedp);
3337	      return expand_unop (mode, neg_optab, temp, target, 0);
3338	    }
3339
3340	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3341				   max_cost))
3342	    {
3343	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3344					    &algorithm, variant);
3345	      return expand_unop (mode, neg_optab, temp, target, 0);
3346	    }
3347	  goto skip_synth;
3348	}
3349
3350      /* Exclude cost of op0 from max_cost to match the cost
3351	 calculation of the synth_mult.  */
3352      max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3353      if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3354	return expand_mult_const (mode, op0, coeff, target,
3355				  &algorithm, variant);
3356    }
3357 skip_synth:
3358
3359  /* Expand x*2.0 as x+x.  */
3360  if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3361      && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3362    {
3363      op0 = force_reg (GET_MODE (op0), op0);
3364      return expand_binop (mode, add_optab, op0, op0,
3365			   target, unsignedp, OPTAB_LIB_WIDEN);
3366    }
3367
3368  /* This used to use umul_optab if unsigned, but for non-widening multiply
3369     there is no difference between signed and unsigned.  */
3370  op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3371		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3372  gcc_assert (op0);
3373  return op0;
3374}
3375
3376/* Return a cost estimate for multiplying a register by the given
3377   COEFFicient in the given MODE and SPEED.  */
3378
3379int
3380mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3381{
3382  int max_cost;
3383  struct algorithm algorithm;
3384  enum mult_variant variant;
3385
3386  rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3387  max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3388			   mode, speed);
3389  if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3390    return algorithm.cost.cost;
3391  else
3392    return max_cost;
3393}
3394
3395/* Perform a widening multiplication and return an rtx for the result.
3396   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3397   TARGET is a suggestion for where to store the result (an rtx).
3398   THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3399   or smul_widen_optab.
3400
3401   We check specially for a constant integer as OP1, comparing the
3402   cost of a widening multiply against the cost of a sequence of shifts
3403   and adds.  */
3404
3405rtx
3406expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3407		      int unsignedp, optab this_optab)
3408{
3409  bool speed = optimize_insn_for_speed_p ();
3410  rtx cop1;
3411
3412  if (CONST_INT_P (op1)
3413      && GET_MODE (op0) != VOIDmode
3414      && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3415				this_optab == umul_widen_optab))
3416      && CONST_INT_P (cop1)
3417      && (INTVAL (cop1) >= 0
3418	  || HWI_COMPUTABLE_MODE_P (mode)))
3419    {
3420      HOST_WIDE_INT coeff = INTVAL (cop1);
3421      int max_cost;
3422      enum mult_variant variant;
3423      struct algorithm algorithm;
3424
3425      if (coeff == 0)
3426	return CONST0_RTX (mode);
3427
3428      /* Special case powers of two.  */
3429      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3430	{
3431	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3432	  return expand_shift (LSHIFT_EXPR, mode, op0,
3433			       floor_log2 (coeff), target, unsignedp);
3434	}
3435
3436      /* Exclude cost of op0 from max_cost to match the cost
3437	 calculation of the synth_mult.  */
3438      max_cost = mul_widen_cost (speed, mode);
3439      if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3440			       max_cost))
3441	{
3442	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3443	  return expand_mult_const (mode, op0, coeff, target,
3444				    &algorithm, variant);
3445	}
3446    }
3447  return expand_binop (mode, this_optab, op0, op1, target,
3448		       unsignedp, OPTAB_LIB_WIDEN);
3449}
3450
3451/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3452   replace division by D, and put the least significant N bits of the result
3453   in *MULTIPLIER_PTR and return the most significant bit.
3454
3455   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3456   needed precision is in PRECISION (should be <= N).
3457
3458   PRECISION should be as small as possible so this function can choose
3459   multiplier more freely.
3460
3461   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3462   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3463
3464   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3465   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3466
3467unsigned HOST_WIDE_INT
3468choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3469		   unsigned HOST_WIDE_INT *multiplier_ptr,
3470		   int *post_shift_ptr, int *lgup_ptr)
3471{
3472  int lgup, post_shift;
3473  int pow, pow2;
3474
3475  /* lgup = ceil(log2(divisor)); */
3476  lgup = ceil_log2 (d);
3477
3478  gcc_assert (lgup <= n);
3479
3480  pow = n + lgup;
3481  pow2 = n + lgup - precision;
3482
3483  /* mlow = 2^(N + lgup)/d */
3484  wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3485  wide_int mlow = wi::udiv_trunc (val, d);
3486
3487  /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3488  val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3489  wide_int mhigh = wi::udiv_trunc (val, d);
3490
3491  /* If precision == N, then mlow, mhigh exceed 2^N
3492     (but they do not exceed 2^(N+1)).  */
3493
3494  /* Reduce to lowest terms.  */
3495  for (post_shift = lgup; post_shift > 0; post_shift--)
3496    {
3497      unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3498						       HOST_BITS_PER_WIDE_INT);
3499      unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3500						       HOST_BITS_PER_WIDE_INT);
3501      if (ml_lo >= mh_lo)
3502	break;
3503
3504      mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3505      mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3506    }
3507
3508  *post_shift_ptr = post_shift;
3509  *lgup_ptr = lgup;
3510  if (n < HOST_BITS_PER_WIDE_INT)
3511    {
3512      unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3513      *multiplier_ptr = mhigh.to_uhwi () & mask;
3514      return mhigh.to_uhwi () >= mask;
3515    }
3516  else
3517    {
3518      *multiplier_ptr = mhigh.to_uhwi ();
3519      return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3520    }
3521}
3522
3523/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3524   congruent to 1 (mod 2**N).  */
3525
3526static unsigned HOST_WIDE_INT
3527invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3528{
3529  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3530
3531  /* The algorithm notes that the choice y = x satisfies
3532     x*y == 1 mod 2^3, since x is assumed odd.
3533     Each iteration doubles the number of bits of significance in y.  */
3534
3535  unsigned HOST_WIDE_INT mask;
3536  unsigned HOST_WIDE_INT y = x;
3537  int nbit = 3;
3538
3539  mask = (n == HOST_BITS_PER_WIDE_INT
3540	  ? HOST_WIDE_INT_M1U
3541	  : (HOST_WIDE_INT_1U << n) - 1);
3542
3543  while (nbit < n)
3544    {
3545      y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3546      nbit *= 2;
3547    }
3548  return y;
3549}
3550
3551/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3552   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3553   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3554   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3555   become signed.
3556
3557   The result is put in TARGET if that is convenient.
3558
3559   MODE is the mode of operation.  */
3560
3561rtx
3562expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3563			     rtx op1, rtx target, int unsignedp)
3564{
3565  rtx tem;
3566  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3567
3568  tem = expand_shift (RSHIFT_EXPR, mode, op0,
3569		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3570  tem = expand_and (mode, tem, op1, NULL_RTX);
3571  adj_operand
3572    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3573		     adj_operand);
3574
3575  tem = expand_shift (RSHIFT_EXPR, mode, op1,
3576		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3577  tem = expand_and (mode, tem, op0, NULL_RTX);
3578  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3579			  target);
3580
3581  return target;
3582}
3583
3584/* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3585
3586static rtx
3587extract_high_half (machine_mode mode, rtx op)
3588{
3589  machine_mode wider_mode;
3590
3591  if (mode == word_mode)
3592    return gen_highpart (mode, op);
3593
3594  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3595
3596  wider_mode = GET_MODE_WIDER_MODE (mode);
3597  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3598		     GET_MODE_BITSIZE (mode), 0, 1);
3599  return convert_modes (mode, wider_mode, op, 0);
3600}
3601
3602/* Like expmed_mult_highpart, but only consider using a multiplication
3603   optab.  OP1 is an rtx for the constant operand.  */
3604
3605static rtx
3606expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3607			    rtx target, int unsignedp, int max_cost)
3608{
3609  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3610  machine_mode wider_mode;
3611  optab moptab;
3612  rtx tem;
3613  int size;
3614  bool speed = optimize_insn_for_speed_p ();
3615
3616  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3617
3618  wider_mode = GET_MODE_WIDER_MODE (mode);
3619  size = GET_MODE_BITSIZE (mode);
3620
3621  /* Firstly, try using a multiplication insn that only generates the needed
3622     high part of the product, and in the sign flavor of unsignedp.  */
3623  if (mul_highpart_cost (speed, mode) < max_cost)
3624    {
3625      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3626      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3627			  unsignedp, OPTAB_DIRECT);
3628      if (tem)
3629	return tem;
3630    }
3631
3632  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3633     Need to adjust the result after the multiplication.  */
3634  if (size - 1 < BITS_PER_WORD
3635      && (mul_highpart_cost (speed, mode)
3636	  + 2 * shift_cost (speed, mode, size-1)
3637	  + 4 * add_cost (speed, mode) < max_cost))
3638    {
3639      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3640      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3641			  unsignedp, OPTAB_DIRECT);
3642      if (tem)
3643	/* We used the wrong signedness.  Adjust the result.  */
3644	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3645					    tem, unsignedp);
3646    }
3647
3648  /* Try widening multiplication.  */
3649  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3650  if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3651      && mul_widen_cost (speed, wider_mode) < max_cost)
3652    {
3653      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3654			  unsignedp, OPTAB_WIDEN);
3655      if (tem)
3656	return extract_high_half (mode, tem);
3657    }
3658
3659  /* Try widening the mode and perform a non-widening multiplication.  */
3660  if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3661      && size - 1 < BITS_PER_WORD
3662      && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3663	  < max_cost))
3664    {
3665      rtx_insn *insns;
3666      rtx wop0, wop1;
3667
3668      /* We need to widen the operands, for example to ensure the
3669	 constant multiplier is correctly sign or zero extended.
3670	 Use a sequence to clean-up any instructions emitted by
3671	 the conversions if things don't work out.  */
3672      start_sequence ();
3673      wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3674      wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3675      tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3676			  unsignedp, OPTAB_WIDEN);
3677      insns = get_insns ();
3678      end_sequence ();
3679
3680      if (tem)
3681	{
3682	  emit_insn (insns);
3683	  return extract_high_half (mode, tem);
3684	}
3685    }
3686
3687  /* Try widening multiplication of opposite signedness, and adjust.  */
3688  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3689  if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3690      && size - 1 < BITS_PER_WORD
3691      && (mul_widen_cost (speed, wider_mode)
3692	  + 2 * shift_cost (speed, mode, size-1)
3693	  + 4 * add_cost (speed, mode) < max_cost))
3694    {
3695      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3696			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3697      if (tem != 0)
3698	{
3699	  tem = extract_high_half (mode, tem);
3700	  /* We used the wrong signedness.  Adjust the result.  */
3701	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3702					      target, unsignedp);
3703	}
3704    }
3705
3706  return 0;
3707}
3708
3709/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3710   putting the high half of the result in TARGET if that is convenient,
3711   and return where the result is.  If the operation can not be performed,
3712   0 is returned.
3713
3714   MODE is the mode of operation and result.
3715
3716   UNSIGNEDP nonzero means unsigned multiply.
3717
3718   MAX_COST is the total allowed cost for the expanded RTL.  */
3719
3720static rtx
3721expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3722		      rtx target, int unsignedp, int max_cost)
3723{
3724  machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3725  unsigned HOST_WIDE_INT cnst1;
3726  int extra_cost;
3727  bool sign_adjust = false;
3728  enum mult_variant variant;
3729  struct algorithm alg;
3730  rtx tem;
3731  bool speed = optimize_insn_for_speed_p ();
3732
3733  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3734  /* We can't support modes wider than HOST_BITS_PER_INT.  */
3735  gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3736
3737  cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3738
3739  /* We can't optimize modes wider than BITS_PER_WORD.
3740     ??? We might be able to perform double-word arithmetic if
3741     mode == word_mode, however all the cost calculations in
3742     synth_mult etc. assume single-word operations.  */
3743  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3744    return expmed_mult_highpart_optab (mode, op0, op1, target,
3745				       unsignedp, max_cost);
3746
3747  extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3748
3749  /* Check whether we try to multiply by a negative constant.  */
3750  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3751    {
3752      sign_adjust = true;
3753      extra_cost += add_cost (speed, mode);
3754    }
3755
3756  /* See whether shift/add multiplication is cheap enough.  */
3757  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3758			   max_cost - extra_cost))
3759    {
3760      /* See whether the specialized multiplication optabs are
3761	 cheaper than the shift/add version.  */
3762      tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3763					alg.cost.cost + extra_cost);
3764      if (tem)
3765	return tem;
3766
3767      tem = convert_to_mode (wider_mode, op0, unsignedp);
3768      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3769      tem = extract_high_half (mode, tem);
3770
3771      /* Adjust result for signedness.  */
3772      if (sign_adjust)
3773	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3774
3775      return tem;
3776    }
3777  return expmed_mult_highpart_optab (mode, op0, op1, target,
3778				     unsignedp, max_cost);
3779}
3780
3781
3782/* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3783
3784static rtx
3785expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3786{
3787  rtx result, temp, shift;
3788  rtx_code_label *label;
3789  int logd;
3790  int prec = GET_MODE_PRECISION (mode);
3791
3792  logd = floor_log2 (d);
3793  result = gen_reg_rtx (mode);
3794
3795  /* Avoid conditional branches when they're expensive.  */
3796  if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3797      && optimize_insn_for_speed_p ())
3798    {
3799      rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3800				      mode, 0, -1);
3801      if (signmask)
3802	{
3803	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3804	  signmask = force_reg (mode, signmask);
3805	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3806
3807	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3808	     which instruction sequence to use.  If logical right shifts
3809	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3810	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3811
3812	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3813	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3814	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3815		  > COSTS_N_INSNS (2)))
3816	    {
3817	      temp = expand_binop (mode, xor_optab, op0, signmask,
3818				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3819	      temp = expand_binop (mode, sub_optab, temp, signmask,
3820				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3821	      temp = expand_binop (mode, and_optab, temp,
3822				   gen_int_mode (masklow, mode),
3823				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3824	      temp = expand_binop (mode, xor_optab, temp, signmask,
3825				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3826	      temp = expand_binop (mode, sub_optab, temp, signmask,
3827				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3828	    }
3829	  else
3830	    {
3831	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3832				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3833	      signmask = force_reg (mode, signmask);
3834
3835	      temp = expand_binop (mode, add_optab, op0, signmask,
3836				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3837	      temp = expand_binop (mode, and_optab, temp,
3838				   gen_int_mode (masklow, mode),
3839				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3840	      temp = expand_binop (mode, sub_optab, temp, signmask,
3841				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3842	    }
3843	  return temp;
3844	}
3845    }
3846
3847  /* Mask contains the mode's signbit and the significant bits of the
3848     modulus.  By including the signbit in the operation, many targets
3849     can avoid an explicit compare operation in the following comparison
3850     against zero.  */
3851  wide_int mask = wi::mask (logd, false, prec);
3852  mask = wi::set_bit (mask, prec - 1);
3853
3854  temp = expand_binop (mode, and_optab, op0,
3855		       immed_wide_int_const (mask, mode),
3856		       result, 1, OPTAB_LIB_WIDEN);
3857  if (temp != result)
3858    emit_move_insn (result, temp);
3859
3860  label = gen_label_rtx ();
3861  do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3862
3863  temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3864		       0, OPTAB_LIB_WIDEN);
3865
3866  mask = wi::mask (logd, true, prec);
3867  temp = expand_binop (mode, ior_optab, temp,
3868		       immed_wide_int_const (mask, mode),
3869		       result, 1, OPTAB_LIB_WIDEN);
3870  temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3871		       0, OPTAB_LIB_WIDEN);
3872  if (temp != result)
3873    emit_move_insn (result, temp);
3874  emit_label (label);
3875  return result;
3876}
3877
3878/* Expand signed division of OP0 by a power of two D in mode MODE.
3879   This routine is only called for positive values of D.  */
3880
3881static rtx
3882expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3883{
3884  rtx temp;
3885  rtx_code_label *label;
3886  int logd;
3887
3888  logd = floor_log2 (d);
3889
3890  if (d == 2
3891      && BRANCH_COST (optimize_insn_for_speed_p (),
3892		      false) >= 1)
3893    {
3894      temp = gen_reg_rtx (mode);
3895      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3896      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3897			   0, OPTAB_LIB_WIDEN);
3898      return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3899    }
3900
3901  if (HAVE_conditional_move
3902      && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3903    {
3904      rtx temp2;
3905
3906      start_sequence ();
3907      temp2 = copy_to_mode_reg (mode, op0);
3908      temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3909			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3910      temp = force_reg (mode, temp);
3911
3912      /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3913      temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3914				     mode, temp, temp2, mode, 0);
3915      if (temp2)
3916	{
3917	  rtx_insn *seq = get_insns ();
3918	  end_sequence ();
3919	  emit_insn (seq);
3920	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3921	}
3922      end_sequence ();
3923    }
3924
3925  if (BRANCH_COST (optimize_insn_for_speed_p (),
3926		   false) >= 2)
3927    {
3928      int ushift = GET_MODE_BITSIZE (mode) - logd;
3929
3930      temp = gen_reg_rtx (mode);
3931      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3932      if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3933	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3934	     > COSTS_N_INSNS (1))
3935	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3936			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3937      else
3938	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3939			     ushift, NULL_RTX, 1);
3940      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3941			   0, OPTAB_LIB_WIDEN);
3942      return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3943    }
3944
3945  label = gen_label_rtx ();
3946  temp = copy_to_mode_reg (mode, op0);
3947  do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3948  expand_inc (temp, gen_int_mode (d - 1, mode));
3949  emit_label (label);
3950  return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3951}
3952
3953/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3954   if that is convenient, and returning where the result is.
3955   You may request either the quotient or the remainder as the result;
3956   specify REM_FLAG nonzero to get the remainder.
3957
3958   CODE is the expression code for which kind of division this is;
3959   it controls how rounding is done.  MODE is the machine mode to use.
3960   UNSIGNEDP nonzero means do unsigned division.  */
3961
3962/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3963   and then correct it by or'ing in missing high bits
3964   if result of ANDI is nonzero.
3965   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3966   This could optimize to a bfexts instruction.
3967   But C doesn't use these operations, so their optimizations are
3968   left for later.  */
3969/* ??? For modulo, we don't actually need the highpart of the first product,
3970   the low part will do nicely.  And for small divisors, the second multiply
3971   can also be a low-part only multiply or even be completely left out.
3972   E.g. to calculate the remainder of a division by 3 with a 32 bit
3973   multiply, multiply with 0x55555556 and extract the upper two bits;
3974   the result is exact for inputs up to 0x1fffffff.
3975   The input range can be reduced by using cross-sum rules.
3976   For odd divisors >= 3, the following table gives right shift counts
3977   so that if a number is shifted by an integer multiple of the given
3978   amount, the remainder stays the same:
3979   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3980   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3981   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3982   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3983   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3984
3985   Cross-sum rules for even numbers can be derived by leaving as many bits
3986   to the right alone as the divisor has zeros to the right.
3987   E.g. if x is an unsigned 32 bit number:
3988   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3989   */
3990
3991rtx
3992expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3993	       rtx op0, rtx op1, rtx target, int unsignedp)
3994{
3995  machine_mode compute_mode;
3996  rtx tquotient;
3997  rtx quotient = 0, remainder = 0;
3998  rtx_insn *last;
3999  int size;
4000  rtx_insn *insn;
4001  optab optab1, optab2;
4002  int op1_is_constant, op1_is_pow2 = 0;
4003  int max_cost, extra_cost;
4004  static HOST_WIDE_INT last_div_const = 0;
4005  bool speed = optimize_insn_for_speed_p ();
4006
4007  op1_is_constant = CONST_INT_P (op1);
4008  if (op1_is_constant)
4009    {
4010      wide_int ext_op1 = rtx_mode_t (op1, mode);
4011      op1_is_pow2 = (wi::popcount (ext_op1) == 1
4012		     || (! unsignedp
4013			 && wi::popcount (wi::neg (ext_op1)) == 1));
4014    }
4015
4016  /*
4017     This is the structure of expand_divmod:
4018
4019     First comes code to fix up the operands so we can perform the operations
4020     correctly and efficiently.
4021
4022     Second comes a switch statement with code specific for each rounding mode.
4023     For some special operands this code emits all RTL for the desired
4024     operation, for other cases, it generates only a quotient and stores it in
4025     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4026     to indicate that it has not done anything.
4027
4028     Last comes code that finishes the operation.  If QUOTIENT is set and
4029     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4030     QUOTIENT is not set, it is computed using trunc rounding.
4031
4032     We try to generate special code for division and remainder when OP1 is a
4033     constant.  If |OP1| = 2**n we can use shifts and some other fast
4034     operations.  For other values of OP1, we compute a carefully selected
4035     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4036     by m.
4037
4038     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4039     half of the product.  Different strategies for generating the product are
4040     implemented in expmed_mult_highpart.
4041
4042     If what we actually want is the remainder, we generate that by another
4043     by-constant multiplication and a subtraction.  */
4044
4045  /* We shouldn't be called with OP1 == const1_rtx, but some of the
4046     code below will malfunction if we are, so check here and handle
4047     the special case if so.  */
4048  if (op1 == const1_rtx)
4049    return rem_flag ? const0_rtx : op0;
4050
4051    /* When dividing by -1, we could get an overflow.
4052     negv_optab can handle overflows.  */
4053  if (! unsignedp && op1 == constm1_rtx)
4054    {
4055      if (rem_flag)
4056	return const0_rtx;
4057      return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4058			  ? negv_optab : neg_optab, op0, target, 0);
4059    }
4060
4061  if (target
4062      /* Don't use the function value register as a target
4063	 since we have to read it as well as write it,
4064	 and function-inlining gets confused by this.  */
4065      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4066	  /* Don't clobber an operand while doing a multi-step calculation.  */
4067	  || ((rem_flag || op1_is_constant)
4068	      && (reg_mentioned_p (target, op0)
4069		  || (MEM_P (op0) && MEM_P (target))))
4070	  || reg_mentioned_p (target, op1)
4071	  || (MEM_P (op1) && MEM_P (target))))
4072    target = 0;
4073
4074  /* Get the mode in which to perform this computation.  Normally it will
4075     be MODE, but sometimes we can't do the desired operation in MODE.
4076     If so, pick a wider mode in which we can do the operation.  Convert
4077     to that mode at the start to avoid repeated conversions.
4078
4079     First see what operations we need.  These depend on the expression
4080     we are evaluating.  (We assume that divxx3 insns exist under the
4081     same conditions that modxx3 insns and that these insns don't normally
4082     fail.  If these assumptions are not correct, we may generate less
4083     efficient code in some cases.)
4084
4085     Then see if we find a mode in which we can open-code that operation
4086     (either a division, modulus, or shift).  Finally, check for the smallest
4087     mode for which we can do the operation with a library call.  */
4088
4089  /* We might want to refine this now that we have division-by-constant
4090     optimization.  Since expmed_mult_highpart tries so many variants, it is
4091     not straightforward to generalize this.  Maybe we should make an array
4092     of possible modes in init_expmed?  Save this for GCC 2.7.  */
4093
4094  optab1 = (op1_is_pow2
4095	    ? (unsignedp ? lshr_optab : ashr_optab)
4096	    : (unsignedp ? udiv_optab : sdiv_optab));
4097  optab2 = (op1_is_pow2 ? optab1
4098	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4099
4100  for (compute_mode = mode; compute_mode != VOIDmode;
4101       compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4102    if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4103	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4104      break;
4105
4106  if (compute_mode == VOIDmode)
4107    for (compute_mode = mode; compute_mode != VOIDmode;
4108	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4109      if (optab_libfunc (optab1, compute_mode)
4110	  || optab_libfunc (optab2, compute_mode))
4111	break;
4112
4113  /* If we still couldn't find a mode, use MODE, but expand_binop will
4114     probably die.  */
4115  if (compute_mode == VOIDmode)
4116    compute_mode = mode;
4117
4118  if (target && GET_MODE (target) == compute_mode)
4119    tquotient = target;
4120  else
4121    tquotient = gen_reg_rtx (compute_mode);
4122
4123  size = GET_MODE_BITSIZE (compute_mode);
4124#if 0
4125  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4126     (mode), and thereby get better code when OP1 is a constant.  Do that
4127     later.  It will require going over all usages of SIZE below.  */
4128  size = GET_MODE_BITSIZE (mode);
4129#endif
4130
4131  /* Only deduct something for a REM if the last divide done was
4132     for a different constant.   Then set the constant of the last
4133     divide.  */
4134  max_cost = (unsignedp
4135	      ? udiv_cost (speed, compute_mode)
4136	      : sdiv_cost (speed, compute_mode));
4137  if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4138		     && INTVAL (op1) == last_div_const))
4139    max_cost -= (mul_cost (speed, compute_mode)
4140		 + add_cost (speed, compute_mode));
4141
4142  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4143
4144  /* Now convert to the best mode to use.  */
4145  if (compute_mode != mode)
4146    {
4147      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4148      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4149
4150      /* convert_modes may have placed op1 into a register, so we
4151	 must recompute the following.  */
4152      op1_is_constant = CONST_INT_P (op1);
4153      if (op1_is_constant)
4154	{
4155	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4156	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4157			 || (! unsignedp
4158			     && wi::popcount (wi::neg (ext_op1)) == 1));
4159	}
4160      else
4161	op1_is_pow2 = 0;
4162    }
4163
4164  /* If one of the operands is a volatile MEM, copy it into a register.  */
4165
4166  if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4167    op0 = force_reg (compute_mode, op0);
4168  if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4169    op1 = force_reg (compute_mode, op1);
4170
4171  /* If we need the remainder or if OP1 is constant, we need to
4172     put OP0 in a register in case it has any queued subexpressions.  */
4173  if (rem_flag || op1_is_constant)
4174    op0 = force_reg (compute_mode, op0);
4175
4176  last = get_last_insn ();
4177
4178  /* Promote floor rounding to trunc rounding for unsigned operations.  */
4179  if (unsignedp)
4180    {
4181      if (code == FLOOR_DIV_EXPR)
4182	code = TRUNC_DIV_EXPR;
4183      if (code == FLOOR_MOD_EXPR)
4184	code = TRUNC_MOD_EXPR;
4185      if (code == EXACT_DIV_EXPR && op1_is_pow2)
4186	code = TRUNC_DIV_EXPR;
4187    }
4188
4189  if (op1 != const0_rtx)
4190    switch (code)
4191      {
4192      case TRUNC_MOD_EXPR:
4193      case TRUNC_DIV_EXPR:
4194	if (op1_is_constant)
4195	  {
4196	    if (unsignedp)
4197	      {
4198		unsigned HOST_WIDE_INT mh, ml;
4199		int pre_shift, post_shift;
4200		int dummy;
4201		wide_int wd = rtx_mode_t (op1, compute_mode);
4202		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4203
4204		if (wi::popcount (wd) == 1)
4205		  {
4206		    pre_shift = floor_log2 (d);
4207		    if (rem_flag)
4208		      {
4209			unsigned HOST_WIDE_INT mask
4210			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4211			remainder
4212			  = expand_binop (compute_mode, and_optab, op0,
4213					  gen_int_mode (mask, compute_mode),
4214					  remainder, 1,
4215					  OPTAB_LIB_WIDEN);
4216			if (remainder)
4217			  return gen_lowpart (mode, remainder);
4218		      }
4219		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4220					     pre_shift, tquotient, 1);
4221		  }
4222		else if (size <= HOST_BITS_PER_WIDE_INT)
4223		  {
4224		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4225		      {
4226			/* Most significant bit of divisor is set; emit an scc
4227			   insn.  */
4228			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4229							  compute_mode, 1, 1);
4230		      }
4231		    else
4232		      {
4233			/* Find a suitable multiplier and right shift count
4234			   instead of multiplying with D.  */
4235
4236			mh = choose_multiplier (d, size, size,
4237						&ml, &post_shift, &dummy);
4238
4239			/* If the suggested multiplier is more than SIZE bits,
4240			   we can do better for even divisors, using an
4241			   initial right shift.  */
4242			if (mh != 0 && (d & 1) == 0)
4243			  {
4244			    pre_shift = ctz_or_zero (d);
4245			    mh = choose_multiplier (d >> pre_shift, size,
4246						    size - pre_shift,
4247						    &ml, &post_shift, &dummy);
4248			    gcc_assert (!mh);
4249			  }
4250			else
4251			  pre_shift = 0;
4252
4253			if (mh != 0)
4254			  {
4255			    rtx t1, t2, t3, t4;
4256
4257			    if (post_shift - 1 >= BITS_PER_WORD)
4258			      goto fail1;
4259
4260			    extra_cost
4261			      = (shift_cost (speed, compute_mode, post_shift - 1)
4262				 + shift_cost (speed, compute_mode, 1)
4263				 + 2 * add_cost (speed, compute_mode));
4264			    t1 = expmed_mult_highpart
4265			      (compute_mode, op0,
4266			       gen_int_mode (ml, compute_mode),
4267			       NULL_RTX, 1, max_cost - extra_cost);
4268			    if (t1 == 0)
4269			      goto fail1;
4270			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4271							       op0, t1),
4272						NULL_RTX);
4273			    t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4274					       t2, 1, NULL_RTX, 1);
4275			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4276							      t1, t3),
4277						NULL_RTX);
4278			    quotient = expand_shift
4279			      (RSHIFT_EXPR, compute_mode, t4,
4280			       post_shift - 1, tquotient, 1);
4281			  }
4282			else
4283			  {
4284			    rtx t1, t2;
4285
4286			    if (pre_shift >= BITS_PER_WORD
4287				|| post_shift >= BITS_PER_WORD)
4288			      goto fail1;
4289
4290			    t1 = expand_shift
4291			      (RSHIFT_EXPR, compute_mode, op0,
4292			       pre_shift, NULL_RTX, 1);
4293			    extra_cost
4294			      = (shift_cost (speed, compute_mode, pre_shift)
4295				 + shift_cost (speed, compute_mode, post_shift));
4296			    t2 = expmed_mult_highpart
4297			      (compute_mode, t1,
4298			       gen_int_mode (ml, compute_mode),
4299			       NULL_RTX, 1, max_cost - extra_cost);
4300			    if (t2 == 0)
4301			      goto fail1;
4302			    quotient = expand_shift
4303			      (RSHIFT_EXPR, compute_mode, t2,
4304			       post_shift, tquotient, 1);
4305			  }
4306		      }
4307		  }
4308		else		/* Too wide mode to use tricky code */
4309		  break;
4310
4311		insn = get_last_insn ();
4312		if (insn != last)
4313		  set_dst_reg_note (insn, REG_EQUAL,
4314				    gen_rtx_UDIV (compute_mode, op0, op1),
4315				    quotient);
4316	      }
4317	    else		/* TRUNC_DIV, signed */
4318	      {
4319		unsigned HOST_WIDE_INT ml;
4320		int lgup, post_shift;
4321		rtx mlr;
4322		HOST_WIDE_INT d = INTVAL (op1);
4323		unsigned HOST_WIDE_INT abs_d;
4324
4325		/* Not prepared to handle division/remainder by
4326		   0xffffffffffffffff8000000000000000 etc.  */
4327		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4328		  break;
4329
4330		/* Since d might be INT_MIN, we have to cast to
4331		   unsigned HOST_WIDE_INT before negating to avoid
4332		   undefined signed overflow.  */
4333		abs_d = (d >= 0
4334			 ? (unsigned HOST_WIDE_INT) d
4335			 : - (unsigned HOST_WIDE_INT) d);
4336
4337		/* n rem d = n rem -d */
4338		if (rem_flag && d < 0)
4339		  {
4340		    d = abs_d;
4341		    op1 = gen_int_mode (abs_d, compute_mode);
4342		  }
4343
4344		if (d == 1)
4345		  quotient = op0;
4346		else if (d == -1)
4347		  quotient = expand_unop (compute_mode, neg_optab, op0,
4348					  tquotient, 0);
4349		else if (size <= HOST_BITS_PER_WIDE_INT
4350			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4351		  {
4352		    /* This case is not handled correctly below.  */
4353		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4354						compute_mode, 1, 1);
4355		    if (quotient == 0)
4356		      goto fail1;
4357		  }
4358		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4359			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4360			 && (rem_flag
4361			     ? smod_pow2_cheap (speed, compute_mode)
4362			     : sdiv_pow2_cheap (speed, compute_mode))
4363			 /* We assume that cheap metric is true if the
4364			    optab has an expander for this mode.  */
4365			 && ((optab_handler ((rem_flag ? smod_optab
4366					      : sdiv_optab),
4367					     compute_mode)
4368			      != CODE_FOR_nothing)
4369			     || (optab_handler (sdivmod_optab,
4370						compute_mode)
4371				 != CODE_FOR_nothing)))
4372		  ;
4373		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4374		  {
4375		    if (rem_flag)
4376		      {
4377			remainder = expand_smod_pow2 (compute_mode, op0, d);
4378			if (remainder)
4379			  return gen_lowpart (mode, remainder);
4380		      }
4381
4382		    if (sdiv_pow2_cheap (speed, compute_mode)
4383			&& ((optab_handler (sdiv_optab, compute_mode)
4384			     != CODE_FOR_nothing)
4385			    || (optab_handler (sdivmod_optab, compute_mode)
4386				!= CODE_FOR_nothing)))
4387		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4388						compute_mode, op0,
4389						gen_int_mode (abs_d,
4390							      compute_mode),
4391						NULL_RTX, 0);
4392		    else
4393		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4394
4395		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4396		       negate the quotient.  */
4397		    if (d < 0)
4398		      {
4399			insn = get_last_insn ();
4400			if (insn != last
4401			    && abs_d < (HOST_WIDE_INT_1U
4402					<< (HOST_BITS_PER_WIDE_INT - 1)))
4403			  set_dst_reg_note (insn, REG_EQUAL,
4404					    gen_rtx_DIV (compute_mode, op0,
4405							 gen_int_mode
4406							   (abs_d,
4407							    compute_mode)),
4408					    quotient);
4409
4410			quotient = expand_unop (compute_mode, neg_optab,
4411						quotient, quotient, 0);
4412		      }
4413		  }
4414		else if (size <= HOST_BITS_PER_WIDE_INT)
4415		  {
4416		    choose_multiplier (abs_d, size, size - 1,
4417				       &ml, &post_shift, &lgup);
4418		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4419		      {
4420			rtx t1, t2, t3;
4421
4422			if (post_shift >= BITS_PER_WORD
4423			    || size - 1 >= BITS_PER_WORD)
4424			  goto fail1;
4425
4426			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4427				      + shift_cost (speed, compute_mode, size - 1)
4428				      + add_cost (speed, compute_mode));
4429			t1 = expmed_mult_highpart
4430			  (compute_mode, op0, gen_int_mode (ml, compute_mode),
4431			   NULL_RTX, 0, max_cost - extra_cost);
4432			if (t1 == 0)
4433			  goto fail1;
4434			t2 = expand_shift
4435			  (RSHIFT_EXPR, compute_mode, t1,
4436			   post_shift, NULL_RTX, 0);
4437			t3 = expand_shift
4438			  (RSHIFT_EXPR, compute_mode, op0,
4439			   size - 1, NULL_RTX, 0);
4440			if (d < 0)
4441			  quotient
4442			    = force_operand (gen_rtx_MINUS (compute_mode,
4443							    t3, t2),
4444					     tquotient);
4445			else
4446			  quotient
4447			    = force_operand (gen_rtx_MINUS (compute_mode,
4448							    t2, t3),
4449					     tquotient);
4450		      }
4451		    else
4452		      {
4453			rtx t1, t2, t3, t4;
4454
4455			if (post_shift >= BITS_PER_WORD
4456			    || size - 1 >= BITS_PER_WORD)
4457			  goto fail1;
4458
4459			ml |= HOST_WIDE_INT_M1U << (size - 1);
4460			mlr = gen_int_mode (ml, compute_mode);
4461			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4462				      + shift_cost (speed, compute_mode, size - 1)
4463				      + 2 * add_cost (speed, compute_mode));
4464			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4465						   NULL_RTX, 0,
4466						   max_cost - extra_cost);
4467			if (t1 == 0)
4468			  goto fail1;
4469			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4470							  t1, op0),
4471					    NULL_RTX);
4472			t3 = expand_shift
4473			  (RSHIFT_EXPR, compute_mode, t2,
4474			   post_shift, NULL_RTX, 0);
4475			t4 = expand_shift
4476			  (RSHIFT_EXPR, compute_mode, op0,
4477			   size - 1, NULL_RTX, 0);
4478			if (d < 0)
4479			  quotient
4480			    = force_operand (gen_rtx_MINUS (compute_mode,
4481							    t4, t3),
4482					     tquotient);
4483			else
4484			  quotient
4485			    = force_operand (gen_rtx_MINUS (compute_mode,
4486							    t3, t4),
4487					     tquotient);
4488		      }
4489		  }
4490		else		/* Too wide mode to use tricky code */
4491		  break;
4492
4493		insn = get_last_insn ();
4494		if (insn != last)
4495		  set_dst_reg_note (insn, REG_EQUAL,
4496				    gen_rtx_DIV (compute_mode, op0, op1),
4497				    quotient);
4498	      }
4499	    break;
4500	  }
4501      fail1:
4502	delete_insns_since (last);
4503	break;
4504
4505      case FLOOR_DIV_EXPR:
4506      case FLOOR_MOD_EXPR:
4507      /* We will come here only for signed operations.  */
4508	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4509	  {
4510	    unsigned HOST_WIDE_INT mh, ml;
4511	    int pre_shift, lgup, post_shift;
4512	    HOST_WIDE_INT d = INTVAL (op1);
4513
4514	    if (d > 0)
4515	      {
4516		/* We could just as easily deal with negative constants here,
4517		   but it does not seem worth the trouble for GCC 2.6.  */
4518		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4519		  {
4520		    pre_shift = floor_log2 (d);
4521		    if (rem_flag)
4522		      {
4523			unsigned HOST_WIDE_INT mask
4524			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4525			remainder = expand_binop
4526			  (compute_mode, and_optab, op0,
4527			   gen_int_mode (mask, compute_mode),
4528			   remainder, 0, OPTAB_LIB_WIDEN);
4529			if (remainder)
4530			  return gen_lowpart (mode, remainder);
4531		      }
4532		    quotient = expand_shift
4533		      (RSHIFT_EXPR, compute_mode, op0,
4534		       pre_shift, tquotient, 0);
4535		  }
4536		else
4537		  {
4538		    rtx t1, t2, t3, t4;
4539
4540		    mh = choose_multiplier (d, size, size - 1,
4541					    &ml, &post_shift, &lgup);
4542		    gcc_assert (!mh);
4543
4544		    if (post_shift < BITS_PER_WORD
4545			&& size - 1 < BITS_PER_WORD)
4546		      {
4547			t1 = expand_shift
4548			  (RSHIFT_EXPR, compute_mode, op0,
4549			   size - 1, NULL_RTX, 0);
4550			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4551					   NULL_RTX, 0, OPTAB_WIDEN);
4552			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4553				      + shift_cost (speed, compute_mode, size - 1)
4554				      + 2 * add_cost (speed, compute_mode));
4555			t3 = expmed_mult_highpart
4556			  (compute_mode, t2, gen_int_mode (ml, compute_mode),
4557			   NULL_RTX, 1, max_cost - extra_cost);
4558			if (t3 != 0)
4559			  {
4560			    t4 = expand_shift
4561			      (RSHIFT_EXPR, compute_mode, t3,
4562			       post_shift, NULL_RTX, 1);
4563			    quotient = expand_binop (compute_mode, xor_optab,
4564						     t4, t1, tquotient, 0,
4565						     OPTAB_WIDEN);
4566			  }
4567		      }
4568		  }
4569	      }
4570	    else
4571	      {
4572		rtx nsign, t1, t2, t3, t4;
4573		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4574						  op0, constm1_rtx), NULL_RTX);
4575		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4576				   0, OPTAB_WIDEN);
4577		nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4578				      size - 1, NULL_RTX, 0);
4579		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4580				    NULL_RTX);
4581		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4582				    NULL_RTX, 0);
4583		if (t4)
4584		  {
4585		    rtx t5;
4586		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4587				      NULL_RTX, 0);
4588		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4589							    t4, t5),
4590					      tquotient);
4591		  }
4592	      }
4593	  }
4594
4595	if (quotient != 0)
4596	  break;
4597	delete_insns_since (last);
4598
4599	/* Try using an instruction that produces both the quotient and
4600	   remainder, using truncation.  We can easily compensate the quotient
4601	   or remainder to get floor rounding, once we have the remainder.
4602	   Notice that we compute also the final remainder value here,
4603	   and return the result right away.  */
4604	if (target == 0 || GET_MODE (target) != compute_mode)
4605	  target = gen_reg_rtx (compute_mode);
4606
4607	if (rem_flag)
4608	  {
4609	    remainder
4610	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4611	    quotient = gen_reg_rtx (compute_mode);
4612	  }
4613	else
4614	  {
4615	    quotient
4616	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4617	    remainder = gen_reg_rtx (compute_mode);
4618	  }
4619
4620	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4621				 quotient, remainder, 0))
4622	  {
4623	    /* This could be computed with a branch-less sequence.
4624	       Save that for later.  */
4625	    rtx tem;
4626	    rtx_code_label *label = gen_label_rtx ();
4627	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4628	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4629				NULL_RTX, 0, OPTAB_WIDEN);
4630	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4631	    expand_dec (quotient, const1_rtx);
4632	    expand_inc (remainder, op1);
4633	    emit_label (label);
4634	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4635	  }
4636
4637	/* No luck with division elimination or divmod.  Have to do it
4638	   by conditionally adjusting op0 *and* the result.  */
4639	{
4640	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4641	  rtx adjusted_op0;
4642	  rtx tem;
4643
4644	  quotient = gen_reg_rtx (compute_mode);
4645	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4646	  label1 = gen_label_rtx ();
4647	  label2 = gen_label_rtx ();
4648	  label3 = gen_label_rtx ();
4649	  label4 = gen_label_rtx ();
4650	  label5 = gen_label_rtx ();
4651	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4652	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4653	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4654			      quotient, 0, OPTAB_LIB_WIDEN);
4655	  if (tem != quotient)
4656	    emit_move_insn (quotient, tem);
4657	  emit_jump_insn (targetm.gen_jump (label5));
4658	  emit_barrier ();
4659	  emit_label (label1);
4660	  expand_inc (adjusted_op0, const1_rtx);
4661	  emit_jump_insn (targetm.gen_jump (label4));
4662	  emit_barrier ();
4663	  emit_label (label2);
4664	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4665	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4666			      quotient, 0, OPTAB_LIB_WIDEN);
4667	  if (tem != quotient)
4668	    emit_move_insn (quotient, tem);
4669	  emit_jump_insn (targetm.gen_jump (label5));
4670	  emit_barrier ();
4671	  emit_label (label3);
4672	  expand_dec (adjusted_op0, const1_rtx);
4673	  emit_label (label4);
4674	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4675			      quotient, 0, OPTAB_LIB_WIDEN);
4676	  if (tem != quotient)
4677	    emit_move_insn (quotient, tem);
4678	  expand_dec (quotient, const1_rtx);
4679	  emit_label (label5);
4680	}
4681	break;
4682
4683      case CEIL_DIV_EXPR:
4684      case CEIL_MOD_EXPR:
4685	if (unsignedp)
4686	  {
4687	    if (op1_is_constant
4688		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4689		&& (size <= HOST_BITS_PER_WIDE_INT
4690		    || INTVAL (op1) >= 0))
4691	      {
4692		rtx t1, t2, t3;
4693		unsigned HOST_WIDE_INT d = INTVAL (op1);
4694		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4695				   floor_log2 (d), tquotient, 1);
4696		t2 = expand_binop (compute_mode, and_optab, op0,
4697				   gen_int_mode (d - 1, compute_mode),
4698				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4699		t3 = gen_reg_rtx (compute_mode);
4700		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4701				      compute_mode, 1, 1);
4702		if (t3 == 0)
4703		  {
4704		    rtx_code_label *lab;
4705		    lab = gen_label_rtx ();
4706		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4707		    expand_inc (t1, const1_rtx);
4708		    emit_label (lab);
4709		    quotient = t1;
4710		  }
4711		else
4712		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4713							  t1, t3),
4714					    tquotient);
4715		break;
4716	      }
4717
4718	    /* Try using an instruction that produces both the quotient and
4719	       remainder, using truncation.  We can easily compensate the
4720	       quotient or remainder to get ceiling rounding, once we have the
4721	       remainder.  Notice that we compute also the final remainder
4722	       value here, and return the result right away.  */
4723	    if (target == 0 || GET_MODE (target) != compute_mode)
4724	      target = gen_reg_rtx (compute_mode);
4725
4726	    if (rem_flag)
4727	      {
4728		remainder = (REG_P (target)
4729			     ? target : gen_reg_rtx (compute_mode));
4730		quotient = gen_reg_rtx (compute_mode);
4731	      }
4732	    else
4733	      {
4734		quotient = (REG_P (target)
4735			    ? target : gen_reg_rtx (compute_mode));
4736		remainder = gen_reg_rtx (compute_mode);
4737	      }
4738
4739	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4740				     remainder, 1))
4741	      {
4742		/* This could be computed with a branch-less sequence.
4743		   Save that for later.  */
4744		rtx_code_label *label = gen_label_rtx ();
4745		do_cmp_and_jump (remainder, const0_rtx, EQ,
4746				 compute_mode, label);
4747		expand_inc (quotient, const1_rtx);
4748		expand_dec (remainder, op1);
4749		emit_label (label);
4750		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4751	      }
4752
4753	    /* No luck with division elimination or divmod.  Have to do it
4754	       by conditionally adjusting op0 *and* the result.  */
4755	    {
4756	      rtx_code_label *label1, *label2;
4757	      rtx adjusted_op0, tem;
4758
4759	      quotient = gen_reg_rtx (compute_mode);
4760	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4761	      label1 = gen_label_rtx ();
4762	      label2 = gen_label_rtx ();
4763	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4764			       compute_mode, label1);
4765	      emit_move_insn  (quotient, const0_rtx);
4766	      emit_jump_insn (targetm.gen_jump (label2));
4767	      emit_barrier ();
4768	      emit_label (label1);
4769	      expand_dec (adjusted_op0, const1_rtx);
4770	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4771				  quotient, 1, OPTAB_LIB_WIDEN);
4772	      if (tem != quotient)
4773		emit_move_insn (quotient, tem);
4774	      expand_inc (quotient, const1_rtx);
4775	      emit_label (label2);
4776	    }
4777	  }
4778	else /* signed */
4779	  {
4780	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4781		&& INTVAL (op1) >= 0)
4782	      {
4783		/* This is extremely similar to the code for the unsigned case
4784		   above.  For 2.7 we should merge these variants, but for
4785		   2.6.1 I don't want to touch the code for unsigned since that
4786		   get used in C.  The signed case will only be used by other
4787		   languages (Ada).  */
4788
4789		rtx t1, t2, t3;
4790		unsigned HOST_WIDE_INT d = INTVAL (op1);
4791		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4792				   floor_log2 (d), tquotient, 0);
4793		t2 = expand_binop (compute_mode, and_optab, op0,
4794				   gen_int_mode (d - 1, compute_mode),
4795				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4796		t3 = gen_reg_rtx (compute_mode);
4797		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4798				      compute_mode, 1, 1);
4799		if (t3 == 0)
4800		  {
4801		    rtx_code_label *lab;
4802		    lab = gen_label_rtx ();
4803		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4804		    expand_inc (t1, const1_rtx);
4805		    emit_label (lab);
4806		    quotient = t1;
4807		  }
4808		else
4809		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4810							  t1, t3),
4811					    tquotient);
4812		break;
4813	      }
4814
4815	    /* Try using an instruction that produces both the quotient and
4816	       remainder, using truncation.  We can easily compensate the
4817	       quotient or remainder to get ceiling rounding, once we have the
4818	       remainder.  Notice that we compute also the final remainder
4819	       value here, and return the result right away.  */
4820	    if (target == 0 || GET_MODE (target) != compute_mode)
4821	      target = gen_reg_rtx (compute_mode);
4822	    if (rem_flag)
4823	      {
4824		remainder= (REG_P (target)
4825			    ? target : gen_reg_rtx (compute_mode));
4826		quotient = gen_reg_rtx (compute_mode);
4827	      }
4828	    else
4829	      {
4830		quotient = (REG_P (target)
4831			    ? target : gen_reg_rtx (compute_mode));
4832		remainder = gen_reg_rtx (compute_mode);
4833	      }
4834
4835	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4836				     remainder, 0))
4837	      {
4838		/* This could be computed with a branch-less sequence.
4839		   Save that for later.  */
4840		rtx tem;
4841		rtx_code_label *label = gen_label_rtx ();
4842		do_cmp_and_jump (remainder, const0_rtx, EQ,
4843				 compute_mode, label);
4844		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4845				    NULL_RTX, 0, OPTAB_WIDEN);
4846		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4847		expand_inc (quotient, const1_rtx);
4848		expand_dec (remainder, op1);
4849		emit_label (label);
4850		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4851	      }
4852
4853	    /* No luck with division elimination or divmod.  Have to do it
4854	       by conditionally adjusting op0 *and* the result.  */
4855	    {
4856	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
4857	      rtx adjusted_op0;
4858	      rtx tem;
4859
4860	      quotient = gen_reg_rtx (compute_mode);
4861	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4862	      label1 = gen_label_rtx ();
4863	      label2 = gen_label_rtx ();
4864	      label3 = gen_label_rtx ();
4865	      label4 = gen_label_rtx ();
4866	      label5 = gen_label_rtx ();
4867	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4868	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4869			       compute_mode, label1);
4870	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4871				  quotient, 0, OPTAB_LIB_WIDEN);
4872	      if (tem != quotient)
4873		emit_move_insn (quotient, tem);
4874	      emit_jump_insn (targetm.gen_jump (label5));
4875	      emit_barrier ();
4876	      emit_label (label1);
4877	      expand_dec (adjusted_op0, const1_rtx);
4878	      emit_jump_insn (targetm.gen_jump (label4));
4879	      emit_barrier ();
4880	      emit_label (label2);
4881	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4882			       compute_mode, label3);
4883	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4884				  quotient, 0, OPTAB_LIB_WIDEN);
4885	      if (tem != quotient)
4886		emit_move_insn (quotient, tem);
4887	      emit_jump_insn (targetm.gen_jump (label5));
4888	      emit_barrier ();
4889	      emit_label (label3);
4890	      expand_inc (adjusted_op0, const1_rtx);
4891	      emit_label (label4);
4892	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4893				  quotient, 0, OPTAB_LIB_WIDEN);
4894	      if (tem != quotient)
4895		emit_move_insn (quotient, tem);
4896	      expand_inc (quotient, const1_rtx);
4897	      emit_label (label5);
4898	    }
4899	  }
4900	break;
4901
4902      case EXACT_DIV_EXPR:
4903	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4904	  {
4905	    HOST_WIDE_INT d = INTVAL (op1);
4906	    unsigned HOST_WIDE_INT ml;
4907	    int pre_shift;
4908	    rtx t1;
4909
4910	    pre_shift = ctz_or_zero (d);
4911	    ml = invert_mod2n (d >> pre_shift, size);
4912	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4913			       pre_shift, NULL_RTX, unsignedp);
4914	    quotient = expand_mult (compute_mode, t1,
4915				    gen_int_mode (ml, compute_mode),
4916				    NULL_RTX, 1);
4917
4918	    insn = get_last_insn ();
4919	    set_dst_reg_note (insn, REG_EQUAL,
4920			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4921					      compute_mode, op0, op1),
4922			      quotient);
4923	  }
4924	break;
4925
4926      case ROUND_DIV_EXPR:
4927      case ROUND_MOD_EXPR:
4928	if (unsignedp)
4929	  {
4930	    rtx tem;
4931	    rtx_code_label *label;
4932	    label = gen_label_rtx ();
4933	    quotient = gen_reg_rtx (compute_mode);
4934	    remainder = gen_reg_rtx (compute_mode);
4935	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4936	      {
4937		rtx tem;
4938		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4939					 quotient, 1, OPTAB_LIB_WIDEN);
4940		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4941		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4942					  remainder, 1, OPTAB_LIB_WIDEN);
4943	      }
4944	    tem = plus_constant (compute_mode, op1, -1);
4945	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4946	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4947	    expand_inc (quotient, const1_rtx);
4948	    expand_dec (remainder, op1);
4949	    emit_label (label);
4950	  }
4951	else
4952	  {
4953	    rtx abs_rem, abs_op1, tem, mask;
4954	    rtx_code_label *label;
4955	    label = gen_label_rtx ();
4956	    quotient = gen_reg_rtx (compute_mode);
4957	    remainder = gen_reg_rtx (compute_mode);
4958	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4959	      {
4960		rtx tem;
4961		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4962					 quotient, 0, OPTAB_LIB_WIDEN);
4963		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4964		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4965					  remainder, 0, OPTAB_LIB_WIDEN);
4966	      }
4967	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4968	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4969	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4970				1, NULL_RTX, 1);
4971	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4972	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4973				NULL_RTX, 0, OPTAB_WIDEN);
4974	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4975				 size - 1, NULL_RTX, 0);
4976	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4977				NULL_RTX, 0, OPTAB_WIDEN);
4978	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4979				NULL_RTX, 0, OPTAB_WIDEN);
4980	    expand_inc (quotient, tem);
4981	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4982				NULL_RTX, 0, OPTAB_WIDEN);
4983	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4984				NULL_RTX, 0, OPTAB_WIDEN);
4985	    expand_dec (remainder, tem);
4986	    emit_label (label);
4987	  }
4988	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4989
4990      default:
4991	gcc_unreachable ();
4992      }
4993
4994  if (quotient == 0)
4995    {
4996      if (target && GET_MODE (target) != compute_mode)
4997	target = 0;
4998
4999      if (rem_flag)
5000	{
5001	  /* Try to produce the remainder without producing the quotient.
5002	     If we seem to have a divmod pattern that does not require widening,
5003	     don't try widening here.  We should really have a WIDEN argument
5004	     to expand_twoval_binop, since what we'd really like to do here is
5005	     1) try a mod insn in compute_mode
5006	     2) try a divmod insn in compute_mode
5007	     3) try a div insn in compute_mode and multiply-subtract to get
5008	        remainder
5009	     4) try the same things with widening allowed.  */
5010	  remainder
5011	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5012				 op0, op1, target,
5013				 unsignedp,
5014				 ((optab_handler (optab2, compute_mode)
5015				   != CODE_FOR_nothing)
5016				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5017	  if (remainder == 0)
5018	    {
5019	      /* No luck there.  Can we do remainder and divide at once
5020		 without a library call?  */
5021	      remainder = gen_reg_rtx (compute_mode);
5022	      if (! expand_twoval_binop ((unsignedp
5023					  ? udivmod_optab
5024					  : sdivmod_optab),
5025					 op0, op1,
5026					 NULL_RTX, remainder, unsignedp))
5027		remainder = 0;
5028	    }
5029
5030	  if (remainder)
5031	    return gen_lowpart (mode, remainder);
5032	}
5033
5034      /* Produce the quotient.  Try a quotient insn, but not a library call.
5035	 If we have a divmod in this mode, use it in preference to widening
5036	 the div (for this test we assume it will not fail). Note that optab2
5037	 is set to the one of the two optabs that the call below will use.  */
5038      quotient
5039	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5040			     op0, op1, rem_flag ? NULL_RTX : target,
5041			     unsignedp,
5042			     ((optab_handler (optab2, compute_mode)
5043			       != CODE_FOR_nothing)
5044			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5045
5046      if (quotient == 0)
5047	{
5048	  /* No luck there.  Try a quotient-and-remainder insn,
5049	     keeping the quotient alone.  */
5050	  quotient = gen_reg_rtx (compute_mode);
5051	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5052				     op0, op1,
5053				     quotient, NULL_RTX, unsignedp))
5054	    {
5055	      quotient = 0;
5056	      if (! rem_flag)
5057		/* Still no luck.  If we are not computing the remainder,
5058		   use a library call for the quotient.  */
5059		quotient = sign_expand_binop (compute_mode,
5060					      udiv_optab, sdiv_optab,
5061					      op0, op1, target,
5062					      unsignedp, OPTAB_LIB_WIDEN);
5063	    }
5064	}
5065    }
5066
5067  if (rem_flag)
5068    {
5069      if (target && GET_MODE (target) != compute_mode)
5070	target = 0;
5071
5072      if (quotient == 0)
5073	{
5074	  /* No divide instruction either.  Use library for remainder.  */
5075	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5076					 op0, op1, target,
5077					 unsignedp, OPTAB_LIB_WIDEN);
5078	  /* No remainder function.  Try a quotient-and-remainder
5079	     function, keeping the remainder.  */
5080	  if (!remainder)
5081	    {
5082	      remainder = gen_reg_rtx (compute_mode);
5083	      if (!expand_twoval_binop_libfunc
5084		  (unsignedp ? udivmod_optab : sdivmod_optab,
5085		   op0, op1,
5086		   NULL_RTX, remainder,
5087		   unsignedp ? UMOD : MOD))
5088		remainder = NULL_RTX;
5089	    }
5090	}
5091      else
5092	{
5093	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5094	  remainder = expand_mult (compute_mode, quotient, op1,
5095				   NULL_RTX, unsignedp);
5096	  remainder = expand_binop (compute_mode, sub_optab, op0,
5097				    remainder, target, unsignedp,
5098				    OPTAB_LIB_WIDEN);
5099	}
5100    }
5101
5102  return gen_lowpart (mode, rem_flag ? remainder : quotient);
5103}
5104
5105/* Return a tree node with data type TYPE, describing the value of X.
5106   Usually this is an VAR_DECL, if there is no obvious better choice.
5107   X may be an expression, however we only support those expressions
5108   generated by loop.c.  */
5109
5110tree
5111make_tree (tree type, rtx x)
5112{
5113  tree t;
5114
5115  switch (GET_CODE (x))
5116    {
5117    case CONST_INT:
5118    case CONST_WIDE_INT:
5119      t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5120      return t;
5121
5122    case CONST_DOUBLE:
5123      STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5124      if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5125	t = wide_int_to_tree (type,
5126			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5127						    HOST_BITS_PER_WIDE_INT * 2));
5128      else
5129	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5130
5131      return t;
5132
5133    case CONST_VECTOR:
5134      {
5135	int units = CONST_VECTOR_NUNITS (x);
5136	tree itype = TREE_TYPE (type);
5137	tree *elts;
5138	int i;
5139
5140	/* Build a tree with vector elements.  */
5141	elts = XALLOCAVEC (tree, units);
5142	for (i = units - 1; i >= 0; --i)
5143	  {
5144	    rtx elt = CONST_VECTOR_ELT (x, i);
5145	    elts[i] = make_tree (itype, elt);
5146	  }
5147
5148	return build_vector (type, elts);
5149      }
5150
5151    case PLUS:
5152      return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5153			  make_tree (type, XEXP (x, 1)));
5154
5155    case MINUS:
5156      return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5157			  make_tree (type, XEXP (x, 1)));
5158
5159    case NEG:
5160      return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5161
5162    case MULT:
5163      return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5164			  make_tree (type, XEXP (x, 1)));
5165
5166    case ASHIFT:
5167      return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5168			  make_tree (type, XEXP (x, 1)));
5169
5170    case LSHIFTRT:
5171      t = unsigned_type_for (type);
5172      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5173			    		 make_tree (t, XEXP (x, 0)),
5174				    	 make_tree (type, XEXP (x, 1))));
5175
5176    case ASHIFTRT:
5177      t = signed_type_for (type);
5178      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5179					 make_tree (t, XEXP (x, 0)),
5180				    	 make_tree (type, XEXP (x, 1))));
5181
5182    case DIV:
5183      if (TREE_CODE (type) != REAL_TYPE)
5184	t = signed_type_for (type);
5185      else
5186	t = type;
5187
5188      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5189				    	 make_tree (t, XEXP (x, 0)),
5190				    	 make_tree (t, XEXP (x, 1))));
5191    case UDIV:
5192      t = unsigned_type_for (type);
5193      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5194				    	 make_tree (t, XEXP (x, 0)),
5195				    	 make_tree (t, XEXP (x, 1))));
5196
5197    case SIGN_EXTEND:
5198    case ZERO_EXTEND:
5199      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5200					  GET_CODE (x) == ZERO_EXTEND);
5201      return fold_convert (type, make_tree (t, XEXP (x, 0)));
5202
5203    case CONST:
5204      return make_tree (type, XEXP (x, 0));
5205
5206    case SYMBOL_REF:
5207      t = SYMBOL_REF_DECL (x);
5208      if (t)
5209	return fold_convert (type, build_fold_addr_expr (t));
5210      /* fall through.  */
5211
5212    default:
5213      t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5214
5215      /* If TYPE is a POINTER_TYPE, we might need to convert X from
5216	 address mode to pointer mode.  */
5217      if (POINTER_TYPE_P (type))
5218	x = convert_memory_address_addr_space
5219	      (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5220
5221      /* Note that we do *not* use SET_DECL_RTL here, because we do not
5222	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5223      t->decl_with_rtl.rtl = x;
5224
5225      return t;
5226    }
5227}
5228
5229/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5230   and returning TARGET.
5231
5232   If TARGET is 0, a pseudo-register or constant is returned.  */
5233
5234rtx
5235expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5236{
5237  rtx tem = 0;
5238
5239  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5240    tem = simplify_binary_operation (AND, mode, op0, op1);
5241  if (tem == 0)
5242    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5243
5244  if (target == 0)
5245    target = tem;
5246  else if (tem != target)
5247    emit_move_insn (target, tem);
5248  return target;
5249}
5250
5251/* Helper function for emit_store_flag.  */
5252rtx
5253emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5254	     machine_mode mode, machine_mode compare_mode,
5255	     int unsignedp, rtx x, rtx y, int normalizep,
5256	     machine_mode target_mode)
5257{
5258  struct expand_operand ops[4];
5259  rtx op0, comparison, subtarget;
5260  rtx_insn *last;
5261  machine_mode result_mode = targetm.cstore_mode (icode);
5262
5263  last = get_last_insn ();
5264  x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5265  y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5266  if (!x || !y)
5267    {
5268      delete_insns_since (last);
5269      return NULL_RTX;
5270    }
5271
5272  if (target_mode == VOIDmode)
5273    target_mode = result_mode;
5274  if (!target)
5275    target = gen_reg_rtx (target_mode);
5276
5277  comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5278
5279  create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5280  create_fixed_operand (&ops[1], comparison);
5281  create_fixed_operand (&ops[2], x);
5282  create_fixed_operand (&ops[3], y);
5283  if (!maybe_expand_insn (icode, 4, ops))
5284    {
5285      delete_insns_since (last);
5286      return NULL_RTX;
5287    }
5288  subtarget = ops[0].value;
5289
5290  /* If we are converting to a wider mode, first convert to
5291     TARGET_MODE, then normalize.  This produces better combining
5292     opportunities on machines that have a SIGN_EXTRACT when we are
5293     testing a single bit.  This mostly benefits the 68k.
5294
5295     If STORE_FLAG_VALUE does not have the sign bit set when
5296     interpreted in MODE, we can do this conversion as unsigned, which
5297     is usually more efficient.  */
5298  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5299    {
5300      convert_move (target, subtarget,
5301		    val_signbit_known_clear_p (result_mode,
5302					       STORE_FLAG_VALUE));
5303      op0 = target;
5304      result_mode = target_mode;
5305    }
5306  else
5307    op0 = subtarget;
5308
5309  /* If we want to keep subexpressions around, don't reuse our last
5310     target.  */
5311  if (optimize)
5312    subtarget = 0;
5313
5314  /* Now normalize to the proper value in MODE.  Sometimes we don't
5315     have to do anything.  */
5316  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5317    ;
5318  /* STORE_FLAG_VALUE might be the most negative number, so write
5319     the comparison this way to avoid a compiler-time warning.  */
5320  else if (- normalizep == STORE_FLAG_VALUE)
5321    op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5322
5323  /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5324     it hard to use a value of just the sign bit due to ANSI integer
5325     constant typing rules.  */
5326  else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5327    op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5328			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5329			normalizep == 1);
5330  else
5331    {
5332      gcc_assert (STORE_FLAG_VALUE & 1);
5333
5334      op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5335      if (normalizep == -1)
5336	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5337    }
5338
5339  /* If we were converting to a smaller mode, do the conversion now.  */
5340  if (target_mode != result_mode)
5341    {
5342      convert_move (target, op0, 0);
5343      return target;
5344    }
5345  else
5346    return op0;
5347}
5348
5349
5350/* A subroutine of emit_store_flag only including "tricks" that do not
5351   need a recursive call.  These are kept separate to avoid infinite
5352   loops.  */
5353
5354static rtx
5355emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5356		   machine_mode mode, int unsignedp, int normalizep,
5357		   machine_mode target_mode)
5358{
5359  rtx subtarget;
5360  enum insn_code icode;
5361  machine_mode compare_mode;
5362  enum mode_class mclass;
5363  enum rtx_code scode;
5364
5365  if (unsignedp)
5366    code = unsigned_condition (code);
5367  scode = swap_condition (code);
5368
5369  /* If one operand is constant, make it the second one.  Only do this
5370     if the other operand is not constant as well.  */
5371
5372  if (swap_commutative_operands_p (op0, op1))
5373    {
5374      std::swap (op0, op1);
5375      code = swap_condition (code);
5376    }
5377
5378  if (mode == VOIDmode)
5379    mode = GET_MODE (op0);
5380
5381  /* For some comparisons with 1 and -1, we can convert this to
5382     comparisons with zero.  This will often produce more opportunities for
5383     store-flag insns.  */
5384
5385  switch (code)
5386    {
5387    case LT:
5388      if (op1 == const1_rtx)
5389	op1 = const0_rtx, code = LE;
5390      break;
5391    case LE:
5392      if (op1 == constm1_rtx)
5393	op1 = const0_rtx, code = LT;
5394      break;
5395    case GE:
5396      if (op1 == const1_rtx)
5397	op1 = const0_rtx, code = GT;
5398      break;
5399    case GT:
5400      if (op1 == constm1_rtx)
5401	op1 = const0_rtx, code = GE;
5402      break;
5403    case GEU:
5404      if (op1 == const1_rtx)
5405	op1 = const0_rtx, code = NE;
5406      break;
5407    case LTU:
5408      if (op1 == const1_rtx)
5409	op1 = const0_rtx, code = EQ;
5410      break;
5411    default:
5412      break;
5413    }
5414
5415  /* If we are comparing a double-word integer with zero or -1, we can
5416     convert the comparison into one involving a single word.  */
5417  if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5418      && GET_MODE_CLASS (mode) == MODE_INT
5419      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5420    {
5421      rtx tem;
5422      if ((code == EQ || code == NE)
5423	  && (op1 == const0_rtx || op1 == constm1_rtx))
5424	{
5425	  rtx op00, op01;
5426
5427	  /* Do a logical OR or AND of the two words and compare the
5428	     result.  */
5429	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5430	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5431	  tem = expand_binop (word_mode,
5432			      op1 == const0_rtx ? ior_optab : and_optab,
5433			      op00, op01, NULL_RTX, unsignedp,
5434			      OPTAB_DIRECT);
5435
5436	  if (tem != 0)
5437	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5438				   unsignedp, normalizep);
5439	}
5440      else if ((code == LT || code == GE) && op1 == const0_rtx)
5441	{
5442	  rtx op0h;
5443
5444	  /* If testing the sign bit, can just test on high word.  */
5445	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5446				      subreg_highpart_offset (word_mode,
5447							      mode));
5448	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5449				 unsignedp, normalizep);
5450	}
5451      else
5452	tem = NULL_RTX;
5453
5454      if (tem)
5455	{
5456	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5457	    return tem;
5458	  if (!target)
5459	    target = gen_reg_rtx (target_mode);
5460
5461	  convert_move (target, tem,
5462			!val_signbit_known_set_p (word_mode,
5463						  (normalizep ? normalizep
5464						   : STORE_FLAG_VALUE)));
5465	  return target;
5466	}
5467    }
5468
5469  /* If this is A < 0 or A >= 0, we can do this by taking the ones
5470     complement of A (for GE) and shifting the sign bit to the low bit.  */
5471  if (op1 == const0_rtx && (code == LT || code == GE)
5472      && GET_MODE_CLASS (mode) == MODE_INT
5473      && (normalizep || STORE_FLAG_VALUE == 1
5474	  || val_signbit_p (mode, STORE_FLAG_VALUE)))
5475    {
5476      subtarget = target;
5477
5478      if (!target)
5479	target_mode = mode;
5480
5481      /* If the result is to be wider than OP0, it is best to convert it
5482	 first.  If it is to be narrower, it is *incorrect* to convert it
5483	 first.  */
5484      else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5485	{
5486	  op0 = convert_modes (target_mode, mode, op0, 0);
5487	  mode = target_mode;
5488	}
5489
5490      if (target_mode != mode)
5491	subtarget = 0;
5492
5493      if (code == GE)
5494	op0 = expand_unop (mode, one_cmpl_optab, op0,
5495			   ((STORE_FLAG_VALUE == 1 || normalizep)
5496			    ? 0 : subtarget), 0);
5497
5498      if (STORE_FLAG_VALUE == 1 || normalizep)
5499	/* If we are supposed to produce a 0/1 value, we want to do
5500	   a logical shift from the sign bit to the low-order bit; for
5501	   a -1/0 value, we do an arithmetic shift.  */
5502	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5503			    GET_MODE_BITSIZE (mode) - 1,
5504			    subtarget, normalizep != -1);
5505
5506      if (mode != target_mode)
5507	op0 = convert_modes (target_mode, mode, op0, 0);
5508
5509      return op0;
5510    }
5511
5512  mclass = GET_MODE_CLASS (mode);
5513  for (compare_mode = mode; compare_mode != VOIDmode;
5514       compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5515    {
5516     machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5517     icode = optab_handler (cstore_optab, optab_mode);
5518     if (icode != CODE_FOR_nothing)
5519	{
5520	  do_pending_stack_adjust ();
5521	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5522				 unsignedp, op0, op1, normalizep, target_mode);
5523	  if (tem)
5524	    return tem;
5525
5526	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5527	    {
5528	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5529				 unsignedp, op1, op0, normalizep, target_mode);
5530	      if (tem)
5531	        return tem;
5532	    }
5533	  break;
5534	}
5535    }
5536
5537  return 0;
5538}
5539
5540/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5541   and storing in TARGET.  Normally return TARGET.
5542   Return 0 if that cannot be done.
5543
5544   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5545   it is VOIDmode, they cannot both be CONST_INT.
5546
5547   UNSIGNEDP is for the case where we have to widen the operands
5548   to perform the operation.  It says to use zero-extension.
5549
5550   NORMALIZEP is 1 if we should convert the result to be either zero
5551   or one.  Normalize is -1 if we should convert the result to be
5552   either zero or -1.  If NORMALIZEP is zero, the result will be left
5553   "raw" out of the scc insn.  */
5554
5555rtx
5556emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5557		 machine_mode mode, int unsignedp, int normalizep)
5558{
5559  machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5560  enum rtx_code rcode;
5561  rtx subtarget;
5562  rtx tem, trueval;
5563  rtx_insn *last;
5564
5565  /* If we compare constants, we shouldn't use a store-flag operation,
5566     but a constant load.  We can get there via the vanilla route that
5567     usually generates a compare-branch sequence, but will in this case
5568     fold the comparison to a constant, and thus elide the branch.  */
5569  if (CONSTANT_P (op0) && CONSTANT_P (op1))
5570    return NULL_RTX;
5571
5572  tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5573			   target_mode);
5574  if (tem)
5575    return tem;
5576
5577  /* If we reached here, we can't do this with a scc insn, however there
5578     are some comparisons that can be done in other ways.  Don't do any
5579     of these cases if branches are very cheap.  */
5580  if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5581    return 0;
5582
5583  /* See what we need to return.  We can only return a 1, -1, or the
5584     sign bit.  */
5585
5586  if (normalizep == 0)
5587    {
5588      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5589	normalizep = STORE_FLAG_VALUE;
5590
5591      else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5592	;
5593      else
5594	return 0;
5595    }
5596
5597  last = get_last_insn ();
5598
5599  /* If optimizing, use different pseudo registers for each insn, instead
5600     of reusing the same pseudo.  This leads to better CSE, but slows
5601     down the compiler, since there are more pseudos */
5602  subtarget = (!optimize
5603	       && (target_mode == mode)) ? target : NULL_RTX;
5604  trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5605
5606  /* For floating-point comparisons, try the reverse comparison or try
5607     changing the "orderedness" of the comparison.  */
5608  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5609    {
5610      enum rtx_code first_code;
5611      bool and_them;
5612
5613      rcode = reverse_condition_maybe_unordered (code);
5614      if (can_compare_p (rcode, mode, ccp_store_flag)
5615          && (code == ORDERED || code == UNORDERED
5616	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5617	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5618	{
5619          int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5620		          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5621
5622	  /* For the reverse comparison, use either an addition or a XOR.  */
5623          if (want_add
5624	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5625			   optimize_insn_for_speed_p ()) == 0)
5626	    {
5627	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5628				       STORE_FLAG_VALUE, target_mode);
5629	      if (tem)
5630                return expand_binop (target_mode, add_optab, tem,
5631				     gen_int_mode (normalizep, target_mode),
5632				     target, 0, OPTAB_WIDEN);
5633	    }
5634          else if (!want_add
5635	           && rtx_cost (trueval, mode, XOR, 1,
5636			        optimize_insn_for_speed_p ()) == 0)
5637	    {
5638	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5639				       normalizep, target_mode);
5640	      if (tem)
5641                return expand_binop (target_mode, xor_optab, tem, trueval,
5642				     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5643	    }
5644	}
5645
5646      delete_insns_since (last);
5647
5648      /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5649      if (code == ORDERED || code == UNORDERED)
5650	return 0;
5651
5652      and_them = split_comparison (code, mode, &first_code, &code);
5653
5654      /* If there are no NaNs, the first comparison should always fall through.
5655         Effectively change the comparison to the other one.  */
5656      if (!HONOR_NANS (mode))
5657	{
5658          gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5659	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5660				    target_mode);
5661	}
5662
5663      if (!HAVE_conditional_move)
5664	return 0;
5665
5666      /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5667	 conditional move.  */
5668      tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5669			       normalizep, target_mode);
5670      if (tem == 0)
5671	return 0;
5672
5673      if (and_them)
5674        tem = emit_conditional_move (target, code, op0, op1, mode,
5675				     tem, const0_rtx, GET_MODE (tem), 0);
5676      else
5677        tem = emit_conditional_move (target, code, op0, op1, mode,
5678				     trueval, tem, GET_MODE (tem), 0);
5679
5680      if (tem == 0)
5681        delete_insns_since (last);
5682      return tem;
5683    }
5684
5685  /* The remaining tricks only apply to integer comparisons.  */
5686
5687  if (GET_MODE_CLASS (mode) != MODE_INT)
5688    return 0;
5689
5690  /* If this is an equality comparison of integers, we can try to exclusive-or
5691     (or subtract) the two operands and use a recursive call to try the
5692     comparison with zero.  Don't do any of these cases if branches are
5693     very cheap.  */
5694
5695  if ((code == EQ || code == NE) && op1 != const0_rtx)
5696    {
5697      tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5698			  OPTAB_WIDEN);
5699
5700      if (tem == 0)
5701	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5702			    OPTAB_WIDEN);
5703      if (tem != 0)
5704	tem = emit_store_flag (target, code, tem, const0_rtx,
5705			       mode, unsignedp, normalizep);
5706      if (tem != 0)
5707	return tem;
5708
5709      delete_insns_since (last);
5710    }
5711
5712  /* For integer comparisons, try the reverse comparison.  However, for
5713     small X and if we'd have anyway to extend, implementing "X != 0"
5714     as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5715  rcode = reverse_condition (code);
5716  if (can_compare_p (rcode, mode, ccp_store_flag)
5717      && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5718	    && code == NE
5719	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5720	    && op1 == const0_rtx))
5721    {
5722      int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5723		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5724
5725      /* Again, for the reverse comparison, use either an addition or a XOR.  */
5726      if (want_add
5727	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5728		       optimize_insn_for_speed_p ()) == 0)
5729	{
5730	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5731				   STORE_FLAG_VALUE, target_mode);
5732	  if (tem != 0)
5733            tem = expand_binop (target_mode, add_optab, tem,
5734				gen_int_mode (normalizep, target_mode),
5735				target, 0, OPTAB_WIDEN);
5736	}
5737      else if (!want_add
5738	       && rtx_cost (trueval, mode, XOR, 1,
5739			    optimize_insn_for_speed_p ()) == 0)
5740	{
5741	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5742				   normalizep, target_mode);
5743	  if (tem != 0)
5744            tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5745				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5746	}
5747
5748      if (tem != 0)
5749	return tem;
5750      delete_insns_since (last);
5751    }
5752
5753  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5754     the constant zero.  Reject all other comparisons at this point.  Only
5755     do LE and GT if branches are expensive since they are expensive on
5756     2-operand machines.  */
5757
5758  if (op1 != const0_rtx
5759      || (code != EQ && code != NE
5760	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5761			   false) <= 1 || (code != LE && code != GT))))
5762    return 0;
5763
5764  /* Try to put the result of the comparison in the sign bit.  Assume we can't
5765     do the necessary operation below.  */
5766
5767  tem = 0;
5768
5769  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5770     the sign bit set.  */
5771
5772  if (code == LE)
5773    {
5774      /* This is destructive, so SUBTARGET can't be OP0.  */
5775      if (rtx_equal_p (subtarget, op0))
5776	subtarget = 0;
5777
5778      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5779			  OPTAB_WIDEN);
5780      if (tem)
5781	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5782			    OPTAB_WIDEN);
5783    }
5784
5785  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5786     number of bits in the mode of OP0, minus one.  */
5787
5788  if (code == GT)
5789    {
5790      if (rtx_equal_p (subtarget, op0))
5791	subtarget = 0;
5792
5793      tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5794				GET_MODE_BITSIZE (mode) - 1,
5795				subtarget, 0);
5796      if (tem)
5797	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5798			    OPTAB_WIDEN);
5799    }
5800
5801  if (code == EQ || code == NE)
5802    {
5803      /* For EQ or NE, one way to do the comparison is to apply an operation
5804	 that converts the operand into a positive number if it is nonzero
5805	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5806	 for NE we negate.  This puts the result in the sign bit.  Then we
5807	 normalize with a shift, if needed.
5808
5809	 Two operations that can do the above actions are ABS and FFS, so try
5810	 them.  If that doesn't work, and MODE is smaller than a full word,
5811	 we can use zero-extension to the wider mode (an unsigned conversion)
5812	 as the operation.  */
5813
5814      /* Note that ABS doesn't yield a positive number for INT_MIN, but
5815	 that is compensated by the subsequent overflow when subtracting
5816	 one / negating.  */
5817
5818      if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5819	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5820      else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5821	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5822      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5823	{
5824	  tem = convert_modes (word_mode, mode, op0, 1);
5825	  mode = word_mode;
5826	}
5827
5828      if (tem != 0)
5829	{
5830	  if (code == EQ)
5831	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5832				0, OPTAB_WIDEN);
5833	  else
5834	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5835	}
5836
5837      /* If we couldn't do it that way, for NE we can "or" the two's complement
5838	 of the value with itself.  For EQ, we take the one's complement of
5839	 that "or", which is an extra insn, so we only handle EQ if branches
5840	 are expensive.  */
5841
5842      if (tem == 0
5843	  && (code == NE
5844	      || BRANCH_COST (optimize_insn_for_speed_p (),
5845		      	      false) > 1))
5846	{
5847	  if (rtx_equal_p (subtarget, op0))
5848	    subtarget = 0;
5849
5850	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5851	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5852			      OPTAB_WIDEN);
5853
5854	  if (tem && code == EQ)
5855	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5856	}
5857    }
5858
5859  if (tem && normalizep)
5860    tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5861			      GET_MODE_BITSIZE (mode) - 1,
5862			      subtarget, normalizep == 1);
5863
5864  if (tem)
5865    {
5866      if (!target)
5867        ;
5868      else if (GET_MODE (tem) != target_mode)
5869	{
5870	  convert_move (target, tem, 0);
5871	  tem = target;
5872	}
5873      else if (!subtarget)
5874	{
5875	  emit_move_insn (target, tem);
5876	  tem = target;
5877	}
5878    }
5879  else
5880    delete_insns_since (last);
5881
5882  return tem;
5883}
5884
5885/* Like emit_store_flag, but always succeeds.  */
5886
5887rtx
5888emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5889		       machine_mode mode, int unsignedp, int normalizep)
5890{
5891  rtx tem;
5892  rtx_code_label *label;
5893  rtx trueval, falseval;
5894
5895  /* First see if emit_store_flag can do the job.  */
5896  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5897  if (tem != 0)
5898    return tem;
5899
5900  /* If one operand is constant, make it the second one.  Only do this
5901     if the other operand is not constant as well.  */
5902
5903  if (swap_commutative_operands_p (op0, op1))
5904    {
5905      std::swap (op0, op1);
5906      code = swap_condition (code);
5907    }
5908
5909  if (mode == VOIDmode)
5910    mode = GET_MODE (op0);
5911
5912  if (!target)
5913    target = gen_reg_rtx (word_mode);
5914
5915  /* If this failed, we have to do this with set/compare/jump/set code.
5916     For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5917  trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5918  if (code == NE
5919      && GET_MODE_CLASS (mode) == MODE_INT
5920      && REG_P (target)
5921      && op0 == target
5922      && op1 == const0_rtx)
5923    {
5924      label = gen_label_rtx ();
5925      do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5926			       NULL_RTX, NULL, label, -1);
5927      emit_move_insn (target, trueval);
5928      emit_label (label);
5929      return target;
5930    }
5931
5932  if (!REG_P (target)
5933      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5934    target = gen_reg_rtx (GET_MODE (target));
5935
5936  /* Jump in the right direction if the target cannot implement CODE
5937     but can jump on its reverse condition.  */
5938  falseval = const0_rtx;
5939  if (! can_compare_p (code, mode, ccp_jump)
5940      && (! FLOAT_MODE_P (mode)
5941          || code == ORDERED || code == UNORDERED
5942          || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5943          || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5944    {
5945      enum rtx_code rcode;
5946      if (FLOAT_MODE_P (mode))
5947        rcode = reverse_condition_maybe_unordered (code);
5948      else
5949        rcode = reverse_condition (code);
5950
5951      /* Canonicalize to UNORDERED for the libcall.  */
5952      if (can_compare_p (rcode, mode, ccp_jump)
5953          || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5954	{
5955	  falseval = trueval;
5956	  trueval = const0_rtx;
5957	  code = rcode;
5958	}
5959    }
5960
5961  emit_move_insn (target, trueval);
5962  label = gen_label_rtx ();
5963  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5964			   label, -1);
5965
5966  emit_move_insn (target, falseval);
5967  emit_label (label);
5968
5969  return target;
5970}
5971
5972/* Perform possibly multi-word comparison and conditional jump to LABEL
5973   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5974   now a thin wrapper around do_compare_rtx_and_jump.  */
5975
5976static void
5977do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5978		 rtx_code_label *label)
5979{
5980  int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5981  do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5982			   NULL, label, -1);
5983}
5984