1/* Medium-level subroutines: convert bit-field store and extract
2   and shifts, multiplies and divides to rtl instructions.
3   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
5   Free Software Foundation, Inc.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 2, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING.  If not, write to the Free
21Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2202110-1301, USA.  */
23
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "tm.h"
29#include "toplev.h"
30#include "rtl.h"
31#include "tree.h"
32#include "tm_p.h"
33#include "flags.h"
34#include "insn-config.h"
35#include "expr.h"
36#include "optabs.h"
37#include "real.h"
38#include "recog.h"
39#include "langhooks.h"
40
41static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
42				   unsigned HOST_WIDE_INT,
43				   unsigned HOST_WIDE_INT, rtx);
44static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
45				   unsigned HOST_WIDE_INT, rtx);
46static rtx extract_fixed_bit_field (enum machine_mode, rtx,
47				    unsigned HOST_WIDE_INT,
48				    unsigned HOST_WIDE_INT,
49				    unsigned HOST_WIDE_INT, rtx, int);
50static rtx mask_rtx (enum machine_mode, int, int, int);
51static rtx lshift_value (enum machine_mode, rtx, int, int);
52static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
53				    unsigned HOST_WIDE_INT, int);
54static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
55static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
56static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57
58/* Test whether a value is zero of a power of two.  */
59#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
60
61/* Nonzero means divides or modulus operations are relatively cheap for
62   powers of two, so don't use branches; emit the operation instead.
63   Usually, this will mean that the MD file will emit non-branch
64   sequences.  */
65
66static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
67static bool smod_pow2_cheap[NUM_MACHINE_MODES];
68
69#ifndef SLOW_UNALIGNED_ACCESS
70#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
71#endif
72
73/* For compilers that support multiple targets with different word sizes,
74   MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
75   is the H8/300(H) compiler.  */
76
77#ifndef MAX_BITS_PER_WORD
78#define MAX_BITS_PER_WORD BITS_PER_WORD
79#endif
80
81/* Reduce conditional compilation elsewhere.  */
82#ifndef HAVE_insv
83#define HAVE_insv	0
84#define CODE_FOR_insv	CODE_FOR_nothing
85#define gen_insv(a,b,c,d) NULL_RTX
86#endif
87#ifndef HAVE_extv
88#define HAVE_extv	0
89#define CODE_FOR_extv	CODE_FOR_nothing
90#define gen_extv(a,b,c,d) NULL_RTX
91#endif
92#ifndef HAVE_extzv
93#define HAVE_extzv	0
94#define CODE_FOR_extzv	CODE_FOR_nothing
95#define gen_extzv(a,b,c,d) NULL_RTX
96#endif
97
98/* Cost of various pieces of RTL.  Note that some of these are indexed by
99   shift count and some by mode.  */
100static int zero_cost;
101static int add_cost[NUM_MACHINE_MODES];
102static int neg_cost[NUM_MACHINE_MODES];
103static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
104static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106static int mul_cost[NUM_MACHINE_MODES];
107static int sdiv_cost[NUM_MACHINE_MODES];
108static int udiv_cost[NUM_MACHINE_MODES];
109static int mul_widen_cost[NUM_MACHINE_MODES];
110static int mul_highpart_cost[NUM_MACHINE_MODES];
111
112void
113init_expmed (void)
114{
115  struct
116  {
117    struct rtx_def reg;		rtunion reg_fld[2];
118    struct rtx_def plus;	rtunion plus_fld1;
119    struct rtx_def neg;
120    struct rtx_def mult;	rtunion mult_fld1;
121    struct rtx_def sdiv;	rtunion sdiv_fld1;
122    struct rtx_def udiv;	rtunion udiv_fld1;
123    struct rtx_def zext;
124    struct rtx_def sdiv_32;	rtunion sdiv_32_fld1;
125    struct rtx_def smod_32;	rtunion smod_32_fld1;
126    struct rtx_def wide_mult;	rtunion wide_mult_fld1;
127    struct rtx_def wide_lshr;	rtunion wide_lshr_fld1;
128    struct rtx_def wide_trunc;
129    struct rtx_def shift;	rtunion shift_fld1;
130    struct rtx_def shift_mult;	rtunion shift_mult_fld1;
131    struct rtx_def shift_add;	rtunion shift_add_fld1;
132    struct rtx_def shift_sub;	rtunion shift_sub_fld1;
133  } all;
134
135  rtx pow2[MAX_BITS_PER_WORD];
136  rtx cint[MAX_BITS_PER_WORD];
137  int m, n;
138  enum machine_mode mode, wider_mode;
139
140  zero_cost = rtx_cost (const0_rtx, 0);
141
142  for (m = 1; m < MAX_BITS_PER_WORD; m++)
143    {
144      pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
145      cint[m] = GEN_INT (m);
146    }
147
148  memset (&all, 0, sizeof all);
149
150  PUT_CODE (&all.reg, REG);
151  /* Avoid using hard regs in ways which may be unsupported.  */
152  REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
153
154  PUT_CODE (&all.plus, PLUS);
155  XEXP (&all.plus, 0) = &all.reg;
156  XEXP (&all.plus, 1) = &all.reg;
157
158  PUT_CODE (&all.neg, NEG);
159  XEXP (&all.neg, 0) = &all.reg;
160
161  PUT_CODE (&all.mult, MULT);
162  XEXP (&all.mult, 0) = &all.reg;
163  XEXP (&all.mult, 1) = &all.reg;
164
165  PUT_CODE (&all.sdiv, DIV);
166  XEXP (&all.sdiv, 0) = &all.reg;
167  XEXP (&all.sdiv, 1) = &all.reg;
168
169  PUT_CODE (&all.udiv, UDIV);
170  XEXP (&all.udiv, 0) = &all.reg;
171  XEXP (&all.udiv, 1) = &all.reg;
172
173  PUT_CODE (&all.sdiv_32, DIV);
174  XEXP (&all.sdiv_32, 0) = &all.reg;
175  XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
176
177  PUT_CODE (&all.smod_32, MOD);
178  XEXP (&all.smod_32, 0) = &all.reg;
179  XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
180
181  PUT_CODE (&all.zext, ZERO_EXTEND);
182  XEXP (&all.zext, 0) = &all.reg;
183
184  PUT_CODE (&all.wide_mult, MULT);
185  XEXP (&all.wide_mult, 0) = &all.zext;
186  XEXP (&all.wide_mult, 1) = &all.zext;
187
188  PUT_CODE (&all.wide_lshr, LSHIFTRT);
189  XEXP (&all.wide_lshr, 0) = &all.wide_mult;
190
191  PUT_CODE (&all.wide_trunc, TRUNCATE);
192  XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
193
194  PUT_CODE (&all.shift, ASHIFT);
195  XEXP (&all.shift, 0) = &all.reg;
196
197  PUT_CODE (&all.shift_mult, MULT);
198  XEXP (&all.shift_mult, 0) = &all.reg;
199
200  PUT_CODE (&all.shift_add, PLUS);
201  XEXP (&all.shift_add, 0) = &all.shift_mult;
202  XEXP (&all.shift_add, 1) = &all.reg;
203
204  PUT_CODE (&all.shift_sub, MINUS);
205  XEXP (&all.shift_sub, 0) = &all.shift_mult;
206  XEXP (&all.shift_sub, 1) = &all.reg;
207
208  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
209       mode != VOIDmode;
210       mode = GET_MODE_WIDER_MODE (mode))
211    {
212      PUT_MODE (&all.reg, mode);
213      PUT_MODE (&all.plus, mode);
214      PUT_MODE (&all.neg, mode);
215      PUT_MODE (&all.mult, mode);
216      PUT_MODE (&all.sdiv, mode);
217      PUT_MODE (&all.udiv, mode);
218      PUT_MODE (&all.sdiv_32, mode);
219      PUT_MODE (&all.smod_32, mode);
220      PUT_MODE (&all.wide_trunc, mode);
221      PUT_MODE (&all.shift, mode);
222      PUT_MODE (&all.shift_mult, mode);
223      PUT_MODE (&all.shift_add, mode);
224      PUT_MODE (&all.shift_sub, mode);
225
226      add_cost[mode] = rtx_cost (&all.plus, SET);
227      neg_cost[mode] = rtx_cost (&all.neg, SET);
228      mul_cost[mode] = rtx_cost (&all.mult, SET);
229      sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
230      udiv_cost[mode] = rtx_cost (&all.udiv, SET);
231
232      sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
233			       <= 2 * add_cost[mode]);
234      smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
235			       <= 4 * add_cost[mode]);
236
237      wider_mode = GET_MODE_WIDER_MODE (mode);
238      if (wider_mode != VOIDmode)
239	{
240	  PUT_MODE (&all.zext, wider_mode);
241	  PUT_MODE (&all.wide_mult, wider_mode);
242	  PUT_MODE (&all.wide_lshr, wider_mode);
243	  XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
244
245	  mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
246	  mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
247	}
248
249      shift_cost[mode][0] = 0;
250      shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
251
252      n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
253      for (m = 1; m < n; m++)
254	{
255	  XEXP (&all.shift, 1) = cint[m];
256	  XEXP (&all.shift_mult, 1) = pow2[m];
257
258	  shift_cost[mode][m] = rtx_cost (&all.shift, SET);
259	  shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
260	  shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
261	}
262    }
263}
264
265/* Return an rtx representing minus the value of X.
266   MODE is the intended mode of the result,
267   useful if X is a CONST_INT.  */
268
269rtx
270negate_rtx (enum machine_mode mode, rtx x)
271{
272  rtx result = simplify_unary_operation (NEG, mode, x, mode);
273
274  if (result == 0)
275    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
276
277  return result;
278}
279
280/* Report on the availability of insv/extv/extzv and the desired mode
281   of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
282   is false; else the mode of the specified operand.  If OPNO is -1,
283   all the caller cares about is whether the insn is available.  */
284enum machine_mode
285mode_for_extraction (enum extraction_pattern pattern, int opno)
286{
287  const struct insn_data *data;
288
289  switch (pattern)
290    {
291    case EP_insv:
292      if (HAVE_insv)
293	{
294	  data = &insn_data[CODE_FOR_insv];
295	  break;
296	}
297      return MAX_MACHINE_MODE;
298
299    case EP_extv:
300      if (HAVE_extv)
301	{
302	  data = &insn_data[CODE_FOR_extv];
303	  break;
304	}
305      return MAX_MACHINE_MODE;
306
307    case EP_extzv:
308      if (HAVE_extzv)
309	{
310	  data = &insn_data[CODE_FOR_extzv];
311	  break;
312	}
313      return MAX_MACHINE_MODE;
314
315    default:
316      gcc_unreachable ();
317    }
318
319  if (opno == -1)
320    return VOIDmode;
321
322  /* Everyone who uses this function used to follow it with
323     if (result == VOIDmode) result = word_mode; */
324  if (data->operand[opno].mode == VOIDmode)
325    return word_mode;
326  return data->operand[opno].mode;
327}
328
329
330/* Generate code to store value from rtx VALUE
331   into a bit-field within structure STR_RTX
332   containing BITSIZE bits starting at bit BITNUM.
333   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
334   ALIGN is the alignment that STR_RTX is known to have.
335   TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
336
337/* ??? Note that there are two different ideas here for how
338   to determine the size to count bits within, for a register.
339   One is BITS_PER_WORD, and the other is the size of operand 3
340   of the insv pattern.
341
342   If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
343   else, we use the mode of operand 3.  */
344
345rtx
346store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
347		 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
348		 rtx value)
349{
350  unsigned int unit
351    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
352  unsigned HOST_WIDE_INT offset, bitpos;
353  rtx op0 = str_rtx;
354  int byte_offset;
355  rtx orig_value;
356
357  enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
358
359  while (GET_CODE (op0) == SUBREG)
360    {
361      /* The following line once was done only if WORDS_BIG_ENDIAN,
362	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
363	 meaningful at a much higher level; when structures are copied
364	 between memory and regs, the higher-numbered regs
365	 always get higher addresses.  */
366      int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
367      int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
368
369      byte_offset = 0;
370
371      /* Paradoxical subregs need special handling on big endian machines.  */
372      if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
373	{
374	  int difference = inner_mode_size - outer_mode_size;
375
376	  if (WORDS_BIG_ENDIAN)
377	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
378	  if (BYTES_BIG_ENDIAN)
379	    byte_offset += difference % UNITS_PER_WORD;
380	}
381      else
382	byte_offset = SUBREG_BYTE (op0);
383
384      bitnum += byte_offset * BITS_PER_UNIT;
385      op0 = SUBREG_REG (op0);
386    }
387
388  /* No action is needed if the target is a register and if the field
389     lies completely outside that register.  This can occur if the source
390     code contains an out-of-bounds access to a small array.  */
391  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
392    return value;
393
394  /* Use vec_set patterns for inserting parts of vectors whenever
395     available.  */
396  if (VECTOR_MODE_P (GET_MODE (op0))
397      && !MEM_P (op0)
398      && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
399	  != CODE_FOR_nothing)
400      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401      && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402      && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403    {
404      enum machine_mode outermode = GET_MODE (op0);
405      enum machine_mode innermode = GET_MODE_INNER (outermode);
406      int icode = (int) vec_set_optab->handlers[outermode].insn_code;
407      int pos = bitnum / GET_MODE_BITSIZE (innermode);
408      rtx rtxpos = GEN_INT (pos);
409      rtx src = value;
410      rtx dest = op0;
411      rtx pat, seq;
412      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
413      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
414      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
415
416      start_sequence ();
417
418      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
419	src = copy_to_mode_reg (mode1, src);
420
421      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
422	rtxpos = copy_to_mode_reg (mode1, rtxpos);
423
424      /* We could handle this, but we should always be called with a pseudo
425	 for our targets and all insns should take them as outputs.  */
426      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
427		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
428		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
429      pat = GEN_FCN (icode) (dest, src, rtxpos);
430      seq = get_insns ();
431      end_sequence ();
432      if (pat)
433	{
434	  emit_insn (seq);
435	  emit_insn (pat);
436	  return dest;
437	}
438    }
439
440  /* If the target is a register, overwriting the entire object, or storing
441     a full-word or multi-word field can be done with just a SUBREG.
442
443     If the target is memory, storing any naturally aligned field can be
444     done with a simple store.  For targets that support fast unaligned
445     memory, any naturally sized, unit aligned field can be done directly.  */
446
447  offset = bitnum / unit;
448  bitpos = bitnum % unit;
449  byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
450                + (offset * UNITS_PER_WORD);
451
452  if (bitpos == 0
453      && bitsize == GET_MODE_BITSIZE (fieldmode)
454      && (!MEM_P (op0)
455	  ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
456	     || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
457	     && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
458	  : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
459	     || (offset * BITS_PER_UNIT % bitsize == 0
460		 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
461    {
462      if (MEM_P (op0))
463	op0 = adjust_address (op0, fieldmode, offset);
464      else if (GET_MODE (op0) != fieldmode)
465	op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
466				   byte_offset);
467      emit_move_insn (op0, value);
468      return value;
469    }
470
471  /* Make sure we are playing with integral modes.  Pun with subregs
472     if we aren't.  This must come after the entire register case above,
473     since that case is valid for any mode.  The following cases are only
474     valid for integral modes.  */
475  {
476    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
477    if (imode != GET_MODE (op0))
478      {
479	if (MEM_P (op0))
480	  op0 = adjust_address (op0, imode, 0);
481	else
482	  {
483	    gcc_assert (imode != BLKmode);
484	    op0 = gen_lowpart (imode, op0);
485	  }
486      }
487  }
488
489  /* We may be accessing data outside the field, which means
490     we can alias adjacent data.  */
491  if (MEM_P (op0))
492    {
493      op0 = shallow_copy_rtx (op0);
494      set_mem_alias_set (op0, 0);
495      set_mem_expr (op0, 0);
496    }
497
498  /* If OP0 is a register, BITPOS must count within a word.
499     But as we have it, it counts within whatever size OP0 now has.
500     On a bigendian machine, these are not the same, so convert.  */
501  if (BYTES_BIG_ENDIAN
502      && !MEM_P (op0)
503      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
504    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
505
506  /* Storing an lsb-aligned field in a register
507     can be done with a movestrict instruction.  */
508
509  if (!MEM_P (op0)
510      && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
511      && bitsize == GET_MODE_BITSIZE (fieldmode)
512      && (movstrict_optab->handlers[fieldmode].insn_code
513	  != CODE_FOR_nothing))
514    {
515      int icode = movstrict_optab->handlers[fieldmode].insn_code;
516
517      /* Get appropriate low part of the value being stored.  */
518      if (GET_CODE (value) == CONST_INT || REG_P (value))
519	value = gen_lowpart (fieldmode, value);
520      else if (!(GET_CODE (value) == SYMBOL_REF
521		 || GET_CODE (value) == LABEL_REF
522		 || GET_CODE (value) == CONST))
523	value = convert_to_mode (fieldmode, value, 0);
524
525      if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
526	value = copy_to_mode_reg (fieldmode, value);
527
528      if (GET_CODE (op0) == SUBREG)
529	{
530	  /* Else we've got some float mode source being extracted into
531	     a different float mode destination -- this combination of
532	     subregs results in Severe Tire Damage.  */
533	  gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
534		      || GET_MODE_CLASS (fieldmode) == MODE_INT
535		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
536	  op0 = SUBREG_REG (op0);
537	}
538
539      emit_insn (GEN_FCN (icode)
540		 (gen_rtx_SUBREG (fieldmode, op0,
541				  (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
542				  + (offset * UNITS_PER_WORD)),
543				  value));
544
545      return value;
546    }
547
548  /* Handle fields bigger than a word.  */
549
550  if (bitsize > BITS_PER_WORD)
551    {
552      /* Here we transfer the words of the field
553	 in the order least significant first.
554	 This is because the most significant word is the one which may
555	 be less than full.
556	 However, only do that if the value is not BLKmode.  */
557
558      unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
559      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
560      unsigned int i;
561
562      /* This is the mode we must force value to, so that there will be enough
563	 subwords to extract.  Note that fieldmode will often (always?) be
564	 VOIDmode, because that is what store_field uses to indicate that this
565	 is a bit field, but passing VOIDmode to operand_subword_force
566	 is not allowed.  */
567      fieldmode = GET_MODE (value);
568      if (fieldmode == VOIDmode)
569	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
570
571      for (i = 0; i < nwords; i++)
572	{
573	  /* If I is 0, use the low-order word in both field and target;
574	     if I is 1, use the next to lowest word; and so on.  */
575	  unsigned int wordnum = (backwards ? nwords - i - 1 : i);
576	  unsigned int bit_offset = (backwards
577				     ? MAX ((int) bitsize - ((int) i + 1)
578					    * BITS_PER_WORD,
579					    0)
580				     : (int) i * BITS_PER_WORD);
581
582	  store_bit_field (op0, MIN (BITS_PER_WORD,
583				     bitsize - i * BITS_PER_WORD),
584			   bitnum + bit_offset, word_mode,
585			   operand_subword_force (value, wordnum, fieldmode));
586	}
587      return value;
588    }
589
590  /* From here on we can assume that the field to be stored in is
591     a full-word (whatever type that is), since it is shorter than a word.  */
592
593  /* OFFSET is the number of words or bytes (UNIT says which)
594     from STR_RTX to the first word or byte containing part of the field.  */
595
596  if (!MEM_P (op0))
597    {
598      if (offset != 0
599	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
600	{
601	  if (!REG_P (op0))
602	    {
603	      /* Since this is a destination (lvalue), we can't copy
604		 it to a pseudo.  We can remove a SUBREG that does not
605		 change the size of the operand.  Such a SUBREG may
606		 have been added above.  */
607	      gcc_assert (GET_CODE (op0) == SUBREG
608			  && (GET_MODE_SIZE (GET_MODE (op0))
609			      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610	      op0 = SUBREG_REG (op0);
611	    }
612	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613		                op0, (offset * UNITS_PER_WORD));
614	}
615      offset = 0;
616    }
617
618  /* If VALUE has a floating-point or complex mode, access it as an
619     integer of the corresponding size.  This can occur on a machine
620     with 64 bit registers that uses SFmode for float.  It can also
621     occur for unaligned float or complex fields.  */
622  orig_value = value;
623  if (GET_MODE (value) != VOIDmode
624      && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625      && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
626    {
627      value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628      emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
629    }
630
631  /* Now OFFSET is nonzero only if OP0 is memory
632     and is therefore always measured in bytes.  */
633
634  if (HAVE_insv
635      && GET_MODE (value) != BLKmode
636      && bitsize > 0
637      && GET_MODE_BITSIZE (op_mode) >= bitsize
638      && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
639	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
640      && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
641							VOIDmode))
642    {
643      int xbitpos = bitpos;
644      rtx value1;
645      rtx xop0 = op0;
646      rtx last = get_last_insn ();
647      rtx pat;
648      enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
649      int save_volatile_ok = volatile_ok;
650
651      volatile_ok = 1;
652
653      /* If this machine's insv can only insert into a register, copy OP0
654	 into a register and save it back later.  */
655      if (MEM_P (op0)
656	  && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
657		(op0, VOIDmode)))
658	{
659	  rtx tempreg;
660	  enum machine_mode bestmode;
661
662	  /* Get the mode to use for inserting into this field.  If OP0 is
663	     BLKmode, get the smallest mode consistent with the alignment. If
664	     OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
665	     mode. Otherwise, use the smallest mode containing the field.  */
666
667	  if (GET_MODE (op0) == BLKmode
668	      || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
669	    bestmode
670	      = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
671			       MEM_VOLATILE_P (op0));
672	  else
673	    bestmode = GET_MODE (op0);
674
675	  if (bestmode == VOIDmode
676	      || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
677	      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
678		  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
679	    goto insv_loses;
680
681	  /* Adjust address to point to the containing unit of that mode.
682	     Compute offset as multiple of this unit, counting in bytes.  */
683	  unit = GET_MODE_BITSIZE (bestmode);
684	  offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
685	  bitpos = bitnum % unit;
686	  op0 = adjust_address (op0, bestmode,  offset);
687
688	  /* Fetch that unit, store the bitfield in it, then store
689	     the unit.  */
690	  tempreg = copy_to_reg (op0);
691	  store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
692	  emit_move_insn (op0, tempreg);
693	  return value;
694	}
695      volatile_ok = save_volatile_ok;
696
697      /* Add OFFSET into OP0's address.  */
698      if (MEM_P (xop0))
699	xop0 = adjust_address (xop0, byte_mode, offset);
700
701      /* If xop0 is a register, we need it in MAXMODE
702	 to make it acceptable to the format of insv.  */
703      if (GET_CODE (xop0) == SUBREG)
704	/* We can't just change the mode, because this might clobber op0,
705	   and we will need the original value of op0 if insv fails.  */
706	xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
707      if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
708	xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
709
710      /* On big-endian machines, we count bits from the most significant.
711	 If the bit field insn does not, we must invert.  */
712
713      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
714	xbitpos = unit - bitsize - xbitpos;
715
716      /* We have been counting XBITPOS within UNIT.
717	 Count instead within the size of the register.  */
718      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
719	xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
720
721      unit = GET_MODE_BITSIZE (maxmode);
722
723      /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
724      value1 = value;
725      if (GET_MODE (value) != maxmode)
726	{
727	  if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
728	    {
729	      /* Optimization: Don't bother really extending VALUE
730		 if it has all the bits we will actually use.  However,
731		 if we must narrow it, be sure we do it correctly.  */
732
733	      if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
734		{
735		  rtx tmp;
736
737		  tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
738		  if (! tmp)
739		    tmp = simplify_gen_subreg (maxmode,
740					       force_reg (GET_MODE (value),
741							  value1),
742					       GET_MODE (value), 0);
743		  value1 = tmp;
744		}
745	      else
746		value1 = gen_lowpart (maxmode, value1);
747	    }
748	  else if (GET_CODE (value) == CONST_INT)
749	    value1 = gen_int_mode (INTVAL (value), maxmode);
750	  else
751	    /* Parse phase is supposed to make VALUE's data type
752	       match that of the component reference, which is a type
753	       at least as wide as the field; so VALUE should have
754	       a mode that corresponds to that type.  */
755	    gcc_assert (CONSTANT_P (value));
756	}
757
758      /* If this machine's insv insists on a register,
759	 get VALUE1 into a register.  */
760      if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
761	     (value1, maxmode)))
762	value1 = force_reg (maxmode, value1);
763
764      pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
765      if (pat)
766	emit_insn (pat);
767      else
768	{
769	  delete_insns_since (last);
770	  store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
771	}
772    }
773  else
774    insv_loses:
775    /* Insv is not available; store using shifts and boolean ops.  */
776    store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
777  return value;
778}
779
780/* Use shifts and boolean operations to store VALUE
781   into a bit field of width BITSIZE
782   in a memory location specified by OP0 except offset by OFFSET bytes.
783     (OFFSET must be 0 if OP0 is a register.)
784   The field starts at position BITPOS within the byte.
785    (If OP0 is a register, it may be a full word or a narrower mode,
786     but BITPOS still counts within a full word,
787     which is significant on bigendian machines.)  */
788
789static void
790store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
791		       unsigned HOST_WIDE_INT bitsize,
792		       unsigned HOST_WIDE_INT bitpos, rtx value)
793{
794  enum machine_mode mode;
795  unsigned int total_bits = BITS_PER_WORD;
796  rtx temp;
797  int all_zero = 0;
798  int all_one = 0;
799
800  /* There is a case not handled here:
801     a structure with a known alignment of just a halfword
802     and a field split across two aligned halfwords within the structure.
803     Or likewise a structure with a known alignment of just a byte
804     and a field split across two bytes.
805     Such cases are not supposed to be able to occur.  */
806
807  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
808    {
809      gcc_assert (!offset);
810      /* Special treatment for a bit field split across two registers.  */
811      if (bitsize + bitpos > BITS_PER_WORD)
812	{
813	  store_split_bit_field (op0, bitsize, bitpos, value);
814	  return;
815	}
816    }
817  else
818    {
819      /* Get the proper mode to use for this field.  We want a mode that
820	 includes the entire field.  If such a mode would be larger than
821	 a word, we won't be doing the extraction the normal way.
822	 We don't want a mode bigger than the destination.  */
823
824      mode = GET_MODE (op0);
825      if (GET_MODE_BITSIZE (mode) == 0
826	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
827	mode = word_mode;
828      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
829			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
830
831      if (mode == VOIDmode)
832	{
833	  /* The only way this should occur is if the field spans word
834	     boundaries.  */
835	  store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
836				 value);
837	  return;
838	}
839
840      total_bits = GET_MODE_BITSIZE (mode);
841
842      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
843	 be in the range 0 to total_bits-1, and put any excess bytes in
844	 OFFSET.  */
845      if (bitpos >= total_bits)
846	{
847	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
848	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
849		     * BITS_PER_UNIT);
850	}
851
852      /* Get ref to an aligned byte, halfword, or word containing the field.
853	 Adjust BITPOS to be position within a word,
854	 and OFFSET to be the offset of that word.
855	 Then alter OP0 to refer to that word.  */
856      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
857      offset -= (offset % (total_bits / BITS_PER_UNIT));
858      op0 = adjust_address (op0, mode, offset);
859    }
860
861  mode = GET_MODE (op0);
862
863  /* Now MODE is either some integral mode for a MEM as OP0,
864     or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
865     The bit field is contained entirely within OP0.
866     BITPOS is the starting bit number within OP0.
867     (OP0's mode may actually be narrower than MODE.)  */
868
869  if (BYTES_BIG_ENDIAN)
870      /* BITPOS is the distance between our msb
871	 and that of the containing datum.
872	 Convert it to the distance from the lsb.  */
873      bitpos = total_bits - bitsize - bitpos;
874
875  /* Now BITPOS is always the distance between our lsb
876     and that of OP0.  */
877
878  /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
879     we must first convert its mode to MODE.  */
880
881  if (GET_CODE (value) == CONST_INT)
882    {
883      HOST_WIDE_INT v = INTVAL (value);
884
885      if (bitsize < HOST_BITS_PER_WIDE_INT)
886	v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
887
888      if (v == 0)
889	all_zero = 1;
890      else if ((bitsize < HOST_BITS_PER_WIDE_INT
891		&& v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
892	       || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
893	all_one = 1;
894
895      value = lshift_value (mode, value, bitpos, bitsize);
896    }
897  else
898    {
899      int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
900		      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
901
902      if (GET_MODE (value) != mode)
903	{
904	  if ((REG_P (value) || GET_CODE (value) == SUBREG)
905	      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
906	    value = gen_lowpart (mode, value);
907	  else
908	    value = convert_to_mode (mode, value, 1);
909	}
910
911      if (must_and)
912	value = expand_binop (mode, and_optab, value,
913			      mask_rtx (mode, 0, bitsize, 0),
914			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
915      if (bitpos > 0)
916	value = expand_shift (LSHIFT_EXPR, mode, value,
917			      build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
918    }
919
920  /* Now clear the chosen bits in OP0,
921     except that if VALUE is -1 we need not bother.  */
922  /* We keep the intermediates in registers to allow CSE to combine
923     consecutive bitfield assignments.  */
924
925  temp = force_reg (mode, op0);
926
927  if (! all_one)
928    {
929      temp = expand_binop (mode, and_optab, temp,
930			   mask_rtx (mode, bitpos, bitsize, 1),
931			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
932      temp = force_reg (mode, temp);
933    }
934
935  /* Now logical-or VALUE into OP0, unless it is zero.  */
936
937  if (! all_zero)
938    {
939      temp = expand_binop (mode, ior_optab, temp, value,
940			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
941      temp = force_reg (mode, temp);
942    }
943
944  if (op0 != temp)
945    emit_move_insn (op0, temp);
946}
947
948/* Store a bit field that is split across multiple accessible memory objects.
949
950   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
951   BITSIZE is the field width; BITPOS the position of its first bit
952   (within the word).
953   VALUE is the value to store.
954
955   This does not yet handle fields wider than BITS_PER_WORD.  */
956
957static void
958store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
959		       unsigned HOST_WIDE_INT bitpos, rtx value)
960{
961  unsigned int unit;
962  unsigned int bitsdone = 0;
963
964  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
965     much at a time.  */
966  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
967    unit = BITS_PER_WORD;
968  else
969    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
970
971  /* If VALUE is a constant other than a CONST_INT, get it into a register in
972     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
973     that VALUE might be a floating-point constant.  */
974  if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
975    {
976      rtx word = gen_lowpart_common (word_mode, value);
977
978      if (word && (value != word))
979	value = word;
980      else
981	value = gen_lowpart_common (word_mode,
982				    force_reg (GET_MODE (value) != VOIDmode
983					       ? GET_MODE (value)
984					       : word_mode, value));
985    }
986
987  while (bitsdone < bitsize)
988    {
989      unsigned HOST_WIDE_INT thissize;
990      rtx part, word;
991      unsigned HOST_WIDE_INT thispos;
992      unsigned HOST_WIDE_INT offset;
993
994      offset = (bitpos + bitsdone) / unit;
995      thispos = (bitpos + bitsdone) % unit;
996
997      /* THISSIZE must not overrun a word boundary.  Otherwise,
998	 store_fixed_bit_field will call us again, and we will mutually
999	 recurse forever.  */
1000      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1001      thissize = MIN (thissize, unit - thispos);
1002
1003      if (BYTES_BIG_ENDIAN)
1004	{
1005	  int total_bits;
1006
1007	  /* We must do an endian conversion exactly the same way as it is
1008	     done in extract_bit_field, so that the two calls to
1009	     extract_fixed_bit_field will have comparable arguments.  */
1010	  if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1011	    total_bits = BITS_PER_WORD;
1012	  else
1013	    total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1014
1015	  /* Fetch successively less significant portions.  */
1016	  if (GET_CODE (value) == CONST_INT)
1017	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1018			     >> (bitsize - bitsdone - thissize))
1019			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1020	  else
1021	    /* The args are chosen so that the last part includes the
1022	       lsb.  Give extract_bit_field the value it needs (with
1023	       endianness compensation) to fetch the piece we want.  */
1024	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1025					    total_bits - bitsize + bitsdone,
1026					    NULL_RTX, 1);
1027	}
1028      else
1029	{
1030	  /* Fetch successively more significant portions.  */
1031	  if (GET_CODE (value) == CONST_INT)
1032	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1033			     >> bitsdone)
1034			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1035	  else
1036	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1037					    bitsdone, NULL_RTX, 1);
1038	}
1039
1040      /* If OP0 is a register, then handle OFFSET here.
1041
1042	 When handling multiword bitfields, extract_bit_field may pass
1043	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1044	 crosses a word boundary.  Thus, for a SUBREG, we must find
1045	 the current word starting from the base register.  */
1046      if (GET_CODE (op0) == SUBREG)
1047	{
1048	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1049	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1050					GET_MODE (SUBREG_REG (op0)));
1051	  offset = 0;
1052	}
1053      else if (REG_P (op0))
1054	{
1055	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1056	  offset = 0;
1057	}
1058      else
1059	word = op0;
1060
1061      /* OFFSET is in UNITs, and UNIT is in bits.
1062         store_fixed_bit_field wants offset in bytes.  */
1063      store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1064			     thispos, part);
1065      bitsdone += thissize;
1066    }
1067}
1068
1069/* Generate code to extract a byte-field from STR_RTX
1070   containing BITSIZE bits, starting at BITNUM,
1071   and put it in TARGET if possible (if TARGET is nonzero).
1072   Regardless of TARGET, we return the rtx for where the value is placed.
1073
1074   STR_RTX is the structure containing the byte (a REG or MEM).
1075   UNSIGNEDP is nonzero if this is an unsigned bit field.
1076   MODE is the natural mode of the field value once extracted.
1077   TMODE is the mode the caller would like the value to have;
1078   but the value may be returned with type MODE instead.
1079
1080   TOTAL_SIZE is the size in bytes of the containing structure,
1081   or -1 if varying.
1082
1083   If a TARGET is specified and we can store in it at no extra cost,
1084   we do so, and return TARGET.
1085   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1086   if they are equally easy.  */
1087
1088rtx
1089extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1090		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1091		   enum machine_mode mode, enum machine_mode tmode)
1092{
1093  unsigned int unit
1094    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1095  unsigned HOST_WIDE_INT offset, bitpos;
1096  rtx op0 = str_rtx;
1097  rtx spec_target = target;
1098  rtx spec_target_subreg = 0;
1099  enum machine_mode int_mode;
1100  enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1101  enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1102  enum machine_mode mode1;
1103  int byte_offset;
1104
1105  if (tmode == VOIDmode)
1106    tmode = mode;
1107
1108  while (GET_CODE (op0) == SUBREG)
1109    {
1110      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1111      op0 = SUBREG_REG (op0);
1112    }
1113
1114  /* If we have an out-of-bounds access to a register, just return an
1115     uninitialized register of the required mode.  This can occur if the
1116     source code contains an out-of-bounds access to a small array.  */
1117  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1118    return gen_reg_rtx (tmode);
1119
1120  if (REG_P (op0)
1121      && mode == GET_MODE (op0)
1122      && bitnum == 0
1123      && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1124    {
1125      /* We're trying to extract a full register from itself.  */
1126      return op0;
1127    }
1128
1129  /* Use vec_extract patterns for extracting parts of vectors whenever
1130     available.  */
1131  if (VECTOR_MODE_P (GET_MODE (op0))
1132      && !MEM_P (op0)
1133      && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1134	  != CODE_FOR_nothing)
1135      && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1136	  == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1137    {
1138      enum machine_mode outermode = GET_MODE (op0);
1139      enum machine_mode innermode = GET_MODE_INNER (outermode);
1140      int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1141      unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1142      rtx rtxpos = GEN_INT (pos);
1143      rtx src = op0;
1144      rtx dest = NULL, pat, seq;
1145      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1146      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1147      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1148
1149      if (innermode == tmode || innermode == mode)
1150	dest = target;
1151
1152      if (!dest)
1153	dest = gen_reg_rtx (innermode);
1154
1155      start_sequence ();
1156
1157      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1158	dest = copy_to_mode_reg (mode0, dest);
1159
1160      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1161	src = copy_to_mode_reg (mode1, src);
1162
1163      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1164	rtxpos = copy_to_mode_reg (mode1, rtxpos);
1165
1166      /* We could handle this, but we should always be called with a pseudo
1167	 for our targets and all insns should take them as outputs.  */
1168      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1169		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
1170		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1171
1172      pat = GEN_FCN (icode) (dest, src, rtxpos);
1173      seq = get_insns ();
1174      end_sequence ();
1175      if (pat)
1176	{
1177	  emit_insn (seq);
1178	  emit_insn (pat);
1179	  return dest;
1180	}
1181    }
1182
1183  /* Make sure we are playing with integral modes.  Pun with subregs
1184     if we aren't.  */
1185  {
1186    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1187    if (imode != GET_MODE (op0))
1188      {
1189	if (MEM_P (op0))
1190	  op0 = adjust_address (op0, imode, 0);
1191	else
1192	  {
1193	    gcc_assert (imode != BLKmode);
1194	    op0 = gen_lowpart (imode, op0);
1195
1196	    /* If we got a SUBREG, force it into a register since we
1197	       aren't going to be able to do another SUBREG on it.  */
1198	    if (GET_CODE (op0) == SUBREG)
1199	      op0 = force_reg (imode, op0);
1200	  }
1201      }
1202  }
1203
1204  /* We may be accessing data outside the field, which means
1205     we can alias adjacent data.  */
1206  if (MEM_P (op0))
1207    {
1208      op0 = shallow_copy_rtx (op0);
1209      set_mem_alias_set (op0, 0);
1210      set_mem_expr (op0, 0);
1211    }
1212
1213  /* Extraction of a full-word or multi-word value from a structure
1214     in a register or aligned memory can be done with just a SUBREG.
1215     A subword value in the least significant part of a register
1216     can also be extracted with a SUBREG.  For this, we need the
1217     byte offset of the value in op0.  */
1218
1219  bitpos = bitnum % unit;
1220  offset = bitnum / unit;
1221  byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1222
1223  /* If OP0 is a register, BITPOS must count within a word.
1224     But as we have it, it counts within whatever size OP0 now has.
1225     On a bigendian machine, these are not the same, so convert.  */
1226  if (BYTES_BIG_ENDIAN
1227      && !MEM_P (op0)
1228      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1229    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1230
1231  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1232     If that's wrong, the solution is to test for it and set TARGET to 0
1233     if needed.  */
1234
1235  /* Only scalar integer modes can be converted via subregs.  There is an
1236     additional problem for FP modes here in that they can have a precision
1237     which is different from the size.  mode_for_size uses precision, but
1238     we want a mode based on the size, so we must avoid calling it for FP
1239     modes.  */
1240  mode1  = (SCALAR_INT_MODE_P (tmode)
1241	    ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1242	    : mode);
1243
1244  if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1245	&& bitpos % BITS_PER_WORD == 0)
1246       || (mode1 != BLKmode
1247	   /* ??? The big endian test here is wrong.  This is correct
1248	      if the value is in a register, and if mode_for_size is not
1249	      the same mode as op0.  This causes us to get unnecessarily
1250	      inefficient code from the Thumb port when -mbig-endian.  */
1251	   && (BYTES_BIG_ENDIAN
1252	       ? bitpos + bitsize == BITS_PER_WORD
1253	       : bitpos == 0)))
1254      && ((!MEM_P (op0)
1255	   && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1256				     GET_MODE_BITSIZE (GET_MODE (op0)))
1257	   && GET_MODE_SIZE (mode1) != 0
1258	   && byte_offset % GET_MODE_SIZE (mode1) == 0)
1259	  || (MEM_P (op0)
1260	      && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1261		  || (offset * BITS_PER_UNIT % bitsize == 0
1262		      && MEM_ALIGN (op0) % bitsize == 0)))))
1263    {
1264      if (mode1 != GET_MODE (op0))
1265	{
1266	  if (MEM_P (op0))
1267	    op0 = adjust_address (op0, mode1, offset);
1268	  else
1269	    {
1270	      rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1271					     byte_offset);
1272	      if (sub == NULL)
1273		goto no_subreg_mode_swap;
1274	      op0 = sub;
1275	    }
1276	}
1277      if (mode1 != mode)
1278	return convert_to_mode (tmode, op0, unsignedp);
1279      return op0;
1280    }
1281 no_subreg_mode_swap:
1282
1283  /* Handle fields bigger than a word.  */
1284
1285  if (bitsize > BITS_PER_WORD)
1286    {
1287      /* Here we transfer the words of the field
1288	 in the order least significant first.
1289	 This is because the most significant word is the one which may
1290	 be less than full.  */
1291
1292      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1293      unsigned int i;
1294
1295      if (target == 0 || !REG_P (target))
1296	target = gen_reg_rtx (mode);
1297
1298      /* Indicate for flow that the entire target reg is being set.  */
1299      emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1300
1301      for (i = 0; i < nwords; i++)
1302	{
1303	  /* If I is 0, use the low-order word in both field and target;
1304	     if I is 1, use the next to lowest word; and so on.  */
1305	  /* Word number in TARGET to use.  */
1306	  unsigned int wordnum
1307	    = (WORDS_BIG_ENDIAN
1308	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1309	       : i);
1310	  /* Offset from start of field in OP0.  */
1311	  unsigned int bit_offset = (WORDS_BIG_ENDIAN
1312				     ? MAX (0, ((int) bitsize - ((int) i + 1)
1313						* (int) BITS_PER_WORD))
1314				     : (int) i * BITS_PER_WORD);
1315	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1316	  rtx result_part
1317	    = extract_bit_field (op0, MIN (BITS_PER_WORD,
1318					   bitsize - i * BITS_PER_WORD),
1319				 bitnum + bit_offset, 1, target_part, mode,
1320				 word_mode);
1321
1322	  gcc_assert (target_part);
1323
1324	  if (result_part != target_part)
1325	    emit_move_insn (target_part, result_part);
1326	}
1327
1328      if (unsignedp)
1329	{
1330	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1331	     need to be zero'd out.  */
1332	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1333	    {
1334	      unsigned int i, total_words;
1335
1336	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1337	      for (i = nwords; i < total_words; i++)
1338		emit_move_insn
1339		  (operand_subword (target,
1340				    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1341				    1, VOIDmode),
1342		   const0_rtx);
1343	    }
1344	  return target;
1345	}
1346
1347      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1348      target = expand_shift (LSHIFT_EXPR, mode, target,
1349			     build_int_cst (NULL_TREE,
1350					    GET_MODE_BITSIZE (mode) - bitsize),
1351			     NULL_RTX, 0);
1352      return expand_shift (RSHIFT_EXPR, mode, target,
1353			   build_int_cst (NULL_TREE,
1354					  GET_MODE_BITSIZE (mode) - bitsize),
1355			   NULL_RTX, 0);
1356    }
1357
1358  /* From here on we know the desired field is smaller than a word.  */
1359
1360  /* Check if there is a correspondingly-sized integer field, so we can
1361     safely extract it as one size of integer, if necessary; then
1362     truncate or extend to the size that is wanted; then use SUBREGs or
1363     convert_to_mode to get one of the modes we really wanted.  */
1364
1365  int_mode = int_mode_for_mode (tmode);
1366  if (int_mode == BLKmode)
1367    int_mode = int_mode_for_mode (mode);
1368  /* Should probably push op0 out to memory and then do a load.  */
1369  gcc_assert (int_mode != BLKmode);
1370
1371  /* OFFSET is the number of words or bytes (UNIT says which)
1372     from STR_RTX to the first word or byte containing part of the field.  */
1373  if (!MEM_P (op0))
1374    {
1375      if (offset != 0
1376	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1377	{
1378	  if (!REG_P (op0))
1379	    op0 = copy_to_reg (op0);
1380	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1381		                op0, (offset * UNITS_PER_WORD));
1382	}
1383      offset = 0;
1384    }
1385
1386  /* Now OFFSET is nonzero only for memory operands.  */
1387
1388  if (unsignedp)
1389    {
1390      if (HAVE_extzv
1391	  && bitsize > 0
1392	  && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1393	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1394		&& (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1395	{
1396	  unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1397	  rtx bitsize_rtx, bitpos_rtx;
1398	  rtx last = get_last_insn ();
1399	  rtx xop0 = op0;
1400	  rtx xtarget = target;
1401	  rtx xspec_target = spec_target;
1402	  rtx xspec_target_subreg = spec_target_subreg;
1403	  rtx pat;
1404	  enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1405
1406	  if (MEM_P (xop0))
1407	    {
1408	      int save_volatile_ok = volatile_ok;
1409	      volatile_ok = 1;
1410
1411	      /* Is the memory operand acceptable?  */
1412	      if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1413		     (xop0, GET_MODE (xop0))))
1414		{
1415		  /* No, load into a reg and extract from there.  */
1416		  enum machine_mode bestmode;
1417
1418		  /* Get the mode to use for inserting into this field.  If
1419		     OP0 is BLKmode, get the smallest mode consistent with the
1420		     alignment. If OP0 is a non-BLKmode object that is no
1421		     wider than MAXMODE, use its mode. Otherwise, use the
1422		     smallest mode containing the field.  */
1423
1424		  if (GET_MODE (xop0) == BLKmode
1425		      || (GET_MODE_SIZE (GET_MODE (op0))
1426			  > GET_MODE_SIZE (maxmode)))
1427		    bestmode = get_best_mode (bitsize, bitnum,
1428					      MEM_ALIGN (xop0), maxmode,
1429					      MEM_VOLATILE_P (xop0));
1430		  else
1431		    bestmode = GET_MODE (xop0);
1432
1433		  if (bestmode == VOIDmode
1434		      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1435			  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1436		    goto extzv_loses;
1437
1438		  /* Compute offset as multiple of this unit,
1439		     counting in bytes.  */
1440		  unit = GET_MODE_BITSIZE (bestmode);
1441		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1442		  xbitpos = bitnum % unit;
1443		  xop0 = adjust_address (xop0, bestmode, xoffset);
1444
1445		  /* Make sure register is big enough for the whole field. */
1446		  if (xoffset * BITS_PER_UNIT + unit
1447		      < offset * BITS_PER_UNIT + bitsize)
1448		    goto extzv_loses;
1449
1450		  /* Fetch it to a register in that size.  */
1451		  xop0 = force_reg (bestmode, xop0);
1452
1453		  /* XBITPOS counts within UNIT, which is what is expected.  */
1454		}
1455	      else
1456		/* Get ref to first byte containing part of the field.  */
1457		xop0 = adjust_address (xop0, byte_mode, xoffset);
1458
1459	      volatile_ok = save_volatile_ok;
1460	    }
1461
1462	  /* If op0 is a register, we need it in MAXMODE (which is usually
1463	     SImode). to make it acceptable to the format of extzv.  */
1464	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1465	    goto extzv_loses;
1466	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1467	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1468
1469	  /* On big-endian machines, we count bits from the most significant.
1470	     If the bit field insn does not, we must invert.  */
1471	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1472	    xbitpos = unit - bitsize - xbitpos;
1473
1474	  /* Now convert from counting within UNIT to counting in MAXMODE.  */
1475	  if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1476	    xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1477
1478	  unit = GET_MODE_BITSIZE (maxmode);
1479
1480	  if (xtarget == 0)
1481	    xtarget = xspec_target = gen_reg_rtx (tmode);
1482
1483	  if (GET_MODE (xtarget) != maxmode)
1484	    {
1485	      if (REG_P (xtarget))
1486		{
1487		  int wider = (GET_MODE_SIZE (maxmode)
1488			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1489		  xtarget = gen_lowpart (maxmode, xtarget);
1490		  if (wider)
1491		    xspec_target_subreg = xtarget;
1492		}
1493	      else
1494		xtarget = gen_reg_rtx (maxmode);
1495	    }
1496
1497	  /* If this machine's extzv insists on a register target,
1498	     make sure we have one.  */
1499	  if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1500		 (xtarget, maxmode)))
1501	    xtarget = gen_reg_rtx (maxmode);
1502
1503	  bitsize_rtx = GEN_INT (bitsize);
1504	  bitpos_rtx = GEN_INT (xbitpos);
1505
1506	  pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1507	  if (pat)
1508	    {
1509	      emit_insn (pat);
1510	      target = xtarget;
1511	      spec_target = xspec_target;
1512	      spec_target_subreg = xspec_target_subreg;
1513	    }
1514	  else
1515	    {
1516	      delete_insns_since (last);
1517	      target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1518						bitpos, target, 1);
1519	    }
1520	}
1521      else
1522      extzv_loses:
1523	target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1524					  bitpos, target, 1);
1525    }
1526  else
1527    {
1528      if (HAVE_extv
1529	  && bitsize > 0
1530	  && GET_MODE_BITSIZE (extv_mode) >= bitsize
1531	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1532		&& (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1533	{
1534	  int xbitpos = bitpos, xoffset = offset;
1535	  rtx bitsize_rtx, bitpos_rtx;
1536	  rtx last = get_last_insn ();
1537	  rtx xop0 = op0, xtarget = target;
1538	  rtx xspec_target = spec_target;
1539	  rtx xspec_target_subreg = spec_target_subreg;
1540	  rtx pat;
1541	  enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1542
1543	  if (MEM_P (xop0))
1544	    {
1545	      /* Is the memory operand acceptable?  */
1546	      if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1547		     (xop0, GET_MODE (xop0))))
1548		{
1549		  /* No, load into a reg and extract from there.  */
1550		  enum machine_mode bestmode;
1551
1552		  /* Get the mode to use for inserting into this field.  If
1553		     OP0 is BLKmode, get the smallest mode consistent with the
1554		     alignment. If OP0 is a non-BLKmode object that is no
1555		     wider than MAXMODE, use its mode. Otherwise, use the
1556		     smallest mode containing the field.  */
1557
1558		  if (GET_MODE (xop0) == BLKmode
1559		      || (GET_MODE_SIZE (GET_MODE (op0))
1560			  > GET_MODE_SIZE (maxmode)))
1561		    bestmode = get_best_mode (bitsize, bitnum,
1562					      MEM_ALIGN (xop0), maxmode,
1563					      MEM_VOLATILE_P (xop0));
1564		  else
1565		    bestmode = GET_MODE (xop0);
1566
1567		  if (bestmode == VOIDmode
1568		      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1569			  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1570		    goto extv_loses;
1571
1572		  /* Compute offset as multiple of this unit,
1573		     counting in bytes.  */
1574		  unit = GET_MODE_BITSIZE (bestmode);
1575		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1576		  xbitpos = bitnum % unit;
1577		  xop0 = adjust_address (xop0, bestmode, xoffset);
1578
1579		  /* Make sure register is big enough for the whole field. */
1580		  if (xoffset * BITS_PER_UNIT + unit
1581		      < offset * BITS_PER_UNIT + bitsize)
1582		    goto extv_loses;
1583
1584		  /* Fetch it to a register in that size.  */
1585		  xop0 = force_reg (bestmode, xop0);
1586
1587		  /* XBITPOS counts within UNIT, which is what is expected.  */
1588		}
1589	      else
1590		/* Get ref to first byte containing part of the field.  */
1591		xop0 = adjust_address (xop0, byte_mode, xoffset);
1592	    }
1593
1594	  /* If op0 is a register, we need it in MAXMODE (which is usually
1595	     SImode) to make it acceptable to the format of extv.  */
1596	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1597	    goto extv_loses;
1598	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1599	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1600
1601	  /* On big-endian machines, we count bits from the most significant.
1602	     If the bit field insn does not, we must invert.  */
1603	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1604	    xbitpos = unit - bitsize - xbitpos;
1605
1606	  /* XBITPOS counts within a size of UNIT.
1607	     Adjust to count within a size of MAXMODE.  */
1608	  if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1609	    xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1610
1611	  unit = GET_MODE_BITSIZE (maxmode);
1612
1613	  if (xtarget == 0)
1614	    xtarget = xspec_target = gen_reg_rtx (tmode);
1615
1616	  if (GET_MODE (xtarget) != maxmode)
1617	    {
1618	      if (REG_P (xtarget))
1619		{
1620		  int wider = (GET_MODE_SIZE (maxmode)
1621			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1622		  xtarget = gen_lowpart (maxmode, xtarget);
1623		  if (wider)
1624		    xspec_target_subreg = xtarget;
1625		}
1626	      else
1627		xtarget = gen_reg_rtx (maxmode);
1628	    }
1629
1630	  /* If this machine's extv insists on a register target,
1631	     make sure we have one.  */
1632	  if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1633		 (xtarget, maxmode)))
1634	    xtarget = gen_reg_rtx (maxmode);
1635
1636	  bitsize_rtx = GEN_INT (bitsize);
1637	  bitpos_rtx = GEN_INT (xbitpos);
1638
1639	  pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1640	  if (pat)
1641	    {
1642	      emit_insn (pat);
1643	      target = xtarget;
1644	      spec_target = xspec_target;
1645	      spec_target_subreg = xspec_target_subreg;
1646	    }
1647	  else
1648	    {
1649	      delete_insns_since (last);
1650	      target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1651						bitpos, target, 0);
1652	    }
1653	}
1654      else
1655      extv_loses:
1656	target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1657					  bitpos, target, 0);
1658    }
1659  if (target == spec_target)
1660    return target;
1661  if (target == spec_target_subreg)
1662    return spec_target;
1663  if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1664    {
1665      /* If the target mode is not a scalar integral, first convert to the
1666	 integer mode of that size and then access it as a floating-point
1667	 value via a SUBREG.  */
1668      if (!SCALAR_INT_MODE_P (tmode))
1669	{
1670	  enum machine_mode smode
1671	    = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1672	  target = convert_to_mode (smode, target, unsignedp);
1673	  target = force_reg (smode, target);
1674	  return gen_lowpart (tmode, target);
1675	}
1676
1677      return convert_to_mode (tmode, target, unsignedp);
1678    }
1679  return target;
1680}
1681
1682/* Extract a bit field using shifts and boolean operations
1683   Returns an rtx to represent the value.
1684   OP0 addresses a register (word) or memory (byte).
1685   BITPOS says which bit within the word or byte the bit field starts in.
1686   OFFSET says how many bytes farther the bit field starts;
1687    it is 0 if OP0 is a register.
1688   BITSIZE says how many bits long the bit field is.
1689    (If OP0 is a register, it may be narrower than a full word,
1690     but BITPOS still counts within a full word,
1691     which is significant on bigendian machines.)
1692
1693   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1694   If TARGET is nonzero, attempts to store the value there
1695   and return TARGET, but this is not guaranteed.
1696   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1697
1698static rtx
1699extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1700			 unsigned HOST_WIDE_INT offset,
1701			 unsigned HOST_WIDE_INT bitsize,
1702			 unsigned HOST_WIDE_INT bitpos, rtx target,
1703			 int unsignedp)
1704{
1705  unsigned int total_bits = BITS_PER_WORD;
1706  enum machine_mode mode;
1707
1708  if (GET_CODE (op0) == SUBREG || REG_P (op0))
1709    {
1710      /* Special treatment for a bit field split across two registers.  */
1711      if (bitsize + bitpos > BITS_PER_WORD)
1712	return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1713    }
1714  else
1715    {
1716      /* Get the proper mode to use for this field.  We want a mode that
1717	 includes the entire field.  If such a mode would be larger than
1718	 a word, we won't be doing the extraction the normal way.  */
1719
1720      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1721			    MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1722
1723      if (mode == VOIDmode)
1724	/* The only way this should occur is if the field spans word
1725	   boundaries.  */
1726	return extract_split_bit_field (op0, bitsize,
1727					bitpos + offset * BITS_PER_UNIT,
1728					unsignedp);
1729
1730      total_bits = GET_MODE_BITSIZE (mode);
1731
1732      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1733	 be in the range 0 to total_bits-1, and put any excess bytes in
1734	 OFFSET.  */
1735      if (bitpos >= total_bits)
1736	{
1737	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1738	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1739		     * BITS_PER_UNIT);
1740	}
1741
1742      /* Get ref to an aligned byte, halfword, or word containing the field.
1743	 Adjust BITPOS to be position within a word,
1744	 and OFFSET to be the offset of that word.
1745	 Then alter OP0 to refer to that word.  */
1746      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1747      offset -= (offset % (total_bits / BITS_PER_UNIT));
1748      op0 = adjust_address (op0, mode, offset);
1749    }
1750
1751  mode = GET_MODE (op0);
1752
1753  if (BYTES_BIG_ENDIAN)
1754    /* BITPOS is the distance between our msb and that of OP0.
1755       Convert it to the distance from the lsb.  */
1756    bitpos = total_bits - bitsize - bitpos;
1757
1758  /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1759     We have reduced the big-endian case to the little-endian case.  */
1760
1761  if (unsignedp)
1762    {
1763      if (bitpos)
1764	{
1765	  /* If the field does not already start at the lsb,
1766	     shift it so it does.  */
1767	  tree amount = build_int_cst (NULL_TREE, bitpos);
1768	  /* Maybe propagate the target for the shift.  */
1769	  /* But not if we will return it--could confuse integrate.c.  */
1770	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1771	  if (tmode != mode) subtarget = 0;
1772	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1773	}
1774      /* Convert the value to the desired mode.  */
1775      if (mode != tmode)
1776	op0 = convert_to_mode (tmode, op0, 1);
1777
1778      /* Unless the msb of the field used to be the msb when we shifted,
1779	 mask out the upper bits.  */
1780
1781      if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1782	return expand_binop (GET_MODE (op0), and_optab, op0,
1783			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1784			     target, 1, OPTAB_LIB_WIDEN);
1785      return op0;
1786    }
1787
1788  /* To extract a signed bit-field, first shift its msb to the msb of the word,
1789     then arithmetic-shift its lsb to the lsb of the word.  */
1790  op0 = force_reg (mode, op0);
1791  if (mode != tmode)
1792    target = 0;
1793
1794  /* Find the narrowest integer mode that contains the field.  */
1795
1796  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1797       mode = GET_MODE_WIDER_MODE (mode))
1798    if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1799      {
1800	op0 = convert_to_mode (mode, op0, 0);
1801	break;
1802      }
1803
1804  if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1805    {
1806      tree amount
1807	= build_int_cst (NULL_TREE,
1808			 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1809      /* Maybe propagate the target for the shift.  */
1810      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1811      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1812    }
1813
1814  return expand_shift (RSHIFT_EXPR, mode, op0,
1815		       build_int_cst (NULL_TREE,
1816				      GET_MODE_BITSIZE (mode) - bitsize),
1817		       target, 0);
1818}
1819
1820/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1821   of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1822   complement of that if COMPLEMENT.  The mask is truncated if
1823   necessary to the width of mode MODE.  The mask is zero-extended if
1824   BITSIZE+BITPOS is too small for MODE.  */
1825
1826static rtx
1827mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1828{
1829  HOST_WIDE_INT masklow, maskhigh;
1830
1831  if (bitsize == 0)
1832    masklow = 0;
1833  else if (bitpos < HOST_BITS_PER_WIDE_INT)
1834    masklow = (HOST_WIDE_INT) -1 << bitpos;
1835  else
1836    masklow = 0;
1837
1838  if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1839    masklow &= ((unsigned HOST_WIDE_INT) -1
1840		>> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1841
1842  if (bitpos <= HOST_BITS_PER_WIDE_INT)
1843    maskhigh = -1;
1844  else
1845    maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1846
1847  if (bitsize == 0)
1848    maskhigh = 0;
1849  else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1850    maskhigh &= ((unsigned HOST_WIDE_INT) -1
1851		 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1852  else
1853    maskhigh = 0;
1854
1855  if (complement)
1856    {
1857      maskhigh = ~maskhigh;
1858      masklow = ~masklow;
1859    }
1860
1861  return immed_double_const (masklow, maskhigh, mode);
1862}
1863
1864/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1865   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1866
1867static rtx
1868lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1869{
1870  unsigned HOST_WIDE_INT v = INTVAL (value);
1871  HOST_WIDE_INT low, high;
1872
1873  if (bitsize < HOST_BITS_PER_WIDE_INT)
1874    v &= ~((HOST_WIDE_INT) -1 << bitsize);
1875
1876  if (bitpos < HOST_BITS_PER_WIDE_INT)
1877    {
1878      low = v << bitpos;
1879      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1880    }
1881  else
1882    {
1883      low = 0;
1884      high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1885    }
1886
1887  return immed_double_const (low, high, mode);
1888}
1889
1890/* Extract a bit field from a memory by forcing the alignment of the
1891   memory.  This efficient only if the field spans at least 4 boundaries.
1892
1893   OP0 is the MEM.
1894   BITSIZE is the field width; BITPOS is the position of the first bit.
1895   UNSIGNEDP is true if the result should be zero-extended.  */
1896
1897static rtx
1898extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1899				   unsigned HOST_WIDE_INT bitpos,
1900				   int unsignedp)
1901{
1902  enum machine_mode mode, dmode;
1903  unsigned int m_bitsize, m_size;
1904  unsigned int sign_shift_up, sign_shift_dn;
1905  rtx base, a1, a2, v1, v2, comb, shift, result, start;
1906
1907  /* Choose a mode that will fit BITSIZE.  */
1908  mode = smallest_mode_for_size (bitsize, MODE_INT);
1909  m_size = GET_MODE_SIZE (mode);
1910  m_bitsize = GET_MODE_BITSIZE (mode);
1911
1912  /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1913  dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1914  if (dmode == BLKmode)
1915    return NULL;
1916
1917  do_pending_stack_adjust ();
1918  start = get_last_insn ();
1919
1920  /* At the end, we'll need an additional shift to deal with sign/zero
1921     extension.  By default this will be a left+right shift of the
1922     appropriate size.  But we may be able to eliminate one of them.  */
1923  sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1924
1925  if (STRICT_ALIGNMENT)
1926    {
1927      base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1928      bitpos %= BITS_PER_UNIT;
1929
1930      /* We load two values to be concatenate.  There's an edge condition
1931	 that bears notice -- an aligned value at the end of a page can
1932	 only load one value lest we segfault.  So the two values we load
1933	 are at "base & -size" and "(base + size - 1) & -size".  If base
1934	 is unaligned, the addresses will be aligned and sequential; if
1935	 base is aligned, the addresses will both be equal to base.  */
1936
1937      a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1938				GEN_INT (-(HOST_WIDE_INT)m_size),
1939				NULL, true, OPTAB_LIB_WIDEN);
1940      mark_reg_pointer (a1, m_bitsize);
1941      v1 = gen_rtx_MEM (mode, a1);
1942      set_mem_align (v1, m_bitsize);
1943      v1 = force_reg (mode, validize_mem (v1));
1944
1945      a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1946      a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1947				GEN_INT (-(HOST_WIDE_INT)m_size),
1948				NULL, true, OPTAB_LIB_WIDEN);
1949      v2 = gen_rtx_MEM (mode, a2);
1950      set_mem_align (v2, m_bitsize);
1951      v2 = force_reg (mode, validize_mem (v2));
1952
1953      /* Combine these two values into a double-word value.  */
1954      if (m_bitsize == BITS_PER_WORD)
1955	{
1956	  comb = gen_reg_rtx (dmode);
1957	  emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1958	  emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1959	  emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1960	}
1961      else
1962	{
1963	  if (BYTES_BIG_ENDIAN)
1964	    comb = v1, v1 = v2, v2 = comb;
1965	  v1 = convert_modes (dmode, mode, v1, true);
1966	  if (v1 == NULL)
1967	    goto fail;
1968	  v2 = convert_modes (dmode, mode, v2, true);
1969	  v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1970				    NULL, true, OPTAB_LIB_WIDEN);
1971	  if (v2 == NULL)
1972	    goto fail;
1973	  comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1974				      true, OPTAB_LIB_WIDEN);
1975	  if (comb == NULL)
1976	    goto fail;
1977	}
1978
1979      shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1980				   NULL, true, OPTAB_LIB_WIDEN);
1981      shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1982
1983      if (bitpos != 0)
1984	{
1985	  if (sign_shift_up <= bitpos)
1986	    bitpos -= sign_shift_up, sign_shift_up = 0;
1987	  shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1988				       NULL, true, OPTAB_LIB_WIDEN);
1989	}
1990    }
1991  else
1992    {
1993      unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1994      bitpos %= BITS_PER_UNIT;
1995
1996      /* When strict alignment is not required, we can just load directly
1997	 from memory without masking.  If the remaining BITPOS offset is
1998	 small enough, we may be able to do all operations in MODE as
1999	 opposed to DMODE.  */
2000      if (bitpos + bitsize <= m_bitsize)
2001	dmode = mode;
2002      comb = adjust_address (op0, dmode, offset);
2003
2004      if (sign_shift_up <= bitpos)
2005	bitpos -= sign_shift_up, sign_shift_up = 0;
2006      shift = GEN_INT (bitpos);
2007    }
2008
2009  /* Shift down the double-word such that the requested value is at bit 0.  */
2010  if (shift != const0_rtx)
2011    comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2012				comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2013  if (comb == NULL)
2014    goto fail;
2015
2016  /* If the field exactly matches MODE, then all we need to do is return the
2017     lowpart.  Otherwise, shift to get the sign bits set properly.  */
2018  result = force_reg (mode, gen_lowpart (mode, comb));
2019
2020  if (sign_shift_up)
2021    result = expand_simple_binop (mode, ASHIFT, result,
2022				  GEN_INT (sign_shift_up),
2023				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2024  if (sign_shift_dn)
2025    result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2026				  result, GEN_INT (sign_shift_dn),
2027				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2028
2029  return result;
2030
2031 fail:
2032  delete_insns_since (start);
2033  return NULL;
2034}
2035
2036/* Extract a bit field that is split across two words
2037   and return an RTX for the result.
2038
2039   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2040   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2041   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2042
2043static rtx
2044extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2045			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2046{
2047  unsigned int unit;
2048  unsigned int bitsdone = 0;
2049  rtx result = NULL_RTX;
2050  int first = 1;
2051
2052  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2053     much at a time.  */
2054  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2055    unit = BITS_PER_WORD;
2056  else
2057    {
2058      unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2059      if (0 && bitsize / unit > 2)
2060	{
2061	  rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2062						       unsignedp);
2063	  if (tmp)
2064	    return tmp;
2065	}
2066    }
2067
2068  while (bitsdone < bitsize)
2069    {
2070      unsigned HOST_WIDE_INT thissize;
2071      rtx part, word;
2072      unsigned HOST_WIDE_INT thispos;
2073      unsigned HOST_WIDE_INT offset;
2074
2075      offset = (bitpos + bitsdone) / unit;
2076      thispos = (bitpos + bitsdone) % unit;
2077
2078      /* THISSIZE must not overrun a word boundary.  Otherwise,
2079	 extract_fixed_bit_field will call us again, and we will mutually
2080	 recurse forever.  */
2081      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2082      thissize = MIN (thissize, unit - thispos);
2083
2084      /* If OP0 is a register, then handle OFFSET here.
2085
2086	 When handling multiword bitfields, extract_bit_field may pass
2087	 down a word_mode SUBREG of a larger REG for a bitfield that actually
2088	 crosses a word boundary.  Thus, for a SUBREG, we must find
2089	 the current word starting from the base register.  */
2090      if (GET_CODE (op0) == SUBREG)
2091	{
2092	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2093	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
2094					GET_MODE (SUBREG_REG (op0)));
2095	  offset = 0;
2096	}
2097      else if (REG_P (op0))
2098	{
2099	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2100	  offset = 0;
2101	}
2102      else
2103	word = op0;
2104
2105      /* Extract the parts in bit-counting order,
2106	 whose meaning is determined by BYTES_PER_UNIT.
2107	 OFFSET is in UNITs, and UNIT is in bits.
2108	 extract_fixed_bit_field wants offset in bytes.  */
2109      part = extract_fixed_bit_field (word_mode, word,
2110				      offset * unit / BITS_PER_UNIT,
2111				      thissize, thispos, 0, 1);
2112      bitsdone += thissize;
2113
2114      /* Shift this part into place for the result.  */
2115      if (BYTES_BIG_ENDIAN)
2116	{
2117	  if (bitsize != bitsdone)
2118	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2119				 build_int_cst (NULL_TREE, bitsize - bitsdone),
2120				 0, 1);
2121	}
2122      else
2123	{
2124	  if (bitsdone != thissize)
2125	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2126				 build_int_cst (NULL_TREE,
2127						bitsdone - thissize), 0, 1);
2128	}
2129
2130      if (first)
2131	result = part;
2132      else
2133	/* Combine the parts with bitwise or.  This works
2134	   because we extracted each part as an unsigned bit field.  */
2135	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2136			       OPTAB_LIB_WIDEN);
2137
2138      first = 0;
2139    }
2140
2141  /* Unsigned bit field: we are done.  */
2142  if (unsignedp)
2143    return result;
2144  /* Signed bit field: sign-extend with two arithmetic shifts.  */
2145  result = expand_shift (LSHIFT_EXPR, word_mode, result,
2146			 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2147			 NULL_RTX, 0);
2148  return expand_shift (RSHIFT_EXPR, word_mode, result,
2149		       build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2150		       NULL_RTX, 0);
2151}
2152
2153/* Add INC into TARGET.  */
2154
2155void
2156expand_inc (rtx target, rtx inc)
2157{
2158  rtx value = expand_binop (GET_MODE (target), add_optab,
2159			    target, inc,
2160			    target, 0, OPTAB_LIB_WIDEN);
2161  if (value != target)
2162    emit_move_insn (target, value);
2163}
2164
2165/* Subtract DEC from TARGET.  */
2166
2167void
2168expand_dec (rtx target, rtx dec)
2169{
2170  rtx value = expand_binop (GET_MODE (target), sub_optab,
2171			    target, dec,
2172			    target, 0, OPTAB_LIB_WIDEN);
2173  if (value != target)
2174    emit_move_insn (target, value);
2175}
2176
2177/* Output a shift instruction for expression code CODE,
2178   with SHIFTED being the rtx for the value to shift,
2179   and AMOUNT the tree for the amount to shift by.
2180   Store the result in the rtx TARGET, if that is convenient.
2181   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2182   Return the rtx for where the value is.  */
2183
2184rtx
2185expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2186	      tree amount, rtx target, int unsignedp)
2187{
2188  rtx op1, temp = 0;
2189  int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2190  int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2191  int try;
2192
2193  /* Previously detected shift-counts computed by NEGATE_EXPR
2194     and shifted in the other direction; but that does not work
2195     on all machines.  */
2196
2197  op1 = expand_normal (amount);
2198
2199  if (SHIFT_COUNT_TRUNCATED)
2200    {
2201      if (GET_CODE (op1) == CONST_INT
2202	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2203	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2204	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2205		       % GET_MODE_BITSIZE (mode));
2206      else if (GET_CODE (op1) == SUBREG
2207	       && subreg_lowpart_p (op1))
2208	op1 = SUBREG_REG (op1);
2209    }
2210
2211  if (op1 == const0_rtx)
2212    return shifted;
2213
2214  /* Check whether its cheaper to implement a left shift by a constant
2215     bit count by a sequence of additions.  */
2216  if (code == LSHIFT_EXPR
2217      && GET_CODE (op1) == CONST_INT
2218      && INTVAL (op1) > 0
2219      && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2220      && INTVAL (op1) < MAX_BITS_PER_WORD
2221      && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2222      && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2223    {
2224      int i;
2225      for (i = 0; i < INTVAL (op1); i++)
2226	{
2227	  temp = force_reg (mode, shifted);
2228	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2229				  unsignedp, OPTAB_LIB_WIDEN);
2230	}
2231      return shifted;
2232    }
2233
2234  for (try = 0; temp == 0 && try < 3; try++)
2235    {
2236      enum optab_methods methods;
2237
2238      if (try == 0)
2239	methods = OPTAB_DIRECT;
2240      else if (try == 1)
2241	methods = OPTAB_WIDEN;
2242      else
2243	methods = OPTAB_LIB_WIDEN;
2244
2245      if (rotate)
2246	{
2247	  /* Widening does not work for rotation.  */
2248	  if (methods == OPTAB_WIDEN)
2249	    continue;
2250	  else if (methods == OPTAB_LIB_WIDEN)
2251	    {
2252	      /* If we have been unable to open-code this by a rotation,
2253		 do it as the IOR of two shifts.  I.e., to rotate A
2254		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2255		 where C is the bitsize of A.
2256
2257		 It is theoretically possible that the target machine might
2258		 not be able to perform either shift and hence we would
2259		 be making two libcalls rather than just the one for the
2260		 shift (similarly if IOR could not be done).  We will allow
2261		 this extremely unlikely lossage to avoid complicating the
2262		 code below.  */
2263
2264	      rtx subtarget = target == shifted ? 0 : target;
2265	      tree new_amount, other_amount;
2266	      rtx temp1;
2267	      tree type = TREE_TYPE (amount);
2268	      if (GET_MODE (op1) != TYPE_MODE (type)
2269		  && GET_MODE (op1) != VOIDmode)
2270		op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2271	      new_amount = make_tree (type, op1);
2272	      other_amount
2273		= fold_build2 (MINUS_EXPR, type,
2274			       build_int_cst (type, GET_MODE_BITSIZE (mode)),
2275			       new_amount);
2276
2277	      shifted = force_reg (mode, shifted);
2278
2279	      temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2280				   mode, shifted, new_amount, 0, 1);
2281	      temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2282				    mode, shifted, other_amount, subtarget, 1);
2283	      return expand_binop (mode, ior_optab, temp, temp1, target,
2284				   unsignedp, methods);
2285	    }
2286
2287	  temp = expand_binop (mode,
2288			       left ? rotl_optab : rotr_optab,
2289			       shifted, op1, target, unsignedp, methods);
2290	}
2291      else if (unsignedp)
2292	temp = expand_binop (mode,
2293			     left ? ashl_optab : lshr_optab,
2294			     shifted, op1, target, unsignedp, methods);
2295
2296      /* Do arithmetic shifts.
2297	 Also, if we are going to widen the operand, we can just as well
2298	 use an arithmetic right-shift instead of a logical one.  */
2299      if (temp == 0 && ! rotate
2300	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2301	{
2302	  enum optab_methods methods1 = methods;
2303
2304	  /* If trying to widen a log shift to an arithmetic shift,
2305	     don't accept an arithmetic shift of the same size.  */
2306	  if (unsignedp)
2307	    methods1 = OPTAB_MUST_WIDEN;
2308
2309	  /* Arithmetic shift */
2310
2311	  temp = expand_binop (mode,
2312			       left ? ashl_optab : ashr_optab,
2313			       shifted, op1, target, unsignedp, methods1);
2314	}
2315
2316      /* We used to try extzv here for logical right shifts, but that was
2317	 only useful for one machine, the VAX, and caused poor code
2318	 generation there for lshrdi3, so the code was deleted and a
2319	 define_expand for lshrsi3 was added to vax.md.  */
2320    }
2321
2322  gcc_assert (temp);
2323  return temp;
2324}
2325
2326enum alg_code {
2327  alg_unknown,
2328  alg_zero,
2329  alg_m, alg_shift,
2330  alg_add_t_m2,
2331  alg_sub_t_m2,
2332  alg_add_factor,
2333  alg_sub_factor,
2334  alg_add_t2_m,
2335  alg_sub_t2_m,
2336  alg_impossible
2337};
2338
2339/* This structure holds the "cost" of a multiply sequence.  The
2340   "cost" field holds the total rtx_cost of every operator in the
2341   synthetic multiplication sequence, hence cost(a op b) is defined
2342   as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2343   The "latency" field holds the minimum possible latency of the
2344   synthetic multiply, on a hypothetical infinitely parallel CPU.
2345   This is the critical path, or the maximum height, of the expression
2346   tree which is the sum of rtx_costs on the most expensive path from
2347   any leaf to the root.  Hence latency(a op b) is defined as zero for
2348   leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2349
2350struct mult_cost {
2351  short cost;     /* Total rtx_cost of the multiplication sequence.  */
2352  short latency;  /* The latency of the multiplication sequence.  */
2353};
2354
2355/* This macro is used to compare a pointer to a mult_cost against an
2356   single integer "rtx_cost" value.  This is equivalent to the macro
2357   CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2358#define MULT_COST_LESS(X,Y) ((X)->cost < (Y)	\
2359			     || ((X)->cost == (Y) && (X)->latency < (Y)))
2360
2361/* This macro is used to compare two pointers to mult_costs against
2362   each other.  The macro returns true if X is cheaper than Y.
2363   Currently, the cheaper of two mult_costs is the one with the
2364   lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2365#define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost		\
2366				 || ((X)->cost == (Y)->cost	\
2367				     && (X)->latency < (Y)->latency))
2368
2369/* This structure records a sequence of operations.
2370   `ops' is the number of operations recorded.
2371   `cost' is their total cost.
2372   The operations are stored in `op' and the corresponding
2373   logarithms of the integer coefficients in `log'.
2374
2375   These are the operations:
2376   alg_zero		total := 0;
2377   alg_m		total := multiplicand;
2378   alg_shift		total := total * coeff
2379   alg_add_t_m2		total := total + multiplicand * coeff;
2380   alg_sub_t_m2		total := total - multiplicand * coeff;
2381   alg_add_factor	total := total * coeff + total;
2382   alg_sub_factor	total := total * coeff - total;
2383   alg_add_t2_m		total := total * coeff + multiplicand;
2384   alg_sub_t2_m		total := total * coeff - multiplicand;
2385
2386   The first operand must be either alg_zero or alg_m.  */
2387
2388struct algorithm
2389{
2390  struct mult_cost cost;
2391  short ops;
2392  /* The size of the OP and LOG fields are not directly related to the
2393     word size, but the worst-case algorithms will be if we have few
2394     consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2395     In that case we will generate shift-by-2, add, shift-by-2, add,...,
2396     in total wordsize operations.  */
2397  enum alg_code op[MAX_BITS_PER_WORD];
2398  char log[MAX_BITS_PER_WORD];
2399};
2400
2401/* The entry for our multiplication cache/hash table.  */
2402struct alg_hash_entry {
2403  /* The number we are multiplying by.  */
2404  unsigned HOST_WIDE_INT t;
2405
2406  /* The mode in which we are multiplying something by T.  */
2407  enum machine_mode mode;
2408
2409  /* The best multiplication algorithm for t.  */
2410  enum alg_code alg;
2411
2412  /* The cost of multiplication if ALG_CODE is not alg_impossible.
2413     Otherwise, the cost within which multiplication by T is
2414     impossible.  */
2415  struct mult_cost cost;
2416};
2417
2418/* The number of cache/hash entries.  */
2419#if HOST_BITS_PER_WIDE_INT == 64
2420#define NUM_ALG_HASH_ENTRIES 1031
2421#else
2422#define NUM_ALG_HASH_ENTRIES 307
2423#endif
2424
2425/* Each entry of ALG_HASH caches alg_code for some integer.  This is
2426   actually a hash table.  If we have a collision, that the older
2427   entry is kicked out.  */
2428static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2429
2430/* Indicates the type of fixup needed after a constant multiplication.
2431   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2432   the result should be negated, and ADD_VARIANT means that the
2433   multiplicand should be added to the result.  */
2434enum mult_variant {basic_variant, negate_variant, add_variant};
2435
2436static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2437			const struct mult_cost *, enum machine_mode mode);
2438static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2439				 struct algorithm *, enum mult_variant *, int);
2440static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2441			      const struct algorithm *, enum mult_variant);
2442static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2443						 int, rtx *, int *, int *);
2444static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2445static rtx extract_high_half (enum machine_mode, rtx);
2446static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2447static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2448				       int, int);
2449/* Compute and return the best algorithm for multiplying by T.
2450   The algorithm must cost less than cost_limit
2451   If retval.cost >= COST_LIMIT, no algorithm was found and all
2452   other field of the returned struct are undefined.
2453   MODE is the machine mode of the multiplication.  */
2454
2455static void
2456synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2457	    const struct mult_cost *cost_limit, enum machine_mode mode)
2458{
2459  int m;
2460  struct algorithm *alg_in, *best_alg;
2461  struct mult_cost best_cost;
2462  struct mult_cost new_limit;
2463  int op_cost, op_latency;
2464  unsigned HOST_WIDE_INT q;
2465  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2466  int hash_index;
2467  bool cache_hit = false;
2468  enum alg_code cache_alg = alg_zero;
2469
2470  /* Indicate that no algorithm is yet found.  If no algorithm
2471     is found, this value will be returned and indicate failure.  */
2472  alg_out->cost.cost = cost_limit->cost + 1;
2473  alg_out->cost.latency = cost_limit->latency + 1;
2474
2475  if (cost_limit->cost < 0
2476      || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2477    return;
2478
2479  /* Restrict the bits of "t" to the multiplication's mode.  */
2480  t &= GET_MODE_MASK (mode);
2481
2482  /* t == 1 can be done in zero cost.  */
2483  if (t == 1)
2484    {
2485      alg_out->ops = 1;
2486      alg_out->cost.cost = 0;
2487      alg_out->cost.latency = 0;
2488      alg_out->op[0] = alg_m;
2489      return;
2490    }
2491
2492  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2493     fail now.  */
2494  if (t == 0)
2495    {
2496      if (MULT_COST_LESS (cost_limit, zero_cost))
2497	return;
2498      else
2499	{
2500	  alg_out->ops = 1;
2501	  alg_out->cost.cost = zero_cost;
2502	  alg_out->cost.latency = zero_cost;
2503	  alg_out->op[0] = alg_zero;
2504	  return;
2505	}
2506    }
2507
2508  /* We'll be needing a couple extra algorithm structures now.  */
2509
2510  alg_in = alloca (sizeof (struct algorithm));
2511  best_alg = alloca (sizeof (struct algorithm));
2512  best_cost = *cost_limit;
2513
2514  /* Compute the hash index.  */
2515  hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2516
2517  /* See if we already know what to do for T.  */
2518  if (alg_hash[hash_index].t == t
2519      && alg_hash[hash_index].mode == mode
2520      && alg_hash[hash_index].alg != alg_unknown)
2521    {
2522      cache_alg = alg_hash[hash_index].alg;
2523
2524      if (cache_alg == alg_impossible)
2525	{
2526	  /* The cache tells us that it's impossible to synthesize
2527	     multiplication by T within alg_hash[hash_index].cost.  */
2528	  if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2529	    /* COST_LIMIT is at least as restrictive as the one
2530	       recorded in the hash table, in which case we have no
2531	       hope of synthesizing a multiplication.  Just
2532	       return.  */
2533	    return;
2534
2535	  /* If we get here, COST_LIMIT is less restrictive than the
2536	     one recorded in the hash table, so we may be able to
2537	     synthesize a multiplication.  Proceed as if we didn't
2538	     have the cache entry.  */
2539	}
2540      else
2541	{
2542	  if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2543	    /* The cached algorithm shows that this multiplication
2544	       requires more cost than COST_LIMIT.  Just return.  This
2545	       way, we don't clobber this cache entry with
2546	       alg_impossible but retain useful information.  */
2547	    return;
2548
2549	  cache_hit = true;
2550
2551	  switch (cache_alg)
2552	    {
2553	    case alg_shift:
2554	      goto do_alg_shift;
2555
2556	    case alg_add_t_m2:
2557	    case alg_sub_t_m2:
2558	      goto do_alg_addsub_t_m2;
2559
2560	    case alg_add_factor:
2561	    case alg_sub_factor:
2562	      goto do_alg_addsub_factor;
2563
2564	    case alg_add_t2_m:
2565	      goto do_alg_add_t2_m;
2566
2567	    case alg_sub_t2_m:
2568	      goto do_alg_sub_t2_m;
2569
2570	    default:
2571	      gcc_unreachable ();
2572	    }
2573	}
2574    }
2575
2576  /* If we have a group of zero bits at the low-order part of T, try
2577     multiplying by the remaining bits and then doing a shift.  */
2578
2579  if ((t & 1) == 0)
2580    {
2581    do_alg_shift:
2582      m = floor_log2 (t & -t);	/* m = number of low zero bits */
2583      if (m < maxm)
2584	{
2585	  q = t >> m;
2586	  /* The function expand_shift will choose between a shift and
2587	     a sequence of additions, so the observed cost is given as
2588	     MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2589	  op_cost = m * add_cost[mode];
2590	  if (shift_cost[mode][m] < op_cost)
2591	    op_cost = shift_cost[mode][m];
2592	  new_limit.cost = best_cost.cost - op_cost;
2593	  new_limit.latency = best_cost.latency - op_cost;
2594	  synth_mult (alg_in, q, &new_limit, mode);
2595
2596	  alg_in->cost.cost += op_cost;
2597	  alg_in->cost.latency += op_cost;
2598	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2599	    {
2600	      struct algorithm *x;
2601	      best_cost = alg_in->cost;
2602	      x = alg_in, alg_in = best_alg, best_alg = x;
2603	      best_alg->log[best_alg->ops] = m;
2604	      best_alg->op[best_alg->ops] = alg_shift;
2605	    }
2606	}
2607      if (cache_hit)
2608	goto done;
2609    }
2610
2611  /* If we have an odd number, add or subtract one.  */
2612  if ((t & 1) != 0)
2613    {
2614      unsigned HOST_WIDE_INT w;
2615
2616    do_alg_addsub_t_m2:
2617      for (w = 1; (w & t) != 0; w <<= 1)
2618	;
2619      /* If T was -1, then W will be zero after the loop.  This is another
2620	 case where T ends with ...111.  Handling this with (T + 1) and
2621	 subtract 1 produces slightly better code and results in algorithm
2622	 selection much faster than treating it like the ...0111 case
2623	 below.  */
2624      if (w == 0
2625	  || (w > 2
2626	      /* Reject the case where t is 3.
2627		 Thus we prefer addition in that case.  */
2628	      && t != 3))
2629	{
2630	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2631
2632	  op_cost = add_cost[mode];
2633	  new_limit.cost = best_cost.cost - op_cost;
2634	  new_limit.latency = best_cost.latency - op_cost;
2635	  synth_mult (alg_in, t + 1, &new_limit, mode);
2636
2637	  alg_in->cost.cost += op_cost;
2638	  alg_in->cost.latency += op_cost;
2639	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2640	    {
2641	      struct algorithm *x;
2642	      best_cost = alg_in->cost;
2643	      x = alg_in, alg_in = best_alg, best_alg = x;
2644	      best_alg->log[best_alg->ops] = 0;
2645	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2646	    }
2647	}
2648      else
2649	{
2650	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2651
2652	  op_cost = add_cost[mode];
2653	  new_limit.cost = best_cost.cost - op_cost;
2654	  new_limit.latency = best_cost.latency - op_cost;
2655	  synth_mult (alg_in, t - 1, &new_limit, mode);
2656
2657	  alg_in->cost.cost += op_cost;
2658	  alg_in->cost.latency += op_cost;
2659	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2660	    {
2661	      struct algorithm *x;
2662	      best_cost = alg_in->cost;
2663	      x = alg_in, alg_in = best_alg, best_alg = x;
2664	      best_alg->log[best_alg->ops] = 0;
2665	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2666	    }
2667	}
2668      if (cache_hit)
2669	goto done;
2670    }
2671
2672  /* Look for factors of t of the form
2673     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2674     If we find such a factor, we can multiply by t using an algorithm that
2675     multiplies by q, shift the result by m and add/subtract it to itself.
2676
2677     We search for large factors first and loop down, even if large factors
2678     are less probable than small; if we find a large factor we will find a
2679     good sequence quickly, and therefore be able to prune (by decreasing
2680     COST_LIMIT) the search.  */
2681
2682 do_alg_addsub_factor:
2683  for (m = floor_log2 (t - 1); m >= 2; m--)
2684    {
2685      unsigned HOST_WIDE_INT d;
2686
2687      d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2688      if (t % d == 0 && t > d && m < maxm
2689	  && (!cache_hit || cache_alg == alg_add_factor))
2690	{
2691	  /* If the target has a cheap shift-and-add instruction use
2692	     that in preference to a shift insn followed by an add insn.
2693	     Assume that the shift-and-add is "atomic" with a latency
2694	     equal to its cost, otherwise assume that on superscalar
2695	     hardware the shift may be executed concurrently with the
2696	     earlier steps in the algorithm.  */
2697	  op_cost = add_cost[mode] + shift_cost[mode][m];
2698	  if (shiftadd_cost[mode][m] < op_cost)
2699	    {
2700	      op_cost = shiftadd_cost[mode][m];
2701	      op_latency = op_cost;
2702	    }
2703	  else
2704	    op_latency = add_cost[mode];
2705
2706	  new_limit.cost = best_cost.cost - op_cost;
2707	  new_limit.latency = best_cost.latency - op_latency;
2708	  synth_mult (alg_in, t / d, &new_limit, mode);
2709
2710	  alg_in->cost.cost += op_cost;
2711	  alg_in->cost.latency += op_latency;
2712	  if (alg_in->cost.latency < op_cost)
2713	    alg_in->cost.latency = op_cost;
2714	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2715	    {
2716	      struct algorithm *x;
2717	      best_cost = alg_in->cost;
2718	      x = alg_in, alg_in = best_alg, best_alg = x;
2719	      best_alg->log[best_alg->ops] = m;
2720	      best_alg->op[best_alg->ops] = alg_add_factor;
2721	    }
2722	  /* Other factors will have been taken care of in the recursion.  */
2723	  break;
2724	}
2725
2726      d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2727      if (t % d == 0 && t > d && m < maxm
2728	  && (!cache_hit || cache_alg == alg_sub_factor))
2729	{
2730	  /* If the target has a cheap shift-and-subtract insn use
2731	     that in preference to a shift insn followed by a sub insn.
2732	     Assume that the shift-and-sub is "atomic" with a latency
2733	     equal to it's cost, otherwise assume that on superscalar
2734	     hardware the shift may be executed concurrently with the
2735	     earlier steps in the algorithm.  */
2736	  op_cost = add_cost[mode] + shift_cost[mode][m];
2737	  if (shiftsub_cost[mode][m] < op_cost)
2738	    {
2739	      op_cost = shiftsub_cost[mode][m];
2740	      op_latency = op_cost;
2741	    }
2742	  else
2743	    op_latency = add_cost[mode];
2744
2745	  new_limit.cost = best_cost.cost - op_cost;
2746	  new_limit.latency = best_cost.latency - op_latency;
2747	  synth_mult (alg_in, t / d, &new_limit, mode);
2748
2749	  alg_in->cost.cost += op_cost;
2750	  alg_in->cost.latency += op_latency;
2751	  if (alg_in->cost.latency < op_cost)
2752	    alg_in->cost.latency = op_cost;
2753	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2754	    {
2755	      struct algorithm *x;
2756	      best_cost = alg_in->cost;
2757	      x = alg_in, alg_in = best_alg, best_alg = x;
2758	      best_alg->log[best_alg->ops] = m;
2759	      best_alg->op[best_alg->ops] = alg_sub_factor;
2760	    }
2761	  break;
2762	}
2763    }
2764  if (cache_hit)
2765    goto done;
2766
2767  /* Try shift-and-add (load effective address) instructions,
2768     i.e. do a*3, a*5, a*9.  */
2769  if ((t & 1) != 0)
2770    {
2771    do_alg_add_t2_m:
2772      q = t - 1;
2773      q = q & -q;
2774      m = exact_log2 (q);
2775      if (m >= 0 && m < maxm)
2776	{
2777	  op_cost = shiftadd_cost[mode][m];
2778	  new_limit.cost = best_cost.cost - op_cost;
2779	  new_limit.latency = best_cost.latency - op_cost;
2780	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2781
2782	  alg_in->cost.cost += op_cost;
2783	  alg_in->cost.latency += op_cost;
2784	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2785	    {
2786	      struct algorithm *x;
2787	      best_cost = alg_in->cost;
2788	      x = alg_in, alg_in = best_alg, best_alg = x;
2789	      best_alg->log[best_alg->ops] = m;
2790	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2791	    }
2792	}
2793      if (cache_hit)
2794	goto done;
2795
2796    do_alg_sub_t2_m:
2797      q = t + 1;
2798      q = q & -q;
2799      m = exact_log2 (q);
2800      if (m >= 0 && m < maxm)
2801	{
2802	  op_cost = shiftsub_cost[mode][m];
2803	  new_limit.cost = best_cost.cost - op_cost;
2804	  new_limit.latency = best_cost.latency - op_cost;
2805	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2806
2807	  alg_in->cost.cost += op_cost;
2808	  alg_in->cost.latency += op_cost;
2809	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2810	    {
2811	      struct algorithm *x;
2812	      best_cost = alg_in->cost;
2813	      x = alg_in, alg_in = best_alg, best_alg = x;
2814	      best_alg->log[best_alg->ops] = m;
2815	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2816	    }
2817	}
2818      if (cache_hit)
2819	goto done;
2820    }
2821
2822 done:
2823  /* If best_cost has not decreased, we have not found any algorithm.  */
2824  if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2825    {
2826      /* We failed to find an algorithm.  Record alg_impossible for
2827	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2828	 we are asked to find an algorithm for T within the same or
2829	 lower COST_LIMIT, we can immediately return to the
2830	 caller.  */
2831      alg_hash[hash_index].t = t;
2832      alg_hash[hash_index].mode = mode;
2833      alg_hash[hash_index].alg = alg_impossible;
2834      alg_hash[hash_index].cost = *cost_limit;
2835      return;
2836    }
2837
2838  /* Cache the result.  */
2839  if (!cache_hit)
2840    {
2841      alg_hash[hash_index].t = t;
2842      alg_hash[hash_index].mode = mode;
2843      alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2844      alg_hash[hash_index].cost.cost = best_cost.cost;
2845      alg_hash[hash_index].cost.latency = best_cost.latency;
2846    }
2847
2848  /* If we are getting a too long sequence for `struct algorithm'
2849     to record, make this search fail.  */
2850  if (best_alg->ops == MAX_BITS_PER_WORD)
2851    return;
2852
2853  /* Copy the algorithm from temporary space to the space at alg_out.
2854     We avoid using structure assignment because the majority of
2855     best_alg is normally undefined, and this is a critical function.  */
2856  alg_out->ops = best_alg->ops + 1;
2857  alg_out->cost = best_cost;
2858  memcpy (alg_out->op, best_alg->op,
2859	  alg_out->ops * sizeof *alg_out->op);
2860  memcpy (alg_out->log, best_alg->log,
2861	  alg_out->ops * sizeof *alg_out->log);
2862}
2863
2864/* Find the cheapest way of multiplying a value of mode MODE by VAL.
2865   Try three variations:
2866
2867       - a shift/add sequence based on VAL itself
2868       - a shift/add sequence based on -VAL, followed by a negation
2869       - a shift/add sequence based on VAL - 1, followed by an addition.
2870
2871   Return true if the cheapest of these cost less than MULT_COST,
2872   describing the algorithm in *ALG and final fixup in *VARIANT.  */
2873
2874static bool
2875choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2876		     struct algorithm *alg, enum mult_variant *variant,
2877		     int mult_cost)
2878{
2879  struct algorithm alg2;
2880  struct mult_cost limit;
2881  int op_cost;
2882
2883  /* Fail quickly for impossible bounds.  */
2884  if (mult_cost < 0)
2885    return false;
2886
2887  /* Ensure that mult_cost provides a reasonable upper bound.
2888     Any constant multiplication can be performed with less
2889     than 2 * bits additions.  */
2890  op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2891  if (mult_cost > op_cost)
2892    mult_cost = op_cost;
2893
2894  *variant = basic_variant;
2895  limit.cost = mult_cost;
2896  limit.latency = mult_cost;
2897  synth_mult (alg, val, &limit, mode);
2898
2899  /* This works only if the inverted value actually fits in an
2900     `unsigned int' */
2901  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2902    {
2903      op_cost = neg_cost[mode];
2904      if (MULT_COST_LESS (&alg->cost, mult_cost))
2905	{
2906	  limit.cost = alg->cost.cost - op_cost;
2907	  limit.latency = alg->cost.latency - op_cost;
2908	}
2909      else
2910	{
2911	  limit.cost = mult_cost - op_cost;
2912	  limit.latency = mult_cost - op_cost;
2913	}
2914
2915      synth_mult (&alg2, -val, &limit, mode);
2916      alg2.cost.cost += op_cost;
2917      alg2.cost.latency += op_cost;
2918      if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2919	*alg = alg2, *variant = negate_variant;
2920    }
2921
2922  /* This proves very useful for division-by-constant.  */
2923  op_cost = add_cost[mode];
2924  if (MULT_COST_LESS (&alg->cost, mult_cost))
2925    {
2926      limit.cost = alg->cost.cost - op_cost;
2927      limit.latency = alg->cost.latency - op_cost;
2928    }
2929  else
2930    {
2931      limit.cost = mult_cost - op_cost;
2932      limit.latency = mult_cost - op_cost;
2933    }
2934
2935  synth_mult (&alg2, val - 1, &limit, mode);
2936  alg2.cost.cost += op_cost;
2937  alg2.cost.latency += op_cost;
2938  if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2939    *alg = alg2, *variant = add_variant;
2940
2941  return MULT_COST_LESS (&alg->cost, mult_cost);
2942}
2943
2944/* A subroutine of expand_mult, used for constant multiplications.
2945   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2946   convenient.  Use the shift/add sequence described by ALG and apply
2947   the final fixup specified by VARIANT.  */
2948
2949static rtx
2950expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2951		   rtx target, const struct algorithm *alg,
2952		   enum mult_variant variant)
2953{
2954  HOST_WIDE_INT val_so_far;
2955  rtx insn, accum, tem;
2956  int opno;
2957  enum machine_mode nmode;
2958
2959  /* Avoid referencing memory over and over.
2960     For speed, but also for correctness when mem is volatile.  */
2961  if (MEM_P (op0))
2962    op0 = force_reg (mode, op0);
2963
2964  /* ACCUM starts out either as OP0 or as a zero, depending on
2965     the first operation.  */
2966
2967  if (alg->op[0] == alg_zero)
2968    {
2969      accum = copy_to_mode_reg (mode, const0_rtx);
2970      val_so_far = 0;
2971    }
2972  else if (alg->op[0] == alg_m)
2973    {
2974      accum = copy_to_mode_reg (mode, op0);
2975      val_so_far = 1;
2976    }
2977  else
2978    gcc_unreachable ();
2979
2980  for (opno = 1; opno < alg->ops; opno++)
2981    {
2982      int log = alg->log[opno];
2983      rtx shift_subtarget = optimize ? 0 : accum;
2984      rtx add_target
2985	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
2986	   && !optimize)
2987	  ? target : 0;
2988      rtx accum_target = optimize ? 0 : accum;
2989
2990      switch (alg->op[opno])
2991	{
2992	case alg_shift:
2993	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2994				build_int_cst (NULL_TREE, log),
2995				NULL_RTX, 0);
2996	  val_so_far <<= log;
2997	  break;
2998
2999	case alg_add_t_m2:
3000	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
3001			      build_int_cst (NULL_TREE, log),
3002			      NULL_RTX, 0);
3003	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3004				 add_target ? add_target : accum_target);
3005	  val_so_far += (HOST_WIDE_INT) 1 << log;
3006	  break;
3007
3008	case alg_sub_t_m2:
3009	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
3010			      build_int_cst (NULL_TREE, log),
3011			      NULL_RTX, 0);
3012	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3013				 add_target ? add_target : accum_target);
3014	  val_so_far -= (HOST_WIDE_INT) 1 << log;
3015	  break;
3016
3017	case alg_add_t2_m:
3018	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3019				build_int_cst (NULL_TREE, log),
3020				shift_subtarget,
3021				0);
3022	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3023				 add_target ? add_target : accum_target);
3024	  val_so_far = (val_so_far << log) + 1;
3025	  break;
3026
3027	case alg_sub_t2_m:
3028	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3029				build_int_cst (NULL_TREE, log),
3030				shift_subtarget, 0);
3031	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3032				 add_target ? add_target : accum_target);
3033	  val_so_far = (val_so_far << log) - 1;
3034	  break;
3035
3036	case alg_add_factor:
3037	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3038			      build_int_cst (NULL_TREE, log),
3039			      NULL_RTX, 0);
3040	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3041				 add_target ? add_target : accum_target);
3042	  val_so_far += val_so_far << log;
3043	  break;
3044
3045	case alg_sub_factor:
3046	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3047			      build_int_cst (NULL_TREE, log),
3048			      NULL_RTX, 0);
3049	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3050				 (add_target
3051				  ? add_target : (optimize ? 0 : tem)));
3052	  val_so_far = (val_so_far << log) - val_so_far;
3053	  break;
3054
3055	default:
3056	  gcc_unreachable ();
3057	}
3058
3059      /* Write a REG_EQUAL note on the last insn so that we can cse
3060	 multiplication sequences.  Note that if ACCUM is a SUBREG,
3061	 we've set the inner register and must properly indicate
3062	 that.  */
3063
3064      tem = op0, nmode = mode;
3065      if (GET_CODE (accum) == SUBREG)
3066	{
3067	  nmode = GET_MODE (SUBREG_REG (accum));
3068	  tem = gen_lowpart (nmode, op0);
3069	}
3070
3071      insn = get_last_insn ();
3072      set_unique_reg_note (insn, REG_EQUAL,
3073			   gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3074    }
3075
3076  if (variant == negate_variant)
3077    {
3078      val_so_far = -val_so_far;
3079      accum = expand_unop (mode, neg_optab, accum, target, 0);
3080    }
3081  else if (variant == add_variant)
3082    {
3083      val_so_far = val_so_far + 1;
3084      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3085    }
3086
3087  /* Compare only the bits of val and val_so_far that are significant
3088     in the result mode, to avoid sign-/zero-extension confusion.  */
3089  val &= GET_MODE_MASK (mode);
3090  val_so_far &= GET_MODE_MASK (mode);
3091  gcc_assert (val == val_so_far);
3092
3093  return accum;
3094}
3095
3096/* Perform a multiplication and return an rtx for the result.
3097   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3098   TARGET is a suggestion for where to store the result (an rtx).
3099
3100   We check specially for a constant integer as OP1.
3101   If you want this check for OP0 as well, then before calling
3102   you should swap the two operands if OP0 would be constant.  */
3103
3104rtx
3105expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3106	     int unsignedp)
3107{
3108  enum mult_variant variant;
3109  struct algorithm algorithm;
3110  int max_cost;
3111
3112  /* Handling const0_rtx here allows us to use zero as a rogue value for
3113     coeff below.  */
3114  if (op1 == const0_rtx)
3115    return const0_rtx;
3116  if (op1 == const1_rtx)
3117    return op0;
3118  if (op1 == constm1_rtx)
3119    return expand_unop (mode,
3120			GET_MODE_CLASS (mode) == MODE_INT
3121			&& !unsignedp && flag_trapv
3122			? negv_optab : neg_optab,
3123			op0, target, 0);
3124
3125  /* These are the operations that are potentially turned into a sequence
3126     of shifts and additions.  */
3127  if (SCALAR_INT_MODE_P (mode)
3128      && (unsignedp || !flag_trapv))
3129    {
3130      HOST_WIDE_INT coeff = 0;
3131      rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3132
3133      /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3134	 less than or equal in size to `unsigned int' this doesn't matter.
3135	 If the mode is larger than `unsigned int', then synth_mult works
3136	 only if the constant value exactly fits in an `unsigned int' without
3137	 any truncation.  This means that multiplying by negative values does
3138	 not work; results are off by 2^32 on a 32 bit machine.  */
3139
3140      if (GET_CODE (op1) == CONST_INT)
3141	{
3142	  /* Attempt to handle multiplication of DImode values by negative
3143	     coefficients, by performing the multiplication by a positive
3144	     multiplier and then inverting the result.  */
3145	  if (INTVAL (op1) < 0
3146	      && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3147	    {
3148	      /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3149		 result is interpreted as an unsigned coefficient.
3150		 Exclude cost of op0 from max_cost to match the cost
3151		 calculation of the synth_mult.  */
3152	      max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3153			 - neg_cost[mode];
3154	      if (max_cost > 0
3155		  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3156					  &variant, max_cost))
3157		{
3158		  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3159						NULL_RTX, &algorithm,
3160						variant);
3161		  return expand_unop (mode, neg_optab, temp, target, 0);
3162		}
3163	    }
3164	  else coeff = INTVAL (op1);
3165	}
3166      else if (GET_CODE (op1) == CONST_DOUBLE)
3167	{
3168	  /* If we are multiplying in DImode, it may still be a win
3169	     to try to work with shifts and adds.  */
3170	  if (CONST_DOUBLE_HIGH (op1) == 0)
3171	    coeff = CONST_DOUBLE_LOW (op1);
3172	  else if (CONST_DOUBLE_LOW (op1) == 0
3173		   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3174	    {
3175	      int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3176			  + HOST_BITS_PER_WIDE_INT;
3177	      return expand_shift (LSHIFT_EXPR, mode, op0,
3178				   build_int_cst (NULL_TREE, shift),
3179				   target, unsignedp);
3180	    }
3181	}
3182
3183      /* We used to test optimize here, on the grounds that it's better to
3184	 produce a smaller program when -O is not used.  But this causes
3185	 such a terrible slowdown sometimes that it seems better to always
3186	 use synth_mult.  */
3187      if (coeff != 0)
3188	{
3189	  /* Special case powers of two.  */
3190	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3191	    return expand_shift (LSHIFT_EXPR, mode, op0,
3192				 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3193				 target, unsignedp);
3194
3195	  /* Exclude cost of op0 from max_cost to match the cost
3196	     calculation of the synth_mult.  */
3197	  max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3198	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3199				   max_cost))
3200	    return expand_mult_const (mode, op0, coeff, target,
3201				      &algorithm, variant);
3202	}
3203    }
3204
3205  if (GET_CODE (op0) == CONST_DOUBLE)
3206    {
3207      rtx temp = op0;
3208      op0 = op1;
3209      op1 = temp;
3210    }
3211
3212  /* Expand x*2.0 as x+x.  */
3213  if (GET_CODE (op1) == CONST_DOUBLE
3214      && SCALAR_FLOAT_MODE_P (mode))
3215    {
3216      REAL_VALUE_TYPE d;
3217      REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3218
3219      if (REAL_VALUES_EQUAL (d, dconst2))
3220	{
3221	  op0 = force_reg (GET_MODE (op0), op0);
3222	  return expand_binop (mode, add_optab, op0, op0,
3223			       target, unsignedp, OPTAB_LIB_WIDEN);
3224	}
3225    }
3226
3227  /* This used to use umul_optab if unsigned, but for non-widening multiply
3228     there is no difference between signed and unsigned.  */
3229  op0 = expand_binop (mode,
3230		      ! unsignedp
3231		      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3232		      ? smulv_optab : smul_optab,
3233		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3234  gcc_assert (op0);
3235  return op0;
3236}
3237
3238/* Return the smallest n such that 2**n >= X.  */
3239
3240int
3241ceil_log2 (unsigned HOST_WIDE_INT x)
3242{
3243  return floor_log2 (x - 1) + 1;
3244}
3245
3246/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3247   replace division by D, and put the least significant N bits of the result
3248   in *MULTIPLIER_PTR and return the most significant bit.
3249
3250   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3251   needed precision is in PRECISION (should be <= N).
3252
3253   PRECISION should be as small as possible so this function can choose
3254   multiplier more freely.
3255
3256   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3257   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3258
3259   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3260   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3261
3262static
3263unsigned HOST_WIDE_INT
3264choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3265		   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3266{
3267  HOST_WIDE_INT mhigh_hi, mlow_hi;
3268  unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3269  int lgup, post_shift;
3270  int pow, pow2;
3271  unsigned HOST_WIDE_INT nl, dummy1;
3272  HOST_WIDE_INT nh, dummy2;
3273
3274  /* lgup = ceil(log2(divisor)); */
3275  lgup = ceil_log2 (d);
3276
3277  gcc_assert (lgup <= n);
3278
3279  pow = n + lgup;
3280  pow2 = n + lgup - precision;
3281
3282  /* We could handle this with some effort, but this case is much
3283     better handled directly with a scc insn, so rely on caller using
3284     that.  */
3285  gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3286
3287  /* mlow = 2^(N + lgup)/d */
3288 if (pow >= HOST_BITS_PER_WIDE_INT)
3289    {
3290      nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3291      nl = 0;
3292    }
3293  else
3294    {
3295      nh = 0;
3296      nl = (unsigned HOST_WIDE_INT) 1 << pow;
3297    }
3298  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3299			&mlow_lo, &mlow_hi, &dummy1, &dummy2);
3300
3301  /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3302  if (pow2 >= HOST_BITS_PER_WIDE_INT)
3303    nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3304  else
3305    nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3306  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3307			&mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3308
3309  gcc_assert (!mhigh_hi || nh - d < d);
3310  gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3311  /* Assert that mlow < mhigh.  */
3312  gcc_assert (mlow_hi < mhigh_hi
3313	      || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3314
3315  /* If precision == N, then mlow, mhigh exceed 2^N
3316     (but they do not exceed 2^(N+1)).  */
3317
3318  /* Reduce to lowest terms.  */
3319  for (post_shift = lgup; post_shift > 0; post_shift--)
3320    {
3321      unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3322      unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3323      if (ml_lo >= mh_lo)
3324	break;
3325
3326      mlow_hi = 0;
3327      mlow_lo = ml_lo;
3328      mhigh_hi = 0;
3329      mhigh_lo = mh_lo;
3330    }
3331
3332  *post_shift_ptr = post_shift;
3333  *lgup_ptr = lgup;
3334  if (n < HOST_BITS_PER_WIDE_INT)
3335    {
3336      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3337      *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3338      return mhigh_lo >= mask;
3339    }
3340  else
3341    {
3342      *multiplier_ptr = GEN_INT (mhigh_lo);
3343      return mhigh_hi;
3344    }
3345}
3346
3347/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3348   congruent to 1 (mod 2**N).  */
3349
3350static unsigned HOST_WIDE_INT
3351invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3352{
3353  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3354
3355  /* The algorithm notes that the choice y = x satisfies
3356     x*y == 1 mod 2^3, since x is assumed odd.
3357     Each iteration doubles the number of bits of significance in y.  */
3358
3359  unsigned HOST_WIDE_INT mask;
3360  unsigned HOST_WIDE_INT y = x;
3361  int nbit = 3;
3362
3363  mask = (n == HOST_BITS_PER_WIDE_INT
3364	  ? ~(unsigned HOST_WIDE_INT) 0
3365	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3366
3367  while (nbit < n)
3368    {
3369      y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3370      nbit *= 2;
3371    }
3372  return y;
3373}
3374
3375/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3376   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3377   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3378   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3379   become signed.
3380
3381   The result is put in TARGET if that is convenient.
3382
3383   MODE is the mode of operation.  */
3384
3385rtx
3386expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3387			     rtx op1, rtx target, int unsignedp)
3388{
3389  rtx tem;
3390  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3391
3392  tem = expand_shift (RSHIFT_EXPR, mode, op0,
3393		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3394		      NULL_RTX, 0);
3395  tem = expand_and (mode, tem, op1, NULL_RTX);
3396  adj_operand
3397    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3398		     adj_operand);
3399
3400  tem = expand_shift (RSHIFT_EXPR, mode, op1,
3401		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3402		      NULL_RTX, 0);
3403  tem = expand_and (mode, tem, op0, NULL_RTX);
3404  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3405			  target);
3406
3407  return target;
3408}
3409
3410/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3411
3412static rtx
3413extract_high_half (enum machine_mode mode, rtx op)
3414{
3415  enum machine_mode wider_mode;
3416
3417  if (mode == word_mode)
3418    return gen_highpart (mode, op);
3419
3420  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3421
3422  wider_mode = GET_MODE_WIDER_MODE (mode);
3423  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3424		     build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3425  return convert_modes (mode, wider_mode, op, 0);
3426}
3427
3428/* Like expand_mult_highpart, but only consider using a multiplication
3429   optab.  OP1 is an rtx for the constant operand.  */
3430
3431static rtx
3432expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3433			    rtx target, int unsignedp, int max_cost)
3434{
3435  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3436  enum machine_mode wider_mode;
3437  optab moptab;
3438  rtx tem;
3439  int size;
3440
3441  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3442
3443  wider_mode = GET_MODE_WIDER_MODE (mode);
3444  size = GET_MODE_BITSIZE (mode);
3445
3446  /* Firstly, try using a multiplication insn that only generates the needed
3447     high part of the product, and in the sign flavor of unsignedp.  */
3448  if (mul_highpart_cost[mode] < max_cost)
3449    {
3450      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3451      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3452			  unsignedp, OPTAB_DIRECT);
3453      if (tem)
3454	return tem;
3455    }
3456
3457  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3458     Need to adjust the result after the multiplication.  */
3459  if (size - 1 < BITS_PER_WORD
3460      && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3461	  + 4 * add_cost[mode] < max_cost))
3462    {
3463      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3464      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3465			  unsignedp, OPTAB_DIRECT);
3466      if (tem)
3467	/* We used the wrong signedness.  Adjust the result.  */
3468	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3469					    tem, unsignedp);
3470    }
3471
3472  /* Try widening multiplication.  */
3473  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3474  if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3475      && mul_widen_cost[wider_mode] < max_cost)
3476    {
3477      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3478			  unsignedp, OPTAB_WIDEN);
3479      if (tem)
3480	return extract_high_half (mode, tem);
3481    }
3482
3483  /* Try widening the mode and perform a non-widening multiplication.  */
3484  if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3485      && size - 1 < BITS_PER_WORD
3486      && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3487    {
3488      rtx insns, wop0, wop1;
3489
3490      /* We need to widen the operands, for example to ensure the
3491	 constant multiplier is correctly sign or zero extended.
3492	 Use a sequence to clean-up any instructions emitted by
3493	 the conversions if things don't work out.  */
3494      start_sequence ();
3495      wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3496      wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3497      tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3498			  unsignedp, OPTAB_WIDEN);
3499      insns = get_insns ();
3500      end_sequence ();
3501
3502      if (tem)
3503	{
3504	  emit_insn (insns);
3505	  return extract_high_half (mode, tem);
3506	}
3507    }
3508
3509  /* Try widening multiplication of opposite signedness, and adjust.  */
3510  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3511  if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3512      && size - 1 < BITS_PER_WORD
3513      && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3514	  + 4 * add_cost[mode] < max_cost))
3515    {
3516      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3517			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3518      if (tem != 0)
3519	{
3520	  tem = extract_high_half (mode, tem);
3521	  /* We used the wrong signedness.  Adjust the result.  */
3522	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3523					      target, unsignedp);
3524	}
3525    }
3526
3527  return 0;
3528}
3529
3530/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3531   putting the high half of the result in TARGET if that is convenient,
3532   and return where the result is.  If the operation can not be performed,
3533   0 is returned.
3534
3535   MODE is the mode of operation and result.
3536
3537   UNSIGNEDP nonzero means unsigned multiply.
3538
3539   MAX_COST is the total allowed cost for the expanded RTL.  */
3540
3541static rtx
3542expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3543		      rtx target, int unsignedp, int max_cost)
3544{
3545  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3546  unsigned HOST_WIDE_INT cnst1;
3547  int extra_cost;
3548  bool sign_adjust = false;
3549  enum mult_variant variant;
3550  struct algorithm alg;
3551  rtx tem;
3552
3553  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3554  /* We can't support modes wider than HOST_BITS_PER_INT.  */
3555  gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3556
3557  cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3558
3559  /* We can't optimize modes wider than BITS_PER_WORD.
3560     ??? We might be able to perform double-word arithmetic if
3561     mode == word_mode, however all the cost calculations in
3562     synth_mult etc. assume single-word operations.  */
3563  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3564    return expand_mult_highpart_optab (mode, op0, op1, target,
3565				       unsignedp, max_cost);
3566
3567  extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3568
3569  /* Check whether we try to multiply by a negative constant.  */
3570  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3571    {
3572      sign_adjust = true;
3573      extra_cost += add_cost[mode];
3574    }
3575
3576  /* See whether shift/add multiplication is cheap enough.  */
3577  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3578			   max_cost - extra_cost))
3579    {
3580      /* See whether the specialized multiplication optabs are
3581	 cheaper than the shift/add version.  */
3582      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3583					alg.cost.cost + extra_cost);
3584      if (tem)
3585	return tem;
3586
3587      tem = convert_to_mode (wider_mode, op0, unsignedp);
3588      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3589      tem = extract_high_half (mode, tem);
3590
3591      /* Adjust result for signedness.  */
3592      if (sign_adjust)
3593	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3594
3595      return tem;
3596    }
3597  return expand_mult_highpart_optab (mode, op0, op1, target,
3598				     unsignedp, max_cost);
3599}
3600
3601
3602/* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3603
3604static rtx
3605expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3606{
3607  unsigned HOST_WIDE_INT masklow, maskhigh;
3608  rtx result, temp, shift, label;
3609  int logd;
3610
3611  logd = floor_log2 (d);
3612  result = gen_reg_rtx (mode);
3613
3614  /* Avoid conditional branches when they're expensive.  */
3615  if (BRANCH_COST >= 2
3616      && !optimize_size)
3617    {
3618      rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3619				      mode, 0, -1);
3620      if (signmask)
3621	{
3622	  signmask = force_reg (mode, signmask);
3623	  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3624	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3625
3626	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3627	     which instruction sequence to use.  If logical right shifts
3628	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3629	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3630
3631	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3632	  if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3633	      || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3634	    {
3635	      temp = expand_binop (mode, xor_optab, op0, signmask,
3636				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3637	      temp = expand_binop (mode, sub_optab, temp, signmask,
3638				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3640				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641	      temp = expand_binop (mode, xor_optab, temp, signmask,
3642				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3643	      temp = expand_binop (mode, sub_optab, temp, signmask,
3644				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3645	    }
3646	  else
3647	    {
3648	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3649				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650	      signmask = force_reg (mode, signmask);
3651
3652	      temp = expand_binop (mode, add_optab, op0, signmask,
3653				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3655				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3656	      temp = expand_binop (mode, sub_optab, temp, signmask,
3657				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658	    }
3659	  return temp;
3660	}
3661    }
3662
3663  /* Mask contains the mode's signbit and the significant bits of the
3664     modulus.  By including the signbit in the operation, many targets
3665     can avoid an explicit compare operation in the following comparison
3666     against zero.  */
3667
3668  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3669  if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3670    {
3671      masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3672      maskhigh = -1;
3673    }
3674  else
3675    maskhigh = (HOST_WIDE_INT) -1
3676		 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3677
3678  temp = expand_binop (mode, and_optab, op0,
3679		       immed_double_const (masklow, maskhigh, mode),
3680		       result, 1, OPTAB_LIB_WIDEN);
3681  if (temp != result)
3682    emit_move_insn (result, temp);
3683
3684  label = gen_label_rtx ();
3685  do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3686
3687  temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3688		       0, OPTAB_LIB_WIDEN);
3689  masklow = (HOST_WIDE_INT) -1 << logd;
3690  maskhigh = -1;
3691  temp = expand_binop (mode, ior_optab, temp,
3692		       immed_double_const (masklow, maskhigh, mode),
3693		       result, 1, OPTAB_LIB_WIDEN);
3694  temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3695		       0, OPTAB_LIB_WIDEN);
3696  if (temp != result)
3697    emit_move_insn (result, temp);
3698  emit_label (label);
3699  return result;
3700}
3701
3702/* Expand signed division of OP0 by a power of two D in mode MODE.
3703   This routine is only called for positive values of D.  */
3704
3705static rtx
3706expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3707{
3708  rtx temp, label;
3709  tree shift;
3710  int logd;
3711
3712  logd = floor_log2 (d);
3713  shift = build_int_cst (NULL_TREE, logd);
3714
3715  if (d == 2 && BRANCH_COST >= 1)
3716    {
3717      temp = gen_reg_rtx (mode);
3718      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3719      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3720			   0, OPTAB_LIB_WIDEN);
3721      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3722    }
3723
3724#ifdef HAVE_conditional_move
3725  if (BRANCH_COST >= 2)
3726    {
3727      rtx temp2;
3728
3729      /* ??? emit_conditional_move forces a stack adjustment via
3730	 compare_from_rtx so, if the sequence is discarded, it will
3731	 be lost.  Do it now instead.  */
3732      do_pending_stack_adjust ();
3733
3734      start_sequence ();
3735      temp2 = copy_to_mode_reg (mode, op0);
3736      temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3737			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3738      temp = force_reg (mode, temp);
3739
3740      /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3741      temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3742				     mode, temp, temp2, mode, 0);
3743      if (temp2)
3744	{
3745	  rtx seq = get_insns ();
3746	  end_sequence ();
3747	  emit_insn (seq);
3748	  return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3749	}
3750      end_sequence ();
3751    }
3752#endif
3753
3754  if (BRANCH_COST >= 2)
3755    {
3756      int ushift = GET_MODE_BITSIZE (mode) - logd;
3757
3758      temp = gen_reg_rtx (mode);
3759      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3760      if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3761	temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3762			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3763      else
3764	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3765			     build_int_cst (NULL_TREE, ushift),
3766			     NULL_RTX, 1);
3767      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3768			   0, OPTAB_LIB_WIDEN);
3769      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3770    }
3771
3772  label = gen_label_rtx ();
3773  temp = copy_to_mode_reg (mode, op0);
3774  do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3775  expand_inc (temp, GEN_INT (d - 1));
3776  emit_label (label);
3777  return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3778}
3779
3780/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3781   if that is convenient, and returning where the result is.
3782   You may request either the quotient or the remainder as the result;
3783   specify REM_FLAG nonzero to get the remainder.
3784
3785   CODE is the expression code for which kind of division this is;
3786   it controls how rounding is done.  MODE is the machine mode to use.
3787   UNSIGNEDP nonzero means do unsigned division.  */
3788
3789/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3790   and then correct it by or'ing in missing high bits
3791   if result of ANDI is nonzero.
3792   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3793   This could optimize to a bfexts instruction.
3794   But C doesn't use these operations, so their optimizations are
3795   left for later.  */
3796/* ??? For modulo, we don't actually need the highpart of the first product,
3797   the low part will do nicely.  And for small divisors, the second multiply
3798   can also be a low-part only multiply or even be completely left out.
3799   E.g. to calculate the remainder of a division by 3 with a 32 bit
3800   multiply, multiply with 0x55555556 and extract the upper two bits;
3801   the result is exact for inputs up to 0x1fffffff.
3802   The input range can be reduced by using cross-sum rules.
3803   For odd divisors >= 3, the following table gives right shift counts
3804   so that if a number is shifted by an integer multiple of the given
3805   amount, the remainder stays the same:
3806   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3807   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3808   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3809   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3810   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3811
3812   Cross-sum rules for even numbers can be derived by leaving as many bits
3813   to the right alone as the divisor has zeros to the right.
3814   E.g. if x is an unsigned 32 bit number:
3815   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3816   */
3817
3818rtx
3819expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3820	       rtx op0, rtx op1, rtx target, int unsignedp)
3821{
3822  enum machine_mode compute_mode;
3823  rtx tquotient;
3824  rtx quotient = 0, remainder = 0;
3825  rtx last;
3826  int size;
3827  rtx insn, set;
3828  optab optab1, optab2;
3829  int op1_is_constant, op1_is_pow2 = 0;
3830  int max_cost, extra_cost;
3831  static HOST_WIDE_INT last_div_const = 0;
3832  static HOST_WIDE_INT ext_op1;
3833
3834  op1_is_constant = GET_CODE (op1) == CONST_INT;
3835  if (op1_is_constant)
3836    {
3837      ext_op1 = INTVAL (op1);
3838      if (unsignedp)
3839	ext_op1 &= GET_MODE_MASK (mode);
3840      op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3841		     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3842    }
3843
3844  /*
3845     This is the structure of expand_divmod:
3846
3847     First comes code to fix up the operands so we can perform the operations
3848     correctly and efficiently.
3849
3850     Second comes a switch statement with code specific for each rounding mode.
3851     For some special operands this code emits all RTL for the desired
3852     operation, for other cases, it generates only a quotient and stores it in
3853     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3854     to indicate that it has not done anything.
3855
3856     Last comes code that finishes the operation.  If QUOTIENT is set and
3857     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3858     QUOTIENT is not set, it is computed using trunc rounding.
3859
3860     We try to generate special code for division and remainder when OP1 is a
3861     constant.  If |OP1| = 2**n we can use shifts and some other fast
3862     operations.  For other values of OP1, we compute a carefully selected
3863     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3864     by m.
3865
3866     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3867     half of the product.  Different strategies for generating the product are
3868     implemented in expand_mult_highpart.
3869
3870     If what we actually want is the remainder, we generate that by another
3871     by-constant multiplication and a subtraction.  */
3872
3873  /* We shouldn't be called with OP1 == const1_rtx, but some of the
3874     code below will malfunction if we are, so check here and handle
3875     the special case if so.  */
3876  if (op1 == const1_rtx)
3877    return rem_flag ? const0_rtx : op0;
3878
3879    /* When dividing by -1, we could get an overflow.
3880     negv_optab can handle overflows.  */
3881  if (! unsignedp && op1 == constm1_rtx)
3882    {
3883      if (rem_flag)
3884	return const0_rtx;
3885      return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3886			  ? negv_optab : neg_optab, op0, target, 0);
3887    }
3888
3889  if (target
3890      /* Don't use the function value register as a target
3891	 since we have to read it as well as write it,
3892	 and function-inlining gets confused by this.  */
3893      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3894	  /* Don't clobber an operand while doing a multi-step calculation.  */
3895	  || ((rem_flag || op1_is_constant)
3896	      && (reg_mentioned_p (target, op0)
3897		  || (MEM_P (op0) && MEM_P (target))))
3898	  || reg_mentioned_p (target, op1)
3899	  || (MEM_P (op1) && MEM_P (target))))
3900    target = 0;
3901
3902  /* Get the mode in which to perform this computation.  Normally it will
3903     be MODE, but sometimes we can't do the desired operation in MODE.
3904     If so, pick a wider mode in which we can do the operation.  Convert
3905     to that mode at the start to avoid repeated conversions.
3906
3907     First see what operations we need.  These depend on the expression
3908     we are evaluating.  (We assume that divxx3 insns exist under the
3909     same conditions that modxx3 insns and that these insns don't normally
3910     fail.  If these assumptions are not correct, we may generate less
3911     efficient code in some cases.)
3912
3913     Then see if we find a mode in which we can open-code that operation
3914     (either a division, modulus, or shift).  Finally, check for the smallest
3915     mode for which we can do the operation with a library call.  */
3916
3917  /* We might want to refine this now that we have division-by-constant
3918     optimization.  Since expand_mult_highpart tries so many variants, it is
3919     not straightforward to generalize this.  Maybe we should make an array
3920     of possible modes in init_expmed?  Save this for GCC 2.7.  */
3921
3922  optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3923	    ? (unsignedp ? lshr_optab : ashr_optab)
3924	    : (unsignedp ? udiv_optab : sdiv_optab));
3925  optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3926	    ? optab1
3927	    : (unsignedp ? udivmod_optab : sdivmod_optab));
3928
3929  for (compute_mode = mode; compute_mode != VOIDmode;
3930       compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3931    if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3932	|| optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3933      break;
3934
3935  if (compute_mode == VOIDmode)
3936    for (compute_mode = mode; compute_mode != VOIDmode;
3937	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3938      if (optab1->handlers[compute_mode].libfunc
3939	  || optab2->handlers[compute_mode].libfunc)
3940	break;
3941
3942  /* If we still couldn't find a mode, use MODE, but expand_binop will
3943     probably die.  */
3944  if (compute_mode == VOIDmode)
3945    compute_mode = mode;
3946
3947  if (target && GET_MODE (target) == compute_mode)
3948    tquotient = target;
3949  else
3950    tquotient = gen_reg_rtx (compute_mode);
3951
3952  size = GET_MODE_BITSIZE (compute_mode);
3953#if 0
3954  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3955     (mode), and thereby get better code when OP1 is a constant.  Do that
3956     later.  It will require going over all usages of SIZE below.  */
3957  size = GET_MODE_BITSIZE (mode);
3958#endif
3959
3960  /* Only deduct something for a REM if the last divide done was
3961     for a different constant.   Then set the constant of the last
3962     divide.  */
3963  max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3964  if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3965		     && INTVAL (op1) == last_div_const))
3966    max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3967
3968  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3969
3970  /* Now convert to the best mode to use.  */
3971  if (compute_mode != mode)
3972    {
3973      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3974      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3975
3976      /* convert_modes may have placed op1 into a register, so we
3977	 must recompute the following.  */
3978      op1_is_constant = GET_CODE (op1) == CONST_INT;
3979      op1_is_pow2 = (op1_is_constant
3980		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3981			  || (! unsignedp
3982			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3983    }
3984
3985  /* If one of the operands is a volatile MEM, copy it into a register.  */
3986
3987  if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3988    op0 = force_reg (compute_mode, op0);
3989  if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3990    op1 = force_reg (compute_mode, op1);
3991
3992  /* If we need the remainder or if OP1 is constant, we need to
3993     put OP0 in a register in case it has any queued subexpressions.  */
3994  if (rem_flag || op1_is_constant)
3995    op0 = force_reg (compute_mode, op0);
3996
3997  last = get_last_insn ();
3998
3999  /* Promote floor rounding to trunc rounding for unsigned operations.  */
4000  if (unsignedp)
4001    {
4002      if (code == FLOOR_DIV_EXPR)
4003	code = TRUNC_DIV_EXPR;
4004      if (code == FLOOR_MOD_EXPR)
4005	code = TRUNC_MOD_EXPR;
4006      if (code == EXACT_DIV_EXPR && op1_is_pow2)
4007	code = TRUNC_DIV_EXPR;
4008    }
4009
4010  if (op1 != const0_rtx)
4011    switch (code)
4012      {
4013      case TRUNC_MOD_EXPR:
4014      case TRUNC_DIV_EXPR:
4015	if (op1_is_constant)
4016	  {
4017	    if (unsignedp)
4018	      {
4019		unsigned HOST_WIDE_INT mh;
4020		int pre_shift, post_shift;
4021		int dummy;
4022		rtx ml;
4023		unsigned HOST_WIDE_INT d = (INTVAL (op1)
4024					    & GET_MODE_MASK (compute_mode));
4025
4026		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4027		  {
4028		    pre_shift = floor_log2 (d);
4029		    if (rem_flag)
4030		      {
4031			remainder
4032			  = expand_binop (compute_mode, and_optab, op0,
4033					  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4034					  remainder, 1,
4035					  OPTAB_LIB_WIDEN);
4036			if (remainder)
4037			  return gen_lowpart (mode, remainder);
4038		      }
4039		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4040					     build_int_cst (NULL_TREE,
4041							    pre_shift),
4042					     tquotient, 1);
4043		  }
4044		else if (size <= HOST_BITS_PER_WIDE_INT)
4045		  {
4046		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4047		      {
4048			/* Most significant bit of divisor is set; emit an scc
4049			   insn.  */
4050			quotient = emit_store_flag (tquotient, GEU, op0, op1,
4051						    compute_mode, 1, 1);
4052			if (quotient == 0)
4053			  goto fail1;
4054		      }
4055		    else
4056		      {
4057			/* Find a suitable multiplier and right shift count
4058			   instead of multiplying with D.  */
4059
4060			mh = choose_multiplier (d, size, size,
4061						&ml, &post_shift, &dummy);
4062
4063			/* If the suggested multiplier is more than SIZE bits,
4064			   we can do better for even divisors, using an
4065			   initial right shift.  */
4066			if (mh != 0 && (d & 1) == 0)
4067			  {
4068			    pre_shift = floor_log2 (d & -d);
4069			    mh = choose_multiplier (d >> pre_shift, size,
4070						    size - pre_shift,
4071						    &ml, &post_shift, &dummy);
4072			    gcc_assert (!mh);
4073			  }
4074			else
4075			  pre_shift = 0;
4076
4077			if (mh != 0)
4078			  {
4079			    rtx t1, t2, t3, t4;
4080
4081			    if (post_shift - 1 >= BITS_PER_WORD)
4082			      goto fail1;
4083
4084			    extra_cost
4085			      = (shift_cost[compute_mode][post_shift - 1]
4086				 + shift_cost[compute_mode][1]
4087				 + 2 * add_cost[compute_mode]);
4088			    t1 = expand_mult_highpart (compute_mode, op0, ml,
4089						       NULL_RTX, 1,
4090						       max_cost - extra_cost);
4091			    if (t1 == 0)
4092			      goto fail1;
4093			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4094							       op0, t1),
4095						NULL_RTX);
4096			    t3 = expand_shift
4097			      (RSHIFT_EXPR, compute_mode, t2,
4098			       build_int_cst (NULL_TREE, 1),
4099			       NULL_RTX,1);
4100			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4101							      t1, t3),
4102						NULL_RTX);
4103			    quotient = expand_shift
4104			      (RSHIFT_EXPR, compute_mode, t4,
4105			       build_int_cst (NULL_TREE, post_shift - 1),
4106			       tquotient, 1);
4107			  }
4108			else
4109			  {
4110			    rtx t1, t2;
4111
4112			    if (pre_shift >= BITS_PER_WORD
4113				|| post_shift >= BITS_PER_WORD)
4114			      goto fail1;
4115
4116			    t1 = expand_shift
4117			      (RSHIFT_EXPR, compute_mode, op0,
4118			       build_int_cst (NULL_TREE, pre_shift),
4119			       NULL_RTX, 1);
4120			    extra_cost
4121			      = (shift_cost[compute_mode][pre_shift]
4122				 + shift_cost[compute_mode][post_shift]);
4123			    t2 = expand_mult_highpart (compute_mode, t1, ml,
4124						       NULL_RTX, 1,
4125						       max_cost - extra_cost);
4126			    if (t2 == 0)
4127			      goto fail1;
4128			    quotient = expand_shift
4129			      (RSHIFT_EXPR, compute_mode, t2,
4130			       build_int_cst (NULL_TREE, post_shift),
4131			       tquotient, 1);
4132			  }
4133		      }
4134		  }
4135		else		/* Too wide mode to use tricky code */
4136		  break;
4137
4138		insn = get_last_insn ();
4139		if (insn != last
4140		    && (set = single_set (insn)) != 0
4141		    && SET_DEST (set) == quotient)
4142		  set_unique_reg_note (insn,
4143				       REG_EQUAL,
4144				       gen_rtx_UDIV (compute_mode, op0, op1));
4145	      }
4146	    else		/* TRUNC_DIV, signed */
4147	      {
4148		unsigned HOST_WIDE_INT ml;
4149		int lgup, post_shift;
4150		rtx mlr;
4151		HOST_WIDE_INT d = INTVAL (op1);
4152		unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4153
4154		/* n rem d = n rem -d */
4155		if (rem_flag && d < 0)
4156		  {
4157		    d = abs_d;
4158		    op1 = gen_int_mode (abs_d, compute_mode);
4159		  }
4160
4161		if (d == 1)
4162		  quotient = op0;
4163		else if (d == -1)
4164		  quotient = expand_unop (compute_mode, neg_optab, op0,
4165					  tquotient, 0);
4166		else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4167		  {
4168		    /* This case is not handled correctly below.  */
4169		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4170						compute_mode, 1, 1);
4171		    if (quotient == 0)
4172		      goto fail1;
4173		  }
4174		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4175			 && (rem_flag ? smod_pow2_cheap[compute_mode]
4176				      : sdiv_pow2_cheap[compute_mode])
4177			 /* We assume that cheap metric is true if the
4178			    optab has an expander for this mode.  */
4179			 && (((rem_flag ? smod_optab : sdiv_optab)
4180			      ->handlers[compute_mode].insn_code
4181			      != CODE_FOR_nothing)
4182			     || (sdivmod_optab->handlers[compute_mode]
4183				 .insn_code != CODE_FOR_nothing)))
4184		  ;
4185		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4186		  {
4187		    if (rem_flag)
4188		      {
4189			remainder = expand_smod_pow2 (compute_mode, op0, d);
4190			if (remainder)
4191			  return gen_lowpart (mode, remainder);
4192		      }
4193
4194		    if (sdiv_pow2_cheap[compute_mode]
4195			&& ((sdiv_optab->handlers[compute_mode].insn_code
4196			     != CODE_FOR_nothing)
4197			    || (sdivmod_optab->handlers[compute_mode].insn_code
4198				!= CODE_FOR_nothing)))
4199		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4200						compute_mode, op0,
4201						gen_int_mode (abs_d,
4202							      compute_mode),
4203						NULL_RTX, 0);
4204		    else
4205		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4206
4207		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4208		       negate the quotient.  */
4209		    if (d < 0)
4210		      {
4211			insn = get_last_insn ();
4212			if (insn != last
4213			    && (set = single_set (insn)) != 0
4214			    && SET_DEST (set) == quotient
4215			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4216					<< (HOST_BITS_PER_WIDE_INT - 1)))
4217			  set_unique_reg_note (insn,
4218					       REG_EQUAL,
4219					       gen_rtx_DIV (compute_mode,
4220							    op0,
4221							    GEN_INT
4222							    (trunc_int_for_mode
4223							     (abs_d,
4224							      compute_mode))));
4225
4226			quotient = expand_unop (compute_mode, neg_optab,
4227						quotient, quotient, 0);
4228		      }
4229		  }
4230		else if (size <= HOST_BITS_PER_WIDE_INT)
4231		  {
4232		    choose_multiplier (abs_d, size, size - 1,
4233				       &mlr, &post_shift, &lgup);
4234		    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4235		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4236		      {
4237			rtx t1, t2, t3;
4238
4239			if (post_shift >= BITS_PER_WORD
4240			    || size - 1 >= BITS_PER_WORD)
4241			  goto fail1;
4242
4243			extra_cost = (shift_cost[compute_mode][post_shift]
4244				      + shift_cost[compute_mode][size - 1]
4245				      + add_cost[compute_mode]);
4246			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4247						   NULL_RTX, 0,
4248						   max_cost - extra_cost);
4249			if (t1 == 0)
4250			  goto fail1;
4251			t2 = expand_shift
4252			  (RSHIFT_EXPR, compute_mode, t1,
4253			   build_int_cst (NULL_TREE, post_shift),
4254			   NULL_RTX, 0);
4255			t3 = expand_shift
4256			  (RSHIFT_EXPR, compute_mode, op0,
4257			   build_int_cst (NULL_TREE, size - 1),
4258			   NULL_RTX, 0);
4259			if (d < 0)
4260			  quotient
4261			    = force_operand (gen_rtx_MINUS (compute_mode,
4262							    t3, t2),
4263					     tquotient);
4264			else
4265			  quotient
4266			    = force_operand (gen_rtx_MINUS (compute_mode,
4267							    t2, t3),
4268					     tquotient);
4269		      }
4270		    else
4271		      {
4272			rtx t1, t2, t3, t4;
4273
4274			if (post_shift >= BITS_PER_WORD
4275			    || size - 1 >= BITS_PER_WORD)
4276			  goto fail1;
4277
4278			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4279			mlr = gen_int_mode (ml, compute_mode);
4280			extra_cost = (shift_cost[compute_mode][post_shift]
4281				      + shift_cost[compute_mode][size - 1]
4282				      + 2 * add_cost[compute_mode]);
4283			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4284						   NULL_RTX, 0,
4285						   max_cost - extra_cost);
4286			if (t1 == 0)
4287			  goto fail1;
4288			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4289							  t1, op0),
4290					    NULL_RTX);
4291			t3 = expand_shift
4292			  (RSHIFT_EXPR, compute_mode, t2,
4293			   build_int_cst (NULL_TREE, post_shift),
4294			   NULL_RTX, 0);
4295			t4 = expand_shift
4296			  (RSHIFT_EXPR, compute_mode, op0,
4297			   build_int_cst (NULL_TREE, size - 1),
4298			   NULL_RTX, 0);
4299			if (d < 0)
4300			  quotient
4301			    = force_operand (gen_rtx_MINUS (compute_mode,
4302							    t4, t3),
4303					     tquotient);
4304			else
4305			  quotient
4306			    = force_operand (gen_rtx_MINUS (compute_mode,
4307							    t3, t4),
4308					     tquotient);
4309		      }
4310		  }
4311		else		/* Too wide mode to use tricky code */
4312		  break;
4313
4314		insn = get_last_insn ();
4315		if (insn != last
4316		    && (set = single_set (insn)) != 0
4317		    && SET_DEST (set) == quotient)
4318		  set_unique_reg_note (insn,
4319				       REG_EQUAL,
4320				       gen_rtx_DIV (compute_mode, op0, op1));
4321	      }
4322	    break;
4323	  }
4324      fail1:
4325	delete_insns_since (last);
4326	break;
4327
4328      case FLOOR_DIV_EXPR:
4329      case FLOOR_MOD_EXPR:
4330      /* We will come here only for signed operations.  */
4331	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4332	  {
4333	    unsigned HOST_WIDE_INT mh;
4334	    int pre_shift, lgup, post_shift;
4335	    HOST_WIDE_INT d = INTVAL (op1);
4336	    rtx ml;
4337
4338	    if (d > 0)
4339	      {
4340		/* We could just as easily deal with negative constants here,
4341		   but it does not seem worth the trouble for GCC 2.6.  */
4342		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4343		  {
4344		    pre_shift = floor_log2 (d);
4345		    if (rem_flag)
4346		      {
4347			remainder = expand_binop (compute_mode, and_optab, op0,
4348						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4349						  remainder, 0, OPTAB_LIB_WIDEN);
4350			if (remainder)
4351			  return gen_lowpart (mode, remainder);
4352		      }
4353		    quotient = expand_shift
4354		      (RSHIFT_EXPR, compute_mode, op0,
4355		       build_int_cst (NULL_TREE, pre_shift),
4356		       tquotient, 0);
4357		  }
4358		else
4359		  {
4360		    rtx t1, t2, t3, t4;
4361
4362		    mh = choose_multiplier (d, size, size - 1,
4363					    &ml, &post_shift, &lgup);
4364		    gcc_assert (!mh);
4365
4366		    if (post_shift < BITS_PER_WORD
4367			&& size - 1 < BITS_PER_WORD)
4368		      {
4369			t1 = expand_shift
4370			  (RSHIFT_EXPR, compute_mode, op0,
4371			   build_int_cst (NULL_TREE, size - 1),
4372			   NULL_RTX, 0);
4373			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4374					   NULL_RTX, 0, OPTAB_WIDEN);
4375			extra_cost = (shift_cost[compute_mode][post_shift]
4376				      + shift_cost[compute_mode][size - 1]
4377				      + 2 * add_cost[compute_mode]);
4378			t3 = expand_mult_highpart (compute_mode, t2, ml,
4379						   NULL_RTX, 1,
4380						   max_cost - extra_cost);
4381			if (t3 != 0)
4382			  {
4383			    t4 = expand_shift
4384			      (RSHIFT_EXPR, compute_mode, t3,
4385			       build_int_cst (NULL_TREE, post_shift),
4386			       NULL_RTX, 1);
4387			    quotient = expand_binop (compute_mode, xor_optab,
4388						     t4, t1, tquotient, 0,
4389						     OPTAB_WIDEN);
4390			  }
4391		      }
4392		  }
4393	      }
4394	    else
4395	      {
4396		rtx nsign, t1, t2, t3, t4;
4397		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4398						  op0, constm1_rtx), NULL_RTX);
4399		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4400				   0, OPTAB_WIDEN);
4401		nsign = expand_shift
4402		  (RSHIFT_EXPR, compute_mode, t2,
4403		   build_int_cst (NULL_TREE, size - 1),
4404		   NULL_RTX, 0);
4405		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4406				    NULL_RTX);
4407		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4408				    NULL_RTX, 0);
4409		if (t4)
4410		  {
4411		    rtx t5;
4412		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4413				      NULL_RTX, 0);
4414		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4415							    t4, t5),
4416					      tquotient);
4417		  }
4418	      }
4419	  }
4420
4421	if (quotient != 0)
4422	  break;
4423	delete_insns_since (last);
4424
4425	/* Try using an instruction that produces both the quotient and
4426	   remainder, using truncation.  We can easily compensate the quotient
4427	   or remainder to get floor rounding, once we have the remainder.
4428	   Notice that we compute also the final remainder value here,
4429	   and return the result right away.  */
4430	if (target == 0 || GET_MODE (target) != compute_mode)
4431	  target = gen_reg_rtx (compute_mode);
4432
4433	if (rem_flag)
4434	  {
4435	    remainder
4436	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4437	    quotient = gen_reg_rtx (compute_mode);
4438	  }
4439	else
4440	  {
4441	    quotient
4442	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4443	    remainder = gen_reg_rtx (compute_mode);
4444	  }
4445
4446	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4447				 quotient, remainder, 0))
4448	  {
4449	    /* This could be computed with a branch-less sequence.
4450	       Save that for later.  */
4451	    rtx tem;
4452	    rtx label = gen_label_rtx ();
4453	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4454	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4455				NULL_RTX, 0, OPTAB_WIDEN);
4456	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4457	    expand_dec (quotient, const1_rtx);
4458	    expand_inc (remainder, op1);
4459	    emit_label (label);
4460	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4461	  }
4462
4463	/* No luck with division elimination or divmod.  Have to do it
4464	   by conditionally adjusting op0 *and* the result.  */
4465	{
4466	  rtx label1, label2, label3, label4, label5;
4467	  rtx adjusted_op0;
4468	  rtx tem;
4469
4470	  quotient = gen_reg_rtx (compute_mode);
4471	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4472	  label1 = gen_label_rtx ();
4473	  label2 = gen_label_rtx ();
4474	  label3 = gen_label_rtx ();
4475	  label4 = gen_label_rtx ();
4476	  label5 = gen_label_rtx ();
4477	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4478	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4479	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4480			      quotient, 0, OPTAB_LIB_WIDEN);
4481	  if (tem != quotient)
4482	    emit_move_insn (quotient, tem);
4483	  emit_jump_insn (gen_jump (label5));
4484	  emit_barrier ();
4485	  emit_label (label1);
4486	  expand_inc (adjusted_op0, const1_rtx);
4487	  emit_jump_insn (gen_jump (label4));
4488	  emit_barrier ();
4489	  emit_label (label2);
4490	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4491	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4492			      quotient, 0, OPTAB_LIB_WIDEN);
4493	  if (tem != quotient)
4494	    emit_move_insn (quotient, tem);
4495	  emit_jump_insn (gen_jump (label5));
4496	  emit_barrier ();
4497	  emit_label (label3);
4498	  expand_dec (adjusted_op0, const1_rtx);
4499	  emit_label (label4);
4500	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501			      quotient, 0, OPTAB_LIB_WIDEN);
4502	  if (tem != quotient)
4503	    emit_move_insn (quotient, tem);
4504	  expand_dec (quotient, const1_rtx);
4505	  emit_label (label5);
4506	}
4507	break;
4508
4509      case CEIL_DIV_EXPR:
4510      case CEIL_MOD_EXPR:
4511	if (unsignedp)
4512	  {
4513	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4514	      {
4515		rtx t1, t2, t3;
4516		unsigned HOST_WIDE_INT d = INTVAL (op1);
4517		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4518				   build_int_cst (NULL_TREE, floor_log2 (d)),
4519				   tquotient, 1);
4520		t2 = expand_binop (compute_mode, and_optab, op0,
4521				   GEN_INT (d - 1),
4522				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4523		t3 = gen_reg_rtx (compute_mode);
4524		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4525				      compute_mode, 1, 1);
4526		if (t3 == 0)
4527		  {
4528		    rtx lab;
4529		    lab = gen_label_rtx ();
4530		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4531		    expand_inc (t1, const1_rtx);
4532		    emit_label (lab);
4533		    quotient = t1;
4534		  }
4535		else
4536		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4537							  t1, t3),
4538					    tquotient);
4539		break;
4540	      }
4541
4542	    /* Try using an instruction that produces both the quotient and
4543	       remainder, using truncation.  We can easily compensate the
4544	       quotient or remainder to get ceiling rounding, once we have the
4545	       remainder.  Notice that we compute also the final remainder
4546	       value here, and return the result right away.  */
4547	    if (target == 0 || GET_MODE (target) != compute_mode)
4548	      target = gen_reg_rtx (compute_mode);
4549
4550	    if (rem_flag)
4551	      {
4552		remainder = (REG_P (target)
4553			     ? target : gen_reg_rtx (compute_mode));
4554		quotient = gen_reg_rtx (compute_mode);
4555	      }
4556	    else
4557	      {
4558		quotient = (REG_P (target)
4559			    ? target : gen_reg_rtx (compute_mode));
4560		remainder = gen_reg_rtx (compute_mode);
4561	      }
4562
4563	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4564				     remainder, 1))
4565	      {
4566		/* This could be computed with a branch-less sequence.
4567		   Save that for later.  */
4568		rtx label = gen_label_rtx ();
4569		do_cmp_and_jump (remainder, const0_rtx, EQ,
4570				 compute_mode, label);
4571		expand_inc (quotient, const1_rtx);
4572		expand_dec (remainder, op1);
4573		emit_label (label);
4574		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4575	      }
4576
4577	    /* No luck with division elimination or divmod.  Have to do it
4578	       by conditionally adjusting op0 *and* the result.  */
4579	    {
4580	      rtx label1, label2;
4581	      rtx adjusted_op0, tem;
4582
4583	      quotient = gen_reg_rtx (compute_mode);
4584	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4585	      label1 = gen_label_rtx ();
4586	      label2 = gen_label_rtx ();
4587	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4588			       compute_mode, label1);
4589	      emit_move_insn  (quotient, const0_rtx);
4590	      emit_jump_insn (gen_jump (label2));
4591	      emit_barrier ();
4592	      emit_label (label1);
4593	      expand_dec (adjusted_op0, const1_rtx);
4594	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4595				  quotient, 1, OPTAB_LIB_WIDEN);
4596	      if (tem != quotient)
4597		emit_move_insn (quotient, tem);
4598	      expand_inc (quotient, const1_rtx);
4599	      emit_label (label2);
4600	    }
4601	  }
4602	else /* signed */
4603	  {
4604	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4605		&& INTVAL (op1) >= 0)
4606	      {
4607		/* This is extremely similar to the code for the unsigned case
4608		   above.  For 2.7 we should merge these variants, but for
4609		   2.6.1 I don't want to touch the code for unsigned since that
4610		   get used in C.  The signed case will only be used by other
4611		   languages (Ada).  */
4612
4613		rtx t1, t2, t3;
4614		unsigned HOST_WIDE_INT d = INTVAL (op1);
4615		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4616				   build_int_cst (NULL_TREE, floor_log2 (d)),
4617				   tquotient, 0);
4618		t2 = expand_binop (compute_mode, and_optab, op0,
4619				   GEN_INT (d - 1),
4620				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4621		t3 = gen_reg_rtx (compute_mode);
4622		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4623				      compute_mode, 1, 1);
4624		if (t3 == 0)
4625		  {
4626		    rtx lab;
4627		    lab = gen_label_rtx ();
4628		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4629		    expand_inc (t1, const1_rtx);
4630		    emit_label (lab);
4631		    quotient = t1;
4632		  }
4633		else
4634		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4635							  t1, t3),
4636					    tquotient);
4637		break;
4638	      }
4639
4640	    /* Try using an instruction that produces both the quotient and
4641	       remainder, using truncation.  We can easily compensate the
4642	       quotient or remainder to get ceiling rounding, once we have the
4643	       remainder.  Notice that we compute also the final remainder
4644	       value here, and return the result right away.  */
4645	    if (target == 0 || GET_MODE (target) != compute_mode)
4646	      target = gen_reg_rtx (compute_mode);
4647	    if (rem_flag)
4648	      {
4649		remainder= (REG_P (target)
4650			    ? target : gen_reg_rtx (compute_mode));
4651		quotient = gen_reg_rtx (compute_mode);
4652	      }
4653	    else
4654	      {
4655		quotient = (REG_P (target)
4656			    ? target : gen_reg_rtx (compute_mode));
4657		remainder = gen_reg_rtx (compute_mode);
4658	      }
4659
4660	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4661				     remainder, 0))
4662	      {
4663		/* This could be computed with a branch-less sequence.
4664		   Save that for later.  */
4665		rtx tem;
4666		rtx label = gen_label_rtx ();
4667		do_cmp_and_jump (remainder, const0_rtx, EQ,
4668				 compute_mode, label);
4669		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4670				    NULL_RTX, 0, OPTAB_WIDEN);
4671		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4672		expand_inc (quotient, const1_rtx);
4673		expand_dec (remainder, op1);
4674		emit_label (label);
4675		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4676	      }
4677
4678	    /* No luck with division elimination or divmod.  Have to do it
4679	       by conditionally adjusting op0 *and* the result.  */
4680	    {
4681	      rtx label1, label2, label3, label4, label5;
4682	      rtx adjusted_op0;
4683	      rtx tem;
4684
4685	      quotient = gen_reg_rtx (compute_mode);
4686	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4687	      label1 = gen_label_rtx ();
4688	      label2 = gen_label_rtx ();
4689	      label3 = gen_label_rtx ();
4690	      label4 = gen_label_rtx ();
4691	      label5 = gen_label_rtx ();
4692	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4693	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4694			       compute_mode, label1);
4695	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4696				  quotient, 0, OPTAB_LIB_WIDEN);
4697	      if (tem != quotient)
4698		emit_move_insn (quotient, tem);
4699	      emit_jump_insn (gen_jump (label5));
4700	      emit_barrier ();
4701	      emit_label (label1);
4702	      expand_dec (adjusted_op0, const1_rtx);
4703	      emit_jump_insn (gen_jump (label4));
4704	      emit_barrier ();
4705	      emit_label (label2);
4706	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4707			       compute_mode, label3);
4708	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4709				  quotient, 0, OPTAB_LIB_WIDEN);
4710	      if (tem != quotient)
4711		emit_move_insn (quotient, tem);
4712	      emit_jump_insn (gen_jump (label5));
4713	      emit_barrier ();
4714	      emit_label (label3);
4715	      expand_inc (adjusted_op0, const1_rtx);
4716	      emit_label (label4);
4717	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4718				  quotient, 0, OPTAB_LIB_WIDEN);
4719	      if (tem != quotient)
4720		emit_move_insn (quotient, tem);
4721	      expand_inc (quotient, const1_rtx);
4722	      emit_label (label5);
4723	    }
4724	  }
4725	break;
4726
4727      case EXACT_DIV_EXPR:
4728	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4729	  {
4730	    HOST_WIDE_INT d = INTVAL (op1);
4731	    unsigned HOST_WIDE_INT ml;
4732	    int pre_shift;
4733	    rtx t1;
4734
4735	    pre_shift = floor_log2 (d & -d);
4736	    ml = invert_mod2n (d >> pre_shift, size);
4737	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4738			       build_int_cst (NULL_TREE, pre_shift),
4739			       NULL_RTX, unsignedp);
4740	    quotient = expand_mult (compute_mode, t1,
4741				    gen_int_mode (ml, compute_mode),
4742				    NULL_RTX, 1);
4743
4744	    insn = get_last_insn ();
4745	    set_unique_reg_note (insn,
4746				 REG_EQUAL,
4747				 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4748						 compute_mode,
4749						 op0, op1));
4750	  }
4751	break;
4752
4753      case ROUND_DIV_EXPR:
4754      case ROUND_MOD_EXPR:
4755	if (unsignedp)
4756	  {
4757	    rtx tem;
4758	    rtx label;
4759	    label = gen_label_rtx ();
4760	    quotient = gen_reg_rtx (compute_mode);
4761	    remainder = gen_reg_rtx (compute_mode);
4762	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4763	      {
4764		rtx tem;
4765		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4766					 quotient, 1, OPTAB_LIB_WIDEN);
4767		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4768		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4769					  remainder, 1, OPTAB_LIB_WIDEN);
4770	      }
4771	    tem = plus_constant (op1, -1);
4772	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4773				build_int_cst (NULL_TREE, 1),
4774				NULL_RTX, 1);
4775	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4776	    expand_inc (quotient, const1_rtx);
4777	    expand_dec (remainder, op1);
4778	    emit_label (label);
4779	  }
4780	else
4781	  {
4782	    rtx abs_rem, abs_op1, tem, mask;
4783	    rtx label;
4784	    label = gen_label_rtx ();
4785	    quotient = gen_reg_rtx (compute_mode);
4786	    remainder = gen_reg_rtx (compute_mode);
4787	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4788	      {
4789		rtx tem;
4790		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4791					 quotient, 0, OPTAB_LIB_WIDEN);
4792		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4793		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4794					  remainder, 0, OPTAB_LIB_WIDEN);
4795	      }
4796	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4797	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4798	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4799				build_int_cst (NULL_TREE, 1),
4800				NULL_RTX, 1);
4801	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4802	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4803				NULL_RTX, 0, OPTAB_WIDEN);
4804	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4805				 build_int_cst (NULL_TREE, size - 1),
4806				 NULL_RTX, 0);
4807	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4808				NULL_RTX, 0, OPTAB_WIDEN);
4809	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4810				NULL_RTX, 0, OPTAB_WIDEN);
4811	    expand_inc (quotient, tem);
4812	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4813				NULL_RTX, 0, OPTAB_WIDEN);
4814	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4815				NULL_RTX, 0, OPTAB_WIDEN);
4816	    expand_dec (remainder, tem);
4817	    emit_label (label);
4818	  }
4819	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4820
4821      default:
4822	gcc_unreachable ();
4823      }
4824
4825  if (quotient == 0)
4826    {
4827      if (target && GET_MODE (target) != compute_mode)
4828	target = 0;
4829
4830      if (rem_flag)
4831	{
4832	  /* Try to produce the remainder without producing the quotient.
4833	     If we seem to have a divmod pattern that does not require widening,
4834	     don't try widening here.  We should really have a WIDEN argument
4835	     to expand_twoval_binop, since what we'd really like to do here is
4836	     1) try a mod insn in compute_mode
4837	     2) try a divmod insn in compute_mode
4838	     3) try a div insn in compute_mode and multiply-subtract to get
4839	        remainder
4840	     4) try the same things with widening allowed.  */
4841	  remainder
4842	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4843				 op0, op1, target,
4844				 unsignedp,
4845				 ((optab2->handlers[compute_mode].insn_code
4846				   != CODE_FOR_nothing)
4847				  ? OPTAB_DIRECT : OPTAB_WIDEN));
4848	  if (remainder == 0)
4849	    {
4850	      /* No luck there.  Can we do remainder and divide at once
4851		 without a library call?  */
4852	      remainder = gen_reg_rtx (compute_mode);
4853	      if (! expand_twoval_binop ((unsignedp
4854					  ? udivmod_optab
4855					  : sdivmod_optab),
4856					 op0, op1,
4857					 NULL_RTX, remainder, unsignedp))
4858		remainder = 0;
4859	    }
4860
4861	  if (remainder)
4862	    return gen_lowpart (mode, remainder);
4863	}
4864
4865      /* Produce the quotient.  Try a quotient insn, but not a library call.
4866	 If we have a divmod in this mode, use it in preference to widening
4867	 the div (for this test we assume it will not fail). Note that optab2
4868	 is set to the one of the two optabs that the call below will use.  */
4869      quotient
4870	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4871			     op0, op1, rem_flag ? NULL_RTX : target,
4872			     unsignedp,
4873			     ((optab2->handlers[compute_mode].insn_code
4874			       != CODE_FOR_nothing)
4875			      ? OPTAB_DIRECT : OPTAB_WIDEN));
4876
4877      if (quotient == 0)
4878	{
4879	  /* No luck there.  Try a quotient-and-remainder insn,
4880	     keeping the quotient alone.  */
4881	  quotient = gen_reg_rtx (compute_mode);
4882	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4883				     op0, op1,
4884				     quotient, NULL_RTX, unsignedp))
4885	    {
4886	      quotient = 0;
4887	      if (! rem_flag)
4888		/* Still no luck.  If we are not computing the remainder,
4889		   use a library call for the quotient.  */
4890		quotient = sign_expand_binop (compute_mode,
4891					      udiv_optab, sdiv_optab,
4892					      op0, op1, target,
4893					      unsignedp, OPTAB_LIB_WIDEN);
4894	    }
4895	}
4896    }
4897
4898  if (rem_flag)
4899    {
4900      if (target && GET_MODE (target) != compute_mode)
4901	target = 0;
4902
4903      if (quotient == 0)
4904	{
4905	  /* No divide instruction either.  Use library for remainder.  */
4906	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4907					 op0, op1, target,
4908					 unsignedp, OPTAB_LIB_WIDEN);
4909	  /* No remainder function.  Try a quotient-and-remainder
4910	     function, keeping the remainder.  */
4911	  if (!remainder)
4912	    {
4913	      remainder = gen_reg_rtx (compute_mode);
4914	      if (!expand_twoval_binop_libfunc
4915		  (unsignedp ? udivmod_optab : sdivmod_optab,
4916		   op0, op1,
4917		   NULL_RTX, remainder,
4918		   unsignedp ? UMOD : MOD))
4919		remainder = NULL_RTX;
4920	    }
4921	}
4922      else
4923	{
4924	  /* We divided.  Now finish doing X - Y * (X / Y).  */
4925	  remainder = expand_mult (compute_mode, quotient, op1,
4926				   NULL_RTX, unsignedp);
4927	  remainder = expand_binop (compute_mode, sub_optab, op0,
4928				    remainder, target, unsignedp,
4929				    OPTAB_LIB_WIDEN);
4930	}
4931    }
4932
4933  return gen_lowpart (mode, rem_flag ? remainder : quotient);
4934}
4935
4936/* Return a tree node with data type TYPE, describing the value of X.
4937   Usually this is an VAR_DECL, if there is no obvious better choice.
4938   X may be an expression, however we only support those expressions
4939   generated by loop.c.  */
4940
4941tree
4942make_tree (tree type, rtx x)
4943{
4944  tree t;
4945
4946  switch (GET_CODE (x))
4947    {
4948    case CONST_INT:
4949      {
4950	HOST_WIDE_INT hi = 0;
4951
4952	if (INTVAL (x) < 0
4953	    && !(TYPE_UNSIGNED (type)
4954		 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4955		     < HOST_BITS_PER_WIDE_INT)))
4956	  hi = -1;
4957
4958	t = build_int_cst_wide (type, INTVAL (x), hi);
4959
4960	return t;
4961      }
4962
4963    case CONST_DOUBLE:
4964      if (GET_MODE (x) == VOIDmode)
4965	t = build_int_cst_wide (type,
4966				CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4967      else
4968	{
4969	  REAL_VALUE_TYPE d;
4970
4971	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4972	  t = build_real (type, d);
4973	}
4974
4975      return t;
4976
4977    case CONST_VECTOR:
4978      {
4979	int units = CONST_VECTOR_NUNITS (x);
4980	tree itype = TREE_TYPE (type);
4981	tree t = NULL_TREE;
4982	int i;
4983
4984
4985	/* Build a tree with vector elements.  */
4986	for (i = units - 1; i >= 0; --i)
4987	  {
4988	    rtx elt = CONST_VECTOR_ELT (x, i);
4989	    t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4990	  }
4991
4992	return build_vector (type, t);
4993      }
4994
4995    case PLUS:
4996      return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4997			  make_tree (type, XEXP (x, 1)));
4998
4999    case MINUS:
5000      return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5001			  make_tree (type, XEXP (x, 1)));
5002
5003    case NEG:
5004      return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5005
5006    case MULT:
5007      return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5008			  make_tree (type, XEXP (x, 1)));
5009
5010    case ASHIFT:
5011      return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5012			  make_tree (type, XEXP (x, 1)));
5013
5014    case LSHIFTRT:
5015      t = lang_hooks.types.unsigned_type (type);
5016      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5017			    		 make_tree (t, XEXP (x, 0)),
5018				    	 make_tree (type, XEXP (x, 1))));
5019
5020    case ASHIFTRT:
5021      t = lang_hooks.types.signed_type (type);
5022      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5023					 make_tree (t, XEXP (x, 0)),
5024				    	 make_tree (type, XEXP (x, 1))));
5025
5026    case DIV:
5027      if (TREE_CODE (type) != REAL_TYPE)
5028	t = lang_hooks.types.signed_type (type);
5029      else
5030	t = type;
5031
5032      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5033				    	 make_tree (t, XEXP (x, 0)),
5034				    	 make_tree (t, XEXP (x, 1))));
5035    case UDIV:
5036      t = lang_hooks.types.unsigned_type (type);
5037      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5038				    	 make_tree (t, XEXP (x, 0)),
5039				    	 make_tree (t, XEXP (x, 1))));
5040
5041    case SIGN_EXTEND:
5042    case ZERO_EXTEND:
5043      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5044					  GET_CODE (x) == ZERO_EXTEND);
5045      return fold_convert (type, make_tree (t, XEXP (x, 0)));
5046
5047    case CONST:
5048      return make_tree (type, XEXP (x, 0));
5049
5050    case SYMBOL_REF:
5051      t = SYMBOL_REF_DECL (x);
5052      if (t)
5053	return fold_convert (type, build_fold_addr_expr (t));
5054      /* else fall through.  */
5055
5056    default:
5057      t = build_decl (VAR_DECL, NULL_TREE, type);
5058
5059      /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5060	 ptr_mode.  So convert.  */
5061      if (POINTER_TYPE_P (type))
5062	x = convert_memory_address (TYPE_MODE (type), x);
5063
5064      /* Note that we do *not* use SET_DECL_RTL here, because we do not
5065	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5066      t->decl_with_rtl.rtl = x;
5067
5068      return t;
5069    }
5070}
5071
5072/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5073   and returning TARGET.
5074
5075   If TARGET is 0, a pseudo-register or constant is returned.  */
5076
5077rtx
5078expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5079{
5080  rtx tem = 0;
5081
5082  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5083    tem = simplify_binary_operation (AND, mode, op0, op1);
5084  if (tem == 0)
5085    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5086
5087  if (target == 0)
5088    target = tem;
5089  else if (tem != target)
5090    emit_move_insn (target, tem);
5091  return target;
5092}
5093
5094/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5095   and storing in TARGET.  Normally return TARGET.
5096   Return 0 if that cannot be done.
5097
5098   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5099   it is VOIDmode, they cannot both be CONST_INT.
5100
5101   UNSIGNEDP is for the case where we have to widen the operands
5102   to perform the operation.  It says to use zero-extension.
5103
5104   NORMALIZEP is 1 if we should convert the result to be either zero
5105   or one.  Normalize is -1 if we should convert the result to be
5106   either zero or -1.  If NORMALIZEP is zero, the result will be left
5107   "raw" out of the scc insn.  */
5108
5109rtx
5110emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5111		 enum machine_mode mode, int unsignedp, int normalizep)
5112{
5113  rtx subtarget;
5114  enum insn_code icode;
5115  enum machine_mode compare_mode;
5116  enum machine_mode target_mode = GET_MODE (target);
5117  rtx tem;
5118  rtx last = get_last_insn ();
5119  rtx pattern, comparison;
5120
5121  if (unsignedp)
5122    code = unsigned_condition (code);
5123
5124  /* If one operand is constant, make it the second one.  Only do this
5125     if the other operand is not constant as well.  */
5126
5127  if (swap_commutative_operands_p (op0, op1))
5128    {
5129      tem = op0;
5130      op0 = op1;
5131      op1 = tem;
5132      code = swap_condition (code);
5133    }
5134
5135  if (mode == VOIDmode)
5136    mode = GET_MODE (op0);
5137
5138  /* For some comparisons with 1 and -1, we can convert this to
5139     comparisons with zero.  This will often produce more opportunities for
5140     store-flag insns.  */
5141
5142  switch (code)
5143    {
5144    case LT:
5145      if (op1 == const1_rtx)
5146	op1 = const0_rtx, code = LE;
5147      break;
5148    case LE:
5149      if (op1 == constm1_rtx)
5150	op1 = const0_rtx, code = LT;
5151      break;
5152    case GE:
5153      if (op1 == const1_rtx)
5154	op1 = const0_rtx, code = GT;
5155      break;
5156    case GT:
5157      if (op1 == constm1_rtx)
5158	op1 = const0_rtx, code = GE;
5159      break;
5160    case GEU:
5161      if (op1 == const1_rtx)
5162	op1 = const0_rtx, code = NE;
5163      break;
5164    case LTU:
5165      if (op1 == const1_rtx)
5166	op1 = const0_rtx, code = EQ;
5167      break;
5168    default:
5169      break;
5170    }
5171
5172  /* If we are comparing a double-word integer with zero or -1, we can
5173     convert the comparison into one involving a single word.  */
5174  if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5175      && GET_MODE_CLASS (mode) == MODE_INT
5176      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5177    {
5178      if ((code == EQ || code == NE)
5179	  && (op1 == const0_rtx || op1 == constm1_rtx))
5180	{
5181	  rtx op00, op01, op0both;
5182
5183	  /* Do a logical OR or AND of the two words and compare the result.  */
5184	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5185	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5186	  op0both = expand_binop (word_mode,
5187				  op1 == const0_rtx ? ior_optab : and_optab,
5188				  op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5189
5190	  if (op0both != 0)
5191	    return emit_store_flag (target, code, op0both, op1, word_mode,
5192				    unsignedp, normalizep);
5193	}
5194      else if ((code == LT || code == GE) && op1 == const0_rtx)
5195	{
5196	  rtx op0h;
5197
5198	  /* If testing the sign bit, can just test on high word.  */
5199	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5200				      subreg_highpart_offset (word_mode, mode));
5201	  return emit_store_flag (target, code, op0h, op1, word_mode,
5202				  unsignedp, normalizep);
5203	}
5204    }
5205
5206  /* From now on, we won't change CODE, so set ICODE now.  */
5207  icode = setcc_gen_code[(int) code];
5208
5209  /* If this is A < 0 or A >= 0, we can do this by taking the ones
5210     complement of A (for GE) and shifting the sign bit to the low bit.  */
5211  if (op1 == const0_rtx && (code == LT || code == GE)
5212      && GET_MODE_CLASS (mode) == MODE_INT
5213      && (normalizep || STORE_FLAG_VALUE == 1
5214	  || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5215	      && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5216		  == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5217    {
5218      subtarget = target;
5219
5220      /* If the result is to be wider than OP0, it is best to convert it
5221	 first.  If it is to be narrower, it is *incorrect* to convert it
5222	 first.  */
5223      if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5224	{
5225	  op0 = convert_modes (target_mode, mode, op0, 0);
5226	  mode = target_mode;
5227	}
5228
5229      if (target_mode != mode)
5230	subtarget = 0;
5231
5232      if (code == GE)
5233	op0 = expand_unop (mode, one_cmpl_optab, op0,
5234			   ((STORE_FLAG_VALUE == 1 || normalizep)
5235			    ? 0 : subtarget), 0);
5236
5237      if (STORE_FLAG_VALUE == 1 || normalizep)
5238	/* If we are supposed to produce a 0/1 value, we want to do
5239	   a logical shift from the sign bit to the low-order bit; for
5240	   a -1/0 value, we do an arithmetic shift.  */
5241	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5242			    size_int (GET_MODE_BITSIZE (mode) - 1),
5243			    subtarget, normalizep != -1);
5244
5245      if (mode != target_mode)
5246	op0 = convert_modes (target_mode, mode, op0, 0);
5247
5248      return op0;
5249    }
5250
5251  if (icode != CODE_FOR_nothing)
5252    {
5253      insn_operand_predicate_fn pred;
5254
5255      /* We think we may be able to do this with a scc insn.  Emit the
5256	 comparison and then the scc insn.  */
5257
5258      do_pending_stack_adjust ();
5259      last = get_last_insn ();
5260
5261      comparison
5262	= compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5263      if (CONSTANT_P (comparison))
5264	{
5265	  switch (GET_CODE (comparison))
5266	    {
5267	    case CONST_INT:
5268	      if (comparison == const0_rtx)
5269		return const0_rtx;
5270	      break;
5271
5272#ifdef FLOAT_STORE_FLAG_VALUE
5273	    case CONST_DOUBLE:
5274	      if (comparison == CONST0_RTX (GET_MODE (comparison)))
5275		return const0_rtx;
5276	      break;
5277#endif
5278	    default:
5279	      gcc_unreachable ();
5280	    }
5281
5282	  if (normalizep == 1)
5283	    return const1_rtx;
5284	  if (normalizep == -1)
5285	    return constm1_rtx;
5286	  return const_true_rtx;
5287	}
5288
5289      /* The code of COMPARISON may not match CODE if compare_from_rtx
5290	 decided to swap its operands and reverse the original code.
5291
5292	 We know that compare_from_rtx returns either a CONST_INT or
5293	 a new comparison code, so it is safe to just extract the
5294	 code from COMPARISON.  */
5295      code = GET_CODE (comparison);
5296
5297      /* Get a reference to the target in the proper mode for this insn.  */
5298      compare_mode = insn_data[(int) icode].operand[0].mode;
5299      subtarget = target;
5300      pred = insn_data[(int) icode].operand[0].predicate;
5301      if (optimize || ! (*pred) (subtarget, compare_mode))
5302	subtarget = gen_reg_rtx (compare_mode);
5303
5304      pattern = GEN_FCN (icode) (subtarget);
5305      if (pattern)
5306	{
5307	  emit_insn (pattern);
5308
5309	  /* If we are converting to a wider mode, first convert to
5310	     TARGET_MODE, then normalize.  This produces better combining
5311	     opportunities on machines that have a SIGN_EXTRACT when we are
5312	     testing a single bit.  This mostly benefits the 68k.
5313
5314	     If STORE_FLAG_VALUE does not have the sign bit set when
5315	     interpreted in COMPARE_MODE, we can do this conversion as
5316	     unsigned, which is usually more efficient.  */
5317	  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5318	    {
5319	      convert_move (target, subtarget,
5320			    (GET_MODE_BITSIZE (compare_mode)
5321			     <= HOST_BITS_PER_WIDE_INT)
5322			    && 0 == (STORE_FLAG_VALUE
5323				     & ((HOST_WIDE_INT) 1
5324					<< (GET_MODE_BITSIZE (compare_mode) -1))));
5325	      op0 = target;
5326	      compare_mode = target_mode;
5327	    }
5328	  else
5329	    op0 = subtarget;
5330
5331	  /* If we want to keep subexpressions around, don't reuse our
5332	     last target.  */
5333
5334	  if (optimize)
5335	    subtarget = 0;
5336
5337	  /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5338	     we don't have to do anything.  */
5339	  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5340	    ;
5341	  /* STORE_FLAG_VALUE might be the most negative number, so write
5342	     the comparison this way to avoid a compiler-time warning.  */
5343	  else if (- normalizep == STORE_FLAG_VALUE)
5344	    op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5345
5346	  /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5347	     makes it hard to use a value of just the sign bit due to
5348	     ANSI integer constant typing rules.  */
5349	  else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5350		   && (STORE_FLAG_VALUE
5351		       & ((HOST_WIDE_INT) 1
5352			  << (GET_MODE_BITSIZE (compare_mode) - 1))))
5353	    op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5354				size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5355				subtarget, normalizep == 1);
5356	  else
5357	    {
5358	      gcc_assert (STORE_FLAG_VALUE & 1);
5359
5360	      op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5361	      if (normalizep == -1)
5362		op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5363	    }
5364
5365	  /* If we were converting to a smaller mode, do the
5366	     conversion now.  */
5367	  if (target_mode != compare_mode)
5368	    {
5369	      convert_move (target, op0, 0);
5370	      return target;
5371	    }
5372	  else
5373	    return op0;
5374	}
5375    }
5376
5377  delete_insns_since (last);
5378
5379  /* If optimizing, use different pseudo registers for each insn, instead
5380     of reusing the same pseudo.  This leads to better CSE, but slows
5381     down the compiler, since there are more pseudos */
5382  subtarget = (!optimize
5383	       && (target_mode == mode)) ? target : NULL_RTX;
5384
5385  /* If we reached here, we can't do this with a scc insn.  However, there
5386     are some comparisons that can be done directly.  For example, if
5387     this is an equality comparison of integers, we can try to exclusive-or
5388     (or subtract) the two operands and use a recursive call to try the
5389     comparison with zero.  Don't do any of these cases if branches are
5390     very cheap.  */
5391
5392  if (BRANCH_COST > 0
5393      && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5394      && op1 != const0_rtx)
5395    {
5396      tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5397			  OPTAB_WIDEN);
5398
5399      if (tem == 0)
5400	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5401			    OPTAB_WIDEN);
5402      if (tem != 0)
5403	tem = emit_store_flag (target, code, tem, const0_rtx,
5404			       mode, unsignedp, normalizep);
5405      if (tem == 0)
5406	delete_insns_since (last);
5407      return tem;
5408    }
5409
5410  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5411     the constant zero.  Reject all other comparisons at this point.  Only
5412     do LE and GT if branches are expensive since they are expensive on
5413     2-operand machines.  */
5414
5415  if (BRANCH_COST == 0
5416      || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5417      || (code != EQ && code != NE
5418	  && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5419    return 0;
5420
5421  /* See what we need to return.  We can only return a 1, -1, or the
5422     sign bit.  */
5423
5424  if (normalizep == 0)
5425    {
5426      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5427	normalizep = STORE_FLAG_VALUE;
5428
5429      else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5430	       && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5431		   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5432	;
5433      else
5434	return 0;
5435    }
5436
5437  /* Try to put the result of the comparison in the sign bit.  Assume we can't
5438     do the necessary operation below.  */
5439
5440  tem = 0;
5441
5442  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5443     the sign bit set.  */
5444
5445  if (code == LE)
5446    {
5447      /* This is destructive, so SUBTARGET can't be OP0.  */
5448      if (rtx_equal_p (subtarget, op0))
5449	subtarget = 0;
5450
5451      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5452			  OPTAB_WIDEN);
5453      if (tem)
5454	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5455			    OPTAB_WIDEN);
5456    }
5457
5458  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5459     number of bits in the mode of OP0, minus one.  */
5460
5461  if (code == GT)
5462    {
5463      if (rtx_equal_p (subtarget, op0))
5464	subtarget = 0;
5465
5466      tem = expand_shift (RSHIFT_EXPR, mode, op0,
5467			  size_int (GET_MODE_BITSIZE (mode) - 1),
5468			  subtarget, 0);
5469      tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5470			  OPTAB_WIDEN);
5471    }
5472
5473  if (code == EQ || code == NE)
5474    {
5475      /* For EQ or NE, one way to do the comparison is to apply an operation
5476	 that converts the operand into a positive number if it is nonzero
5477	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5478	 for NE we negate.  This puts the result in the sign bit.  Then we
5479	 normalize with a shift, if needed.
5480
5481	 Two operations that can do the above actions are ABS and FFS, so try
5482	 them.  If that doesn't work, and MODE is smaller than a full word,
5483	 we can use zero-extension to the wider mode (an unsigned conversion)
5484	 as the operation.  */
5485
5486      /* Note that ABS doesn't yield a positive number for INT_MIN, but
5487	 that is compensated by the subsequent overflow when subtracting
5488	 one / negating.  */
5489
5490      if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5491	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5492      else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5493	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5494      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5495	{
5496	  tem = convert_modes (word_mode, mode, op0, 1);
5497	  mode = word_mode;
5498	}
5499
5500      if (tem != 0)
5501	{
5502	  if (code == EQ)
5503	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5504				0, OPTAB_WIDEN);
5505	  else
5506	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5507	}
5508
5509      /* If we couldn't do it that way, for NE we can "or" the two's complement
5510	 of the value with itself.  For EQ, we take the one's complement of
5511	 that "or", which is an extra insn, so we only handle EQ if branches
5512	 are expensive.  */
5513
5514      if (tem == 0 && (code == NE || BRANCH_COST > 1))
5515	{
5516	  if (rtx_equal_p (subtarget, op0))
5517	    subtarget = 0;
5518
5519	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5520	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5521			      OPTAB_WIDEN);
5522
5523	  if (tem && code == EQ)
5524	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5525	}
5526    }
5527
5528  if (tem && normalizep)
5529    tem = expand_shift (RSHIFT_EXPR, mode, tem,
5530			size_int (GET_MODE_BITSIZE (mode) - 1),
5531			subtarget, normalizep == 1);
5532
5533  if (tem)
5534    {
5535      if (GET_MODE (tem) != target_mode)
5536	{
5537	  convert_move (target, tem, 0);
5538	  tem = target;
5539	}
5540      else if (!subtarget)
5541	{
5542	  emit_move_insn (target, tem);
5543	  tem = target;
5544	}
5545    }
5546  else
5547    delete_insns_since (last);
5548
5549  return tem;
5550}
5551
5552/* Like emit_store_flag, but always succeeds.  */
5553
5554rtx
5555emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5556		       enum machine_mode mode, int unsignedp, int normalizep)
5557{
5558  rtx tem, label;
5559
5560  /* First see if emit_store_flag can do the job.  */
5561  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5562  if (tem != 0)
5563    return tem;
5564
5565  if (normalizep == 0)
5566    normalizep = 1;
5567
5568  /* If this failed, we have to do this with set/compare/jump/set code.  */
5569
5570  if (!REG_P (target)
5571      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5572    target = gen_reg_rtx (GET_MODE (target));
5573
5574  emit_move_insn (target, const1_rtx);
5575  label = gen_label_rtx ();
5576  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5577			   NULL_RTX, label);
5578
5579  emit_move_insn (target, const0_rtx);
5580  emit_label (label);
5581
5582  return target;
5583}
5584
5585/* Perform possibly multi-word comparison and conditional jump to LABEL
5586   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5587   now a thin wrapper around do_compare_rtx_and_jump.  */
5588
5589static void
5590do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5591		 rtx label)
5592{
5593  int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5594  do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5595			   NULL_RTX, NULL_RTX, label);
5596}
5597