1/* Medium-level subroutines: convert bit-field store and extract
2   and shifts, multiplies and divides to rtl instructions.
3   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4   1999, 2000 Free Software Foundation, Inc.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING.  If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA.  */
22
23
24#include "config.h"
25#include "system.h"
26#include "toplev.h"
27#include "rtl.h"
28#include "tree.h"
29#include "flags.h"
30#include "insn-flags.h"
31#include "insn-codes.h"
32#include "insn-config.h"
33#include "expr.h"
34#include "real.h"
35#include "recog.h"
36
37static void store_fixed_bit_field	PROTO((rtx, int, int, int, rtx, int));
38static void store_split_bit_field	PROTO((rtx, int, int, rtx, int));
39static rtx extract_fixed_bit_field	PROTO((enum machine_mode, rtx, int,
40					       int, int, rtx, int, int));
41static rtx mask_rtx			PROTO((enum machine_mode, int,
42					       int, int));
43static rtx lshift_value			PROTO((enum machine_mode, rtx,
44					       int, int));
45static rtx extract_split_bit_field	PROTO((rtx, int, int, int, int));
46static void do_cmp_and_jump		PROTO((rtx, rtx, enum rtx_code,
47					       enum machine_mode, rtx));
48
49#define CEIL(x,y) (((x) + (y) - 1) / (y))
50
51/* Non-zero means divides or modulus operations are relatively cheap for
52   powers of two, so don't use branches; emit the operation instead.
53   Usually, this will mean that the MD file will emit non-branch
54   sequences.  */
55
56static int sdiv_pow2_cheap, smod_pow2_cheap;
57
58#ifndef SLOW_UNALIGNED_ACCESS
59#define SLOW_UNALIGNED_ACCESS STRICT_ALIGNMENT
60#endif
61
62/* For compilers that support multiple targets with different word sizes,
63   MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
64   is the H8/300(H) compiler.  */
65
66#ifndef MAX_BITS_PER_WORD
67#define MAX_BITS_PER_WORD BITS_PER_WORD
68#endif
69
70/* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
71   and some by mode.  */
72static int add_cost, negate_cost, zero_cost;
73static int shift_cost[MAX_BITS_PER_WORD];
74static int shiftadd_cost[MAX_BITS_PER_WORD];
75static int shiftsub_cost[MAX_BITS_PER_WORD];
76static int mul_cost[NUM_MACHINE_MODES];
77static int div_cost[NUM_MACHINE_MODES];
78static int mul_widen_cost[NUM_MACHINE_MODES];
79static int mul_highpart_cost[NUM_MACHINE_MODES];
80
81void
82init_expmed ()
83{
84  char *free_point;
85  /* This is "some random pseudo register" for purposes of calling recog
86     to see what insns exist.  */
87  rtx reg = gen_rtx_REG (word_mode, 10000);
88  rtx shift_insn, shiftadd_insn, shiftsub_insn;
89  int dummy;
90  int m;
91  enum machine_mode mode, wider_mode;
92
93  start_sequence ();
94
95  /* Since we are on the permanent obstack, we must be sure we save this
96     spot AFTER we call start_sequence, since it will reuse the rtl it
97     makes.  */
98  free_point = (char *) oballoc (0);
99
100  reg = gen_rtx (REG, word_mode, 10000);
101
102  zero_cost = rtx_cost (const0_rtx, 0);
103  add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
104
105  shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
106				       gen_rtx_ASHIFT (word_mode, reg,
107						       const0_rtx)));
108
109  shiftadd_insn
110    = emit_insn (gen_rtx_SET (VOIDmode, reg,
111			      gen_rtx_PLUS (word_mode,
112					    gen_rtx_MULT (word_mode,
113							  reg, const0_rtx),
114					    reg)));
115
116  shiftsub_insn
117    = emit_insn (gen_rtx_SET (VOIDmode, reg,
118			      gen_rtx_MINUS (word_mode,
119					     gen_rtx_MULT (word_mode,
120							   reg, const0_rtx),
121					     reg)));
122
123  init_recog ();
124
125  shift_cost[0] = 0;
126  shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
127
128  for (m = 1; m < MAX_BITS_PER_WORD; m++)
129    {
130      shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
131
132      XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
133      if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
134	shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
135
136      XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
137	= GEN_INT ((HOST_WIDE_INT) 1 << m);
138      if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
139	shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
140
141      XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
142	= GEN_INT ((HOST_WIDE_INT) 1 << m);
143      if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
144	shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
145    }
146
147  negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
148
149  sdiv_pow2_cheap
150    = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
151       <= 2 * add_cost);
152  smod_pow2_cheap
153    = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
154       <= 2 * add_cost);
155
156  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
157       mode != VOIDmode;
158       mode = GET_MODE_WIDER_MODE (mode))
159    {
160      reg = gen_rtx_REG (mode, 10000);
161      div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
162      mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
163      wider_mode = GET_MODE_WIDER_MODE (mode);
164      if (wider_mode != VOIDmode)
165	{
166	  mul_widen_cost[(int) wider_mode]
167	    = rtx_cost (gen_rtx_MULT (wider_mode,
168				      gen_rtx_ZERO_EXTEND (wider_mode, reg),
169				      gen_rtx_ZERO_EXTEND (wider_mode, reg)),
170			SET);
171	  mul_highpart_cost[(int) mode]
172	    = rtx_cost (gen_rtx_TRUNCATE
173			(mode,
174			 gen_rtx_LSHIFTRT
175			 (wider_mode,
176			  gen_rtx_MULT (wider_mode,
177					gen_rtx_ZERO_EXTEND (wider_mode, reg),
178					gen_rtx_ZERO_EXTEND (wider_mode, reg)),
179			  GEN_INT (GET_MODE_BITSIZE (mode)))),
180			SET);
181	}
182    }
183
184  /* Free the objects we just allocated.  */
185  end_sequence ();
186  obfree (free_point);
187}
188
189/* Return an rtx representing minus the value of X.
190   MODE is the intended mode of the result,
191   useful if X is a CONST_INT.  */
192
193rtx
194negate_rtx (mode, x)
195     enum machine_mode mode;
196     rtx x;
197{
198  rtx result = simplify_unary_operation (NEG, mode, x, mode);
199
200  if (result == 0)
201    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
202
203  return result;
204}
205
206/* Generate code to store value from rtx VALUE
207   into a bit-field within structure STR_RTX
208   containing BITSIZE bits starting at bit BITNUM.
209   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
210   ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
211   TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
212
213/* ??? Note that there are two different ideas here for how
214   to determine the size to count bits within, for a register.
215   One is BITS_PER_WORD, and the other is the size of operand 3
216   of the insv pattern.
217
218   If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
219   else, we use the mode of operand 3.  */
220
221rtx
222store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
223     rtx str_rtx;
224     register int bitsize;
225     int bitnum;
226     enum machine_mode fieldmode;
227     rtx value;
228     int align;
229     int total_size;
230{
231  int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
232  register int offset = bitnum / unit;
233  register int bitpos = bitnum % unit;
234  register rtx op0 = str_rtx;
235#ifdef HAVE_insv
236  int insv_bitsize;
237
238  if (insn_operand_mode[(int) CODE_FOR_insv][3] == VOIDmode)
239    insv_bitsize = GET_MODE_BITSIZE (word_mode);
240  else
241    insv_bitsize = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3]);
242#endif
243
244  if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
245    abort ();
246
247  /* Discount the part of the structure before the desired byte.
248     We need to know how many bytes are safe to reference after it.  */
249  if (total_size >= 0)
250    total_size -= (bitpos / BIGGEST_ALIGNMENT
251		   * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
252
253  while (GET_CODE (op0) == SUBREG)
254    {
255      /* The following line once was done only if WORDS_BIG_ENDIAN,
256	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
257	 meaningful at a much higher level; when structures are copied
258	 between memory and regs, the higher-numbered regs
259	 always get higher addresses.  */
260      offset += SUBREG_WORD (op0);
261      /* We used to adjust BITPOS here, but now we do the whole adjustment
262	 right after the loop.  */
263      op0 = SUBREG_REG (op0);
264    }
265
266  /* Make sure we are playing with integral modes.  Pun with subregs
267     if we aren't.  */
268  {
269    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
270    if (imode != GET_MODE (op0))
271      {
272	if (GET_CODE (op0) == MEM)
273	  op0 = change_address (op0, imode, NULL_RTX);
274	else if (imode != BLKmode)
275	  op0 = gen_lowpart (imode, op0);
276	else
277	  abort ();
278      }
279  }
280
281  /* If OP0 is a register, BITPOS must count within a word.
282     But as we have it, it counts within whatever size OP0 now has.
283     On a bigendian machine, these are not the same, so convert.  */
284  if (BYTES_BIG_ENDIAN
285      && GET_CODE (op0) != MEM
286      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
287    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
288
289  value = protect_from_queue (value, 0);
290
291  if (flag_force_mem)
292    value = force_not_mem (value);
293
294  /* Note that the adjustment of BITPOS above has no effect on whether
295     BITPOS is 0 in a REG bigger than a word.  */
296  if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
297      && (GET_CODE (op0) != MEM
298	  || ! SLOW_UNALIGNED_ACCESS
299	  || (offset * BITS_PER_UNIT % bitsize == 0
300	      && align % GET_MODE_SIZE (fieldmode) == 0))
301      && bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
302    {
303      /* Storing in a full-word or multi-word field in a register
304	 can be done with just SUBREG.  */
305      if (GET_MODE (op0) != fieldmode)
306	{
307	  if (GET_CODE (op0) == SUBREG)
308	    {
309	      if (GET_MODE (SUBREG_REG (op0)) == fieldmode
310		  || GET_MODE_CLASS (fieldmode) == MODE_INT
311		  || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
312		op0 = SUBREG_REG (op0);
313	      else
314		/* Else we've got some float mode source being extracted into
315		   a different float mode destination -- this combination of
316		   subregs results in Severe Tire Damage.  */
317		abort ();
318	    }
319	  if (GET_CODE (op0) == REG)
320	    op0 = gen_rtx_SUBREG (fieldmode, op0, offset);
321	  else
322	    op0 = change_address (op0, fieldmode,
323				  plus_constant (XEXP (op0, 0), offset));
324	}
325      emit_move_insn (op0, value);
326      return value;
327    }
328
329  /* Storing an lsb-aligned field in a register
330     can be done with a movestrict instruction.  */
331
332  if (GET_CODE (op0) != MEM
333      && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
334      && bitsize == GET_MODE_BITSIZE (fieldmode)
335      && (GET_MODE (op0) == fieldmode
336	  || (movstrict_optab->handlers[(int) fieldmode].insn_code
337	      != CODE_FOR_nothing)))
338    {
339      /* Get appropriate low part of the value being stored.  */
340      if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
341	value = gen_lowpart (fieldmode, value);
342      else if (!(GET_CODE (value) == SYMBOL_REF
343		 || GET_CODE (value) == LABEL_REF
344		 || GET_CODE (value) == CONST))
345	value = convert_to_mode (fieldmode, value, 0);
346
347      if (GET_MODE (op0) == fieldmode)
348	emit_move_insn (op0, value);
349      else
350	{
351	  int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
352	  if (! (*insn_operand_predicate[icode][1]) (value, fieldmode))
353	    value = copy_to_mode_reg (fieldmode, value);
354
355	  if (GET_CODE (op0) == SUBREG)
356	    {
357	      if (GET_MODE (SUBREG_REG (op0)) == fieldmode
358		  || GET_MODE_CLASS (fieldmode) == MODE_INT
359		  || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
360		op0 = SUBREG_REG (op0);
361	      else
362		/* Else we've got some float mode source being extracted into
363		   a different float mode destination -- this combination of
364		   subregs results in Severe Tire Damage.  */
365		abort ();
366	    }
367
368	  emit_insn (GEN_FCN (icode)
369		   (gen_rtx_SUBREG (fieldmode, op0, offset), value));
370	}
371      return value;
372    }
373
374  /* Handle fields bigger than a word.  */
375
376  if (bitsize > BITS_PER_WORD)
377    {
378      /* Here we transfer the words of the field
379	 in the order least significant first.
380	 This is because the most significant word is the one which may
381	 be less than full.
382	 However, only do that if the value is not BLKmode.  */
383
384      int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
385
386      int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
387      int i;
388
389      /* This is the mode we must force value to, so that there will be enough
390	 subwords to extract.  Note that fieldmode will often (always?) be
391	 VOIDmode, because that is what store_field uses to indicate that this
392	 is a bit field, but passing VOIDmode to operand_subword_force will
393	 result in an abort.  */
394      fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
395
396      for (i = 0; i < nwords; i++)
397	{
398	  /* If I is 0, use the low-order word in both field and target;
399	     if I is 1, use the next to lowest word; and so on.  */
400	  int wordnum = (backwards ? nwords - i - 1 : i);
401	  int bit_offset = (backwards
402			    ? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
403			    : i * BITS_PER_WORD);
404	  store_bit_field (op0, MIN (BITS_PER_WORD,
405				     bitsize - i * BITS_PER_WORD),
406			   bitnum + bit_offset, word_mode,
407			   operand_subword_force (value, wordnum,
408						  (GET_MODE (value) == VOIDmode
409						   ? fieldmode
410						   : GET_MODE (value))),
411			   align, total_size);
412	}
413      return value;
414    }
415
416  /* From here on we can assume that the field to be stored in is
417     a full-word (whatever type that is), since it is shorter than a word.  */
418
419  /* OFFSET is the number of words or bytes (UNIT says which)
420     from STR_RTX to the first word or byte containing part of the field.  */
421
422  if (GET_CODE (op0) != MEM)
423    {
424      if (offset != 0
425	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
426	{
427	  if (GET_CODE (op0) != REG)
428	    {
429	      /* Since this is a destination (lvalue), we can't copy it to a
430		 pseudo.  We can trivially remove a SUBREG that does not
431		 change the size of the operand.  Such a SUBREG may have been
432		 added above.  Otherwise, abort.  */
433	      if (GET_CODE (op0) == SUBREG
434		  && (GET_MODE_SIZE (GET_MODE (op0))
435		      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
436		op0 = SUBREG_REG (op0);
437	      else
438		abort ();
439	    }
440	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
441		                op0, offset);
442	}
443      offset = 0;
444    }
445  else
446    {
447      op0 = protect_from_queue (op0, 1);
448    }
449
450  /* If VALUE is a floating-point mode, access it as an integer of the
451     corresponding size.  This can occur on a machine with 64 bit registers
452     that uses SFmode for float.  This can also occur for unaligned float
453     structure fields.  */
454  if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
455    {
456      if (GET_CODE (value) != REG)
457	value = copy_to_reg (value);
458      value = gen_rtx_SUBREG (word_mode, value, 0);
459    }
460
461  /* Now OFFSET is nonzero only if OP0 is memory
462     and is therefore always measured in bytes.  */
463
464#ifdef HAVE_insv
465  if (HAVE_insv
466      && GET_MODE (value) != BLKmode
467      && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
468      /* Ensure insv's size is wide enough for this field.  */
469      && (insv_bitsize >= bitsize)
470      && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
471	    && (bitsize + bitpos > insv_bitsize)))
472    {
473      int xbitpos = bitpos;
474      rtx value1;
475      rtx xop0 = op0;
476      rtx last = get_last_insn ();
477      rtx pat;
478      enum machine_mode maxmode;
479      int save_volatile_ok = volatile_ok;
480
481      maxmode = insn_operand_mode[(int) CODE_FOR_insv][3];
482      if (maxmode == VOIDmode)
483	maxmode = word_mode;
484
485      volatile_ok = 1;
486
487      /* If this machine's insv can only insert into a register, copy OP0
488	 into a register and save it back later.  */
489      /* This used to check flag_force_mem, but that was a serious
490	 de-optimization now that flag_force_mem is enabled by -O2.  */
491      if (GET_CODE (op0) == MEM
492	  && ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
493		(op0, VOIDmode)))
494	{
495	  rtx tempreg;
496	  enum machine_mode bestmode;
497
498	  /* Get the mode to use for inserting into this field.  If OP0 is
499	     BLKmode, get the smallest mode consistent with the alignment. If
500	     OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
501	     mode. Otherwise, use the smallest mode containing the field.  */
502
503	  if (GET_MODE (op0) == BLKmode
504	      || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
505	    bestmode
506	      = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
507			       MEM_VOLATILE_P (op0));
508	  else
509	    bestmode = GET_MODE (op0);
510
511	  if (bestmode == VOIDmode
512	      || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
513	    goto insv_loses;
514
515	  /* Adjust address to point to the containing unit of that mode.  */
516	  unit = GET_MODE_BITSIZE (bestmode);
517	  /* Compute offset as multiple of this unit, counting in bytes.  */
518	  offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
519	  bitpos = bitnum % unit;
520	  op0 = change_address (op0, bestmode,
521				plus_constant (XEXP (op0, 0), offset));
522
523	  /* Fetch that unit, store the bitfield in it, then store the unit.  */
524	  tempreg = copy_to_reg (op0);
525	  store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
526			   align, total_size);
527	  emit_move_insn (op0, tempreg);
528	  return value;
529	}
530      volatile_ok = save_volatile_ok;
531
532      /* Add OFFSET into OP0's address.  */
533      if (GET_CODE (xop0) == MEM)
534	xop0 = change_address (xop0, byte_mode,
535			       plus_constant (XEXP (xop0, 0), offset));
536
537      /* If xop0 is a register, we need it in MAXMODE
538	 to make it acceptable to the format of insv.  */
539      if (GET_CODE (xop0) == SUBREG)
540	/* We can't just change the mode, because this might clobber op0,
541	   and we will need the original value of op0 if insv fails.  */
542	xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
543      if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
544	xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
545
546      /* On big-endian machines, we count bits from the most significant.
547	 If the bit field insn does not, we must invert.  */
548
549      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
550	xbitpos = unit - bitsize - xbitpos;
551
552      /* We have been counting XBITPOS within UNIT.
553	 Count instead within the size of the register.  */
554      if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
555	xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
556
557      unit = GET_MODE_BITSIZE (maxmode);
558
559      /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
560      value1 = value;
561      if (GET_MODE (value) != maxmode)
562	{
563	  if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
564	    {
565	      /* Optimization: Don't bother really extending VALUE
566		 if it has all the bits we will actually use.  However,
567		 if we must narrow it, be sure we do it correctly.  */
568
569	      if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
570		{
571		  /* Avoid making subreg of a subreg, or of a mem.  */
572		  if (GET_CODE (value1) != REG)
573		    value1 = copy_to_reg (value1);
574		  value1 = gen_rtx_SUBREG (maxmode, value1, 0);
575		}
576	      else
577		value1 = gen_lowpart (maxmode, value1);
578	    }
579	  else if (!CONSTANT_P (value))
580	    /* Parse phase is supposed to make VALUE's data type
581	       match that of the component reference, which is a type
582	       at least as wide as the field; so VALUE should have
583	       a mode that corresponds to that type.  */
584	    abort ();
585	}
586
587      /* If this machine's insv insists on a register,
588	 get VALUE1 into a register.  */
589      if (! ((*insn_operand_predicate[(int) CODE_FOR_insv][3])
590	     (value1, maxmode)))
591	value1 = force_reg (maxmode, value1);
592
593      pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
594      if (pat)
595	emit_insn (pat);
596      else
597        {
598	  delete_insns_since (last);
599	  store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
600	}
601    }
602  else
603    insv_loses:
604#endif
605    /* Insv is not available; store using shifts and boolean ops.  */
606    store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
607  return value;
608}
609
610/* Use shifts and boolean operations to store VALUE
611   into a bit field of width BITSIZE
612   in a memory location specified by OP0 except offset by OFFSET bytes.
613     (OFFSET must be 0 if OP0 is a register.)
614   The field starts at position BITPOS within the byte.
615    (If OP0 is a register, it may be a full word or a narrower mode,
616     but BITPOS still counts within a full word,
617     which is significant on bigendian machines.)
618   STRUCT_ALIGN is the alignment the structure is known to have (in bytes).
619
620   Note that protect_from_queue has already been done on OP0 and VALUE.  */
621
622static void
623store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
624     register rtx op0;
625     register int offset, bitsize, bitpos;
626     register rtx value;
627     int struct_align;
628{
629  register enum machine_mode mode;
630  int total_bits = BITS_PER_WORD;
631  rtx subtarget, temp;
632  int all_zero = 0;
633  int all_one = 0;
634
635  if (! SLOW_UNALIGNED_ACCESS)
636    struct_align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
637
638  /* There is a case not handled here:
639     a structure with a known alignment of just a halfword
640     and a field split across two aligned halfwords within the structure.
641     Or likewise a structure with a known alignment of just a byte
642     and a field split across two bytes.
643     Such cases are not supposed to be able to occur.  */
644
645  if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
646    {
647      if (offset != 0)
648	abort ();
649      /* Special treatment for a bit field split across two registers.  */
650      if (bitsize + bitpos > BITS_PER_WORD)
651	{
652	  store_split_bit_field (op0, bitsize, bitpos,
653				 value, BITS_PER_WORD);
654	  return;
655	}
656    }
657  else
658    {
659      /* Get the proper mode to use for this field.  We want a mode that
660	 includes the entire field.  If such a mode would be larger than
661	 a word, we won't be doing the extraction the normal way.  */
662
663      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
664			    struct_align * BITS_PER_UNIT, word_mode,
665			    GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
666
667      if (mode == VOIDmode)
668	{
669	  /* The only way this should occur is if the field spans word
670	     boundaries.  */
671	  store_split_bit_field (op0,
672				 bitsize, bitpos + offset * BITS_PER_UNIT,
673				 value, struct_align);
674	  return;
675	}
676
677      total_bits = GET_MODE_BITSIZE (mode);
678
679      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
680	 be in the range 0 to total_bits-1, and put any excess bytes in
681	 OFFSET.  */
682      if (bitpos >= total_bits)
683	{
684	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
685	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
686		     * BITS_PER_UNIT);
687	}
688
689      /* Get ref to an aligned byte, halfword, or word containing the field.
690	 Adjust BITPOS to be position within a word,
691	 and OFFSET to be the offset of that word.
692	 Then alter OP0 to refer to that word.  */
693      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
694      offset -= (offset % (total_bits / BITS_PER_UNIT));
695      op0 = change_address (op0, mode,
696			    plus_constant (XEXP (op0, 0), offset));
697    }
698
699  mode = GET_MODE (op0);
700
701  /* Now MODE is either some integral mode for a MEM as OP0,
702     or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
703     The bit field is contained entirely within OP0.
704     BITPOS is the starting bit number within OP0.
705     (OP0's mode may actually be narrower than MODE.)  */
706
707  if (BYTES_BIG_ENDIAN)
708      /* BITPOS is the distance between our msb
709	 and that of the containing datum.
710	 Convert it to the distance from the lsb.  */
711      bitpos = total_bits - bitsize - bitpos;
712
713  /* Now BITPOS is always the distance between our lsb
714     and that of OP0.  */
715
716  /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
717     we must first convert its mode to MODE.  */
718
719  if (GET_CODE (value) == CONST_INT)
720    {
721      register HOST_WIDE_INT v = INTVAL (value);
722
723      if (bitsize < HOST_BITS_PER_WIDE_INT)
724	v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
725
726      if (v == 0)
727	all_zero = 1;
728      else if ((bitsize < HOST_BITS_PER_WIDE_INT
729		&& v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
730	       || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
731	all_one = 1;
732
733      value = lshift_value (mode, value, bitpos, bitsize);
734    }
735  else
736    {
737      int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
738		      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
739
740      if (GET_MODE (value) != mode)
741	{
742	  if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
743	      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
744	    value = gen_lowpart (mode, value);
745	  else
746	    value = convert_to_mode (mode, value, 1);
747	}
748
749      if (must_and)
750	value = expand_binop (mode, and_optab, value,
751			      mask_rtx (mode, 0, bitsize, 0),
752			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
753      if (bitpos > 0)
754	value = expand_shift (LSHIFT_EXPR, mode, value,
755			      build_int_2 (bitpos, 0), NULL_RTX, 1);
756    }
757
758  /* Now clear the chosen bits in OP0,
759     except that if VALUE is -1 we need not bother.  */
760
761  subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
762
763  if (! all_one)
764    {
765      temp = expand_binop (mode, and_optab, op0,
766			   mask_rtx (mode, bitpos, bitsize, 1),
767			   subtarget, 1, OPTAB_LIB_WIDEN);
768      subtarget = temp;
769    }
770  else
771    temp = op0;
772
773  /* Now logical-or VALUE into OP0, unless it is zero.  */
774
775  if (! all_zero)
776    temp = expand_binop (mode, ior_optab, temp, value,
777			 subtarget, 1, OPTAB_LIB_WIDEN);
778  if (op0 != temp)
779    emit_move_insn (op0, temp);
780}
781
782/* Store a bit field that is split across multiple accessible memory objects.
783
784   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
785   BITSIZE is the field width; BITPOS the position of its first bit
786   (within the word).
787   VALUE is the value to store.
788   ALIGN is the known alignment of OP0, measured in bytes.
789   This is also the size of the memory objects to be used.
790
791   This does not yet handle fields wider than BITS_PER_WORD.  */
792
793static void
794store_split_bit_field (op0, bitsize, bitpos, value, align)
795     rtx op0;
796     int bitsize, bitpos;
797     rtx value;
798     int align;
799{
800  int unit;
801  int bitsdone = 0;
802
803  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
804     much at a time.  */
805  if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
806    unit = BITS_PER_WORD;
807  else
808    unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
809
810  /* If VALUE is a constant other than a CONST_INT, get it into a register in
811     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
812     that VALUE might be a floating-point constant.  */
813  if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
814    {
815      rtx word = gen_lowpart_common (word_mode, value);
816
817      if (word && (value != word))
818	value = word;
819      else
820	value = gen_lowpart_common (word_mode,
821				    force_reg (GET_MODE (value) != VOIDmode
822					       ? GET_MODE (value)
823					       : word_mode, value));
824    }
825  else if (GET_CODE (value) == ADDRESSOF)
826    value = copy_to_reg (value);
827
828  while (bitsdone < bitsize)
829    {
830      int thissize;
831      rtx part, word;
832      int thispos;
833      int offset;
834
835      offset = (bitpos + bitsdone) / unit;
836      thispos = (bitpos + bitsdone) % unit;
837
838      /* THISSIZE must not overrun a word boundary.  Otherwise,
839	 store_fixed_bit_field will call us again, and we will mutually
840	 recurse forever.  */
841      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
842      thissize = MIN (thissize, unit - thispos);
843
844      if (BYTES_BIG_ENDIAN)
845	{
846	  int total_bits;
847
848	  /* We must do an endian conversion exactly the same way as it is
849	     done in extract_bit_field, so that the two calls to
850	     extract_fixed_bit_field will have comparable arguments.  */
851	  if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
852	    total_bits = BITS_PER_WORD;
853	  else
854	    total_bits = GET_MODE_BITSIZE (GET_MODE (value));
855
856	  /* Fetch successively less significant portions.  */
857	  if (GET_CODE (value) == CONST_INT)
858	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
859			     >> (bitsize - bitsdone - thissize))
860			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
861	  else
862	    /* The args are chosen so that the last part includes the
863	       lsb.  Give extract_bit_field the value it needs (with
864	       endianness compensation) to fetch the piece we want.
865
866	       ??? We have no idea what the alignment of VALUE is, so
867	       we have to use a guess.  */
868	    part
869	      = extract_fixed_bit_field
870		(word_mode, value, 0, thissize,
871		 total_bits - bitsize + bitsdone, NULL_RTX, 1,
872		 GET_MODE (value) == VOIDmode
873		 ? UNITS_PER_WORD
874		 : (GET_MODE (value) == BLKmode
875		    ? 1
876		    : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
877	}
878      else
879	{
880	  /* Fetch successively more significant portions.  */
881	  if (GET_CODE (value) == CONST_INT)
882	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
883			     >> bitsdone)
884			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
885	  else
886	    part
887	      = extract_fixed_bit_field
888		(word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
889		 GET_MODE (value) == VOIDmode
890		 ? UNITS_PER_WORD
891		 : (GET_MODE (value) == BLKmode
892		    ? 1
893		    : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
894	}
895
896      /* If OP0 is a register, then handle OFFSET here.
897
898	 When handling multiword bitfields, extract_bit_field may pass
899	 down a word_mode SUBREG of a larger REG for a bitfield that actually
900	 crosses a word boundary.  Thus, for a SUBREG, we must find
901	 the current word starting from the base register.  */
902      if (GET_CODE (op0) == SUBREG)
903	{
904	  word = operand_subword_force (SUBREG_REG (op0),
905					SUBREG_WORD (op0) + offset,
906					GET_MODE (SUBREG_REG (op0)));
907	  offset = 0;
908	}
909      else if (GET_CODE (op0) == REG)
910	{
911	  word = operand_subword_force (op0, offset, GET_MODE (op0));
912	  offset = 0;
913	}
914      else
915	word = op0;
916
917      /* OFFSET is in UNITs, and UNIT is in bits.
918         store_fixed_bit_field wants offset in bytes.  */
919      store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
920			     thissize, thispos, part, align);
921      bitsdone += thissize;
922    }
923}
924
925/* Generate code to extract a byte-field from STR_RTX
926   containing BITSIZE bits, starting at BITNUM,
927   and put it in TARGET if possible (if TARGET is nonzero).
928   Regardless of TARGET, we return the rtx for where the value is placed.
929   It may be a QUEUED.
930
931   STR_RTX is the structure containing the byte (a REG or MEM).
932   UNSIGNEDP is nonzero if this is an unsigned bit field.
933   MODE is the natural mode of the field value once extracted.
934   TMODE is the mode the caller would like the value to have;
935   but the value may be returned with type MODE instead.
936
937   ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
938   TOTAL_SIZE is the size in bytes of the containing structure,
939   or -1 if varying.
940
941   If a TARGET is specified and we can store in it at no extra cost,
942   we do so, and return TARGET.
943   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
944   if they are equally easy.  */
945
946rtx
947extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
948		   target, mode, tmode, align, total_size)
949     rtx str_rtx;
950     register int bitsize;
951     int bitnum;
952     int unsignedp;
953     rtx target;
954     enum machine_mode mode, tmode;
955     int align;
956     int total_size;
957{
958  int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
959  register int offset = bitnum / unit;
960  register int bitpos = bitnum % unit;
961  register rtx op0 = str_rtx;
962  rtx spec_target = target;
963  rtx spec_target_subreg = 0;
964#ifdef HAVE_extv
965  int extv_bitsize;
966#endif
967#ifdef HAVE_extzv
968  int extzv_bitsize;
969#endif
970
971#ifdef HAVE_extv
972  if (insn_operand_mode[(int) CODE_FOR_extv][0] == VOIDmode)
973    extv_bitsize = GET_MODE_BITSIZE (word_mode);
974  else
975    extv_bitsize = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0]);
976#endif
977
978#ifdef HAVE_extzv
979  if (insn_operand_mode[(int) CODE_FOR_extzv][0] == VOIDmode)
980    extzv_bitsize = GET_MODE_BITSIZE (word_mode);
981  else
982    extzv_bitsize
983      = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0]);
984#endif
985
986  /* Discount the part of the structure before the desired byte.
987     We need to know how many bytes are safe to reference after it.  */
988  if (total_size >= 0)
989    total_size -= (bitpos / BIGGEST_ALIGNMENT
990		   * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
991
992  if (tmode == VOIDmode)
993    tmode = mode;
994  while (GET_CODE (op0) == SUBREG)
995    {
996      int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
997      int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
998
999      offset += SUBREG_WORD (op0);
1000
1001      inner_size = MIN (inner_size, BITS_PER_WORD);
1002
1003      if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1004	{
1005	  bitpos += inner_size - outer_size;
1006	  if (bitpos > unit)
1007	    {
1008	      offset += (bitpos / unit);
1009	      bitpos %= unit;
1010	    }
1011	}
1012
1013      op0 = SUBREG_REG (op0);
1014    }
1015
1016  /* Make sure we are playing with integral modes.  Pun with subregs
1017     if we aren't.  */
1018  {
1019    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1020    if (imode != GET_MODE (op0))
1021      {
1022	if (GET_CODE (op0) == MEM)
1023	  op0 = change_address (op0, imode, NULL_RTX);
1024	else if (imode != BLKmode)
1025	  op0 = gen_lowpart (imode, op0);
1026	else
1027	  abort ();
1028      }
1029  }
1030
1031  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1032     If that's wrong, the solution is to test for it and set TARGET to 0
1033     if needed.  */
1034
1035  /* If OP0 is a register, BITPOS must count within a word.
1036     But as we have it, it counts within whatever size OP0 now has.
1037     On a bigendian machine, these are not the same, so convert.  */
1038  if (BYTES_BIG_ENDIAN
1039      && GET_CODE (op0) != MEM
1040      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1041    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1042
1043  /* Extracting a full-word or multi-word value
1044     from a structure in a register or aligned memory.
1045     This can be done with just SUBREG.
1046     So too extracting a subword value in
1047     the least significant part of the register.  */
1048
1049  if (((GET_CODE (op0) != MEM
1050	&& TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1051				  GET_MODE_BITSIZE (GET_MODE (op0))))
1052       || (GET_CODE (op0) == MEM
1053	   && (! SLOW_UNALIGNED_ACCESS
1054	       || (offset * BITS_PER_UNIT % bitsize == 0
1055		   && align * BITS_PER_UNIT % bitsize == 0))))
1056      && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1057	   && bitpos % BITS_PER_WORD == 0)
1058	  || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1059	      /* ??? The big endian test here is wrong.  This is correct
1060		 if the value is in a register, and if mode_for_size is not
1061		 the same mode as op0.  This causes us to get unnecessarily
1062		 inefficient code from the Thumb port when -mbig-endian.  */
1063	      && (BYTES_BIG_ENDIAN
1064		  ? bitpos + bitsize == BITS_PER_WORD
1065		  : bitpos == 0))))
1066    {
1067      enum machine_mode mode1
1068	= mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
1069
1070      if (mode1 != GET_MODE (op0))
1071	{
1072	  if (GET_CODE (op0) == SUBREG)
1073	    {
1074	      if (GET_MODE (SUBREG_REG (op0)) == mode1
1075		  || GET_MODE_CLASS (mode1) == MODE_INT
1076		  || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1077		op0 = SUBREG_REG (op0);
1078	      else
1079		/* Else we've got some float mode source being extracted into
1080		   a different float mode destination -- this combination of
1081		   subregs results in Severe Tire Damage.  */
1082		abort ();
1083	    }
1084	  if (GET_CODE (op0) == REG)
1085	    op0 = gen_rtx_SUBREG (mode1, op0, offset);
1086	  else
1087	    op0 = change_address (op0, mode1,
1088				  plus_constant (XEXP (op0, 0), offset));
1089	}
1090      if (mode1 != mode)
1091	return convert_to_mode (tmode, op0, unsignedp);
1092      return op0;
1093    }
1094
1095  /* Handle fields bigger than a word.  */
1096
1097  if (bitsize > BITS_PER_WORD)
1098    {
1099      /* Here we transfer the words of the field
1100	 in the order least significant first.
1101	 This is because the most significant word is the one which may
1102	 be less than full.  */
1103
1104      int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1105      int i;
1106
1107      if (target == 0 || GET_CODE (target) != REG)
1108	target = gen_reg_rtx (mode);
1109
1110      /* Indicate for flow that the entire target reg is being set.  */
1111      emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1112
1113      for (i = 0; i < nwords; i++)
1114	{
1115	  /* If I is 0, use the low-order word in both field and target;
1116	     if I is 1, use the next to lowest word; and so on.  */
1117	  /* Word number in TARGET to use.  */
1118	  int wordnum = (WORDS_BIG_ENDIAN
1119			 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1120			 : i);
1121	  /* Offset from start of field in OP0.  */
1122	  int bit_offset = (WORDS_BIG_ENDIAN
1123			    ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
1124			    : i * BITS_PER_WORD);
1125	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1126	  rtx result_part
1127	    = extract_bit_field (op0, MIN (BITS_PER_WORD,
1128					   bitsize - i * BITS_PER_WORD),
1129				 bitnum + bit_offset,
1130				 1, target_part, mode, word_mode,
1131				 align, total_size);
1132
1133	  if (target_part == 0)
1134	    abort ();
1135
1136	  if (result_part != target_part)
1137	    emit_move_insn (target_part, result_part);
1138	}
1139
1140      if (unsignedp)
1141	{
1142	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1143	     need to be zero'd out.  */
1144	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1145	    {
1146	      int i,total_words;
1147
1148	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1149	      for (i = nwords; i < total_words; i++)
1150		{
1151		  int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1152		  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1153		  emit_move_insn (target_part, const0_rtx);
1154		}
1155	    }
1156	  return target;
1157	}
1158
1159      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1160      target = expand_shift (LSHIFT_EXPR, mode, target,
1161			     build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1162			     NULL_RTX, 0);
1163      return expand_shift (RSHIFT_EXPR, mode, target,
1164			   build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1165			   NULL_RTX, 0);
1166    }
1167
1168  /* From here on we know the desired field is smaller than a word
1169     so we can assume it is an integer.  So we can safely extract it as one
1170     size of integer, if necessary, and then truncate or extend
1171     to the size that is wanted.  */
1172
1173  /* OFFSET is the number of words or bytes (UNIT says which)
1174     from STR_RTX to the first word or byte containing part of the field.  */
1175
1176  if (GET_CODE (op0) != MEM)
1177    {
1178      if (offset != 0
1179	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1180	{
1181	  if (GET_CODE (op0) != REG)
1182	    op0 = copy_to_reg (op0);
1183	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1184		                op0, offset);
1185	}
1186      offset = 0;
1187    }
1188  else
1189    {
1190      op0 = protect_from_queue (str_rtx, 1);
1191    }
1192
1193  /* Now OFFSET is nonzero only for memory operands.  */
1194
1195  if (unsignedp)
1196    {
1197#ifdef HAVE_extzv
1198      if (HAVE_extzv
1199	  && (extzv_bitsize >= bitsize)
1200	  && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1201		&& (bitsize + bitpos > extzv_bitsize)))
1202	{
1203	  int xbitpos = bitpos, xoffset = offset;
1204	  rtx bitsize_rtx, bitpos_rtx;
1205	  rtx last = get_last_insn ();
1206	  rtx xop0 = op0;
1207	  rtx xtarget = target;
1208	  rtx xspec_target = spec_target;
1209	  rtx xspec_target_subreg = spec_target_subreg;
1210	  rtx pat;
1211	  enum machine_mode maxmode;
1212
1213	  maxmode = insn_operand_mode[(int) CODE_FOR_extzv][0];
1214	  if (maxmode == VOIDmode)
1215	    maxmode = word_mode;
1216
1217	  if (GET_CODE (xop0) == MEM)
1218	    {
1219	      int save_volatile_ok = volatile_ok;
1220	      volatile_ok = 1;
1221
1222	      /* Is the memory operand acceptable?  */
1223	      if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][1])
1224		     (xop0, GET_MODE (xop0))))
1225		{
1226		  /* No, load into a reg and extract from there.  */
1227		  enum machine_mode bestmode;
1228
1229		  /* Get the mode to use for inserting into this field.  If
1230		     OP0 is BLKmode, get the smallest mode consistent with the
1231		     alignment. If OP0 is a non-BLKmode object that is no
1232		     wider than MAXMODE, use its mode. Otherwise, use the
1233		     smallest mode containing the field.  */
1234
1235		  if (GET_MODE (xop0) == BLKmode
1236		      || (GET_MODE_SIZE (GET_MODE (op0))
1237			  > GET_MODE_SIZE (maxmode)))
1238		    bestmode = get_best_mode (bitsize, bitnum,
1239					      align * BITS_PER_UNIT, maxmode,
1240					      MEM_VOLATILE_P (xop0));
1241		  else
1242		    bestmode = GET_MODE (xop0);
1243
1244		  if (bestmode == VOIDmode
1245		      || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1246		    goto extzv_loses;
1247
1248		  /* Compute offset as multiple of this unit,
1249		     counting in bytes.  */
1250		  unit = GET_MODE_BITSIZE (bestmode);
1251		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1252		  xbitpos = bitnum % unit;
1253		  xop0 = change_address (xop0, bestmode,
1254					 plus_constant (XEXP (xop0, 0),
1255							xoffset));
1256		  /* Fetch it to a register in that size.  */
1257		  xop0 = force_reg (bestmode, xop0);
1258
1259		  /* XBITPOS counts within UNIT, which is what is expected.  */
1260		}
1261	      else
1262		/* Get ref to first byte containing part of the field.  */
1263		xop0 = change_address (xop0, byte_mode,
1264				       plus_constant (XEXP (xop0, 0), xoffset));
1265
1266	      volatile_ok = save_volatile_ok;
1267	    }
1268
1269	  /* If op0 is a register, we need it in MAXMODE (which is usually
1270	     SImode). to make it acceptable to the format of extzv.  */
1271	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1272	    goto extzv_loses;
1273	  if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1274	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1275
1276	  /* On big-endian machines, we count bits from the most significant.
1277	     If the bit field insn does not, we must invert.  */
1278	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1279	    xbitpos = unit - bitsize - xbitpos;
1280
1281	  /* Now convert from counting within UNIT to counting in MAXMODE.  */
1282	  if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1283	    xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1284
1285	  unit = GET_MODE_BITSIZE (maxmode);
1286
1287	  if (xtarget == 0
1288	      || (flag_force_mem && GET_CODE (xtarget) == MEM))
1289	    xtarget = xspec_target = gen_reg_rtx (tmode);
1290
1291	  if (GET_MODE (xtarget) != maxmode)
1292	    {
1293	      if (GET_CODE (xtarget) == REG)
1294		{
1295		  int wider = (GET_MODE_SIZE (maxmode)
1296			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1297		  xtarget = gen_lowpart (maxmode, xtarget);
1298		  if (wider)
1299		    xspec_target_subreg = xtarget;
1300		}
1301	      else
1302		xtarget = gen_reg_rtx (maxmode);
1303	    }
1304
1305	  /* If this machine's extzv insists on a register target,
1306	     make sure we have one.  */
1307	  if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][0])
1308		 (xtarget, maxmode)))
1309	    xtarget = gen_reg_rtx (maxmode);
1310
1311	  bitsize_rtx = GEN_INT (bitsize);
1312	  bitpos_rtx = GEN_INT (xbitpos);
1313
1314	  pat = gen_extzv (protect_from_queue (xtarget, 1),
1315			   xop0, bitsize_rtx, bitpos_rtx);
1316	  if (pat)
1317	    {
1318	      emit_insn (pat);
1319	      target = xtarget;
1320	      spec_target = xspec_target;
1321	      spec_target_subreg = xspec_target_subreg;
1322	    }
1323	  else
1324	    {
1325	      delete_insns_since (last);
1326	      target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1327						bitpos, target, 1, align);
1328	    }
1329	}
1330      else
1331        extzv_loses:
1332#endif
1333	target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1334					  target, 1, align);
1335    }
1336  else
1337    {
1338#ifdef HAVE_extv
1339      if (HAVE_extv
1340	  && (extv_bitsize >= bitsize)
1341	  && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1342		&& (bitsize + bitpos > extv_bitsize)))
1343	{
1344	  int xbitpos = bitpos, xoffset = offset;
1345	  rtx bitsize_rtx, bitpos_rtx;
1346	  rtx last = get_last_insn ();
1347	  rtx xop0 = op0, xtarget = target;
1348	  rtx xspec_target = spec_target;
1349	  rtx xspec_target_subreg = spec_target_subreg;
1350	  rtx pat;
1351	  enum machine_mode maxmode;
1352
1353	  maxmode = insn_operand_mode[(int) CODE_FOR_extv][0];
1354	  if (maxmode == VOIDmode)
1355	    maxmode = word_mode;
1356
1357	  if (GET_CODE (xop0) == MEM)
1358	    {
1359	      /* Is the memory operand acceptable?  */
1360	      if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][1])
1361		     (xop0, GET_MODE (xop0))))
1362		{
1363		  /* No, load into a reg and extract from there.  */
1364		  enum machine_mode bestmode;
1365
1366		  /* Get the mode to use for inserting into this field.  If
1367		     OP0 is BLKmode, get the smallest mode consistent with the
1368		     alignment. If OP0 is a non-BLKmode object that is no
1369		     wider than MAXMODE, use its mode. Otherwise, use the
1370		     smallest mode containing the field.  */
1371
1372		  if (GET_MODE (xop0) == BLKmode
1373		      || (GET_MODE_SIZE (GET_MODE (op0))
1374			  > GET_MODE_SIZE (maxmode)))
1375		    bestmode = get_best_mode (bitsize, bitnum,
1376					      align * BITS_PER_UNIT, maxmode,
1377					      MEM_VOLATILE_P (xop0));
1378		  else
1379		    bestmode = GET_MODE (xop0);
1380
1381		  if (bestmode == VOIDmode
1382		      || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1383		    goto extv_loses;
1384
1385		  /* Compute offset as multiple of this unit,
1386		     counting in bytes.  */
1387		  unit = GET_MODE_BITSIZE (bestmode);
1388		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1389		  xbitpos = bitnum % unit;
1390		  xop0 = change_address (xop0, bestmode,
1391					 plus_constant (XEXP (xop0, 0),
1392							xoffset));
1393		  /* Fetch it to a register in that size.  */
1394		  xop0 = force_reg (bestmode, xop0);
1395
1396		  /* XBITPOS counts within UNIT, which is what is expected.  */
1397		}
1398	      else
1399		/* Get ref to first byte containing part of the field.  */
1400		xop0 = change_address (xop0, byte_mode,
1401				       plus_constant (XEXP (xop0, 0), xoffset));
1402	    }
1403
1404	  /* If op0 is a register, we need it in MAXMODE (which is usually
1405	     SImode) to make it acceptable to the format of extv.  */
1406	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1407	    goto extv_loses;
1408	  if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1409	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1410
1411	  /* On big-endian machines, we count bits from the most significant.
1412	     If the bit field insn does not, we must invert.  */
1413	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1414	    xbitpos = unit - bitsize - xbitpos;
1415
1416	  /* XBITPOS counts within a size of UNIT.
1417	     Adjust to count within a size of MAXMODE.  */
1418	  if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1419	    xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1420
1421	  unit = GET_MODE_BITSIZE (maxmode);
1422
1423	  if (xtarget == 0
1424	      || (flag_force_mem && GET_CODE (xtarget) == MEM))
1425	    xtarget = xspec_target = gen_reg_rtx (tmode);
1426
1427	  if (GET_MODE (xtarget) != maxmode)
1428	    {
1429	      if (GET_CODE (xtarget) == REG)
1430		{
1431		  int wider = (GET_MODE_SIZE (maxmode)
1432			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1433		  xtarget = gen_lowpart (maxmode, xtarget);
1434		  if (wider)
1435		    xspec_target_subreg = xtarget;
1436		}
1437	      else
1438		xtarget = gen_reg_rtx (maxmode);
1439	    }
1440
1441	  /* If this machine's extv insists on a register target,
1442	     make sure we have one.  */
1443	  if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][0])
1444		 (xtarget, maxmode)))
1445	    xtarget = gen_reg_rtx (maxmode);
1446
1447	  bitsize_rtx = GEN_INT (bitsize);
1448	  bitpos_rtx = GEN_INT (xbitpos);
1449
1450	  pat = gen_extv (protect_from_queue (xtarget, 1),
1451			  xop0, bitsize_rtx, bitpos_rtx);
1452	  if (pat)
1453	    {
1454	      emit_insn (pat);
1455	      target = xtarget;
1456	      spec_target = xspec_target;
1457	      spec_target_subreg = xspec_target_subreg;
1458	    }
1459	  else
1460	    {
1461	      delete_insns_since (last);
1462	      target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1463						bitpos, target, 0, align);
1464	    }
1465	}
1466      else
1467	extv_loses:
1468#endif
1469	target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1470					  target, 0, align);
1471    }
1472  if (target == spec_target)
1473    return target;
1474  if (target == spec_target_subreg)
1475    return spec_target;
1476  if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1477    {
1478      /* If the target mode is floating-point, first convert to the
1479	 integer mode of that size and then access it as a floating-point
1480	 value via a SUBREG.  */
1481      if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1482	{
1483	  target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1484						   MODE_INT, 0),
1485				    target, unsignedp);
1486	  if (GET_CODE (target) != REG)
1487	    target = copy_to_reg (target);
1488	  return gen_rtx_SUBREG (tmode, target, 0);
1489	}
1490      else
1491	return convert_to_mode (tmode, target, unsignedp);
1492    }
1493  return target;
1494}
1495
1496/* Extract a bit field using shifts and boolean operations
1497   Returns an rtx to represent the value.
1498   OP0 addresses a register (word) or memory (byte).
1499   BITPOS says which bit within the word or byte the bit field starts in.
1500   OFFSET says how many bytes farther the bit field starts;
1501    it is 0 if OP0 is a register.
1502   BITSIZE says how many bits long the bit field is.
1503    (If OP0 is a register, it may be narrower than a full word,
1504     but BITPOS still counts within a full word,
1505     which is significant on bigendian machines.)
1506
1507   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1508   If TARGET is nonzero, attempts to store the value there
1509   and return TARGET, but this is not guaranteed.
1510   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1511
1512   ALIGN is the alignment that STR_RTX is known to have, measured in bytes.  */
1513
1514static rtx
1515extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1516			 target, unsignedp, align)
1517     enum machine_mode tmode;
1518     register rtx op0, target;
1519     register int offset, bitsize, bitpos;
1520     int unsignedp;
1521     int align;
1522{
1523  int total_bits = BITS_PER_WORD;
1524  enum machine_mode mode;
1525
1526  if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1527    {
1528      /* Special treatment for a bit field split across two registers.  */
1529      if (bitsize + bitpos > BITS_PER_WORD)
1530	return extract_split_bit_field (op0, bitsize, bitpos,
1531					unsignedp, align);
1532    }
1533  else
1534    {
1535      /* Get the proper mode to use for this field.  We want a mode that
1536	 includes the entire field.  If such a mode would be larger than
1537	 a word, we won't be doing the extraction the normal way.  */
1538
1539      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1540			    align * BITS_PER_UNIT, word_mode,
1541			    GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1542
1543      if (mode == VOIDmode)
1544	/* The only way this should occur is if the field spans word
1545	   boundaries.  */
1546	return extract_split_bit_field (op0, bitsize,
1547					bitpos + offset * BITS_PER_UNIT,
1548					unsignedp, align);
1549
1550      total_bits = GET_MODE_BITSIZE (mode);
1551
1552      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1553	 be in the range 0 to total_bits-1, and put any excess bytes in
1554	 OFFSET.  */
1555      if (bitpos >= total_bits)
1556	{
1557	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1558	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1559		     * BITS_PER_UNIT);
1560	}
1561
1562      /* Get ref to an aligned byte, halfword, or word containing the field.
1563	 Adjust BITPOS to be position within a word,
1564	 and OFFSET to be the offset of that word.
1565	 Then alter OP0 to refer to that word.  */
1566      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1567      offset -= (offset % (total_bits / BITS_PER_UNIT));
1568      op0 = change_address (op0, mode,
1569			    plus_constant (XEXP (op0, 0), offset));
1570    }
1571
1572  mode = GET_MODE (op0);
1573
1574  if (BYTES_BIG_ENDIAN)
1575    {
1576      /* BITPOS is the distance between our msb and that of OP0.
1577	 Convert it to the distance from the lsb.  */
1578
1579      bitpos = total_bits - bitsize - bitpos;
1580    }
1581
1582  /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1583     We have reduced the big-endian case to the little-endian case.  */
1584
1585  if (unsignedp)
1586    {
1587      if (bitpos)
1588	{
1589	  /* If the field does not already start at the lsb,
1590	     shift it so it does.  */
1591	  tree amount = build_int_2 (bitpos, 0);
1592	  /* Maybe propagate the target for the shift.  */
1593	  /* But not if we will return it--could confuse integrate.c.  */
1594	  rtx subtarget = (target != 0 && GET_CODE (target) == REG
1595			   && !REG_FUNCTION_VALUE_P (target)
1596			   ? target : 0);
1597	  if (tmode != mode) subtarget = 0;
1598	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1599	}
1600      /* Convert the value to the desired mode.  */
1601      if (mode != tmode)
1602	op0 = convert_to_mode (tmode, op0, 1);
1603
1604      /* Unless the msb of the field used to be the msb when we shifted,
1605	 mask out the upper bits.  */
1606
1607      if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1608#if 0
1609#ifdef SLOW_ZERO_EXTEND
1610	  /* Always generate an `and' if
1611	     we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1612	     will combine fruitfully with the zero-extend.  */
1613	  || tmode != mode
1614#endif
1615#endif
1616	  )
1617	return expand_binop (GET_MODE (op0), and_optab, op0,
1618			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1619			     target, 1, OPTAB_LIB_WIDEN);
1620      return op0;
1621    }
1622
1623  /* To extract a signed bit-field, first shift its msb to the msb of the word,
1624     then arithmetic-shift its lsb to the lsb of the word.  */
1625  op0 = force_reg (mode, op0);
1626  if (mode != tmode)
1627    target = 0;
1628
1629  /* Find the narrowest integer mode that contains the field.  */
1630
1631  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1632       mode = GET_MODE_WIDER_MODE (mode))
1633    if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1634      {
1635	op0 = convert_to_mode (mode, op0, 0);
1636	break;
1637      }
1638
1639  if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1640    {
1641      tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1642      /* Maybe propagate the target for the shift.  */
1643      /* But not if we will return the result--could confuse integrate.c.  */
1644      rtx subtarget = (target != 0 && GET_CODE (target) == REG
1645		       && ! REG_FUNCTION_VALUE_P (target)
1646		       ? target : 0);
1647      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1648    }
1649
1650  return expand_shift (RSHIFT_EXPR, mode, op0,
1651		       build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1652		       target, 0);
1653}
1654
1655/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1656   of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1657   complement of that if COMPLEMENT.  The mask is truncated if
1658   necessary to the width of mode MODE.  The mask is zero-extended if
1659   BITSIZE+BITPOS is too small for MODE.  */
1660
1661static rtx
1662mask_rtx (mode, bitpos, bitsize, complement)
1663     enum machine_mode mode;
1664     int bitpos, bitsize, complement;
1665{
1666  HOST_WIDE_INT masklow, maskhigh;
1667
1668  if (bitpos < HOST_BITS_PER_WIDE_INT)
1669    masklow = (HOST_WIDE_INT) -1 << bitpos;
1670  else
1671    masklow = 0;
1672
1673  if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1674    masklow &= ((unsigned HOST_WIDE_INT) -1
1675		>> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1676
1677  if (bitpos <= HOST_BITS_PER_WIDE_INT)
1678    maskhigh = -1;
1679  else
1680    maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1681
1682  if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1683    maskhigh &= ((unsigned HOST_WIDE_INT) -1
1684		 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1685  else
1686    maskhigh = 0;
1687
1688  if (complement)
1689    {
1690      maskhigh = ~maskhigh;
1691      masklow = ~masklow;
1692    }
1693
1694  return immed_double_const (masklow, maskhigh, mode);
1695}
1696
1697/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1698   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1699
1700static rtx
1701lshift_value (mode, value, bitpos, bitsize)
1702     enum machine_mode mode;
1703     rtx value;
1704     int bitpos, bitsize;
1705{
1706  unsigned HOST_WIDE_INT v = INTVAL (value);
1707  HOST_WIDE_INT low, high;
1708
1709  if (bitsize < HOST_BITS_PER_WIDE_INT)
1710    v &= ~((HOST_WIDE_INT) -1 << bitsize);
1711
1712  if (bitpos < HOST_BITS_PER_WIDE_INT)
1713    {
1714      low = v << bitpos;
1715      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1716    }
1717  else
1718    {
1719      low = 0;
1720      high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1721    }
1722
1723  return immed_double_const (low, high, mode);
1724}
1725
1726/* Extract a bit field that is split across two words
1727   and return an RTX for the result.
1728
1729   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1730   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1731   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1732
1733   ALIGN is the known alignment of OP0, measured in bytes.
1734   This is also the size of the memory objects to be used.  */
1735
1736static rtx
1737extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1738     rtx op0;
1739     int bitsize, bitpos, unsignedp, align;
1740{
1741  int unit;
1742  int bitsdone = 0;
1743  rtx result = NULL_RTX;
1744  int first = 1;
1745
1746  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1747     much at a time.  */
1748  if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1749    unit = BITS_PER_WORD;
1750  else
1751    unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
1752
1753  while (bitsdone < bitsize)
1754    {
1755      int thissize;
1756      rtx part, word;
1757      int thispos;
1758      int offset;
1759
1760      offset = (bitpos + bitsdone) / unit;
1761      thispos = (bitpos + bitsdone) % unit;
1762
1763      /* THISSIZE must not overrun a word boundary.  Otherwise,
1764	 extract_fixed_bit_field will call us again, and we will mutually
1765	 recurse forever.  */
1766      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1767      thissize = MIN (thissize, unit - thispos);
1768
1769      /* If OP0 is a register, then handle OFFSET here.
1770
1771	 When handling multiword bitfields, extract_bit_field may pass
1772	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1773	 crosses a word boundary.  Thus, for a SUBREG, we must find
1774	 the current word starting from the base register.  */
1775      if (GET_CODE (op0) == SUBREG)
1776	{
1777	  word = operand_subword_force (SUBREG_REG (op0),
1778					SUBREG_WORD (op0) + offset,
1779					GET_MODE (SUBREG_REG (op0)));
1780	  offset = 0;
1781	}
1782      else if (GET_CODE (op0) == REG)
1783	{
1784	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1785	  offset = 0;
1786	}
1787      else
1788	word = op0;
1789
1790      /* Extract the parts in bit-counting order,
1791	 whose meaning is determined by BYTES_PER_UNIT.
1792	 OFFSET is in UNITs, and UNIT is in bits.
1793	 extract_fixed_bit_field wants offset in bytes.  */
1794      part = extract_fixed_bit_field (word_mode, word,
1795				      offset * unit / BITS_PER_UNIT,
1796				      thissize, thispos, 0, 1, align);
1797      bitsdone += thissize;
1798
1799      /* Shift this part into place for the result.  */
1800      if (BYTES_BIG_ENDIAN)
1801	{
1802	  if (bitsize != bitsdone)
1803	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
1804				 build_int_2 (bitsize - bitsdone, 0), 0, 1);
1805	}
1806      else
1807	{
1808	  if (bitsdone != thissize)
1809	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
1810				 build_int_2 (bitsdone - thissize, 0), 0, 1);
1811	}
1812
1813      if (first)
1814	result = part;
1815      else
1816	/* Combine the parts with bitwise or.  This works
1817	   because we extracted each part as an unsigned bit field.  */
1818	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1819			       OPTAB_LIB_WIDEN);
1820
1821      first = 0;
1822    }
1823
1824  /* Unsigned bit field: we are done.  */
1825  if (unsignedp)
1826    return result;
1827  /* Signed bit field: sign-extend with two arithmetic shifts.  */
1828  result = expand_shift (LSHIFT_EXPR, word_mode, result,
1829			 build_int_2 (BITS_PER_WORD - bitsize, 0),
1830			 NULL_RTX, 0);
1831  return expand_shift (RSHIFT_EXPR, word_mode, result,
1832		       build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1833}
1834
1835/* Add INC into TARGET.  */
1836
1837void
1838expand_inc (target, inc)
1839     rtx target, inc;
1840{
1841  rtx value = expand_binop (GET_MODE (target), add_optab,
1842			    target, inc,
1843			    target, 0, OPTAB_LIB_WIDEN);
1844  if (value != target)
1845    emit_move_insn (target, value);
1846}
1847
1848/* Subtract DEC from TARGET.  */
1849
1850void
1851expand_dec (target, dec)
1852     rtx target, dec;
1853{
1854  rtx value = expand_binop (GET_MODE (target), sub_optab,
1855			    target, dec,
1856			    target, 0, OPTAB_LIB_WIDEN);
1857  if (value != target)
1858    emit_move_insn (target, value);
1859}
1860
1861/* Output a shift instruction for expression code CODE,
1862   with SHIFTED being the rtx for the value to shift,
1863   and AMOUNT the tree for the amount to shift by.
1864   Store the result in the rtx TARGET, if that is convenient.
1865   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1866   Return the rtx for where the value is.  */
1867
1868rtx
1869expand_shift (code, mode, shifted, amount, target, unsignedp)
1870     enum tree_code code;
1871     register enum machine_mode mode;
1872     rtx shifted;
1873     tree amount;
1874     register rtx target;
1875     int unsignedp;
1876{
1877  register rtx op1, temp = 0;
1878  register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1879  register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1880  int try;
1881
1882  /* Previously detected shift-counts computed by NEGATE_EXPR
1883     and shifted in the other direction; but that does not work
1884     on all machines.  */
1885
1886  op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1887
1888#ifdef SHIFT_COUNT_TRUNCATED
1889  if (SHIFT_COUNT_TRUNCATED)
1890    {
1891      if (GET_CODE (op1) == CONST_INT
1892          && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1893	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1894        op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1895		       % GET_MODE_BITSIZE (mode));
1896      else if (GET_CODE (op1) == SUBREG
1897	       && SUBREG_WORD (op1) == 0)
1898	op1 = SUBREG_REG (op1);
1899    }
1900#endif
1901
1902  if (op1 == const0_rtx)
1903    return shifted;
1904
1905  for (try = 0; temp == 0 && try < 3; try++)
1906    {
1907      enum optab_methods methods;
1908
1909      if (try == 0)
1910	methods = OPTAB_DIRECT;
1911      else if (try == 1)
1912	methods = OPTAB_WIDEN;
1913      else
1914	methods = OPTAB_LIB_WIDEN;
1915
1916      if (rotate)
1917	{
1918	  /* Widening does not work for rotation.  */
1919	  if (methods == OPTAB_WIDEN)
1920	    continue;
1921	  else if (methods == OPTAB_LIB_WIDEN)
1922	    {
1923	      /* If we have been unable to open-code this by a rotation,
1924		 do it as the IOR of two shifts.  I.e., to rotate A
1925		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1926		 where C is the bitsize of A.
1927
1928		 It is theoretically possible that the target machine might
1929		 not be able to perform either shift and hence we would
1930		 be making two libcalls rather than just the one for the
1931		 shift (similarly if IOR could not be done).  We will allow
1932		 this extremely unlikely lossage to avoid complicating the
1933		 code below.  */
1934
1935	      rtx subtarget = target == shifted ? 0 : target;
1936	      rtx temp1;
1937	      tree type = TREE_TYPE (amount);
1938	      tree new_amount = make_tree (type, op1);
1939	      tree other_amount
1940		= fold (build (MINUS_EXPR, type,
1941			       convert (type,
1942					build_int_2 (GET_MODE_BITSIZE (mode),
1943						     0)),
1944			       amount));
1945
1946	      shifted = force_reg (mode, shifted);
1947
1948	      temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1949				   mode, shifted, new_amount, subtarget, 1);
1950	      temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1951				    mode, shifted, other_amount, 0, 1);
1952	      return expand_binop (mode, ior_optab, temp, temp1, target,
1953				   unsignedp, methods);
1954	    }
1955
1956	  temp = expand_binop (mode,
1957			       left ? rotl_optab : rotr_optab,
1958			       shifted, op1, target, unsignedp, methods);
1959
1960	  /* If we don't have the rotate, but we are rotating by a constant
1961	     that is in range, try a rotate in the opposite direction.  */
1962
1963	  if (temp == 0 && GET_CODE (op1) == CONST_INT
1964	      && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1965	    temp = expand_binop (mode,
1966				 left ? rotr_optab : rotl_optab,
1967				 shifted,
1968				 GEN_INT (GET_MODE_BITSIZE (mode)
1969					  - INTVAL (op1)),
1970				 target, unsignedp, methods);
1971	}
1972      else if (unsignedp)
1973	temp = expand_binop (mode,
1974			     left ? ashl_optab : lshr_optab,
1975			     shifted, op1, target, unsignedp, methods);
1976
1977      /* Do arithmetic shifts.
1978	 Also, if we are going to widen the operand, we can just as well
1979	 use an arithmetic right-shift instead of a logical one.  */
1980      if (temp == 0 && ! rotate
1981	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1982	{
1983	  enum optab_methods methods1 = methods;
1984
1985	  /* If trying to widen a log shift to an arithmetic shift,
1986	     don't accept an arithmetic shift of the same size.  */
1987	  if (unsignedp)
1988	    methods1 = OPTAB_MUST_WIDEN;
1989
1990	  /* Arithmetic shift */
1991
1992	  temp = expand_binop (mode,
1993			       left ? ashl_optab : ashr_optab,
1994			       shifted, op1, target, unsignedp, methods1);
1995	}
1996
1997      /* We used to try extzv here for logical right shifts, but that was
1998	 only useful for one machine, the VAX, and caused poor code
1999	 generation there for lshrdi3, so the code was deleted and a
2000	 define_expand for lshrsi3 was added to vax.md.  */
2001    }
2002
2003  if (temp == 0)
2004    abort ();
2005  return temp;
2006}
2007
2008enum alg_code { alg_zero, alg_m, alg_shift,
2009		  alg_add_t_m2, alg_sub_t_m2,
2010		  alg_add_factor, alg_sub_factor,
2011		  alg_add_t2_m, alg_sub_t2_m,
2012		  alg_add, alg_subtract, alg_factor, alg_shiftop };
2013
2014/* This structure records a sequence of operations.
2015   `ops' is the number of operations recorded.
2016   `cost' is their total cost.
2017   The operations are stored in `op' and the corresponding
2018   logarithms of the integer coefficients in `log'.
2019
2020   These are the operations:
2021   alg_zero		total := 0;
2022   alg_m		total := multiplicand;
2023   alg_shift		total := total * coeff
2024   alg_add_t_m2		total := total + multiplicand * coeff;
2025   alg_sub_t_m2		total := total - multiplicand * coeff;
2026   alg_add_factor	total := total * coeff + total;
2027   alg_sub_factor	total := total * coeff - total;
2028   alg_add_t2_m		total := total * coeff + multiplicand;
2029   alg_sub_t2_m		total := total * coeff - multiplicand;
2030
2031   The first operand must be either alg_zero or alg_m.  */
2032
2033struct algorithm
2034{
2035  short cost;
2036  short ops;
2037  /* The size of the OP and LOG fields are not directly related to the
2038     word size, but the worst-case algorithms will be if we have few
2039     consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2040     In that case we will generate shift-by-2, add, shift-by-2, add,...,
2041     in total wordsize operations.  */
2042  enum alg_code op[MAX_BITS_PER_WORD];
2043  char log[MAX_BITS_PER_WORD];
2044};
2045
2046static void synth_mult			PROTO((struct algorithm *,
2047					       unsigned HOST_WIDE_INT,
2048					       int));
2049static unsigned HOST_WIDE_INT choose_multiplier PROTO((unsigned HOST_WIDE_INT,
2050						       int, int,
2051						       unsigned HOST_WIDE_INT *,
2052						       int *, int *));
2053static unsigned HOST_WIDE_INT invert_mod2n	PROTO((unsigned HOST_WIDE_INT,
2054						       int));
2055/* Compute and return the best algorithm for multiplying by T.
2056   The algorithm must cost less than cost_limit
2057   If retval.cost >= COST_LIMIT, no algorithm was found and all
2058   other field of the returned struct are undefined.  */
2059
2060static void
2061synth_mult (alg_out, t, cost_limit)
2062     struct algorithm *alg_out;
2063     unsigned HOST_WIDE_INT t;
2064     int cost_limit;
2065{
2066  int m;
2067  struct algorithm *alg_in, *best_alg;
2068  int cost;
2069  unsigned HOST_WIDE_INT q;
2070
2071  /* Indicate that no algorithm is yet found.  If no algorithm
2072     is found, this value will be returned and indicate failure.  */
2073  alg_out->cost = cost_limit;
2074
2075  if (cost_limit <= 0)
2076    return;
2077
2078  /* t == 1 can be done in zero cost.  */
2079  if (t == 1)
2080    {
2081      alg_out->ops = 1;
2082      alg_out->cost = 0;
2083      alg_out->op[0] = alg_m;
2084      return;
2085    }
2086
2087  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2088     fail now.  */
2089  if (t == 0)
2090    {
2091      if (zero_cost >= cost_limit)
2092	return;
2093      else
2094	{
2095	  alg_out->ops = 1;
2096	  alg_out->cost = zero_cost;
2097	  alg_out->op[0] = alg_zero;
2098	  return;
2099	}
2100    }
2101
2102  /* We'll be needing a couple extra algorithm structures now.  */
2103
2104  alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2105  best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2106
2107  /* If we have a group of zero bits at the low-order part of T, try
2108     multiplying by the remaining bits and then doing a shift.  */
2109
2110  if ((t & 1) == 0)
2111    {
2112      m = floor_log2 (t & -t);	/* m = number of low zero bits */
2113      q = t >> m;
2114      cost = shift_cost[m];
2115      synth_mult (alg_in, q, cost_limit - cost);
2116
2117      cost += alg_in->cost;
2118      if (cost < cost_limit)
2119	{
2120	  struct algorithm *x;
2121	  x = alg_in, alg_in = best_alg, best_alg = x;
2122	  best_alg->log[best_alg->ops] = m;
2123	  best_alg->op[best_alg->ops] = alg_shift;
2124	  cost_limit = cost;
2125	}
2126    }
2127
2128  /* If we have an odd number, add or subtract one.  */
2129  if ((t & 1) != 0)
2130    {
2131      unsigned HOST_WIDE_INT w;
2132
2133      for (w = 1; (w & t) != 0; w <<= 1)
2134	;
2135      /* If T was -1, then W will be zero after the loop.  This is another
2136	 case where T ends with ...111.  Handling this with (T + 1) and
2137	 subtract 1 produces slightly better code and results in algorithm
2138	 selection much faster than treating it like the ...0111 case
2139	 below.  */
2140      if (w == 0
2141	  || (w > 2
2142	      /* Reject the case where t is 3.
2143		 Thus we prefer addition in that case.  */
2144	      && t != 3))
2145	{
2146	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2147
2148	  cost = add_cost;
2149	  synth_mult (alg_in, t + 1, cost_limit - cost);
2150
2151	  cost += alg_in->cost;
2152	  if (cost < cost_limit)
2153	    {
2154	      struct algorithm *x;
2155	      x = alg_in, alg_in = best_alg, best_alg = x;
2156	      best_alg->log[best_alg->ops] = 0;
2157	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2158	      cost_limit = cost;
2159	    }
2160	}
2161      else
2162	{
2163	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2164
2165	  cost = add_cost;
2166	  synth_mult (alg_in, t - 1, cost_limit - cost);
2167
2168	  cost += alg_in->cost;
2169	  if (cost < cost_limit)
2170	    {
2171	      struct algorithm *x;
2172	      x = alg_in, alg_in = best_alg, best_alg = x;
2173	      best_alg->log[best_alg->ops] = 0;
2174	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2175	      cost_limit = cost;
2176	    }
2177	}
2178    }
2179
2180  /* Look for factors of t of the form
2181     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2182     If we find such a factor, we can multiply by t using an algorithm that
2183     multiplies by q, shift the result by m and add/subtract it to itself.
2184
2185     We search for large factors first and loop down, even if large factors
2186     are less probable than small; if we find a large factor we will find a
2187     good sequence quickly, and therefore be able to prune (by decreasing
2188     COST_LIMIT) the search.  */
2189
2190  for (m = floor_log2 (t - 1); m >= 2; m--)
2191    {
2192      unsigned HOST_WIDE_INT d;
2193
2194      d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2195      if (t % d == 0 && t > d)
2196	{
2197	  cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2198	  synth_mult (alg_in, t / d, cost_limit - cost);
2199
2200	  cost += alg_in->cost;
2201	  if (cost < cost_limit)
2202	    {
2203	      struct algorithm *x;
2204	      x = alg_in, alg_in = best_alg, best_alg = x;
2205	      best_alg->log[best_alg->ops] = m;
2206	      best_alg->op[best_alg->ops] = alg_add_factor;
2207	      cost_limit = cost;
2208	    }
2209	  /* Other factors will have been taken care of in the recursion.  */
2210	  break;
2211	}
2212
2213      d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2214      if (t % d == 0 && t > d)
2215	{
2216	  cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2217	  synth_mult (alg_in, t / d, cost_limit - cost);
2218
2219	  cost += alg_in->cost;
2220	  if (cost < cost_limit)
2221	    {
2222	      struct algorithm *x;
2223	      x = alg_in, alg_in = best_alg, best_alg = x;
2224	      best_alg->log[best_alg->ops] = m;
2225	      best_alg->op[best_alg->ops] = alg_sub_factor;
2226	      cost_limit = cost;
2227	    }
2228	  break;
2229	}
2230    }
2231
2232  /* Try shift-and-add (load effective address) instructions,
2233     i.e. do a*3, a*5, a*9.  */
2234  if ((t & 1) != 0)
2235    {
2236      q = t - 1;
2237      q = q & -q;
2238      m = exact_log2 (q);
2239      if (m >= 0)
2240	{
2241	  cost = shiftadd_cost[m];
2242	  synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2243
2244	  cost += alg_in->cost;
2245	  if (cost < cost_limit)
2246	    {
2247	      struct algorithm *x;
2248	      x = alg_in, alg_in = best_alg, best_alg = x;
2249	      best_alg->log[best_alg->ops] = m;
2250	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2251	      cost_limit = cost;
2252	    }
2253	}
2254
2255      q = t + 1;
2256      q = q & -q;
2257      m = exact_log2 (q);
2258      if (m >= 0)
2259	{
2260	  cost = shiftsub_cost[m];
2261	  synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2262
2263	  cost += alg_in->cost;
2264	  if (cost < cost_limit)
2265	    {
2266	      struct algorithm *x;
2267	      x = alg_in, alg_in = best_alg, best_alg = x;
2268	      best_alg->log[best_alg->ops] = m;
2269	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2270	      cost_limit = cost;
2271	    }
2272	}
2273    }
2274
2275  /* If cost_limit has not decreased since we stored it in alg_out->cost,
2276     we have not found any algorithm.  */
2277  if (cost_limit == alg_out->cost)
2278    return;
2279
2280  /* If we are getting a too long sequence for `struct algorithm'
2281     to record, make this search fail.  */
2282  if (best_alg->ops == MAX_BITS_PER_WORD)
2283    return;
2284
2285  /* Copy the algorithm from temporary space to the space at alg_out.
2286     We avoid using structure assignment because the majority of
2287     best_alg is normally undefined, and this is a critical function.  */
2288  alg_out->ops = best_alg->ops + 1;
2289  alg_out->cost = cost_limit;
2290  bcopy ((char *) best_alg->op, (char *) alg_out->op,
2291	 alg_out->ops * sizeof *alg_out->op);
2292  bcopy ((char *) best_alg->log, (char *) alg_out->log,
2293	 alg_out->ops * sizeof *alg_out->log);
2294}
2295
2296/* Perform a multiplication and return an rtx for the result.
2297   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2298   TARGET is a suggestion for where to store the result (an rtx).
2299
2300   We check specially for a constant integer as OP1.
2301   If you want this check for OP0 as well, then before calling
2302   you should swap the two operands if OP0 would be constant.  */
2303
2304rtx
2305expand_mult (mode, op0, op1, target, unsignedp)
2306     enum machine_mode mode;
2307     register rtx op0, op1, target;
2308     int unsignedp;
2309{
2310  rtx const_op1 = op1;
2311
2312  /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2313     less than or equal in size to `unsigned int' this doesn't matter.
2314     If the mode is larger than `unsigned int', then synth_mult works only
2315     if the constant value exactly fits in an `unsigned int' without any
2316     truncation.  This means that multiplying by negative values does
2317     not work; results are off by 2^32 on a 32 bit machine.  */
2318
2319  /* If we are multiplying in DImode, it may still be a win
2320     to try to work with shifts and adds.  */
2321  if (GET_CODE (op1) == CONST_DOUBLE
2322      && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2323      && HOST_BITS_PER_INT >= BITS_PER_WORD
2324      && CONST_DOUBLE_HIGH (op1) == 0)
2325    const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2326  else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2327	   && GET_CODE (op1) == CONST_INT
2328	   && INTVAL (op1) < 0)
2329    const_op1 = 0;
2330
2331  /* We used to test optimize here, on the grounds that it's better to
2332     produce a smaller program when -O is not used.
2333     But this causes such a terrible slowdown sometimes
2334     that it seems better to use synth_mult always.  */
2335
2336  if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2337    {
2338      struct algorithm alg;
2339      struct algorithm alg2;
2340      HOST_WIDE_INT val = INTVAL (op1);
2341      HOST_WIDE_INT val_so_far;
2342      rtx insn;
2343      int mult_cost;
2344      enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2345
2346      /* Try to do the computation three ways: multiply by the negative of OP1
2347	 and then negate, do the multiplication directly, or do multiplication
2348	 by OP1 - 1.  */
2349
2350      mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2351      mult_cost = MIN (12 * add_cost, mult_cost);
2352
2353      synth_mult (&alg, val, mult_cost);
2354
2355      /* This works only if the inverted value actually fits in an
2356	 `unsigned int' */
2357      if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2358	{
2359	  synth_mult (&alg2, - val,
2360		      (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2361	  if (alg2.cost + negate_cost < alg.cost)
2362	    alg = alg2, variant = negate_variant;
2363	}
2364
2365      /* This proves very useful for division-by-constant.  */
2366      synth_mult (&alg2, val - 1,
2367		  (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2368      if (alg2.cost + add_cost < alg.cost)
2369	alg = alg2, variant = add_variant;
2370
2371      if (alg.cost < mult_cost)
2372	{
2373	  /* We found something cheaper than a multiply insn.  */
2374	  int opno;
2375	  rtx accum, tem;
2376
2377	  op0 = protect_from_queue (op0, 0);
2378
2379	  /* Avoid referencing memory over and over.
2380	     For speed, but also for correctness when mem is volatile.  */
2381	  if (GET_CODE (op0) == MEM)
2382	    op0 = force_reg (mode, op0);
2383
2384	  /* ACCUM starts out either as OP0 or as a zero, depending on
2385	     the first operation.  */
2386
2387	  if (alg.op[0] == alg_zero)
2388	    {
2389	      accum = copy_to_mode_reg (mode, const0_rtx);
2390	      val_so_far = 0;
2391	    }
2392	  else if (alg.op[0] == alg_m)
2393	    {
2394	      accum = copy_to_mode_reg (mode, op0);
2395	      val_so_far = 1;
2396	    }
2397	  else
2398	    abort ();
2399
2400	  for (opno = 1; opno < alg.ops; opno++)
2401	    {
2402	      int log = alg.log[opno];
2403	      int preserve = preserve_subexpressions_p ();
2404	      rtx shift_subtarget = preserve ? 0 : accum;
2405	      rtx add_target
2406		= (opno == alg.ops - 1 && target != 0 && variant != add_variant
2407		   && ! preserve)
2408		  ? target : 0;
2409	      rtx accum_target = preserve ? 0 : accum;
2410
2411	      switch (alg.op[opno])
2412		{
2413		case alg_shift:
2414		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2415					build_int_2 (log, 0), NULL_RTX, 0);
2416		  val_so_far <<= log;
2417		  break;
2418
2419		case alg_add_t_m2:
2420		  tem = expand_shift (LSHIFT_EXPR, mode, op0,
2421				      build_int_2 (log, 0), NULL_RTX, 0);
2422		  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2423					 add_target ? add_target : accum_target);
2424		  val_so_far += (HOST_WIDE_INT) 1 << log;
2425		  break;
2426
2427		case alg_sub_t_m2:
2428		  tem = expand_shift (LSHIFT_EXPR, mode, op0,
2429				      build_int_2 (log, 0), NULL_RTX, 0);
2430		  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2431					 add_target ? add_target : accum_target);
2432		  val_so_far -= (HOST_WIDE_INT) 1 << log;
2433		  break;
2434
2435		case alg_add_t2_m:
2436		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2437					build_int_2 (log, 0), shift_subtarget,
2438					0);
2439		  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2440					 add_target ? add_target : accum_target);
2441		  val_so_far = (val_so_far << log) + 1;
2442		  break;
2443
2444		case alg_sub_t2_m:
2445		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2446					build_int_2 (log, 0), shift_subtarget,
2447					0);
2448		  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2449					 add_target ? add_target : accum_target);
2450		  val_so_far = (val_so_far << log) - 1;
2451		  break;
2452
2453		case alg_add_factor:
2454		  tem = expand_shift (LSHIFT_EXPR, mode, accum,
2455				      build_int_2 (log, 0), NULL_RTX, 0);
2456		  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2457					 add_target ? add_target : accum_target);
2458		  val_so_far += val_so_far << log;
2459		  break;
2460
2461		case alg_sub_factor:
2462		  tem = expand_shift (LSHIFT_EXPR, mode, accum,
2463				      build_int_2 (log, 0), NULL_RTX, 0);
2464		  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2465					 (add_target ? add_target
2466					  : preserve ? 0 : tem));
2467		  val_so_far = (val_so_far << log) - val_so_far;
2468		  break;
2469
2470		default:
2471		  abort ();;
2472		}
2473
2474	      /* Write a REG_EQUAL note on the last insn so that we can cse
2475		 multiplication sequences.  */
2476
2477	      insn = get_last_insn ();
2478	      set_unique_reg_note (insn,
2479	      			   REG_EQUAL,
2480				   gen_rtx_MULT (mode, op0,
2481				   	         GEN_INT (val_so_far)));
2482	    }
2483
2484	  if (variant == negate_variant)
2485	    {
2486	      val_so_far = - val_so_far;
2487	      accum = expand_unop (mode, neg_optab, accum, target, 0);
2488	    }
2489	  else if (variant == add_variant)
2490	    {
2491	      val_so_far = val_so_far + 1;
2492	      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2493	    }
2494
2495	  if (val != val_so_far)
2496	    abort ();
2497
2498	  return accum;
2499	}
2500    }
2501
2502  /* This used to use umul_optab if unsigned, but for non-widening multiply
2503     there is no difference between signed and unsigned.  */
2504  op0 = expand_binop (mode, smul_optab,
2505		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2506  if (op0 == 0)
2507    abort ();
2508  return op0;
2509}
2510
2511/* Return the smallest n such that 2**n >= X.  */
2512
2513int
2514ceil_log2 (x)
2515     unsigned HOST_WIDE_INT x;
2516{
2517  return floor_log2 (x - 1) + 1;
2518}
2519
2520/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2521   replace division by D, and put the least significant N bits of the result
2522   in *MULTIPLIER_PTR and return the most significant bit.
2523
2524   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2525   needed precision is in PRECISION (should be <= N).
2526
2527   PRECISION should be as small as possible so this function can choose
2528   multiplier more freely.
2529
2530   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2531   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2532
2533   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2534   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2535
2536static
2537unsigned HOST_WIDE_INT
2538choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2539     unsigned HOST_WIDE_INT d;
2540     int n;
2541     int precision;
2542     unsigned HOST_WIDE_INT *multiplier_ptr;
2543     int *post_shift_ptr;
2544     int *lgup_ptr;
2545{
2546  unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2547  unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2548  int lgup, post_shift;
2549  int pow, pow2;
2550  unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2551
2552  /* lgup = ceil(log2(divisor)); */
2553  lgup = ceil_log2 (d);
2554
2555  if (lgup > n)
2556    abort ();
2557
2558  pow = n + lgup;
2559  pow2 = n + lgup - precision;
2560
2561  if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2562    {
2563      /* We could handle this with some effort, but this case is much better
2564	 handled directly with a scc insn, so rely on caller using that.  */
2565      abort ();
2566    }
2567
2568  /* mlow = 2^(N + lgup)/d */
2569 if (pow >= HOST_BITS_PER_WIDE_INT)
2570    {
2571      nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2572      nl = 0;
2573    }
2574  else
2575    {
2576      nh = 0;
2577      nl = (unsigned HOST_WIDE_INT) 1 << pow;
2578    }
2579  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2580			&mlow_lo, &mlow_hi, &dummy1, &dummy2);
2581
2582  /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2583  if (pow2 >= HOST_BITS_PER_WIDE_INT)
2584    nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2585  else
2586    nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2587  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2588			&mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2589
2590  if (mhigh_hi && nh - d >= d)
2591    abort ();
2592  if (mhigh_hi > 1 || mlow_hi > 1)
2593    abort ();
2594  /* assert that mlow < mhigh.  */
2595  if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2596    abort();
2597
2598  /* If precision == N, then mlow, mhigh exceed 2^N
2599     (but they do not exceed 2^(N+1)).  */
2600
2601  /* Reduce to lowest terms */
2602  for (post_shift = lgup; post_shift > 0; post_shift--)
2603    {
2604      unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2605      unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2606      if (ml_lo >= mh_lo)
2607	break;
2608
2609      mlow_hi = 0;
2610      mlow_lo = ml_lo;
2611      mhigh_hi = 0;
2612      mhigh_lo = mh_lo;
2613    }
2614
2615  *post_shift_ptr = post_shift;
2616  *lgup_ptr = lgup;
2617  if (n < HOST_BITS_PER_WIDE_INT)
2618    {
2619      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2620      *multiplier_ptr = mhigh_lo & mask;
2621      return mhigh_lo >= mask;
2622    }
2623  else
2624    {
2625      *multiplier_ptr = mhigh_lo;
2626      return mhigh_hi;
2627    }
2628}
2629
2630/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2631   congruent to 1 (mod 2**N).  */
2632
2633static unsigned HOST_WIDE_INT
2634invert_mod2n (x, n)
2635     unsigned HOST_WIDE_INT x;
2636     int n;
2637{
2638  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2639
2640  /* The algorithm notes that the choice y = x satisfies
2641     x*y == 1 mod 2^3, since x is assumed odd.
2642     Each iteration doubles the number of bits of significance in y.  */
2643
2644  unsigned HOST_WIDE_INT mask;
2645  unsigned HOST_WIDE_INT y = x;
2646  int nbit = 3;
2647
2648  mask = (n == HOST_BITS_PER_WIDE_INT
2649	  ? ~(unsigned HOST_WIDE_INT) 0
2650	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2651
2652  while (nbit < n)
2653    {
2654      y = y * (2 - x*y) & mask;		/* Modulo 2^N */
2655      nbit *= 2;
2656    }
2657  return y;
2658}
2659
2660/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2661   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2662   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2663   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2664   become signed.
2665
2666   The result is put in TARGET if that is convenient.
2667
2668   MODE is the mode of operation.  */
2669
2670rtx
2671expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2672     enum machine_mode mode;
2673     register rtx adj_operand, op0, op1, target;
2674     int unsignedp;
2675{
2676  rtx tem;
2677  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2678
2679  tem = expand_shift (RSHIFT_EXPR, mode, op0,
2680		      build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2681		      NULL_RTX, 0);
2682  tem = expand_and (tem, op1, NULL_RTX);
2683  adj_operand
2684    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2685		     adj_operand);
2686
2687  tem = expand_shift (RSHIFT_EXPR, mode, op1,
2688		      build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2689		      NULL_RTX, 0);
2690  tem = expand_and (tem, op0, NULL_RTX);
2691  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2692			  target);
2693
2694  return target;
2695}
2696
2697/* Emit code to multiply OP0 and CNST1, putting the high half of the result
2698   in TARGET if that is convenient, and return where the result is.  If the
2699   operation can not be performed, 0 is returned.
2700
2701   MODE is the mode of operation and result.
2702
2703   UNSIGNEDP nonzero means unsigned multiply.
2704
2705   MAX_COST is the total allowed cost for the expanded RTL.  */
2706
2707rtx
2708expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2709     enum machine_mode mode;
2710     register rtx op0, target;
2711     unsigned HOST_WIDE_INT cnst1;
2712     int unsignedp;
2713     int max_cost;
2714{
2715  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2716  optab mul_highpart_optab;
2717  optab moptab;
2718  rtx tem;
2719  int size = GET_MODE_BITSIZE (mode);
2720  rtx op1, wide_op1;
2721
2722  /* We can't support modes wider than HOST_BITS_PER_INT.  */
2723  if (size > HOST_BITS_PER_WIDE_INT)
2724    abort ();
2725
2726  op1 = GEN_INT (cnst1);
2727
2728  if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2729    wide_op1 = op1;
2730  else
2731    wide_op1
2732      = immed_double_const (cnst1,
2733			    (unsignedp
2734			     ? (HOST_WIDE_INT) 0
2735			     : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2736			    wider_mode);
2737
2738  /* expand_mult handles constant multiplication of word_mode
2739     or narrower.  It does a poor job for large modes.  */
2740  if (size < BITS_PER_WORD
2741      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2742    {
2743      /* We have to do this, since expand_binop doesn't do conversion for
2744	 multiply.  Maybe change expand_binop to handle widening multiply?  */
2745      op0 = convert_to_mode (wider_mode, op0, unsignedp);
2746
2747      tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2748      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2749			  build_int_2 (size, 0), NULL_RTX, 1);
2750      return convert_modes (mode, wider_mode, tem, unsignedp);
2751    }
2752
2753  if (target == 0)
2754    target = gen_reg_rtx (mode);
2755
2756  /* Firstly, try using a multiplication insn that only generates the needed
2757     high part of the product, and in the sign flavor of unsignedp.  */
2758  if (mul_highpart_cost[(int) mode] < max_cost)
2759    {
2760      mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2761      target = expand_binop (mode, mul_highpart_optab,
2762			     op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2763      if (target)
2764	return target;
2765    }
2766
2767  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2768     Need to adjust the result after the multiplication.  */
2769  if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2770    {
2771      mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2772      target = expand_binop (mode, mul_highpart_optab,
2773			     op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2774      if (target)
2775	/* We used the wrong signedness.  Adjust the result.  */
2776	return expand_mult_highpart_adjust (mode, target, op0,
2777					    op1, target, unsignedp);
2778    }
2779
2780  /* Try widening multiplication.  */
2781  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2782  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2783      && mul_widen_cost[(int) wider_mode] < max_cost)
2784    {
2785      op1 = force_reg (mode, op1);
2786      goto try;
2787    }
2788
2789  /* Try widening the mode and perform a non-widening multiplication.  */
2790  moptab = smul_optab;
2791  if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2792      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2793    {
2794      op1 = wide_op1;
2795      goto try;
2796    }
2797
2798  /* Try widening multiplication of opposite signedness, and adjust.  */
2799  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2800  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2801      && (mul_widen_cost[(int) wider_mode]
2802	  + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2803    {
2804      rtx regop1 = force_reg (mode, op1);
2805      tem = expand_binop (wider_mode, moptab, op0, regop1,
2806			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2807      if (tem != 0)
2808	{
2809	  /* Extract the high half of the just generated product.  */
2810	  tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2811			      build_int_2 (size, 0), NULL_RTX, 1);
2812	  tem = convert_modes (mode, wider_mode, tem, unsignedp);
2813	  /* We used the wrong signedness.  Adjust the result.  */
2814	  return expand_mult_highpart_adjust (mode, tem, op0, op1,
2815					      target, unsignedp);
2816	}
2817    }
2818
2819  return 0;
2820
2821 try:
2822  /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2823  tem = expand_binop (wider_mode, moptab, op0, op1,
2824		      NULL_RTX, unsignedp, OPTAB_WIDEN);
2825  if (tem == 0)
2826    return 0;
2827
2828  /* Extract the high half of the just generated product.  */
2829  if (mode == word_mode)
2830    {
2831      return gen_highpart (mode, tem);
2832    }
2833  else
2834    {
2835      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2836			  build_int_2 (size, 0), NULL_RTX, 1);
2837      return convert_modes (mode, wider_mode, tem, unsignedp);
2838    }
2839}
2840
2841/* Emit the code to divide OP0 by OP1, putting the result in TARGET
2842   if that is convenient, and returning where the result is.
2843   You may request either the quotient or the remainder as the result;
2844   specify REM_FLAG nonzero to get the remainder.
2845
2846   CODE is the expression code for which kind of division this is;
2847   it controls how rounding is done.  MODE is the machine mode to use.
2848   UNSIGNEDP nonzero means do unsigned division.  */
2849
2850/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2851   and then correct it by or'ing in missing high bits
2852   if result of ANDI is nonzero.
2853   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2854   This could optimize to a bfexts instruction.
2855   But C doesn't use these operations, so their optimizations are
2856   left for later.  */
2857/* ??? For modulo, we don't actually need the highpart of the first product,
2858   the low part will do nicely.  And for small divisors, the second multiply
2859   can also be a low-part only multiply or even be completely left out.
2860   E.g. to calculate the remainder of a division by 3 with a 32 bit
2861   multiply, multiply with 0x55555556 and extract the upper two bits;
2862   the result is exact for inputs up to 0x1fffffff.
2863   The input range can be reduced by using cross-sum rules.
2864   For odd divisors >= 3, the following table gives right shift counts
2865   so that if an number is shifted by an integer multiple of the given
2866   amount, the remainder stays the same:
2867   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2868   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2869   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2870   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2871   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2872
2873   Cross-sum rules for even numbers can be derived by leaving as many bits
2874   to the right alone as the divisor has zeros to the right.
2875   E.g. if x is an unsigned 32 bit number:
2876   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2877   */
2878
2879#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2880
2881rtx
2882expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2883     int rem_flag;
2884     enum tree_code code;
2885     enum machine_mode mode;
2886     register rtx op0, op1, target;
2887     int unsignedp;
2888{
2889  enum machine_mode compute_mode;
2890  register rtx tquotient;
2891  rtx quotient = 0, remainder = 0;
2892  rtx last;
2893  int size;
2894  rtx insn, set;
2895  optab optab1, optab2;
2896  int op1_is_constant, op1_is_pow2;
2897  int max_cost, extra_cost;
2898  static HOST_WIDE_INT last_div_const = 0;
2899
2900  op1_is_constant = GET_CODE (op1) == CONST_INT;
2901  op1_is_pow2 = (op1_is_constant
2902		 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2903		      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2904
2905  /*
2906     This is the structure of expand_divmod:
2907
2908     First comes code to fix up the operands so we can perform the operations
2909     correctly and efficiently.
2910
2911     Second comes a switch statement with code specific for each rounding mode.
2912     For some special operands this code emits all RTL for the desired
2913     operation, for other cases, it generates only a quotient and stores it in
2914     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2915     to indicate that it has not done anything.
2916
2917     Last comes code that finishes the operation.  If QUOTIENT is set and
2918     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2919     QUOTIENT is not set, it is computed using trunc rounding.
2920
2921     We try to generate special code for division and remainder when OP1 is a
2922     constant.  If |OP1| = 2**n we can use shifts and some other fast
2923     operations.  For other values of OP1, we compute a carefully selected
2924     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2925     by m.
2926
2927     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2928     half of the product.  Different strategies for generating the product are
2929     implemented in expand_mult_highpart.
2930
2931     If what we actually want is the remainder, we generate that by another
2932     by-constant multiplication and a subtraction.  */
2933
2934  /* We shouldn't be called with OP1 == const1_rtx, but some of the
2935     code below will malfunction if we are, so check here and handle
2936     the special case if so.  */
2937  if (op1 == const1_rtx)
2938    return rem_flag ? const0_rtx : op0;
2939
2940  if (target
2941      /* Don't use the function value register as a target
2942	 since we have to read it as well as write it,
2943	 and function-inlining gets confused by this.  */
2944      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2945	  /* Don't clobber an operand while doing a multi-step calculation.  */
2946	  || ((rem_flag || op1_is_constant)
2947	      && (reg_mentioned_p (target, op0)
2948		  || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2949	  || reg_mentioned_p (target, op1)
2950	  || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2951    target = 0;
2952
2953  /* Get the mode in which to perform this computation.  Normally it will
2954     be MODE, but sometimes we can't do the desired operation in MODE.
2955     If so, pick a wider mode in which we can do the operation.  Convert
2956     to that mode at the start to avoid repeated conversions.
2957
2958     First see what operations we need.  These depend on the expression
2959     we are evaluating.  (We assume that divxx3 insns exist under the
2960     same conditions that modxx3 insns and that these insns don't normally
2961     fail.  If these assumptions are not correct, we may generate less
2962     efficient code in some cases.)
2963
2964     Then see if we find a mode in which we can open-code that operation
2965     (either a division, modulus, or shift).  Finally, check for the smallest
2966     mode for which we can do the operation with a library call.  */
2967
2968  /* We might want to refine this now that we have division-by-constant
2969     optimization.  Since expand_mult_highpart tries so many variants, it is
2970     not straightforward to generalize this.  Maybe we should make an array
2971     of possible modes in init_expmed?  Save this for GCC 2.7.  */
2972
2973  optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2974	    : (unsignedp ? udiv_optab : sdiv_optab));
2975  optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
2976
2977  for (compute_mode = mode; compute_mode != VOIDmode;
2978       compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2979    if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
2980	|| optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
2981      break;
2982
2983  if (compute_mode == VOIDmode)
2984    for (compute_mode = mode; compute_mode != VOIDmode;
2985	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2986      if (optab1->handlers[(int) compute_mode].libfunc
2987	  || optab2->handlers[(int) compute_mode].libfunc)
2988	break;
2989
2990  /* If we still couldn't find a mode, use MODE, but we'll probably abort
2991     in expand_binop.  */
2992  if (compute_mode == VOIDmode)
2993    compute_mode = mode;
2994
2995  if (target && GET_MODE (target) == compute_mode)
2996    tquotient = target;
2997  else
2998    tquotient = gen_reg_rtx (compute_mode);
2999
3000  size = GET_MODE_BITSIZE (compute_mode);
3001#if 0
3002  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3003     (mode), and thereby get better code when OP1 is a constant.  Do that
3004     later.  It will require going over all usages of SIZE below.  */
3005  size = GET_MODE_BITSIZE (mode);
3006#endif
3007
3008  /* Only deduct something for a REM if the last divide done was
3009     for a different constant.   Then set the constant of the last
3010     divide.  */
3011  max_cost = div_cost[(int) compute_mode]
3012    - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3013		      && INTVAL (op1) == last_div_const)
3014       ? mul_cost[(int) compute_mode] + add_cost : 0);
3015
3016  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3017
3018  /* Now convert to the best mode to use.  */
3019  if (compute_mode != mode)
3020    {
3021      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3022      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3023
3024      /* convert_modes may have placed op1 into a register, so we
3025	 must recompute the following.  */
3026      op1_is_constant = GET_CODE (op1) == CONST_INT;
3027      op1_is_pow2 = (op1_is_constant
3028		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3029			  || (! unsignedp
3030			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3031    }
3032
3033  /* If one of the operands is a volatile MEM, copy it into a register.  */
3034
3035  if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3036    op0 = force_reg (compute_mode, op0);
3037  if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3038    op1 = force_reg (compute_mode, op1);
3039
3040  /* If we need the remainder or if OP1 is constant, we need to
3041     put OP0 in a register in case it has any queued subexpressions.  */
3042  if (rem_flag || op1_is_constant)
3043    op0 = force_reg (compute_mode, op0);
3044
3045  last = get_last_insn ();
3046
3047  /* Promote floor rounding to trunc rounding for unsigned operations.  */
3048  if (unsignedp)
3049    {
3050      if (code == FLOOR_DIV_EXPR)
3051	code = TRUNC_DIV_EXPR;
3052      if (code == FLOOR_MOD_EXPR)
3053	code = TRUNC_MOD_EXPR;
3054      if (code == EXACT_DIV_EXPR && op1_is_pow2)
3055	code = TRUNC_DIV_EXPR;
3056    }
3057
3058  if (op1 != const0_rtx)
3059    switch (code)
3060      {
3061      case TRUNC_MOD_EXPR:
3062      case TRUNC_DIV_EXPR:
3063	if (op1_is_constant)
3064	  {
3065	    if (unsignedp)
3066	      {
3067		unsigned HOST_WIDE_INT mh, ml;
3068		int pre_shift, post_shift;
3069		int dummy;
3070		unsigned HOST_WIDE_INT d = INTVAL (op1);
3071
3072		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3073		  {
3074		    pre_shift = floor_log2 (d);
3075		    if (rem_flag)
3076		      {
3077			remainder
3078			  = expand_binop (compute_mode, and_optab, op0,
3079					  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3080					  remainder, 1,
3081					  OPTAB_LIB_WIDEN);
3082			if (remainder)
3083			  return gen_lowpart (mode, remainder);
3084		      }
3085		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3086					     build_int_2 (pre_shift, 0),
3087					     tquotient, 1);
3088		  }
3089		else if (size <= HOST_BITS_PER_WIDE_INT)
3090		  {
3091		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3092		      {
3093			/* Most significant bit of divisor is set; emit an scc
3094			   insn.  */
3095			quotient = emit_store_flag (tquotient, GEU, op0, op1,
3096						    compute_mode, 1, 1);
3097			if (quotient == 0)
3098			  goto fail1;
3099		      }
3100		    else
3101		      {
3102			/* Find a suitable multiplier and right shift count
3103			   instead of multiplying with D.  */
3104
3105			mh = choose_multiplier (d, size, size,
3106						&ml, &post_shift, &dummy);
3107
3108			/* If the suggested multiplier is more than SIZE bits,
3109			   we can do better for even divisors, using an
3110			   initial right shift.  */
3111			if (mh != 0 && (d & 1) == 0)
3112			  {
3113			    pre_shift = floor_log2 (d & -d);
3114			    mh = choose_multiplier (d >> pre_shift, size,
3115						    size - pre_shift,
3116						    &ml, &post_shift, &dummy);
3117			    if (mh)
3118			      abort ();
3119			  }
3120			else
3121			  pre_shift = 0;
3122
3123			if (mh != 0)
3124			  {
3125			    rtx t1, t2, t3, t4;
3126
3127			    extra_cost = (shift_cost[post_shift - 1]
3128					  + shift_cost[1] + 2 * add_cost);
3129			    t1 = expand_mult_highpart (compute_mode, op0, ml,
3130						       NULL_RTX, 1,
3131						       max_cost - extra_cost);
3132			    if (t1 == 0)
3133			      goto fail1;
3134			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
3135							       op0, t1),
3136						NULL_RTX);
3137			    t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3138					       build_int_2 (1, 0), NULL_RTX,1);
3139			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
3140							      t1, t3),
3141						NULL_RTX);
3142			    quotient
3143			      = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3144					      build_int_2 (post_shift - 1, 0),
3145					      tquotient, 1);
3146			  }
3147			else
3148			  {
3149			    rtx t1, t2;
3150
3151			    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3152					       build_int_2 (pre_shift, 0),
3153					       NULL_RTX, 1);
3154			    extra_cost = (shift_cost[pre_shift]
3155					  + shift_cost[post_shift]);
3156			    t2 = expand_mult_highpart (compute_mode, t1, ml,
3157						       NULL_RTX, 1,
3158						       max_cost - extra_cost);
3159			    if (t2 == 0)
3160			      goto fail1;
3161			    quotient
3162			      = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3163					      build_int_2 (post_shift, 0),
3164					      tquotient, 1);
3165			  }
3166		      }
3167		  }
3168		else		/* Too wide mode to use tricky code */
3169		  break;
3170
3171		insn = get_last_insn ();
3172		if (insn != last
3173		    && (set = single_set (insn)) != 0
3174		    && SET_DEST (set) == quotient)
3175		  set_unique_reg_note (insn,
3176		  		       REG_EQUAL,
3177				       gen_rtx_UDIV (compute_mode, op0, op1));
3178	      }
3179	    else		/* TRUNC_DIV, signed */
3180	      {
3181		unsigned HOST_WIDE_INT ml;
3182		int lgup, post_shift;
3183		HOST_WIDE_INT d = INTVAL (op1);
3184		unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3185
3186		/* n rem d = n rem -d */
3187		if (rem_flag && d < 0)
3188		  {
3189		    d = abs_d;
3190		    op1 = GEN_INT (abs_d);
3191		  }
3192
3193		if (d == 1)
3194		  quotient = op0;
3195		else if (d == -1)
3196		  quotient = expand_unop (compute_mode, neg_optab, op0,
3197					  tquotient, 0);
3198		else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3199		  {
3200		    /* This case is not handled correctly below.  */
3201		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
3202						compute_mode, 1, 1);
3203		    if (quotient == 0)
3204		      goto fail1;
3205		  }
3206		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3207			 && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3208		  ;
3209		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3210		  {
3211		    lgup = floor_log2 (abs_d);
3212		    if (abs_d != 2 && BRANCH_COST < 3)
3213		      {
3214			rtx label = gen_label_rtx ();
3215			rtx t1;
3216
3217			t1 = copy_to_mode_reg (compute_mode, op0);
3218			do_cmp_and_jump (t1, const0_rtx, GE,
3219					 compute_mode, label);
3220			expand_inc (t1, GEN_INT (abs_d - 1));
3221			emit_label (label);
3222			quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3223						 build_int_2 (lgup, 0),
3224						 tquotient, 0);
3225		      }
3226		    else
3227		      {
3228			rtx t1, t2, t3;
3229			t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3230					   build_int_2 (size - 1, 0),
3231					   NULL_RTX, 0);
3232			t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3233					   build_int_2 (size - lgup, 0),
3234					   NULL_RTX, 1);
3235			t3 = force_operand (gen_rtx_PLUS (compute_mode,
3236							  op0, t2),
3237					    NULL_RTX);
3238			quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3239						 build_int_2 (lgup, 0),
3240						 tquotient, 0);
3241		      }
3242
3243		    /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3244		       the quotient.  */
3245		    if (d < 0)
3246		      {
3247			insn = get_last_insn ();
3248			if (insn != last
3249			    && (set = single_set (insn)) != 0
3250			    && SET_DEST (set) == quotient
3251			    && abs_d < ((unsigned HOST_WIDE_INT) 1
3252					<< (HOST_BITS_PER_WIDE_INT - 1)))
3253			  set_unique_reg_note (insn,
3254			  		       REG_EQUAL,
3255					       gen_rtx_DIV (compute_mode,
3256							    op0,
3257							    GEN_INT (abs_d)));
3258
3259			quotient = expand_unop (compute_mode, neg_optab,
3260						quotient, quotient, 0);
3261		      }
3262		  }
3263		else if (size <= HOST_BITS_PER_WIDE_INT)
3264		  {
3265		    choose_multiplier (abs_d, size, size - 1,
3266				       &ml, &post_shift, &lgup);
3267		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3268		      {
3269			rtx t1, t2, t3;
3270
3271			extra_cost = (shift_cost[post_shift]
3272				      + shift_cost[size - 1] + add_cost);
3273			t1 = expand_mult_highpart (compute_mode, op0, ml,
3274						   NULL_RTX, 0,
3275						   max_cost - extra_cost);
3276			if (t1 == 0)
3277			  goto fail1;
3278			t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3279					   build_int_2 (post_shift, 0), NULL_RTX, 0);
3280			t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3281					   build_int_2 (size - 1, 0), NULL_RTX, 0);
3282			if (d < 0)
3283			  quotient = force_operand (gen_rtx_MINUS (compute_mode, t3, t2),
3284						    tquotient);
3285			else
3286			  quotient = force_operand (gen_rtx_MINUS (compute_mode, t2, t3),
3287						    tquotient);
3288		      }
3289		    else
3290		      {
3291			rtx t1, t2, t3, t4;
3292
3293			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3294			extra_cost = (shift_cost[post_shift]
3295				      + shift_cost[size - 1] + 2 * add_cost);
3296			t1 = expand_mult_highpart (compute_mode, op0, ml,
3297						   NULL_RTX, 0,
3298						   max_cost - extra_cost);
3299			if (t1 == 0)
3300			  goto fail1;
3301			t2 = force_operand (gen_rtx_PLUS (compute_mode, t1, op0),
3302					    NULL_RTX);
3303			t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3304					   build_int_2 (post_shift, 0), NULL_RTX, 0);
3305			t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3306					   build_int_2 (size - 1, 0), NULL_RTX, 0);
3307			if (d < 0)
3308			  quotient = force_operand (gen_rtx_MINUS (compute_mode, t4, t3),
3309						    tquotient);
3310			else
3311			  quotient = force_operand (gen_rtx_MINUS (compute_mode, t3, t4),
3312						    tquotient);
3313		      }
3314		  }
3315		else		/* Too wide mode to use tricky code */
3316		  break;
3317
3318		insn = get_last_insn ();
3319		if (insn != last
3320		    && (set = single_set (insn)) != 0
3321		    && SET_DEST (set) == quotient)
3322		  set_unique_reg_note (insn,
3323		  		       REG_EQUAL,
3324				       gen_rtx_DIV (compute_mode, op0, op1));
3325	      }
3326	    break;
3327	  }
3328      fail1:
3329	delete_insns_since (last);
3330	break;
3331
3332      case FLOOR_DIV_EXPR:
3333      case FLOOR_MOD_EXPR:
3334      /* We will come here only for signed operations.  */
3335	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3336	  {
3337	    unsigned HOST_WIDE_INT mh, ml;
3338	    int pre_shift, lgup, post_shift;
3339	    HOST_WIDE_INT d = INTVAL (op1);
3340
3341	    if (d > 0)
3342	      {
3343		/* We could just as easily deal with negative constants here,
3344		   but it does not seem worth the trouble for GCC 2.6.  */
3345		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3346		  {
3347		    pre_shift = floor_log2 (d);
3348		    if (rem_flag)
3349		      {
3350			remainder = expand_binop (compute_mode, and_optab, op0,
3351						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3352						  remainder, 0, OPTAB_LIB_WIDEN);
3353			if (remainder)
3354			  return gen_lowpart (mode, remainder);
3355		      }
3356		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3357					     build_int_2 (pre_shift, 0),
3358					     tquotient, 0);
3359		  }
3360		else
3361		  {
3362		    rtx t1, t2, t3, t4;
3363
3364		    mh = choose_multiplier (d, size, size - 1,
3365					    &ml, &post_shift, &lgup);
3366		    if (mh)
3367		      abort ();
3368
3369		    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3370				       build_int_2 (size - 1, 0), NULL_RTX, 0);
3371		    t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3372				       NULL_RTX, 0, OPTAB_WIDEN);
3373		    extra_cost = (shift_cost[post_shift]
3374				  + shift_cost[size - 1] + 2 * add_cost);
3375		    t3 = expand_mult_highpart (compute_mode, t2, ml,
3376					       NULL_RTX, 1,
3377					       max_cost - extra_cost);
3378		    if (t3 != 0)
3379		      {
3380			t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3381					   build_int_2 (post_shift, 0),
3382					   NULL_RTX, 1);
3383			quotient = expand_binop (compute_mode, xor_optab,
3384						 t4, t1, tquotient, 0,
3385						 OPTAB_WIDEN);
3386		      }
3387		  }
3388	      }
3389	    else
3390	      {
3391		rtx nsign, t1, t2, t3, t4;
3392		t1 = force_operand (gen_rtx_PLUS (compute_mode,
3393						  op0, constm1_rtx), NULL_RTX);
3394		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3395				   0, OPTAB_WIDEN);
3396		nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3397				      build_int_2 (size - 1, 0), NULL_RTX, 0);
3398		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3399				    NULL_RTX);
3400		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3401				    NULL_RTX, 0);
3402		if (t4)
3403		  {
3404		    rtx t5;
3405		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3406				      NULL_RTX, 0);
3407		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
3408							    t4, t5),
3409					      tquotient);
3410		  }
3411	      }
3412	  }
3413
3414	if (quotient != 0)
3415	  break;
3416	delete_insns_since (last);
3417
3418	/* Try using an instruction that produces both the quotient and
3419	   remainder, using truncation.  We can easily compensate the quotient
3420	   or remainder to get floor rounding, once we have the remainder.
3421	   Notice that we compute also the final remainder value here,
3422	   and return the result right away.  */
3423	if (target == 0 || GET_MODE (target) != compute_mode)
3424	  target = gen_reg_rtx (compute_mode);
3425
3426	if (rem_flag)
3427	  {
3428	    remainder
3429	      = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3430	    quotient = gen_reg_rtx (compute_mode);
3431	  }
3432	else
3433	  {
3434	    quotient
3435	      = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3436	    remainder = gen_reg_rtx (compute_mode);
3437	  }
3438
3439	if (expand_twoval_binop (sdivmod_optab, op0, op1,
3440				 quotient, remainder, 0))
3441	  {
3442	    /* This could be computed with a branch-less sequence.
3443	       Save that for later.  */
3444	    rtx tem;
3445	    rtx label = gen_label_rtx ();
3446	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3447	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
3448				NULL_RTX, 0, OPTAB_WIDEN);
3449	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3450	    expand_dec (quotient, const1_rtx);
3451	    expand_inc (remainder, op1);
3452	    emit_label (label);
3453	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
3454	  }
3455
3456	/* No luck with division elimination or divmod.  Have to do it
3457	   by conditionally adjusting op0 *and* the result.  */
3458	{
3459	  rtx label1, label2, label3, label4, label5;
3460	  rtx adjusted_op0;
3461	  rtx tem;
3462
3463	  quotient = gen_reg_rtx (compute_mode);
3464	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3465	  label1 = gen_label_rtx ();
3466	  label2 = gen_label_rtx ();
3467	  label3 = gen_label_rtx ();
3468	  label4 = gen_label_rtx ();
3469	  label5 = gen_label_rtx ();
3470	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3471	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3472	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3473			      quotient, 0, OPTAB_LIB_WIDEN);
3474	  if (tem != quotient)
3475	    emit_move_insn (quotient, tem);
3476	  emit_jump_insn (gen_jump (label5));
3477	  emit_barrier ();
3478	  emit_label (label1);
3479	  expand_inc (adjusted_op0, const1_rtx);
3480	  emit_jump_insn (gen_jump (label4));
3481	  emit_barrier ();
3482	  emit_label (label2);
3483	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3484	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3485			      quotient, 0, OPTAB_LIB_WIDEN);
3486	  if (tem != quotient)
3487	    emit_move_insn (quotient, tem);
3488	  emit_jump_insn (gen_jump (label5));
3489	  emit_barrier ();
3490	  emit_label (label3);
3491	  expand_dec (adjusted_op0, const1_rtx);
3492	  emit_label (label4);
3493	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3494			      quotient, 0, OPTAB_LIB_WIDEN);
3495	  if (tem != quotient)
3496	    emit_move_insn (quotient, tem);
3497	  expand_dec (quotient, const1_rtx);
3498	  emit_label (label5);
3499	}
3500	break;
3501
3502      case CEIL_DIV_EXPR:
3503      case CEIL_MOD_EXPR:
3504	if (unsignedp)
3505	  {
3506	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3507	      {
3508		rtx t1, t2, t3;
3509		unsigned HOST_WIDE_INT d = INTVAL (op1);
3510		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3511				   build_int_2 (floor_log2 (d), 0),
3512				   tquotient, 1);
3513		t2 = expand_binop (compute_mode, and_optab, op0,
3514				   GEN_INT (d - 1),
3515				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3516		t3 = gen_reg_rtx (compute_mode);
3517		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3518				      compute_mode, 1, 1);
3519		if (t3 == 0)
3520		  {
3521		    rtx lab;
3522		    lab = gen_label_rtx ();
3523		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3524		    expand_inc (t1, const1_rtx);
3525		    emit_label (lab);
3526		    quotient = t1;
3527		  }
3528		else
3529		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
3530							  t1, t3),
3531					    tquotient);
3532		break;
3533	      }
3534
3535	    /* Try using an instruction that produces both the quotient and
3536	       remainder, using truncation.  We can easily compensate the
3537	       quotient or remainder to get ceiling rounding, once we have the
3538	       remainder.  Notice that we compute also the final remainder
3539	       value here, and return the result right away.  */
3540	    if (target == 0 || GET_MODE (target) != compute_mode)
3541	      target = gen_reg_rtx (compute_mode);
3542
3543	    if (rem_flag)
3544	      {
3545		remainder = (GET_CODE (target) == REG
3546			     ? target : gen_reg_rtx (compute_mode));
3547		quotient = gen_reg_rtx (compute_mode);
3548	      }
3549	    else
3550	      {
3551		quotient = (GET_CODE (target) == REG
3552			    ? target : gen_reg_rtx (compute_mode));
3553		remainder = gen_reg_rtx (compute_mode);
3554	      }
3555
3556	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3557				     remainder, 1))
3558	      {
3559		/* This could be computed with a branch-less sequence.
3560		   Save that for later.  */
3561		rtx label = gen_label_rtx ();
3562		do_cmp_and_jump (remainder, const0_rtx, EQ,
3563				 compute_mode, label);
3564		expand_inc (quotient, const1_rtx);
3565		expand_dec (remainder, op1);
3566		emit_label (label);
3567		return gen_lowpart (mode, rem_flag ? remainder : quotient);
3568	      }
3569
3570	    /* No luck with division elimination or divmod.  Have to do it
3571	       by conditionally adjusting op0 *and* the result.  */
3572	    {
3573	      rtx label1, label2;
3574	      rtx adjusted_op0, tem;
3575
3576	      quotient = gen_reg_rtx (compute_mode);
3577	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3578	      label1 = gen_label_rtx ();
3579	      label2 = gen_label_rtx ();
3580	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3581			       compute_mode, label1);
3582	      emit_move_insn  (quotient, const0_rtx);
3583	      emit_jump_insn (gen_jump (label2));
3584	      emit_barrier ();
3585	      emit_label (label1);
3586	      expand_dec (adjusted_op0, const1_rtx);
3587	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3588				  quotient, 1, OPTAB_LIB_WIDEN);
3589	      if (tem != quotient)
3590		emit_move_insn (quotient, tem);
3591	      expand_inc (quotient, const1_rtx);
3592	      emit_label (label2);
3593	    }
3594	  }
3595	else /* signed */
3596	  {
3597	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3598		&& INTVAL (op1) >= 0)
3599	      {
3600		/* This is extremely similar to the code for the unsigned case
3601		   above.  For 2.7 we should merge these variants, but for
3602		   2.6.1 I don't want to touch the code for unsigned since that
3603		   get used in C.  The signed case will only be used by other
3604		   languages (Ada).  */
3605
3606		rtx t1, t2, t3;
3607		unsigned HOST_WIDE_INT d = INTVAL (op1);
3608		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3609				   build_int_2 (floor_log2 (d), 0),
3610				   tquotient, 0);
3611		t2 = expand_binop (compute_mode, and_optab, op0,
3612				   GEN_INT (d - 1),
3613				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3614		t3 = gen_reg_rtx (compute_mode);
3615		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3616				      compute_mode, 1, 1);
3617		if (t3 == 0)
3618		  {
3619		    rtx lab;
3620		    lab = gen_label_rtx ();
3621		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3622		    expand_inc (t1, const1_rtx);
3623		    emit_label (lab);
3624		    quotient = t1;
3625		  }
3626		else
3627		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
3628							  t1, t3),
3629					    tquotient);
3630		break;
3631	      }
3632
3633	    /* Try using an instruction that produces both the quotient and
3634	       remainder, using truncation.  We can easily compensate the
3635	       quotient or remainder to get ceiling rounding, once we have the
3636	       remainder.  Notice that we compute also the final remainder
3637	       value here, and return the result right away.  */
3638	    if (target == 0 || GET_MODE (target) != compute_mode)
3639	      target = gen_reg_rtx (compute_mode);
3640	    if (rem_flag)
3641	      {
3642		remainder= (GET_CODE (target) == REG
3643			    ? target : gen_reg_rtx (compute_mode));
3644		quotient = gen_reg_rtx (compute_mode);
3645	      }
3646	    else
3647	      {
3648		quotient = (GET_CODE (target) == REG
3649			    ? target : gen_reg_rtx (compute_mode));
3650		remainder = gen_reg_rtx (compute_mode);
3651	      }
3652
3653	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3654				     remainder, 0))
3655	      {
3656		/* This could be computed with a branch-less sequence.
3657		   Save that for later.  */
3658		rtx tem;
3659		rtx label = gen_label_rtx ();
3660		do_cmp_and_jump (remainder, const0_rtx, EQ,
3661				 compute_mode, label);
3662		tem = expand_binop (compute_mode, xor_optab, op0, op1,
3663				    NULL_RTX, 0, OPTAB_WIDEN);
3664		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3665		expand_inc (quotient, const1_rtx);
3666		expand_dec (remainder, op1);
3667		emit_label (label);
3668		return gen_lowpart (mode, rem_flag ? remainder : quotient);
3669	      }
3670
3671	    /* No luck with division elimination or divmod.  Have to do it
3672	       by conditionally adjusting op0 *and* the result.  */
3673	    {
3674	      rtx label1, label2, label3, label4, label5;
3675	      rtx adjusted_op0;
3676	      rtx tem;
3677
3678	      quotient = gen_reg_rtx (compute_mode);
3679	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3680	      label1 = gen_label_rtx ();
3681	      label2 = gen_label_rtx ();
3682	      label3 = gen_label_rtx ();
3683	      label4 = gen_label_rtx ();
3684	      label5 = gen_label_rtx ();
3685	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3686	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3687			       compute_mode, label1);
3688	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3689				  quotient, 0, OPTAB_LIB_WIDEN);
3690	      if (tem != quotient)
3691		emit_move_insn (quotient, tem);
3692	      emit_jump_insn (gen_jump (label5));
3693	      emit_barrier ();
3694	      emit_label (label1);
3695	      expand_dec (adjusted_op0, const1_rtx);
3696	      emit_jump_insn (gen_jump (label4));
3697	      emit_barrier ();
3698	      emit_label (label2);
3699	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3700			       compute_mode, label3);
3701	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3702				  quotient, 0, OPTAB_LIB_WIDEN);
3703	      if (tem != quotient)
3704		emit_move_insn (quotient, tem);
3705	      emit_jump_insn (gen_jump (label5));
3706	      emit_barrier ();
3707	      emit_label (label3);
3708	      expand_inc (adjusted_op0, const1_rtx);
3709	      emit_label (label4);
3710	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3711				  quotient, 0, OPTAB_LIB_WIDEN);
3712	      if (tem != quotient)
3713		emit_move_insn (quotient, tem);
3714	      expand_inc (quotient, const1_rtx);
3715	      emit_label (label5);
3716	    }
3717	  }
3718	break;
3719
3720      case EXACT_DIV_EXPR:
3721	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3722	  {
3723	    HOST_WIDE_INT d = INTVAL (op1);
3724	    unsigned HOST_WIDE_INT ml;
3725	    int post_shift;
3726	    rtx t1;
3727
3728	    post_shift = floor_log2 (d & -d);
3729	    ml = invert_mod2n (d >> post_shift, size);
3730	    t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3731			      unsignedp);
3732	    quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3733				     build_int_2 (post_shift, 0),
3734				     NULL_RTX, unsignedp);
3735
3736	    insn = get_last_insn ();
3737	    set_unique_reg_note (insn,
3738	    			 REG_EQUAL,
3739				 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3740						 compute_mode,
3741						 op0, op1));
3742	  }
3743	break;
3744
3745      case ROUND_DIV_EXPR:
3746      case ROUND_MOD_EXPR:
3747	if (unsignedp)
3748	  {
3749	    rtx tem;
3750	    rtx label;
3751	    label = gen_label_rtx ();
3752	    quotient = gen_reg_rtx (compute_mode);
3753	    remainder = gen_reg_rtx (compute_mode);
3754	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3755	      {
3756		rtx tem;
3757		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3758					 quotient, 1, OPTAB_LIB_WIDEN);
3759		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3760		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3761					  remainder, 1, OPTAB_LIB_WIDEN);
3762	      }
3763	    tem = plus_constant (op1, -1);
3764	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3765				build_int_2 (1, 0), NULL_RTX, 1);
3766	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3767	    expand_inc (quotient, const1_rtx);
3768	    expand_dec (remainder, op1);
3769	    emit_label (label);
3770	  }
3771	else
3772	  {
3773	    rtx abs_rem, abs_op1, tem, mask;
3774	    rtx label;
3775	    label = gen_label_rtx ();
3776	    quotient = gen_reg_rtx (compute_mode);
3777	    remainder = gen_reg_rtx (compute_mode);
3778	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3779	      {
3780		rtx tem;
3781		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3782					 quotient, 0, OPTAB_LIB_WIDEN);
3783		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3784		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3785					  remainder, 0, OPTAB_LIB_WIDEN);
3786	      }
3787	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0);
3788	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0);
3789	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3790				build_int_2 (1, 0), NULL_RTX, 1);
3791	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3792	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
3793				NULL_RTX, 0, OPTAB_WIDEN);
3794	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3795				build_int_2 (size - 1, 0), NULL_RTX, 0);
3796	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3797				NULL_RTX, 0, OPTAB_WIDEN);
3798	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
3799				NULL_RTX, 0, OPTAB_WIDEN);
3800	    expand_inc (quotient, tem);
3801	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
3802				NULL_RTX, 0, OPTAB_WIDEN);
3803	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
3804				NULL_RTX, 0, OPTAB_WIDEN);
3805	    expand_dec (remainder, tem);
3806	    emit_label (label);
3807	  }
3808	return gen_lowpart (mode, rem_flag ? remainder : quotient);
3809
3810      default:
3811	abort ();
3812      }
3813
3814  if (quotient == 0)
3815    {
3816      if (target && GET_MODE (target) != compute_mode)
3817	target = 0;
3818
3819      if (rem_flag)
3820	{
3821	  /* Try to produce the remainder without producing the quotient.
3822	     If we seem to have a divmod patten that does not require widening,
3823	     don't try windening here.  We should really have an WIDEN argument
3824	     to expand_twoval_binop, since what we'd really like to do here is
3825	     1) try a mod insn in compute_mode
3826	     2) try a divmod insn in compute_mode
3827	     3) try a div insn in compute_mode and multiply-subtract to get
3828	        remainder
3829	     4) try the same things with widening allowed.  */
3830	  remainder
3831	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3832				 op0, op1, target,
3833				 unsignedp,
3834				 ((optab2->handlers[(int) compute_mode].insn_code
3835				   != CODE_FOR_nothing)
3836				  ? OPTAB_DIRECT : OPTAB_WIDEN));
3837	  if (remainder == 0)
3838	    {
3839	      /* No luck there.  Can we do remainder and divide at once
3840		 without a library call?  */
3841	      remainder = gen_reg_rtx (compute_mode);
3842	      if (! expand_twoval_binop ((unsignedp
3843					  ? udivmod_optab
3844					  : sdivmod_optab),
3845					 op0, op1,
3846					 NULL_RTX, remainder, unsignedp))
3847		remainder = 0;
3848	    }
3849
3850	  if (remainder)
3851	    return gen_lowpart (mode, remainder);
3852	}
3853
3854      /* Produce the quotient.  Try a quotient insn, but not a library call.
3855	 If we have a divmod in this mode, use it in preference to widening
3856	 the div (for this test we assume it will not fail). Note that optab2
3857	 is set to the one of the two optabs that the call below will use.  */
3858      quotient
3859	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3860			     op0, op1, rem_flag ? NULL_RTX : target,
3861			     unsignedp,
3862			     ((optab2->handlers[(int) compute_mode].insn_code
3863			       != CODE_FOR_nothing)
3864			      ? OPTAB_DIRECT : OPTAB_WIDEN));
3865
3866      if (quotient == 0)
3867	{
3868	  /* No luck there.  Try a quotient-and-remainder insn,
3869	     keeping the quotient alone.  */
3870	  quotient = gen_reg_rtx (compute_mode);
3871	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3872				     op0, op1,
3873				     quotient, NULL_RTX, unsignedp))
3874	    {
3875	      quotient = 0;
3876	      if (! rem_flag)
3877		/* Still no luck.  If we are not computing the remainder,
3878		   use a library call for the quotient.  */
3879		quotient = sign_expand_binop (compute_mode,
3880					      udiv_optab, sdiv_optab,
3881					      op0, op1, target,
3882					      unsignedp, OPTAB_LIB_WIDEN);
3883	    }
3884	}
3885    }
3886
3887  if (rem_flag)
3888    {
3889      if (target && GET_MODE (target) != compute_mode)
3890	target = 0;
3891
3892      if (quotient == 0)
3893	/* No divide instruction either.  Use library for remainder.  */
3894	remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3895				       op0, op1, target,
3896				       unsignedp, OPTAB_LIB_WIDEN);
3897      else
3898	{
3899	  /* We divided.  Now finish doing X - Y * (X / Y).  */
3900	  remainder = expand_mult (compute_mode, quotient, op1,
3901				   NULL_RTX, unsignedp);
3902	  remainder = expand_binop (compute_mode, sub_optab, op0,
3903				    remainder, target, unsignedp,
3904				    OPTAB_LIB_WIDEN);
3905	}
3906    }
3907
3908  return gen_lowpart (mode, rem_flag ? remainder : quotient);
3909}
3910
3911/* Return a tree node with data type TYPE, describing the value of X.
3912   Usually this is an RTL_EXPR, if there is no obvious better choice.
3913   X may be an expression, however we only support those expressions
3914   generated by loop.c.   */
3915
3916tree
3917make_tree (type, x)
3918     tree type;
3919     rtx x;
3920{
3921  tree t;
3922
3923  switch (GET_CODE (x))
3924    {
3925    case CONST_INT:
3926      t = build_int_2 (INTVAL (x),
3927		       (TREE_UNSIGNED (type)
3928			&& (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
3929		       || INTVAL (x) >= 0 ? 0 : -1);
3930      TREE_TYPE (t) = type;
3931      return t;
3932
3933    case CONST_DOUBLE:
3934      if (GET_MODE (x) == VOIDmode)
3935	{
3936	  t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3937	  TREE_TYPE (t) = type;
3938	}
3939      else
3940	{
3941	  REAL_VALUE_TYPE d;
3942
3943	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3944	  t = build_real (type, d);
3945	}
3946
3947      return t;
3948
3949    case PLUS:
3950      return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3951			  make_tree (type, XEXP (x, 1))));
3952
3953    case MINUS:
3954      return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3955			  make_tree (type, XEXP (x, 1))));
3956
3957    case NEG:
3958      return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3959
3960    case MULT:
3961      return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3962			  make_tree (type, XEXP (x, 1))));
3963
3964    case ASHIFT:
3965      return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
3966			  make_tree (type, XEXP (x, 1))));
3967
3968    case LSHIFTRT:
3969      return fold (convert (type,
3970			    build (RSHIFT_EXPR, unsigned_type (type),
3971				   make_tree (unsigned_type (type),
3972					      XEXP (x, 0)),
3973				   make_tree (type, XEXP (x, 1)))));
3974
3975    case ASHIFTRT:
3976      return fold (convert (type,
3977			    build (RSHIFT_EXPR, signed_type (type),
3978				   make_tree (signed_type (type), XEXP (x, 0)),
3979				   make_tree (type, XEXP (x, 1)))));
3980
3981    case DIV:
3982      if (TREE_CODE (type) != REAL_TYPE)
3983	t = signed_type (type);
3984      else
3985	t = type;
3986
3987      return fold (convert (type,
3988			    build (TRUNC_DIV_EXPR, t,
3989				   make_tree (t, XEXP (x, 0)),
3990				   make_tree (t, XEXP (x, 1)))));
3991    case UDIV:
3992      t = unsigned_type (type);
3993      return fold (convert (type,
3994			    build (TRUNC_DIV_EXPR, t,
3995				   make_tree (t, XEXP (x, 0)),
3996				   make_tree (t, XEXP (x, 1)))));
3997   default:
3998      t = make_node (RTL_EXPR);
3999      TREE_TYPE (t) = type;
4000      RTL_EXPR_RTL (t) = x;
4001      /* There are no insns to be output
4002	 when this rtl_expr is used.  */
4003      RTL_EXPR_SEQUENCE (t) = 0;
4004      return t;
4005    }
4006}
4007
4008/* Return an rtx representing the value of X * MULT + ADD.
4009   TARGET is a suggestion for where to store the result (an rtx).
4010   MODE is the machine mode for the computation.
4011   X and MULT must have mode MODE.  ADD may have a different mode.
4012   So can X (defaults to same as MODE).
4013   UNSIGNEDP is non-zero to do unsigned multiplication.
4014   This may emit insns.  */
4015
4016rtx
4017expand_mult_add (x, target, mult, add, mode, unsignedp)
4018     rtx x, target, mult, add;
4019     enum machine_mode mode;
4020     int unsignedp;
4021{
4022  tree type = type_for_mode (mode, unsignedp);
4023  tree add_type = (GET_MODE (add) == VOIDmode
4024		   ? type : type_for_mode (GET_MODE (add), unsignedp));
4025  tree result =  fold (build (PLUS_EXPR, type,
4026			      fold (build (MULT_EXPR, type,
4027					   make_tree (type, x),
4028					   make_tree (type, mult))),
4029			      make_tree (add_type, add)));
4030
4031  return expand_expr (result, target, VOIDmode, 0);
4032}
4033
4034/* Compute the logical-and of OP0 and OP1, storing it in TARGET
4035   and returning TARGET.
4036
4037   If TARGET is 0, a pseudo-register or constant is returned.  */
4038
4039rtx
4040expand_and (op0, op1, target)
4041     rtx op0, op1, target;
4042{
4043  enum machine_mode mode = VOIDmode;
4044  rtx tem;
4045
4046  if (GET_MODE (op0) != VOIDmode)
4047    mode = GET_MODE (op0);
4048  else if (GET_MODE (op1) != VOIDmode)
4049    mode = GET_MODE (op1);
4050
4051  if (mode != VOIDmode)
4052    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4053  else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4054    tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4055  else
4056    abort ();
4057
4058  if (target == 0)
4059    target = tem;
4060  else if (tem != target)
4061    emit_move_insn (target, tem);
4062  return target;
4063}
4064
4065/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4066   and storing in TARGET.  Normally return TARGET.
4067   Return 0 if that cannot be done.
4068
4069   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4070   it is VOIDmode, they cannot both be CONST_INT.
4071
4072   UNSIGNEDP is for the case where we have to widen the operands
4073   to perform the operation.  It says to use zero-extension.
4074
4075   NORMALIZEP is 1 if we should convert the result to be either zero
4076   or one.  Normalize is -1 if we should convert the result to be
4077   either zero or -1.  If NORMALIZEP is zero, the result will be left
4078   "raw" out of the scc insn.  */
4079
4080rtx
4081emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4082     rtx target;
4083     enum rtx_code code;
4084     rtx op0, op1;
4085     enum machine_mode mode;
4086     int unsignedp;
4087     int normalizep;
4088{
4089  rtx subtarget;
4090  enum insn_code icode;
4091  enum machine_mode compare_mode;
4092  enum machine_mode target_mode = GET_MODE (target);
4093  rtx tem;
4094  rtx last = get_last_insn ();
4095  rtx pattern, comparison;
4096
4097  /* If one operand is constant, make it the second one.  Only do this
4098     if the other operand is not constant as well.  */
4099
4100  if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
4101      || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
4102    {
4103      tem = op0;
4104      op0 = op1;
4105      op1 = tem;
4106      code = swap_condition (code);
4107    }
4108
4109  if (mode == VOIDmode)
4110    mode = GET_MODE (op0);
4111
4112  /* For some comparisons with 1 and -1, we can convert this to
4113     comparisons with zero.  This will often produce more opportunities for
4114     store-flag insns.  */
4115
4116  switch (code)
4117    {
4118    case LT:
4119      if (op1 == const1_rtx)
4120	op1 = const0_rtx, code = LE;
4121      break;
4122    case LE:
4123      if (op1 == constm1_rtx)
4124	op1 = const0_rtx, code = LT;
4125      break;
4126    case GE:
4127      if (op1 == const1_rtx)
4128	op1 = const0_rtx, code = GT;
4129      break;
4130    case GT:
4131      if (op1 == constm1_rtx)
4132	op1 = const0_rtx, code = GE;
4133      break;
4134    case GEU:
4135      if (op1 == const1_rtx)
4136	op1 = const0_rtx, code = NE;
4137      break;
4138    case LTU:
4139      if (op1 == const1_rtx)
4140	op1 = const0_rtx, code = EQ;
4141      break;
4142    default:
4143      break;
4144    }
4145
4146  /* From now on, we won't change CODE, so set ICODE now.  */
4147  icode = setcc_gen_code[(int) code];
4148
4149  /* If this is A < 0 or A >= 0, we can do this by taking the ones
4150     complement of A (for GE) and shifting the sign bit to the low bit.  */
4151  if (op1 == const0_rtx && (code == LT || code == GE)
4152      && GET_MODE_CLASS (mode) == MODE_INT
4153      && (normalizep || STORE_FLAG_VALUE == 1
4154	  || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4155	      && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4156		  == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4157    {
4158      subtarget = target;
4159
4160      /* If the result is to be wider than OP0, it is best to convert it
4161	 first.  If it is to be narrower, it is *incorrect* to convert it
4162	 first.  */
4163      if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4164	{
4165	  op0 = protect_from_queue (op0, 0);
4166	  op0 = convert_modes (target_mode, mode, op0, 0);
4167	  mode = target_mode;
4168	}
4169
4170      if (target_mode != mode)
4171	subtarget = 0;
4172
4173      if (code == GE)
4174	op0 = expand_unop (mode, one_cmpl_optab, op0,
4175			   ((STORE_FLAG_VALUE == 1 || normalizep)
4176			    ? 0 : subtarget), 0);
4177
4178      if (STORE_FLAG_VALUE == 1 || normalizep)
4179	/* If we are supposed to produce a 0/1 value, we want to do
4180	   a logical shift from the sign bit to the low-order bit; for
4181	   a -1/0 value, we do an arithmetic shift.  */
4182	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4183			    size_int (GET_MODE_BITSIZE (mode) - 1),
4184			    subtarget, normalizep != -1);
4185
4186      if (mode != target_mode)
4187	op0 = convert_modes (target_mode, mode, op0, 0);
4188
4189      return op0;
4190    }
4191
4192  if (icode != CODE_FOR_nothing)
4193    {
4194      /* We think we may be able to do this with a scc insn.  Emit the
4195	 comparison and then the scc insn.
4196
4197	 compare_from_rtx may call emit_queue, which would be deleted below
4198	 if the scc insn fails.  So call it ourselves before setting LAST.
4199	 Likewise for do_pending_stack_adjust.  */
4200
4201      emit_queue ();
4202      do_pending_stack_adjust ();
4203      last = get_last_insn ();
4204
4205      comparison
4206	= compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4207      if (GET_CODE (comparison) == CONST_INT)
4208	return (comparison == const0_rtx ? const0_rtx
4209		: normalizep == 1 ? const1_rtx
4210		: normalizep == -1 ? constm1_rtx
4211		: const_true_rtx);
4212
4213      /* If the code of COMPARISON doesn't match CODE, something is
4214	 wrong; we can no longer be sure that we have the operation.
4215	 We could handle this case, but it should not happen.  */
4216
4217      if (GET_CODE (comparison) != code)
4218	abort ();
4219
4220      /* Get a reference to the target in the proper mode for this insn.  */
4221      compare_mode = insn_operand_mode[(int) icode][0];
4222      subtarget = target;
4223      if (preserve_subexpressions_p ()
4224	  || ! (*insn_operand_predicate[(int) icode][0]) (subtarget, compare_mode))
4225	subtarget = gen_reg_rtx (compare_mode);
4226
4227      pattern = GEN_FCN (icode) (subtarget);
4228      if (pattern)
4229	{
4230	  emit_insn (pattern);
4231
4232	  /* If we are converting to a wider mode, first convert to
4233	     TARGET_MODE, then normalize.  This produces better combining
4234	     opportunities on machines that have a SIGN_EXTRACT when we are
4235	     testing a single bit.  This mostly benefits the 68k.
4236
4237	     If STORE_FLAG_VALUE does not have the sign bit set when
4238	     interpreted in COMPARE_MODE, we can do this conversion as
4239	     unsigned, which is usually more efficient.  */
4240	  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4241	    {
4242	      convert_move (target, subtarget,
4243			    (GET_MODE_BITSIZE (compare_mode)
4244			     <= HOST_BITS_PER_WIDE_INT)
4245			    && 0 == (STORE_FLAG_VALUE
4246				     & ((HOST_WIDE_INT) 1
4247					<< (GET_MODE_BITSIZE (compare_mode) -1))));
4248	      op0 = target;
4249	      compare_mode = target_mode;
4250	    }
4251	  else
4252	    op0 = subtarget;
4253
4254	  /* If we want to keep subexpressions around, don't reuse our
4255	     last target.  */
4256
4257	  if (preserve_subexpressions_p ())
4258	    subtarget = 0;
4259
4260	  /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4261	     we don't have to do anything.  */
4262	  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4263	    ;
4264	  else if (normalizep == - STORE_FLAG_VALUE)
4265	    op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4266
4267	  /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4268	     makes it hard to use a value of just the sign bit due to
4269	     ANSI integer constant typing rules.  */
4270	  else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4271		   && (STORE_FLAG_VALUE
4272		       & ((HOST_WIDE_INT) 1
4273			  << (GET_MODE_BITSIZE (compare_mode) - 1))))
4274	    op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4275				size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4276				subtarget, normalizep == 1);
4277	  else if (STORE_FLAG_VALUE & 1)
4278	    {
4279	      op0 = expand_and (op0, const1_rtx, subtarget);
4280	      if (normalizep == -1)
4281		op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4282	    }
4283	  else
4284	    abort ();
4285
4286	  /* If we were converting to a smaller mode, do the
4287	     conversion now.  */
4288	  if (target_mode != compare_mode)
4289	    {
4290	      convert_move (target, op0, 0);
4291	      return target;
4292	    }
4293	  else
4294	    return op0;
4295	}
4296    }
4297
4298  delete_insns_since (last);
4299
4300  /* If expensive optimizations, use different pseudo registers for each
4301     insn, instead of reusing the same pseudo.  This leads to better CSE,
4302     but slows down the compiler, since there are more pseudos */
4303  subtarget = (!flag_expensive_optimizations
4304	       && (target_mode == mode)) ? target : NULL_RTX;
4305
4306  /* If we reached here, we can't do this with a scc insn.  However, there
4307     are some comparisons that can be done directly.  For example, if
4308     this is an equality comparison of integers, we can try to exclusive-or
4309     (or subtract) the two operands and use a recursive call to try the
4310     comparison with zero.  Don't do any of these cases if branches are
4311     very cheap.  */
4312
4313  if (BRANCH_COST > 0
4314      && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4315      && op1 != const0_rtx)
4316    {
4317      tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4318			  OPTAB_WIDEN);
4319
4320      if (tem == 0)
4321	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4322			    OPTAB_WIDEN);
4323      if (tem != 0)
4324	tem = emit_store_flag (target, code, tem, const0_rtx,
4325			       mode, unsignedp, normalizep);
4326      if (tem == 0)
4327	delete_insns_since (last);
4328      return tem;
4329    }
4330
4331  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4332     the constant zero.  Reject all other comparisons at this point.  Only
4333     do LE and GT if branches are expensive since they are expensive on
4334     2-operand machines.  */
4335
4336  if (BRANCH_COST == 0
4337      || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4338      || (code != EQ && code != NE
4339	  && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4340    return 0;
4341
4342  /* See what we need to return.  We can only return a 1, -1, or the
4343     sign bit.  */
4344
4345  if (normalizep == 0)
4346    {
4347      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4348	normalizep = STORE_FLAG_VALUE;
4349
4350      else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4351	       && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4352		   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4353	;
4354      else
4355	return 0;
4356    }
4357
4358  /* Try to put the result of the comparison in the sign bit.  Assume we can't
4359     do the necessary operation below.  */
4360
4361  tem = 0;
4362
4363  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4364     the sign bit set.  */
4365
4366  if (code == LE)
4367    {
4368      /* This is destructive, so SUBTARGET can't be OP0.  */
4369      if (rtx_equal_p (subtarget, op0))
4370	subtarget = 0;
4371
4372      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4373			  OPTAB_WIDEN);
4374      if (tem)
4375	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4376			    OPTAB_WIDEN);
4377    }
4378
4379  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4380     number of bits in the mode of OP0, minus one.  */
4381
4382  if (code == GT)
4383    {
4384      if (rtx_equal_p (subtarget, op0))
4385	subtarget = 0;
4386
4387      tem = expand_shift (RSHIFT_EXPR, mode, op0,
4388			  size_int (GET_MODE_BITSIZE (mode) - 1),
4389			  subtarget, 0);
4390      tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4391			  OPTAB_WIDEN);
4392    }
4393
4394  if (code == EQ || code == NE)
4395    {
4396      /* For EQ or NE, one way to do the comparison is to apply an operation
4397	 that converts the operand into a positive number if it is non-zero
4398	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4399	 for NE we negate.  This puts the result in the sign bit.  Then we
4400	 normalize with a shift, if needed.
4401
4402	 Two operations that can do the above actions are ABS and FFS, so try
4403	 them.  If that doesn't work, and MODE is smaller than a full word,
4404	 we can use zero-extension to the wider mode (an unsigned conversion)
4405	 as the operation.  */
4406
4407      if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4408	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4409      else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4410	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4411      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4412	{
4413	  op0 = protect_from_queue (op0, 0);
4414	  tem = convert_modes (word_mode, mode, op0, 1);
4415	  mode = word_mode;
4416	}
4417
4418      if (tem != 0)
4419	{
4420	  if (code == EQ)
4421	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4422				0, OPTAB_WIDEN);
4423	  else
4424	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4425	}
4426
4427      /* If we couldn't do it that way, for NE we can "or" the two's complement
4428	 of the value with itself.  For EQ, we take the one's complement of
4429	 that "or", which is an extra insn, so we only handle EQ if branches
4430	 are expensive.  */
4431
4432      if (tem == 0 && (code == NE || BRANCH_COST > 1))
4433	{
4434	  if (rtx_equal_p (subtarget, op0))
4435	    subtarget = 0;
4436
4437	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4438	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4439			      OPTAB_WIDEN);
4440
4441	  if (tem && code == EQ)
4442	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4443	}
4444    }
4445
4446  if (tem && normalizep)
4447    tem = expand_shift (RSHIFT_EXPR, mode, tem,
4448			size_int (GET_MODE_BITSIZE (mode) - 1),
4449			subtarget, normalizep == 1);
4450
4451  if (tem)
4452    {
4453      if (GET_MODE (tem) != target_mode)
4454	{
4455	  convert_move (target, tem, 0);
4456	  tem = target;
4457	}
4458      else if (!subtarget)
4459	{
4460	  emit_move_insn (target, tem);
4461	  tem = target;
4462	}
4463    }
4464  else
4465    delete_insns_since (last);
4466
4467  return tem;
4468}
4469
4470/* Like emit_store_flag, but always succeeds.  */
4471
4472rtx
4473emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4474     rtx target;
4475     enum rtx_code code;
4476     rtx op0, op1;
4477     enum machine_mode mode;
4478     int unsignedp;
4479     int normalizep;
4480{
4481  rtx tem, label;
4482
4483  /* First see if emit_store_flag can do the job.  */
4484  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4485  if (tem != 0)
4486    return tem;
4487
4488  if (normalizep == 0)
4489    normalizep = 1;
4490
4491  /* If this failed, we have to do this with set/compare/jump/set code.  */
4492
4493  if (GET_CODE (target) != REG
4494      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4495    target = gen_reg_rtx (GET_MODE (target));
4496
4497  emit_move_insn (target, const1_rtx);
4498  tem = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4499  if (GET_CODE (tem) == CONST_INT)
4500    return tem;
4501
4502  label = gen_label_rtx ();
4503  if (bcc_gen_fctn[(int) code] == 0)
4504    abort ();
4505
4506  emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
4507  emit_move_insn (target, const0_rtx);
4508  emit_label (label);
4509
4510  return target;
4511}
4512
4513/* Perform possibly multi-word comparison and conditional jump to LABEL
4514   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4515
4516   The algorithm is based on the code in expr.c:do_jump.
4517
4518   Note that this does not perform a general comparison.  Only variants
4519   generated within expmed.c are correctly handled, others abort (but could
4520   be handled if needed).  */
4521
4522static void
4523do_cmp_and_jump (arg1, arg2, op, mode, label)
4524     rtx arg1, arg2, label;
4525    enum rtx_code op;
4526    enum machine_mode mode;
4527{
4528  /* If this mode is an integer too wide to compare properly,
4529     compare word by word.  Rely on cse to optimize constant cases.  */
4530
4531  if (GET_MODE_CLASS (mode) == MODE_INT && !can_compare_p (mode))
4532    {
4533      rtx label2 = gen_label_rtx ();
4534
4535      switch (op)
4536	{
4537	case LTU:
4538	  do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4539	  break;
4540
4541	case LEU:
4542	  do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4543	  break;
4544
4545	case LT:
4546	  do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4547	  break;
4548
4549	case GT:
4550	  do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4551	  break;
4552
4553	case GE:
4554	  do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4555	  break;
4556
4557	  /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4558	     that's the only equality operations we do */
4559	case EQ:
4560	  if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4561	    abort();
4562	  do_jump_by_parts_equality_rtx (arg1, label2, label);
4563	  break;
4564
4565	case NE:
4566	  if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4567	    abort();
4568	  do_jump_by_parts_equality_rtx (arg1, label, label2);
4569	  break;
4570
4571	default:
4572	  abort();
4573	}
4574
4575      emit_label (label2);
4576    }
4577  else
4578    {
4579      emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, 0, label);
4580    }
4581}
4582