1/* GIMPLE store merging and byte swapping passes.
2   Copyright (C) 2009-2020 Free Software Foundation, Inc.
3   Contributed by ARM Ltd.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but
13   WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING3.  If not see
19   <http://www.gnu.org/licenses/>.  */
20
21/* The purpose of the store merging pass is to combine multiple memory stores
22   of constant values, values loaded from memory, bitwise operations on those,
23   or bit-field values, to consecutive locations, into fewer wider stores.
24
25   For example, if we have a sequence peforming four byte stores to
26   consecutive memory locations:
27   [p     ] := imm1;
28   [p + 1B] := imm2;
29   [p + 2B] := imm3;
30   [p + 3B] := imm4;
31   we can transform this into a single 4-byte store if the target supports it:
32   [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33
34   Or:
35   [p     ] := [q     ];
36   [p + 1B] := [q + 1B];
37   [p + 2B] := [q + 2B];
38   [p + 3B] := [q + 3B];
39   if there is no overlap can be transformed into a single 4-byte
40   load followed by single 4-byte store.
41
42   Or:
43   [p     ] := [q     ] ^ imm1;
44   [p + 1B] := [q + 1B] ^ imm2;
45   [p + 2B] := [q + 2B] ^ imm3;
46   [p + 3B] := [q + 3B] ^ imm4;
47   if there is no overlap can be transformed into a single 4-byte
48   load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
50   Or:
51   [p:1 ] := imm;
52   [p:31] := val & 0x7FFFFFFF;
53   we can transform this into a single 4-byte store if the target supports it:
54   [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
56   The algorithm is applied to each basic block in three phases:
57
58   1) Scan through the basic block and record assignments to destinations
59   that can be expressed as a store to memory of a certain size at a certain
60   bit offset from base expressions we can handle.  For bit-fields we also
61   record the surrounding bit region, i.e. bits that could be stored in
62   a read-modify-write operation when storing the bit-field.  Record store
63   chains to different bases in a hash_map (m_stores) and make sure to
64   terminate such chains when appropriate (for example when the stored
65   values get used subsequently).
66   These stores can be a result of structure element initializers, array stores
67   etc.  A store_immediate_info object is recorded for every such store.
68   Record as many such assignments to a single base as possible until a
69   statement that interferes with the store sequence is encountered.
70   Each store has up to 2 operands, which can be a either constant, a memory
71   load or an SSA name, from which the value to be stored can be computed.
72   At most one of the operands can be a constant.  The operands are recorded
73   in store_operand_info struct.
74
75   2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76   store_immediate_info objects) and coalesce contiguous stores into
77   merged_store_group objects.  For bit-field stores, we don't need to
78   require the stores to be contiguous, just their surrounding bit regions
79   have to be contiguous.  If the expression being stored is different
80   between adjacent stores, such as one store storing a constant and
81   following storing a value loaded from memory, or if the loaded memory
82   objects are not adjacent, a new merged_store_group is created as well.
83
84   For example, given the stores:
85   [p     ] := 0;
86   [p + 1B] := 1;
87   [p + 3B] := 0;
88   [p + 4B] := 1;
89   [p + 5B] := 0;
90   [p + 6B] := 0;
91   This phase would produce two merged_store_group objects, one recording the
92   two bytes stored in the memory region [p : p + 1] and another
93   recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95   3) The merged_store_group objects produced in phase 2) are processed
96   to generate the sequence of wider stores that set the contiguous memory
97   regions to the sequence of bytes that correspond to it.  This may emit
98   multiple stores per store group to handle contiguous stores that are not
99   of a size that is a power of 2.  For example it can try to emit a 40-bit
100   store as a 32-bit store followed by an 8-bit store.
101   We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102   or TARGET_SLOW_UNALIGNED_ACCESS settings.
103
104   Note on endianness and example:
105   Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106   [p     ] := 0x1234;
107   [p + 2B] := 0x5678;
108   [p + 4B] := 0xab;
109   [p + 5B] := 0xcd;
110
111   The memory layout for little-endian (LE) and big-endian (BE) must be:
112  p |LE|BE|
113  ---------
114  0 |34|12|
115  1 |12|34|
116  2 |78|56|
117  3 |56|78|
118  4 |ab|ab|
119  5 |cd|cd|
120
121  To merge these into a single 48-bit merged value 'val' in phase 2)
122  on little-endian we insert stores to higher (consecutive) bitpositions
123  into the most significant bits of the merged value.
124  The final merged value would be: 0xcdab56781234
125
126  For big-endian we insert stores to higher bitpositions into the least
127  significant bits of the merged value.
128  The final merged value would be: 0x12345678abcd
129
130  Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131  followed by a 16-bit store.  Again, we must consider endianness when
132  breaking down the 48-bit value 'val' computed above.
133  For little endian we emit:
134  [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135  [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
136
137  Whereas for big-endian we emit:
138  [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139  [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
154#include "print-tree.h"
155#include "tree-hash-traits.h"
156#include "gimple-iterator.h"
157#include "gimplify.h"
158#include "gimple-fold.h"
159#include "stor-layout.h"
160#include "timevar.h"
161#include "cfganal.h"
162#include "cfgcleanup.h"
163#include "tree-cfg.h"
164#include "except.h"
165#include "tree-eh.h"
166#include "target.h"
167#include "gimplify-me.h"
168#include "rtl.h"
169#include "expr.h"	/* For get_bit_range.  */
170#include "optabs-tree.h"
171#include "dbgcnt.h"
172#include "selftest.h"
173
174/* The maximum size (in bits) of the stores this pass should generate.  */
175#define MAX_STORE_BITSIZE (BITS_PER_WORD)
176#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
178/* Limit to bound the number of aliasing checks for loads with the same
179   vuse as the corresponding store.  */
180#define MAX_STORE_ALIAS_CHECKS 64
181
182namespace {
183
184struct bswap_stat
185{
186  /* Number of hand-written 16-bit nop / bswaps found.  */
187  int found_16bit;
188
189  /* Number of hand-written 32-bit nop / bswaps found.  */
190  int found_32bit;
191
192  /* Number of hand-written 64-bit nop / bswaps found.  */
193  int found_64bit;
194} nop_stats, bswap_stats;
195
196/* A symbolic number structure is used to detect byte permutation and selection
197   patterns of a source.  To achieve that, its field N contains an artificial
198   number consisting of BITS_PER_MARKER sized markers tracking where does each
199   byte come from in the source:
200
201   0	   - target byte has the value 0
202   FF	   - target byte has an unknown value (eg. due to sign extension)
203   1..size - marker value is the byte index in the source (0 for lsb).
204
205   To detect permutations on memory sources (arrays and structures), a symbolic
206   number is also associated:
207   - a base address BASE_ADDR and an OFFSET giving the address of the source;
208   - a range which gives the difference between the highest and lowest accessed
209     memory location to make such a symbolic number;
210   - the address SRC of the source element of lowest address as a convenience
211     to easily get BASE_ADDR + offset + lowest bytepos;
212   - number of expressions N_OPS bitwise ored together to represent
213     approximate cost of the computation.
214
215   Note 1: the range is different from size as size reflects the size of the
216   type of the current expression.  For instance, for an array char a[],
217   (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218   (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219   time a range of 1.
220
221   Note 2: for non-memory sources, range holds the same value as size.
222
223   Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
224
225struct symbolic_number {
226  uint64_t n;
227  tree type;
228  tree base_addr;
229  tree offset;
230  poly_int64_pod bytepos;
231  tree src;
232  tree alias_set;
233  tree vuse;
234  unsigned HOST_WIDE_INT range;
235  int n_ops;
236};
237
238#define BITS_PER_MARKER 8
239#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240#define MARKER_BYTE_UNKNOWN MARKER_MASK
241#define HEAD_MARKER(n, size) \
242  ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244/* The number which the find_bswap_or_nop_1 result should match in
245   order to have a nop.  The number is masked according to the size of
246   the symbolic number before using it.  */
247#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248  (uint64_t)0x08070605 << 32 | 0x04030201)
249
250/* The number which the find_bswap_or_nop_1 result should match in
251   order to have a byte swap.  The number is masked according to the
252   size of the symbolic number before using it.  */
253#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254  (uint64_t)0x01020304 << 32 | 0x05060708)
255
256/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257   number N.  Return false if the requested operation is not permitted
258   on a symbolic number.  */
259
260inline bool
261do_shift_rotate (enum tree_code code,
262		 struct symbolic_number *n,
263		 int count)
264{
265  int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266  uint64_t head_marker;
267
268  if (count < 0
269      || count >= TYPE_PRECISION (n->type)
270      || count % BITS_PER_UNIT != 0)
271    return false;
272  count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274  /* Zero out the extra bits of N in order to avoid them being shifted
275     into the significant bits.  */
276  if (size < 64 / BITS_PER_MARKER)
277    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279  switch (code)
280    {
281    case LSHIFT_EXPR:
282      n->n <<= count;
283      break;
284    case RSHIFT_EXPR:
285      head_marker = HEAD_MARKER (n->n, size);
286      n->n >>= count;
287      /* Arithmetic shift of signed type: result is dependent on the value.  */
288      if (!TYPE_UNSIGNED (n->type) && head_marker)
289	for (i = 0; i < count / BITS_PER_MARKER; i++)
290	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291		  << ((size - 1 - i) * BITS_PER_MARKER);
292      break;
293    case LROTATE_EXPR:
294      n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295      break;
296    case RROTATE_EXPR:
297      n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298      break;
299    default:
300      return false;
301    }
302  /* Zero unused bits for size.  */
303  if (size < 64 / BITS_PER_MARKER)
304    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305  return true;
306}
307
308/* Perform sanity checking for the symbolic number N and the gimple
309   statement STMT.  */
310
311inline bool
312verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313{
314  tree lhs_type;
315
316  lhs_type = gimple_expr_type (stmt);
317
318  if (TREE_CODE (lhs_type) != INTEGER_TYPE
319      && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320    return false;
321
322  if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323    return false;
324
325  return true;
326}
327
328/* Initialize the symbolic number N for the bswap pass from the base element
329   SRC manipulated by the bitwise OR expression.  */
330
331bool
332init_symbolic_number (struct symbolic_number *n, tree src)
333{
334  int size;
335
336  if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337    return false;
338
339  n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340  n->src = src;
341
342  /* Set up the symbolic number N by setting each byte to a value between 1 and
343     the byte size of rhs1.  The highest order byte is set to n->size and the
344     lowest order byte to 1.  */
345  n->type = TREE_TYPE (src);
346  size = TYPE_PRECISION (n->type);
347  if (size % BITS_PER_UNIT != 0)
348    return false;
349  size /= BITS_PER_UNIT;
350  if (size > 64 / BITS_PER_MARKER)
351    return false;
352  n->range = size;
353  n->n = CMPNOP;
354  n->n_ops = 1;
355
356  if (size < 64 / BITS_PER_MARKER)
357    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359  return true;
360}
361
362/* Check if STMT might be a byte swap or a nop from a memory source and returns
363   the answer. If so, REF is that memory source and the base of the memory area
364   accessed and the offset of the access from that base are recorded in N.  */
365
366bool
367find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368{
369  /* Leaf node is an array or component ref. Memorize its base and
370     offset from base to compare to other such leaf node.  */
371  poly_int64 bitsize, bitpos, bytepos;
372  machine_mode mode;
373  int unsignedp, reversep, volatilep;
374  tree offset, base_addr;
375
376  /* Not prepared to handle PDP endian.  */
377  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378    return false;
379
380  if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381    return false;
382
383  base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384				   &unsignedp, &reversep, &volatilep);
385
386  if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387    /* Do not rewrite TARGET_MEM_REF.  */
388    return false;
389  else if (TREE_CODE (base_addr) == MEM_REF)
390    {
391      poly_offset_int bit_offset = 0;
392      tree off = TREE_OPERAND (base_addr, 1);
393
394      if (!integer_zerop (off))
395	{
396	  poly_offset_int boff = mem_ref_offset (base_addr);
397	  boff <<= LOG2_BITS_PER_UNIT;
398	  bit_offset += boff;
399	}
400
401      base_addr = TREE_OPERAND (base_addr, 0);
402
403      /* Avoid returning a negative bitpos as this may wreak havoc later.  */
404      if (maybe_lt (bit_offset, 0))
405	{
406	  tree byte_offset = wide_int_to_tree
407	    (sizetype, bits_to_bytes_round_down (bit_offset));
408	  bit_offset = num_trailing_bits (bit_offset);
409	  if (offset)
410	    offset = size_binop (PLUS_EXPR, offset, byte_offset);
411	  else
412	    offset = byte_offset;
413	}
414
415      bitpos += bit_offset.force_shwi ();
416    }
417  else
418    base_addr = build_fold_addr_expr (base_addr);
419
420  if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421    return false;
422  if (!multiple_p (bitsize, BITS_PER_UNIT))
423    return false;
424  if (reversep)
425    return false;
426
427  if (!init_symbolic_number (n, ref))
428    return false;
429  n->base_addr = base_addr;
430  n->offset = offset;
431  n->bytepos = bytepos;
432  n->alias_set = reference_alias_ptr_type (ref);
433  n->vuse = gimple_vuse (stmt);
434  return true;
435}
436
437/* Compute the symbolic number N representing the result of a bitwise OR on 2
438   symbolic number N1 and N2 whose source statements are respectively
439   SOURCE_STMT1 and SOURCE_STMT2.  */
440
441gimple *
442perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443			gimple *source_stmt2, struct symbolic_number *n2,
444			struct symbolic_number *n)
445{
446  int i, size;
447  uint64_t mask;
448  gimple *source_stmt;
449  struct symbolic_number *n_start;
450
451  tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452  if (TREE_CODE (rhs1) == BIT_FIELD_REF
453      && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454    rhs1 = TREE_OPERAND (rhs1, 0);
455  tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456  if (TREE_CODE (rhs2) == BIT_FIELD_REF
457      && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458    rhs2 = TREE_OPERAND (rhs2, 0);
459
460  /* Sources are different, cancel bswap if they are not memory location with
461     the same base (array, structure, ...).  */
462  if (rhs1 != rhs2)
463    {
464      uint64_t inc;
465      HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466      struct symbolic_number *toinc_n_ptr, *n_end;
467      basic_block bb1, bb2;
468
469      if (!n1->base_addr || !n2->base_addr
470	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471	return NULL;
472
473      if (!n1->offset != !n2->offset
474	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475	return NULL;
476
477      start1 = 0;
478      if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479	return NULL;
480
481      if (start1 < start2)
482	{
483	  n_start = n1;
484	  start_sub = start2 - start1;
485	}
486      else
487	{
488	  n_start = n2;
489	  start_sub = start1 - start2;
490	}
491
492      bb1 = gimple_bb (source_stmt1);
493      bb2 = gimple_bb (source_stmt2);
494      if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495	source_stmt = source_stmt1;
496      else
497	source_stmt = source_stmt2;
498
499      /* Find the highest address at which a load is performed and
500	 compute related info.  */
501      end1 = start1 + (n1->range - 1);
502      end2 = start2 + (n2->range - 1);
503      if (end1 < end2)
504	{
505	  end = end2;
506	  end_sub = end2 - end1;
507	}
508      else
509	{
510	  end = end1;
511	  end_sub = end1 - end2;
512	}
513      n_end = (end2 > end1) ? n2 : n1;
514
515      /* Find symbolic number whose lsb is the most significant.  */
516      if (BYTES_BIG_ENDIAN)
517	toinc_n_ptr = (n_end == n1) ? n2 : n1;
518      else
519	toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
521      n->range = end - MIN (start1, start2) + 1;
522
523      /* Check that the range of memory covered can be represented by
524	 a symbolic number.  */
525      if (n->range > 64 / BITS_PER_MARKER)
526	return NULL;
527
528      /* Reinterpret byte marks in symbolic number holding the value of
529	 bigger weight according to target endianness.  */
530      inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531      size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532      for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533	{
534	  unsigned marker
535	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536	  if (marker && marker != MARKER_BYTE_UNKNOWN)
537	    toinc_n_ptr->n += inc;
538	}
539    }
540  else
541    {
542      n->range = n1->range;
543      n_start = n1;
544      source_stmt = source_stmt1;
545    }
546
547  if (!n1->alias_set
548      || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549    n->alias_set = n1->alias_set;
550  else
551    n->alias_set = ptr_type_node;
552  n->vuse = n_start->vuse;
553  n->base_addr = n_start->base_addr;
554  n->offset = n_start->offset;
555  n->src = n_start->src;
556  n->bytepos = n_start->bytepos;
557  n->type = n_start->type;
558  size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559
560  for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561    {
562      uint64_t masked1, masked2;
563
564      masked1 = n1->n & mask;
565      masked2 = n2->n & mask;
566      if (masked1 && masked2 && masked1 != masked2)
567	return NULL;
568    }
569  n->n = n1->n | n2->n;
570  n->n_ops = n1->n_ops + n2->n_ops;
571
572  return source_stmt;
573}
574
575/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576   the operation given by the rhs of STMT on the result.  If the operation
577   could successfully be executed the function returns a gimple stmt whose
578   rhs's first tree is the expression of the source operand and NULL
579   otherwise.  */
580
581gimple *
582find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583{
584  enum tree_code code;
585  tree rhs1, rhs2 = NULL;
586  gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587  enum gimple_rhs_class rhs_class;
588
589  if (!limit || !is_gimple_assign (stmt))
590    return NULL;
591
592  rhs1 = gimple_assign_rhs1 (stmt);
593
594  if (find_bswap_or_nop_load (stmt, rhs1, n))
595    return stmt;
596
597  /* Handle BIT_FIELD_REF.  */
598  if (TREE_CODE (rhs1) == BIT_FIELD_REF
599      && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600    {
601      unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
602      unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
603      if (bitpos % BITS_PER_UNIT == 0
604	  && bitsize % BITS_PER_UNIT == 0
605	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
606	{
607	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
608	  if (BYTES_BIG_ENDIAN)
609	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
610
611	  /* Shift.  */
612	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
613	    return NULL;
614
615	  /* Mask.  */
616	  uint64_t mask = 0;
617	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
618	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
619	       i++, tmp <<= BITS_PER_UNIT)
620	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
621	  n->n &= mask;
622
623	  /* Convert.  */
624	  n->type = TREE_TYPE (rhs1);
625	  if (!n->base_addr)
626	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
627
628	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
629	}
630
631      return NULL;
632    }
633
634  if (TREE_CODE (rhs1) != SSA_NAME)
635    return NULL;
636
637  code = gimple_assign_rhs_code (stmt);
638  rhs_class = gimple_assign_rhs_class (stmt);
639  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
640
641  if (rhs_class == GIMPLE_BINARY_RHS)
642    rhs2 = gimple_assign_rhs2 (stmt);
643
644  /* Handle unary rhs and binary rhs with integer constants as second
645     operand.  */
646
647  if (rhs_class == GIMPLE_UNARY_RHS
648      || (rhs_class == GIMPLE_BINARY_RHS
649	  && TREE_CODE (rhs2) == INTEGER_CST))
650    {
651      if (code != BIT_AND_EXPR
652	  && code != LSHIFT_EXPR
653	  && code != RSHIFT_EXPR
654	  && code != LROTATE_EXPR
655	  && code != RROTATE_EXPR
656	  && !CONVERT_EXPR_CODE_P (code))
657	return NULL;
658
659      source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
660
661      /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
662	 we have to initialize the symbolic number.  */
663      if (!source_stmt1)
664	{
665	  if (gimple_assign_load_p (stmt)
666	      || !init_symbolic_number (n, rhs1))
667	    return NULL;
668	  source_stmt1 = stmt;
669	}
670
671      switch (code)
672	{
673	case BIT_AND_EXPR:
674	  {
675	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
676	    uint64_t val = int_cst_value (rhs2), mask = 0;
677	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
678
679	    /* Only constants masking full bytes are allowed.  */
680	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
681	      if ((val & tmp) != 0 && (val & tmp) != tmp)
682		return NULL;
683	      else if (val & tmp)
684		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
685
686	    n->n &= mask;
687	  }
688	  break;
689	case LSHIFT_EXPR:
690	case RSHIFT_EXPR:
691	case LROTATE_EXPR:
692	case RROTATE_EXPR:
693	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
694	    return NULL;
695	  break;
696	CASE_CONVERT:
697	  {
698	    int i, type_size, old_type_size;
699	    tree type;
700
701	    type = gimple_expr_type (stmt);
702	    type_size = TYPE_PRECISION (type);
703	    if (type_size % BITS_PER_UNIT != 0)
704	      return NULL;
705	    type_size /= BITS_PER_UNIT;
706	    if (type_size > 64 / BITS_PER_MARKER)
707	      return NULL;
708
709	    /* Sign extension: result is dependent on the value.  */
710	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
711	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
712		&& HEAD_MARKER (n->n, old_type_size))
713	      for (i = 0; i < type_size - old_type_size; i++)
714		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
715			<< ((type_size - 1 - i) * BITS_PER_MARKER);
716
717	    if (type_size < 64 / BITS_PER_MARKER)
718	      {
719		/* If STMT casts to a smaller type mask out the bits not
720		   belonging to the target type.  */
721		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
722	      }
723	    n->type = type;
724	    if (!n->base_addr)
725	      n->range = type_size;
726	  }
727	  break;
728	default:
729	  return NULL;
730	};
731      return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
732    }
733
734  /* Handle binary rhs.  */
735
736  if (rhs_class == GIMPLE_BINARY_RHS)
737    {
738      struct symbolic_number n1, n2;
739      gimple *source_stmt, *source_stmt2;
740
741      if (code != BIT_IOR_EXPR)
742	return NULL;
743
744      if (TREE_CODE (rhs2) != SSA_NAME)
745	return NULL;
746
747      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
748
749      switch (code)
750	{
751	case BIT_IOR_EXPR:
752	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
753
754	  if (!source_stmt1)
755	    return NULL;
756
757	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
758
759	  if (!source_stmt2)
760	    return NULL;
761
762	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
763	    return NULL;
764
765	  if (n1.vuse != n2.vuse)
766	    return NULL;
767
768	  source_stmt
769	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
770
771	  if (!source_stmt)
772	    return NULL;
773
774	  if (!verify_symbolic_number_p (n, stmt))
775	    return NULL;
776
777	  break;
778	default:
779	  return NULL;
780	}
781      return source_stmt;
782    }
783  return NULL;
784}
785
786/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
787   *CMPXCHG, *CMPNOP and adjust *N.  */
788
789void
790find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
791			    uint64_t *cmpnop)
792{
793  unsigned rsize;
794  uint64_t tmpn, mask;
795
796  /* The number which the find_bswap_or_nop_1 result should match in order
797     to have a full byte swap.  The number is shifted to the right
798     according to the size of the symbolic number before using it.  */
799  *cmpxchg = CMPXCHG;
800  *cmpnop = CMPNOP;
801
802  /* Find real size of result (highest non-zero byte).  */
803  if (n->base_addr)
804    for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
805  else
806    rsize = n->range;
807
808  /* Zero out the bits corresponding to untouched bytes in original gimple
809     expression.  */
810  if (n->range < (int) sizeof (int64_t))
811    {
812      mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
813      *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
814      *cmpnop &= mask;
815    }
816
817  /* Zero out the bits corresponding to unused bytes in the result of the
818     gimple expression.  */
819  if (rsize < n->range)
820    {
821      if (BYTES_BIG_ENDIAN)
822	{
823	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
824	  *cmpxchg &= mask;
825	  if (n->range - rsize == sizeof (int64_t))
826	    *cmpnop = 0;
827	  else
828	    *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
829	}
830      else
831	{
832	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
833	  if (n->range - rsize == sizeof (int64_t))
834	    *cmpxchg = 0;
835	  else
836	    *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
837	  *cmpnop &= mask;
838	}
839      n->range = rsize;
840    }
841
842  n->range *= BITS_PER_UNIT;
843}
844
845/* Check if STMT completes a bswap implementation or a read in a given
846   endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
847   accordingly.  It also sets N to represent the kind of operations
848   performed: size of the resulting expression and whether it works on
849   a memory source, and if so alias-set and vuse.  At last, the
850   function returns a stmt whose rhs's first tree is the source
851   expression.  */
852
853gimple *
854find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
855{
856  /* The last parameter determines the depth search limit.  It usually
857     correlates directly to the number n of bytes to be touched.  We
858     increase that number by 2 * (log2(n) + 1) here in order to also
859     cover signed -> unsigned conversions of the src operand as can be seen
860     in libgcc, and for initial shift/and operation of the src operand.  */
861  int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
862  limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
863  gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
864
865  if (!ins_stmt)
866    return NULL;
867
868  uint64_t cmpxchg, cmpnop;
869  find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
870
871  /* A complete byte swap should make the symbolic number to start with
872     the largest digit in the highest order byte. Unchanged symbolic
873     number indicates a read with same endianness as target architecture.  */
874  if (n->n == cmpnop)
875    *bswap = false;
876  else if (n->n == cmpxchg)
877    *bswap = true;
878  else
879    return NULL;
880
881  /* Useless bit manipulation performed by code.  */
882  if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
883    return NULL;
884
885  return ins_stmt;
886}
887
888const pass_data pass_data_optimize_bswap =
889{
890  GIMPLE_PASS, /* type */
891  "bswap", /* name */
892  OPTGROUP_NONE, /* optinfo_flags */
893  TV_NONE, /* tv_id */
894  PROP_ssa, /* properties_required */
895  0, /* properties_provided */
896  0, /* properties_destroyed */
897  0, /* todo_flags_start */
898  0, /* todo_flags_finish */
899};
900
901class pass_optimize_bswap : public gimple_opt_pass
902{
903public:
904  pass_optimize_bswap (gcc::context *ctxt)
905    : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
906  {}
907
908  /* opt_pass methods: */
909  virtual bool gate (function *)
910    {
911      return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
912    }
913
914  virtual unsigned int execute (function *);
915
916}; // class pass_optimize_bswap
917
918/* Perform the bswap optimization: replace the expression computed in the rhs
919   of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
920   bswap, load or load + bswap expression.
921   Which of these alternatives replace the rhs is given by N->base_addr (non
922   null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
923   load to perform are also given in N while the builtin bswap invoke is given
924   in FNDEL.  Finally, if a load is involved, INS_STMT refers to one of the
925   load statements involved to construct the rhs in gsi_stmt (GSI) and
926   N->range gives the size of the rhs expression for maintaining some
927   statistics.
928
929   Note that if the replacement involve a load and if gsi_stmt (GSI) is
930   non-NULL, that stmt is moved just after INS_STMT to do the load with the
931   same VUSE which can lead to gsi_stmt (GSI) changing of basic block.  */
932
933tree
934bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
935	       tree bswap_type, tree load_type, struct symbolic_number *n,
936	       bool bswap)
937{
938  tree src, tmp, tgt = NULL_TREE;
939  gimple *bswap_stmt;
940
941  gimple *cur_stmt = gsi_stmt (gsi);
942  src = n->src;
943  if (cur_stmt)
944    tgt = gimple_assign_lhs (cur_stmt);
945
946  /* Need to load the value from memory first.  */
947  if (n->base_addr)
948    {
949      gimple_stmt_iterator gsi_ins = gsi;
950      if (ins_stmt)
951	gsi_ins = gsi_for_stmt (ins_stmt);
952      tree addr_expr, addr_tmp, val_expr, val_tmp;
953      tree load_offset_ptr, aligned_load_type;
954      gimple *load_stmt;
955      unsigned align = get_object_alignment (src);
956      poly_int64 load_offset = 0;
957
958      if (cur_stmt)
959	{
960	  basic_block ins_bb = gimple_bb (ins_stmt);
961	  basic_block cur_bb = gimple_bb (cur_stmt);
962	  if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
963	    return NULL_TREE;
964
965	  /* Move cur_stmt just before one of the load of the original
966	     to ensure it has the same VUSE.  See PR61517 for what could
967	     go wrong.  */
968	  if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
969	    reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
970	  gsi_move_before (&gsi, &gsi_ins);
971	  gsi = gsi_for_stmt (cur_stmt);
972	}
973      else
974	gsi = gsi_ins;
975
976      /* Compute address to load from and cast according to the size
977	 of the load.  */
978      addr_expr = build_fold_addr_expr (src);
979      if (is_gimple_mem_ref_addr (addr_expr))
980	addr_tmp = unshare_expr (addr_expr);
981      else
982	{
983	  addr_tmp = unshare_expr (n->base_addr);
984	  if (!is_gimple_mem_ref_addr (addr_tmp))
985	    addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
986						   is_gimple_mem_ref_addr,
987						   NULL_TREE, true,
988						   GSI_SAME_STMT);
989	  load_offset = n->bytepos;
990	  if (n->offset)
991	    {
992	      tree off
993		= force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
994					    true, NULL_TREE, true,
995					    GSI_SAME_STMT);
996	      gimple *stmt
997		= gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
998				       POINTER_PLUS_EXPR, addr_tmp, off);
999	      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1000	      addr_tmp = gimple_assign_lhs (stmt);
1001	    }
1002	}
1003
1004      /* Perform the load.  */
1005      aligned_load_type = load_type;
1006      if (align < TYPE_ALIGN (load_type))
1007	aligned_load_type = build_aligned_type (load_type, align);
1008      load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1009      val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1010			      load_offset_ptr);
1011
1012      if (!bswap)
1013	{
1014	  if (n->range == 16)
1015	    nop_stats.found_16bit++;
1016	  else if (n->range == 32)
1017	    nop_stats.found_32bit++;
1018	  else
1019	    {
1020	      gcc_assert (n->range == 64);
1021	      nop_stats.found_64bit++;
1022	    }
1023
1024	  /* Convert the result of load if necessary.  */
1025	  if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1026	    {
1027	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1028					    "load_dst");
1029	      load_stmt = gimple_build_assign (val_tmp, val_expr);
1030	      gimple_set_vuse (load_stmt, n->vuse);
1031	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1032	      gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1033	      update_stmt (cur_stmt);
1034	    }
1035	  else if (cur_stmt)
1036	    {
1037	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1038	      gimple_set_vuse (cur_stmt, n->vuse);
1039	      update_stmt (cur_stmt);
1040	    }
1041	  else
1042	    {
1043	      tgt = make_ssa_name (load_type);
1044	      cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1045	      gimple_set_vuse (cur_stmt, n->vuse);
1046	      gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1047	    }
1048
1049	  if (dump_file)
1050	    {
1051	      fprintf (dump_file,
1052		       "%d bit load in target endianness found at: ",
1053		       (int) n->range);
1054	      print_gimple_stmt (dump_file, cur_stmt, 0);
1055	    }
1056	  return tgt;
1057	}
1058      else
1059	{
1060	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1061	  load_stmt = gimple_build_assign (val_tmp, val_expr);
1062	  gimple_set_vuse (load_stmt, n->vuse);
1063	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1064	}
1065      src = val_tmp;
1066    }
1067  else if (!bswap)
1068    {
1069      gimple *g = NULL;
1070      if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1071	{
1072	  if (!is_gimple_val (src))
1073	    return NULL_TREE;
1074	  g = gimple_build_assign (tgt, NOP_EXPR, src);
1075	}
1076      else if (cur_stmt)
1077	g = gimple_build_assign (tgt, src);
1078      else
1079	tgt = src;
1080      if (n->range == 16)
1081	nop_stats.found_16bit++;
1082      else if (n->range == 32)
1083	nop_stats.found_32bit++;
1084      else
1085	{
1086	  gcc_assert (n->range == 64);
1087	  nop_stats.found_64bit++;
1088	}
1089      if (dump_file)
1090	{
1091	  fprintf (dump_file,
1092		   "%d bit reshuffle in target endianness found at: ",
1093		   (int) n->range);
1094	  if (cur_stmt)
1095	    print_gimple_stmt (dump_file, cur_stmt, 0);
1096	  else
1097	    {
1098	      print_generic_expr (dump_file, tgt, TDF_NONE);
1099	      fprintf (dump_file, "\n");
1100	    }
1101	}
1102      if (cur_stmt)
1103	gsi_replace (&gsi, g, true);
1104      return tgt;
1105    }
1106  else if (TREE_CODE (src) == BIT_FIELD_REF)
1107    src = TREE_OPERAND (src, 0);
1108
1109  if (n->range == 16)
1110    bswap_stats.found_16bit++;
1111  else if (n->range == 32)
1112    bswap_stats.found_32bit++;
1113  else
1114    {
1115      gcc_assert (n->range == 64);
1116      bswap_stats.found_64bit++;
1117    }
1118
1119  tmp = src;
1120
1121  /* Convert the src expression if necessary.  */
1122  if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1123    {
1124      gimple *convert_stmt;
1125
1126      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1127      convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1128      gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1129    }
1130
1131  /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
1132     are considered as rotation of 2N bit values by N bits is generally not
1133     equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
1134     gives 0x03040102 while a bswap for that value is 0x04030201.  */
1135  if (bswap && n->range == 16)
1136    {
1137      tree count = build_int_cst (NULL, BITS_PER_UNIT);
1138      src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1139      bswap_stmt = gimple_build_assign (NULL, src);
1140    }
1141  else
1142    bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1143
1144  if (tgt == NULL_TREE)
1145    tgt = make_ssa_name (bswap_type);
1146  tmp = tgt;
1147
1148  /* Convert the result if necessary.  */
1149  if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1150    {
1151      gimple *convert_stmt;
1152
1153      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1154      convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1155      gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1156    }
1157
1158  gimple_set_lhs (bswap_stmt, tmp);
1159
1160  if (dump_file)
1161    {
1162      fprintf (dump_file, "%d bit bswap implementation found at: ",
1163	       (int) n->range);
1164      if (cur_stmt)
1165	print_gimple_stmt (dump_file, cur_stmt, 0);
1166      else
1167	{
1168	  print_generic_expr (dump_file, tgt, TDF_NONE);
1169	  fprintf (dump_file, "\n");
1170	}
1171    }
1172
1173  if (cur_stmt)
1174    {
1175      gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1176      gsi_remove (&gsi, true);
1177    }
1178  else
1179    gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1180  return tgt;
1181}
1182
1183/* Find manual byte swap implementations as well as load in a given
1184   endianness. Byte swaps are turned into a bswap builtin invokation
1185   while endian loads are converted to bswap builtin invokation or
1186   simple load according to the target endianness.  */
1187
1188unsigned int
1189pass_optimize_bswap::execute (function *fun)
1190{
1191  basic_block bb;
1192  bool bswap32_p, bswap64_p;
1193  bool changed = false;
1194  tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1195
1196  bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1197	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1198  bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1199	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1200		   || (bswap32_p && word_mode == SImode)));
1201
1202  /* Determine the argument type of the builtins.  The code later on
1203     assumes that the return and argument type are the same.  */
1204  if (bswap32_p)
1205    {
1206      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1207      bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1208    }
1209
1210  if (bswap64_p)
1211    {
1212      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1213      bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1214    }
1215
1216  memset (&nop_stats, 0, sizeof (nop_stats));
1217  memset (&bswap_stats, 0, sizeof (bswap_stats));
1218  calculate_dominance_info (CDI_DOMINATORS);
1219
1220  FOR_EACH_BB_FN (bb, fun)
1221    {
1222      gimple_stmt_iterator gsi;
1223
1224      /* We do a reverse scan for bswap patterns to make sure we get the
1225	 widest match. As bswap pattern matching doesn't handle previously
1226	 inserted smaller bswap replacements as sub-patterns, the wider
1227	 variant wouldn't be detected.  */
1228      for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1229	{
1230	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1231	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1232	  enum tree_code code;
1233	  struct symbolic_number n;
1234	  bool bswap;
1235
1236	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1237	     might be moved to a different basic block by bswap_replace and gsi
1238	     must not points to it if that's the case.  Moving the gsi_prev
1239	     there make sure that gsi points to the statement previous to
1240	     cur_stmt while still making sure that all statements are
1241	     considered in this basic block.  */
1242	  gsi_prev (&gsi);
1243
1244	  if (!is_gimple_assign (cur_stmt))
1245	    continue;
1246
1247	  code = gimple_assign_rhs_code (cur_stmt);
1248	  switch (code)
1249	    {
1250	    case LROTATE_EXPR:
1251	    case RROTATE_EXPR:
1252	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1253		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1254		     % BITS_PER_UNIT)
1255		continue;
1256	      /* Fall through.  */
1257	    case BIT_IOR_EXPR:
1258	      break;
1259	    default:
1260	      continue;
1261	    }
1262
1263	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1264
1265	  if (!ins_stmt)
1266	    continue;
1267
1268	  switch (n.range)
1269	    {
1270	    case 16:
1271	      /* Already in canonical form, nothing to do.  */
1272	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1273		continue;
1274	      load_type = bswap_type = uint16_type_node;
1275	      break;
1276	    case 32:
1277	      load_type = uint32_type_node;
1278	      if (bswap32_p)
1279		{
1280		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1281		  bswap_type = bswap32_type;
1282		}
1283	      break;
1284	    case 64:
1285	      load_type = uint64_type_node;
1286	      if (bswap64_p)
1287		{
1288		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1289		  bswap_type = bswap64_type;
1290		}
1291	      break;
1292	    default:
1293	      continue;
1294	    }
1295
1296	  if (bswap && !fndecl && n.range != 16)
1297	    continue;
1298
1299	  if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1300			     bswap_type, load_type, &n, bswap))
1301	    changed = true;
1302	}
1303    }
1304
1305  statistics_counter_event (fun, "16-bit nop implementations found",
1306			    nop_stats.found_16bit);
1307  statistics_counter_event (fun, "32-bit nop implementations found",
1308			    nop_stats.found_32bit);
1309  statistics_counter_event (fun, "64-bit nop implementations found",
1310			    nop_stats.found_64bit);
1311  statistics_counter_event (fun, "16-bit bswap implementations found",
1312			    bswap_stats.found_16bit);
1313  statistics_counter_event (fun, "32-bit bswap implementations found",
1314			    bswap_stats.found_32bit);
1315  statistics_counter_event (fun, "64-bit bswap implementations found",
1316			    bswap_stats.found_64bit);
1317
1318  return (changed ? TODO_update_ssa : 0);
1319}
1320
1321} // anon namespace
1322
1323gimple_opt_pass *
1324make_pass_optimize_bswap (gcc::context *ctxt)
1325{
1326  return new pass_optimize_bswap (ctxt);
1327}
1328
1329namespace {
1330
1331/* Struct recording one operand for the store, which is either a constant,
1332   then VAL represents the constant and all the other fields are zero, or
1333   a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1334   and the other fields also reflect the memory load, or an SSA name, then
1335   VAL represents the SSA name and all the other fields are zero,  */
1336
1337class store_operand_info
1338{
1339public:
1340  tree val;
1341  tree base_addr;
1342  poly_uint64 bitsize;
1343  poly_uint64 bitpos;
1344  poly_uint64 bitregion_start;
1345  poly_uint64 bitregion_end;
1346  gimple *stmt;
1347  bool bit_not_p;
1348  store_operand_info ();
1349};
1350
1351store_operand_info::store_operand_info ()
1352  : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1353    bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1354{
1355}
1356
1357/* Struct recording the information about a single store of an immediate
1358   to memory.  These are created in the first phase and coalesced into
1359   merged_store_group objects in the second phase.  */
1360
1361class store_immediate_info
1362{
1363public:
1364  unsigned HOST_WIDE_INT bitsize;
1365  unsigned HOST_WIDE_INT bitpos;
1366  unsigned HOST_WIDE_INT bitregion_start;
1367  /* This is one past the last bit of the bit region.  */
1368  unsigned HOST_WIDE_INT bitregion_end;
1369  gimple *stmt;
1370  unsigned int order;
1371  /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1372     BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1373     for bit insertion.
1374     LROTATE_EXPR if it can be only bswap optimized and
1375     ops are not really meaningful.
1376     NOP_EXPR if bswap optimization detected identity, ops
1377     are not meaningful.  */
1378  enum tree_code rhs_code;
1379  /* Two fields for bswap optimization purposes.  */
1380  struct symbolic_number n;
1381  gimple *ins_stmt;
1382  /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing.  */
1383  bool bit_not_p;
1384  /* True if ops have been swapped and thus ops[1] represents
1385     rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2.  */
1386  bool ops_swapped_p;
1387  /* The index number of the landing pad, or 0 if there is none.  */
1388  int lp_nr;
1389  /* Operands.  For BIT_*_EXPR rhs_code both operands are used, otherwise
1390     just the first one.  */
1391  store_operand_info ops[2];
1392  store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1393			unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1394			gimple *, unsigned int, enum tree_code,
1395			struct symbolic_number &, gimple *, bool, int,
1396			const store_operand_info &,
1397			const store_operand_info &);
1398};
1399
1400store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1401					    unsigned HOST_WIDE_INT bp,
1402					    unsigned HOST_WIDE_INT brs,
1403					    unsigned HOST_WIDE_INT bre,
1404					    gimple *st,
1405					    unsigned int ord,
1406					    enum tree_code rhscode,
1407					    struct symbolic_number &nr,
1408					    gimple *ins_stmtp,
1409					    bool bitnotp,
1410					    int nr2,
1411					    const store_operand_info &op0r,
1412					    const store_operand_info &op1r)
1413  : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1414    stmt (st), order (ord), rhs_code (rhscode), n (nr),
1415    ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1416    lp_nr (nr2)
1417#if __cplusplus >= 201103L
1418    , ops { op0r, op1r }
1419{
1420}
1421#else
1422{
1423  ops[0] = op0r;
1424  ops[1] = op1r;
1425}
1426#endif
1427
1428/* Struct representing a group of stores to contiguous memory locations.
1429   These are produced by the second phase (coalescing) and consumed in the
1430   third phase that outputs the widened stores.  */
1431
1432class merged_store_group
1433{
1434public:
1435  unsigned HOST_WIDE_INT start;
1436  unsigned HOST_WIDE_INT width;
1437  unsigned HOST_WIDE_INT bitregion_start;
1438  unsigned HOST_WIDE_INT bitregion_end;
1439  /* The size of the allocated memory for val and mask.  */
1440  unsigned HOST_WIDE_INT buf_size;
1441  unsigned HOST_WIDE_INT align_base;
1442  poly_uint64 load_align_base[2];
1443
1444  unsigned int align;
1445  unsigned int load_align[2];
1446  unsigned int first_order;
1447  unsigned int last_order;
1448  bool bit_insertion;
1449  bool only_constants;
1450  unsigned int first_nonmergeable_order;
1451  int lp_nr;
1452
1453  auto_vec<store_immediate_info *> stores;
1454  /* We record the first and last original statements in the sequence because
1455     we'll need their vuse/vdef and replacement position.  It's easier to keep
1456     track of them separately as 'stores' is reordered by apply_stores.  */
1457  gimple *last_stmt;
1458  gimple *first_stmt;
1459  unsigned char *val;
1460  unsigned char *mask;
1461
1462  merged_store_group (store_immediate_info *);
1463  ~merged_store_group ();
1464  bool can_be_merged_into (store_immediate_info *);
1465  void merge_into (store_immediate_info *);
1466  void merge_overlapping (store_immediate_info *);
1467  bool apply_stores ();
1468private:
1469  void do_merge (store_immediate_info *);
1470};
1471
1472/* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
1473
1474static void
1475dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1476{
1477  if (!fd)
1478    return;
1479
1480  for (unsigned int i = 0; i < len; i++)
1481    fprintf (fd, "%02x ", ptr[i]);
1482  fprintf (fd, "\n");
1483}
1484
1485/* Clear out LEN bits starting from bit START in the byte array
1486   PTR.  This clears the bits to the *right* from START.
1487   START must be within [0, BITS_PER_UNIT) and counts starting from
1488   the least significant bit.  */
1489
1490static void
1491clear_bit_region_be (unsigned char *ptr, unsigned int start,
1492		     unsigned int len)
1493{
1494  if (len == 0)
1495    return;
1496  /* Clear len bits to the right of start.  */
1497  else if (len <= start + 1)
1498    {
1499      unsigned char mask = (~(~0U << len));
1500      mask = mask << (start + 1U - len);
1501      ptr[0] &= ~mask;
1502    }
1503  else if (start != BITS_PER_UNIT - 1)
1504    {
1505      clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1506      clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1507			   len - (start % BITS_PER_UNIT) - 1);
1508    }
1509  else if (start == BITS_PER_UNIT - 1
1510	   && len > BITS_PER_UNIT)
1511    {
1512      unsigned int nbytes = len / BITS_PER_UNIT;
1513      memset (ptr, 0, nbytes);
1514      if (len % BITS_PER_UNIT != 0)
1515	clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1516			     len % BITS_PER_UNIT);
1517    }
1518  else
1519    gcc_unreachable ();
1520}
1521
1522/* In the byte array PTR clear the bit region starting at bit
1523   START and is LEN bits wide.
1524   For regions spanning multiple bytes do this recursively until we reach
1525   zero LEN or a region contained within a single byte.  */
1526
1527static void
1528clear_bit_region (unsigned char *ptr, unsigned int start,
1529		  unsigned int len)
1530{
1531  /* Degenerate base case.  */
1532  if (len == 0)
1533    return;
1534  else if (start >= BITS_PER_UNIT)
1535    clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1536  /* Second base case.  */
1537  else if ((start + len) <= BITS_PER_UNIT)
1538    {
1539      unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1540      mask >>= BITS_PER_UNIT - (start + len);
1541
1542      ptr[0] &= ~mask;
1543
1544      return;
1545    }
1546  /* Clear most significant bits in a byte and proceed with the next byte.  */
1547  else if (start != 0)
1548    {
1549      clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1550      clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1551    }
1552  /* Whole bytes need to be cleared.  */
1553  else if (start == 0 && len > BITS_PER_UNIT)
1554    {
1555      unsigned int nbytes = len / BITS_PER_UNIT;
1556      /* We could recurse on each byte but we clear whole bytes, so a simple
1557	 memset will do.  */
1558      memset (ptr, '\0', nbytes);
1559      /* Clear the remaining sub-byte region if there is one.  */
1560      if (len % BITS_PER_UNIT != 0)
1561	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1562    }
1563  else
1564    gcc_unreachable ();
1565}
1566
1567/* Write BITLEN bits of EXPR to the byte array PTR at
1568   bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
1569   Return true if the operation succeeded.  */
1570
1571static bool
1572encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1573		       unsigned int total_bytes)
1574{
1575  unsigned int first_byte = bitpos / BITS_PER_UNIT;
1576  bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1577			|| (bitpos % BITS_PER_UNIT)
1578			|| !int_mode_for_size (bitlen, 0).exists ());
1579  bool empty_ctor_p
1580    = (TREE_CODE (expr) == CONSTRUCTOR
1581       && CONSTRUCTOR_NELTS (expr) == 0
1582       && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1583		       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1584
1585  if (!sub_byte_op_p)
1586    {
1587      if (first_byte >= total_bytes)
1588	return false;
1589      total_bytes -= first_byte;
1590      if (empty_ctor_p)
1591	{
1592	  unsigned HOST_WIDE_INT rhs_bytes
1593	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1594	  if (rhs_bytes > total_bytes)
1595	    return false;
1596	  memset (ptr + first_byte, '\0', rhs_bytes);
1597	  return true;
1598	}
1599      return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1600    }
1601
1602  /* LITTLE-ENDIAN
1603     We are writing a non byte-sized quantity or at a position that is not
1604     at a byte boundary.
1605     |--------|--------|--------| ptr + first_byte
1606           ^              ^
1607           xxx xxxxxxxx xxx< bp>
1608           |______EXPR____|
1609
1610     First native_encode_expr EXPR into a temporary buffer and shift each
1611     byte in the buffer by 'bp' (carrying the bits over as necessary).
1612     |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1613                                              <------bitlen---->< bp>
1614    Then we clear the destination bits:
1615    |---00000|00000000|000-----| ptr + first_byte
1616        <-------bitlen--->< bp>
1617
1618    Finally we ORR the bytes of the shifted EXPR into the cleared region:
1619    |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1620
1621   BIG-ENDIAN
1622   We are writing a non byte-sized quantity or at a position that is not
1623   at a byte boundary.
1624     ptr + first_byte |--------|--------|--------|
1625                            ^              ^
1626                       <bp >xxx xxxxxxxx xxx
1627                            |_____EXPR_____|
1628
1629     First native_encode_expr EXPR into a temporary buffer and shift each
1630     byte in the buffer to the right by (carrying the bits over as necessary).
1631     We shift by as much as needed to align the most significant bit of EXPR
1632     with bitpos:
1633     |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1634        <---bitlen---->          <bp ><-----bitlen----->
1635    Then we clear the destination bits:
1636    ptr + first_byte |-----000||00000000||00000---|
1637                      <bp ><-------bitlen----->
1638
1639    Finally we ORR the bytes of the shifted EXPR into the cleared region:
1640    ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1641    The awkwardness comes from the fact that bitpos is counted from the
1642    most significant bit of a byte.  */
1643
1644  /* We must be dealing with fixed-size data at this point, since the
1645     total size is also fixed.  */
1646  unsigned int byte_size;
1647  if (empty_ctor_p)
1648    {
1649      unsigned HOST_WIDE_INT rhs_bytes
1650	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1651      if (rhs_bytes > total_bytes)
1652	return false;
1653      byte_size = rhs_bytes;
1654    }
1655  else
1656    {
1657      fixed_size_mode mode
1658	= as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1659      byte_size = GET_MODE_SIZE (mode);
1660    }
1661  /* Allocate an extra byte so that we have space to shift into.  */
1662  byte_size++;
1663  unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1664  memset (tmpbuf, '\0', byte_size);
1665  /* The store detection code should only have allowed constants that are
1666     accepted by native_encode_expr or empty ctors.  */
1667  if (!empty_ctor_p
1668      && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1669    gcc_unreachable ();
1670
1671  /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1672     bytes to write.  This means it can write more than
1673     ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1674     write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
1675     bitlen and zero out the bits that are not relevant as well (that may
1676     contain a sign bit due to sign-extension).  */
1677  unsigned int padding
1678    = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1679  /* On big-endian the padding is at the 'front' so just skip the initial
1680     bytes.  */
1681  if (BYTES_BIG_ENDIAN)
1682    tmpbuf += padding;
1683
1684  byte_size -= padding;
1685
1686  if (bitlen % BITS_PER_UNIT != 0)
1687    {
1688      if (BYTES_BIG_ENDIAN)
1689	clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1690			     BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1691      else
1692	clear_bit_region (tmpbuf, bitlen,
1693			  byte_size * BITS_PER_UNIT - bitlen);
1694    }
1695  /* Left shifting relies on the last byte being clear if bitlen is
1696     a multiple of BITS_PER_UNIT, which might not be clear if
1697     there are padding bytes.  */
1698  else if (!BYTES_BIG_ENDIAN)
1699    tmpbuf[byte_size - 1] = '\0';
1700
1701  /* Clear the bit region in PTR where the bits from TMPBUF will be
1702     inserted into.  */
1703  if (BYTES_BIG_ENDIAN)
1704    clear_bit_region_be (ptr + first_byte,
1705			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1706  else
1707    clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1708
1709  int shift_amnt;
1710  int bitlen_mod = bitlen % BITS_PER_UNIT;
1711  int bitpos_mod = bitpos % BITS_PER_UNIT;
1712
1713  bool skip_byte = false;
1714  if (BYTES_BIG_ENDIAN)
1715    {
1716      /* BITPOS and BITLEN are exactly aligned and no shifting
1717	 is necessary.  */
1718      if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1719	  || (bitpos_mod == 0 && bitlen_mod == 0))
1720	shift_amnt = 0;
1721      /* |. . . . . . . .|
1722	  <bp >   <blen >.
1723	 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1724	 of the value until it aligns with 'bp' in the next byte over.  */
1725      else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1726	{
1727	  shift_amnt = bitlen_mod + bitpos_mod;
1728	  skip_byte = bitlen_mod != 0;
1729	}
1730      /* |. . . . . . . .|
1731	  <----bp--->
1732	    <---blen---->.
1733	 Shift the value right within the same byte so it aligns with 'bp'.  */
1734      else
1735	shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1736    }
1737  else
1738    shift_amnt = bitpos % BITS_PER_UNIT;
1739
1740  /* Create the shifted version of EXPR.  */
1741  if (!BYTES_BIG_ENDIAN)
1742    {
1743      shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
1744      if (shift_amnt == 0)
1745	byte_size--;
1746    }
1747  else
1748    {
1749      gcc_assert (BYTES_BIG_ENDIAN);
1750      shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1751      /* If shifting right forced us to move into the next byte skip the now
1752	 empty byte.  */
1753      if (skip_byte)
1754	{
1755	  tmpbuf++;
1756	  byte_size--;
1757	}
1758    }
1759
1760  /* Insert the bits from TMPBUF.  */
1761  for (unsigned int i = 0; i < byte_size; i++)
1762    ptr[first_byte + i] |= tmpbuf[i];
1763
1764  return true;
1765}
1766
1767/* Sorting function for store_immediate_info objects.
1768   Sorts them by bitposition.  */
1769
1770static int
1771sort_by_bitpos (const void *x, const void *y)
1772{
1773  store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1774  store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1775
1776  if ((*tmp)->bitpos < (*tmp2)->bitpos)
1777    return -1;
1778  else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1779    return 1;
1780  else
1781    /* If they are the same let's use the order which is guaranteed to
1782       be different.  */
1783    return (*tmp)->order - (*tmp2)->order;
1784}
1785
1786/* Sorting function for store_immediate_info objects.
1787   Sorts them by the order field.  */
1788
1789static int
1790sort_by_order (const void *x, const void *y)
1791{
1792  store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1793  store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1794
1795  if ((*tmp)->order < (*tmp2)->order)
1796    return -1;
1797  else if ((*tmp)->order > (*tmp2)->order)
1798    return 1;
1799
1800  gcc_unreachable ();
1801}
1802
1803/* Initialize a merged_store_group object from a store_immediate_info
1804   object.  */
1805
1806merged_store_group::merged_store_group (store_immediate_info *info)
1807{
1808  start = info->bitpos;
1809  width = info->bitsize;
1810  bitregion_start = info->bitregion_start;
1811  bitregion_end = info->bitregion_end;
1812  /* VAL has memory allocated for it in apply_stores once the group
1813     width has been finalized.  */
1814  val = NULL;
1815  mask = NULL;
1816  bit_insertion = false;
1817  only_constants = info->rhs_code == INTEGER_CST;
1818  first_nonmergeable_order = ~0U;
1819  lp_nr = info->lp_nr;
1820  unsigned HOST_WIDE_INT align_bitpos = 0;
1821  get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1822			  &align, &align_bitpos);
1823  align_base = start - align_bitpos;
1824  for (int i = 0; i < 2; ++i)
1825    {
1826      store_operand_info &op = info->ops[i];
1827      if (op.base_addr == NULL_TREE)
1828	{
1829	  load_align[i] = 0;
1830	  load_align_base[i] = 0;
1831	}
1832      else
1833	{
1834	  get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1835	  load_align_base[i] = op.bitpos - align_bitpos;
1836	}
1837    }
1838  stores.create (1);
1839  stores.safe_push (info);
1840  last_stmt = info->stmt;
1841  last_order = info->order;
1842  first_stmt = last_stmt;
1843  first_order = last_order;
1844  buf_size = 0;
1845}
1846
1847merged_store_group::~merged_store_group ()
1848{
1849  if (val)
1850    XDELETEVEC (val);
1851}
1852
1853/* Return true if the store described by INFO can be merged into the group.  */
1854
1855bool
1856merged_store_group::can_be_merged_into (store_immediate_info *info)
1857{
1858  /* Do not merge bswap patterns.  */
1859  if (info->rhs_code == LROTATE_EXPR)
1860    return false;
1861
1862  if (info->lp_nr != lp_nr)
1863    return false;
1864
1865  /* The canonical case.  */
1866  if (info->rhs_code == stores[0]->rhs_code)
1867    return true;
1868
1869  /* BIT_INSERT_EXPR is compatible with INTEGER_CST.  */
1870  if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1871    return true;
1872
1873  if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1874    return true;
1875
1876  /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
1877  if (info->rhs_code == MEM_REF
1878      && (stores[0]->rhs_code == INTEGER_CST
1879	  || stores[0]->rhs_code == BIT_INSERT_EXPR)
1880      && info->bitregion_start == stores[0]->bitregion_start
1881      && info->bitregion_end == stores[0]->bitregion_end)
1882    return true;
1883
1884  if (stores[0]->rhs_code == MEM_REF
1885      && (info->rhs_code == INTEGER_CST
1886	  || info->rhs_code == BIT_INSERT_EXPR)
1887      && info->bitregion_start == stores[0]->bitregion_start
1888      && info->bitregion_end == stores[0]->bitregion_end)
1889    return true;
1890
1891  return false;
1892}
1893
1894/* Helper method for merge_into and merge_overlapping to do
1895   the common part.  */
1896
1897void
1898merged_store_group::do_merge (store_immediate_info *info)
1899{
1900  bitregion_start = MIN (bitregion_start, info->bitregion_start);
1901  bitregion_end = MAX (bitregion_end, info->bitregion_end);
1902
1903  unsigned int this_align;
1904  unsigned HOST_WIDE_INT align_bitpos = 0;
1905  get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1906			  &this_align, &align_bitpos);
1907  if (this_align > align)
1908    {
1909      align = this_align;
1910      align_base = info->bitpos - align_bitpos;
1911    }
1912  for (int i = 0; i < 2; ++i)
1913    {
1914      store_operand_info &op = info->ops[i];
1915      if (!op.base_addr)
1916	continue;
1917
1918      get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1919      if (this_align > load_align[i])
1920	{
1921	  load_align[i] = this_align;
1922	  load_align_base[i] = op.bitpos - align_bitpos;
1923	}
1924    }
1925
1926  gimple *stmt = info->stmt;
1927  stores.safe_push (info);
1928  if (info->order > last_order)
1929    {
1930      last_order = info->order;
1931      last_stmt = stmt;
1932    }
1933  else if (info->order < first_order)
1934    {
1935      first_order = info->order;
1936      first_stmt = stmt;
1937    }
1938  if (info->rhs_code != INTEGER_CST)
1939    only_constants = false;
1940}
1941
1942/* Merge a store recorded by INFO into this merged store.
1943   The store is not overlapping with the existing recorded
1944   stores.  */
1945
1946void
1947merged_store_group::merge_into (store_immediate_info *info)
1948{
1949  /* Make sure we're inserting in the position we think we're inserting.  */
1950  gcc_assert (info->bitpos >= start + width
1951	      && info->bitregion_start <= bitregion_end);
1952
1953  width = info->bitpos + info->bitsize - start;
1954  do_merge (info);
1955}
1956
1957/* Merge a store described by INFO into this merged store.
1958   INFO overlaps in some way with the current store (i.e. it's not contiguous
1959   which is handled by merged_store_group::merge_into).  */
1960
1961void
1962merged_store_group::merge_overlapping (store_immediate_info *info)
1963{
1964  /* If the store extends the size of the group, extend the width.  */
1965  if (info->bitpos + info->bitsize > start + width)
1966    width = info->bitpos + info->bitsize - start;
1967
1968  do_merge (info);
1969}
1970
1971/* Go through all the recorded stores in this group in program order and
1972   apply their values to the VAL byte array to create the final merged
1973   value.  Return true if the operation succeeded.  */
1974
1975bool
1976merged_store_group::apply_stores ()
1977{
1978  /* Make sure we have more than one store in the group, otherwise we cannot
1979     merge anything.  */
1980  if (bitregion_start % BITS_PER_UNIT != 0
1981      || bitregion_end % BITS_PER_UNIT != 0
1982      || stores.length () == 1)
1983    return false;
1984
1985  stores.qsort (sort_by_order);
1986  store_immediate_info *info;
1987  unsigned int i;
1988  /* Create a power-of-2-sized buffer for native_encode_expr.  */
1989  buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
1990  val = XNEWVEC (unsigned char, 2 * buf_size);
1991  mask = val + buf_size;
1992  memset (val, 0, buf_size);
1993  memset (mask, ~0U, buf_size);
1994
1995  FOR_EACH_VEC_ELT (stores, i, info)
1996    {
1997      unsigned int pos_in_buffer = info->bitpos - bitregion_start;
1998      tree cst;
1999      if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2000	cst = info->ops[0].val;
2001      else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2002	cst = info->ops[1].val;
2003      else
2004	cst = NULL_TREE;
2005      bool ret = true;
2006      if (cst)
2007	{
2008	  if (info->rhs_code == BIT_INSERT_EXPR)
2009	    bit_insertion = true;
2010	  else
2011	    ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2012					 pos_in_buffer, buf_size);
2013	}
2014      unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2015      if (BYTES_BIG_ENDIAN)
2016	clear_bit_region_be (m, (BITS_PER_UNIT - 1
2017				 - (pos_in_buffer % BITS_PER_UNIT)),
2018			     info->bitsize);
2019      else
2020	clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2021      if (cst && dump_file && (dump_flags & TDF_DETAILS))
2022	{
2023	  if (ret)
2024	    {
2025	      fputs ("After writing ", dump_file);
2026	      print_generic_expr (dump_file, cst, TDF_NONE);
2027	      fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2028		       " at position %d\n", info->bitsize, pos_in_buffer);
2029	      fputs ("  the merged value contains ", dump_file);
2030	      dump_char_array (dump_file, val, buf_size);
2031	      fputs ("  the merged mask contains  ", dump_file);
2032	      dump_char_array (dump_file, mask, buf_size);
2033	      if (bit_insertion)
2034		fputs ("  bit insertion is required\n", dump_file);
2035	    }
2036	  else
2037	    fprintf (dump_file, "Failed to merge stores\n");
2038	}
2039      if (!ret)
2040	return false;
2041    }
2042  stores.qsort (sort_by_bitpos);
2043  return true;
2044}
2045
2046/* Structure describing the store chain.  */
2047
2048class imm_store_chain_info
2049{
2050public:
2051  /* Doubly-linked list that imposes an order on chain processing.
2052     PNXP (prev's next pointer) points to the head of a list, or to
2053     the next field in the previous chain in the list.
2054     See pass_store_merging::m_stores_head for more rationale.  */
2055  imm_store_chain_info *next, **pnxp;
2056  tree base_addr;
2057  auto_vec<store_immediate_info *> m_store_info;
2058  auto_vec<merged_store_group *> m_merged_store_groups;
2059
2060  imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2061  : next (inspt), pnxp (&inspt), base_addr (b_a)
2062  {
2063    inspt = this;
2064    if (next)
2065      {
2066	gcc_checking_assert (pnxp == next->pnxp);
2067	next->pnxp = &next;
2068      }
2069  }
2070  ~imm_store_chain_info ()
2071  {
2072    *pnxp = next;
2073    if (next)
2074      {
2075	gcc_checking_assert (&next == next->pnxp);
2076	next->pnxp = pnxp;
2077      }
2078  }
2079  bool terminate_and_process_chain ();
2080  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2081			   unsigned int);
2082  bool coalesce_immediate_stores ();
2083  bool output_merged_store (merged_store_group *);
2084  bool output_merged_stores ();
2085};
2086
2087const pass_data pass_data_tree_store_merging = {
2088  GIMPLE_PASS,     /* type */
2089  "store-merging", /* name */
2090  OPTGROUP_NONE,   /* optinfo_flags */
2091  TV_GIMPLE_STORE_MERGING,	 /* tv_id */
2092  PROP_ssa,	/* properties_required */
2093  0,		   /* properties_provided */
2094  0,		   /* properties_destroyed */
2095  0,		   /* todo_flags_start */
2096  TODO_update_ssa, /* todo_flags_finish */
2097};
2098
2099class pass_store_merging : public gimple_opt_pass
2100{
2101public:
2102  pass_store_merging (gcc::context *ctxt)
2103    : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2104  {
2105  }
2106
2107  /* Pass not supported for PDP-endian, nor for insane hosts or
2108     target character sizes where native_{encode,interpret}_expr
2109     doesn't work properly.  */
2110  virtual bool
2111  gate (function *)
2112  {
2113    return flag_store_merging
2114	   && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2115	   && CHAR_BIT == 8
2116	   && BITS_PER_UNIT == 8;
2117  }
2118
2119  virtual unsigned int execute (function *);
2120
2121private:
2122  hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2123
2124  /* Form a doubly-linked stack of the elements of m_stores, so that
2125     we can iterate over them in a predictable way.  Using this order
2126     avoids extraneous differences in the compiler output just because
2127     of tree pointer variations (e.g. different chains end up in
2128     different positions of m_stores, so they are handled in different
2129     orders, so they allocate or release SSA names in different
2130     orders, and when they get reused, subsequent passes end up
2131     getting different SSA names, which may ultimately change
2132     decisions when going out of SSA).  */
2133  imm_store_chain_info *m_stores_head;
2134
2135  bool process_store (gimple *);
2136  bool terminate_and_process_chain (imm_store_chain_info *);
2137  bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2138  bool terminate_and_process_all_chains ();
2139}; // class pass_store_merging
2140
2141/* Terminate and process all recorded chains.  Return true if any changes
2142   were made.  */
2143
2144bool
2145pass_store_merging::terminate_and_process_all_chains ()
2146{
2147  bool ret = false;
2148  while (m_stores_head)
2149    ret |= terminate_and_process_chain (m_stores_head);
2150  gcc_assert (m_stores.is_empty ());
2151  return ret;
2152}
2153
2154/* Terminate all chains that are affected by the statement STMT.
2155   CHAIN_INFO is the chain we should ignore from the checks if
2156   non-NULL.  Return true if any changes were made.  */
2157
2158bool
2159pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2160						     **chain_info,
2161						   gimple *stmt)
2162{
2163  bool ret = false;
2164
2165  /* If the statement doesn't touch memory it can't alias.  */
2166  if (!gimple_vuse (stmt))
2167    return false;
2168
2169  tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2170  ao_ref store_lhs_ref;
2171  ao_ref_init (&store_lhs_ref, store_lhs);
2172  for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2173    {
2174      next = cur->next;
2175
2176      /* We already checked all the stores in chain_info and terminated the
2177	 chain if necessary.  Skip it here.  */
2178      if (chain_info && *chain_info == cur)
2179	continue;
2180
2181      store_immediate_info *info;
2182      unsigned int i;
2183      FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2184	{
2185	  tree lhs = gimple_assign_lhs (info->stmt);
2186	  ao_ref lhs_ref;
2187	  ao_ref_init (&lhs_ref, lhs);
2188	  if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2189	      || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2190	      || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2191						   &lhs_ref, false)))
2192	    {
2193	      if (dump_file && (dump_flags & TDF_DETAILS))
2194		{
2195		  fprintf (dump_file, "stmt causes chain termination:\n");
2196		  print_gimple_stmt (dump_file, stmt, 0);
2197		}
2198	      ret |= terminate_and_process_chain (cur);
2199	      break;
2200	    }
2201	}
2202    }
2203
2204  return ret;
2205}
2206
2207/* Helper function.  Terminate the recorded chain storing to base object
2208   BASE.  Return true if the merging and output was successful.  The m_stores
2209   entry is removed after the processing in any case.  */
2210
2211bool
2212pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2213{
2214  bool ret = chain_info->terminate_and_process_chain ();
2215  m_stores.remove (chain_info->base_addr);
2216  delete chain_info;
2217  return ret;
2218}
2219
2220/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2221   may clobber REF.  FIRST and LAST must have non-NULL vdef.  We want to
2222   be able to sink load of REF across stores between FIRST and LAST, up
2223   to right before LAST.  */
2224
2225bool
2226stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2227{
2228  ao_ref r;
2229  ao_ref_init (&r, ref);
2230  unsigned int count = 0;
2231  tree vop = gimple_vdef (last);
2232  gimple *stmt;
2233
2234  /* Return true conservatively if the basic blocks are different.  */
2235  if (gimple_bb (first) != gimple_bb (last))
2236    return true;
2237
2238  do
2239    {
2240      stmt = SSA_NAME_DEF_STMT (vop);
2241      if (stmt_may_clobber_ref_p_1 (stmt, &r))
2242	return true;
2243      if (gimple_store_p (stmt)
2244	  && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2245	return true;
2246      /* Avoid quadratic compile time by bounding the number of checks
2247	 we perform.  */
2248      if (++count > MAX_STORE_ALIAS_CHECKS)
2249	return true;
2250      vop = gimple_vuse (stmt);
2251    }
2252  while (stmt != first);
2253
2254  return false;
2255}
2256
2257/* Return true if INFO->ops[IDX] is mergeable with the
2258   corresponding loads already in MERGED_STORE group.
2259   BASE_ADDR is the base address of the whole store group.  */
2260
2261bool
2262compatible_load_p (merged_store_group *merged_store,
2263		   store_immediate_info *info,
2264		   tree base_addr, int idx)
2265{
2266  store_immediate_info *infof = merged_store->stores[0];
2267  if (!info->ops[idx].base_addr
2268      || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2269		   info->bitpos - infof->bitpos)
2270      || !operand_equal_p (info->ops[idx].base_addr,
2271			   infof->ops[idx].base_addr, 0))
2272    return false;
2273
2274  store_immediate_info *infol = merged_store->stores.last ();
2275  tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2276  /* In this case all vuses should be the same, e.g.
2277     _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2278     or
2279     _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2280     and we can emit the coalesced load next to any of those loads.  */
2281  if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2282      && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2283    return true;
2284
2285  /* Otherwise, at least for now require that the load has the same
2286     vuse as the store.  See following examples.  */
2287  if (gimple_vuse (info->stmt) != load_vuse)
2288    return false;
2289
2290  if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2291      || (infof != infol
2292	  && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2293    return false;
2294
2295  /* If the load is from the same location as the store, already
2296     the construction of the immediate chain info guarantees no intervening
2297     stores, so no further checks are needed.  Example:
2298     _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
2299  if (known_eq (info->ops[idx].bitpos, info->bitpos)
2300      && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2301    return true;
2302
2303  /* Otherwise, we need to punt if any of the loads can be clobbered by any
2304     of the stores in the group, or any other stores in between those.
2305     Previous calls to compatible_load_p ensured that for all the
2306     merged_store->stores IDX loads, no stmts starting with
2307     merged_store->first_stmt and ending right before merged_store->last_stmt
2308     clobbers those loads.  */
2309  gimple *first = merged_store->first_stmt;
2310  gimple *last = merged_store->last_stmt;
2311  unsigned int i;
2312  store_immediate_info *infoc;
2313  /* The stores are sorted by increasing store bitpos, so if info->stmt store
2314     comes before the so far first load, we'll be changing
2315     merged_store->first_stmt.  In that case we need to give up if
2316     any of the earlier processed loads clobber with the stmts in the new
2317     range.  */
2318  if (info->order < merged_store->first_order)
2319    {
2320      FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2321	if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2322	  return false;
2323      first = info->stmt;
2324    }
2325  /* Similarly, we could change merged_store->last_stmt, so ensure
2326     in that case no stmts in the new range clobber any of the earlier
2327     processed loads.  */
2328  else if (info->order > merged_store->last_order)
2329    {
2330      FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2331	if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2332	  return false;
2333      last = info->stmt;
2334    }
2335  /* And finally, we'd be adding a new load to the set, ensure it isn't
2336     clobbered in the new range.  */
2337  if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2338    return false;
2339
2340  /* Otherwise, we are looking for:
2341     _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2342     or
2343     _1 = s.a; t.a = _1; _2 = s.b; t.b = _2;  */
2344  return true;
2345}
2346
2347/* Add all refs loaded to compute VAL to REFS vector.  */
2348
2349void
2350gather_bswap_load_refs (vec<tree> *refs, tree val)
2351{
2352  if (TREE_CODE (val) != SSA_NAME)
2353    return;
2354
2355  gimple *stmt = SSA_NAME_DEF_STMT (val);
2356  if (!is_gimple_assign (stmt))
2357    return;
2358
2359  if (gimple_assign_load_p (stmt))
2360    {
2361      refs->safe_push (gimple_assign_rhs1 (stmt));
2362      return;
2363    }
2364
2365  switch (gimple_assign_rhs_class (stmt))
2366    {
2367    case GIMPLE_BINARY_RHS:
2368      gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2369      /* FALLTHRU */
2370    case GIMPLE_UNARY_RHS:
2371      gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2372      break;
2373    default:
2374      gcc_unreachable ();
2375    }
2376}
2377
2378/* Check if there are any stores in M_STORE_INFO after index I
2379   (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2380   a potential group ending with END that have their order
2381   smaller than LAST_ORDER.  ALL_INTEGER_CST_P is true if
2382   all the stores already merged and the one under consideration
2383   have rhs_code of INTEGER_CST.  Return true if there are no such stores.
2384   Consider:
2385     MEM[(long long int *)p_28] = 0;
2386     MEM[(long long int *)p_28 + 8B] = 0;
2387     MEM[(long long int *)p_28 + 16B] = 0;
2388     MEM[(long long int *)p_28 + 24B] = 0;
2389     _129 = (int) _130;
2390     MEM[(int *)p_28 + 8B] = _129;
2391     MEM[(int *)p_28].a = -1;
2392   We already have
2393     MEM[(long long int *)p_28] = 0;
2394     MEM[(int *)p_28].a = -1;
2395   stmts in the current group and need to consider if it is safe to
2396   add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2397   There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2398   store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2399   into the group and merging of those 3 stores is successful, merged
2400   stmts will be emitted at the latest store from that group, i.e.
2401   LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2402   The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2403   the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2404   so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2405   into the group.  That way it will be its own store group and will
2406   not be touched.  If ALL_INTEGER_CST_P and there are overlapping
2407   INTEGER_CST stores, those are mergeable using merge_overlapping,
2408   so don't return false for those.
2409
2410   Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2411   (exclusive), whether they don't overlap the bitrange START to END
2412   and have order in between FIRST_ORDER and LAST_ORDER.  This is to
2413   prevent merging in cases like:
2414     MEM <char[12]> [&b + 8B] = {};
2415     MEM[(short *) &b] = 5;
2416     _5 = *x_4(D);
2417     MEM <long long unsigned int> [&b + 2B] = _5;
2418     MEM[(char *)&b + 16B] = 88;
2419     MEM[(int *)&b + 20B] = 1;
2420   The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2421   be merged with it, because the = _5 store overlaps these and is in between
2422   them in sort_by_order ordering.  If it was merged, the merged store would
2423   go after the = _5 store and thus change behavior.  */
2424
2425static bool
2426check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2427		  bool all_integer_cst_p, unsigned int first_order,
2428		  unsigned int last_order, unsigned HOST_WIDE_INT start,
2429		  unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2430		  unsigned end_earlier)
2431{
2432  unsigned int len = m_store_info.length ();
2433  for (unsigned int j = first_earlier; j < end_earlier; j++)
2434    {
2435      store_immediate_info *info = m_store_info[j];
2436      if (info->order > first_order
2437	  && info->order < last_order
2438	  && info->bitpos + info->bitsize > start)
2439	return false;
2440    }
2441  for (++i; i < len; ++i)
2442    {
2443      store_immediate_info *info = m_store_info[i];
2444      if (info->bitpos >= end)
2445	break;
2446      if (info->order < last_order
2447	  && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2448	return false;
2449    }
2450  return true;
2451}
2452
2453/* Return true if m_store_info[first] and at least one following store
2454   form a group which store try_size bitsize value which is byte swapped
2455   from a memory load or some value, or identity from some value.
2456   This uses the bswap pass APIs.  */
2457
2458bool
2459imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2460					  unsigned int first,
2461					  unsigned int try_size,
2462					  unsigned int first_earlier)
2463{
2464  unsigned int len = m_store_info.length (), last = first;
2465  unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2466  if (width >= try_size)
2467    return false;
2468  for (unsigned int i = first + 1; i < len; ++i)
2469    {
2470      if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2471	  || m_store_info[i]->lp_nr != merged_store->lp_nr
2472	  || m_store_info[i]->ins_stmt == NULL)
2473	return false;
2474      width += m_store_info[i]->bitsize;
2475      if (width >= try_size)
2476	{
2477	  last = i;
2478	  break;
2479	}
2480    }
2481  if (width != try_size)
2482    return false;
2483
2484  bool allow_unaligned
2485    = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2486  /* Punt if the combined store would not be aligned and we need alignment.  */
2487  if (!allow_unaligned)
2488    {
2489      unsigned int align = merged_store->align;
2490      unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2491      for (unsigned int i = first + 1; i <= last; ++i)
2492	{
2493	  unsigned int this_align;
2494	  unsigned HOST_WIDE_INT align_bitpos = 0;
2495	  get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2496				  &this_align, &align_bitpos);
2497	  if (this_align > align)
2498	    {
2499	      align = this_align;
2500	      align_base = m_store_info[i]->bitpos - align_bitpos;
2501	    }
2502	}
2503      unsigned HOST_WIDE_INT align_bitpos
2504	= (m_store_info[first]->bitpos - align_base) & (align - 1);
2505      if (align_bitpos)
2506	align = least_bit_hwi (align_bitpos);
2507      if (align < try_size)
2508	return false;
2509    }
2510
2511  tree type;
2512  switch (try_size)
2513    {
2514    case 16: type = uint16_type_node; break;
2515    case 32: type = uint32_type_node; break;
2516    case 64: type = uint64_type_node; break;
2517    default: gcc_unreachable ();
2518    }
2519  struct symbolic_number n;
2520  gimple *ins_stmt = NULL;
2521  int vuse_store = -1;
2522  unsigned int first_order = merged_store->first_order;
2523  unsigned int last_order = merged_store->last_order;
2524  gimple *first_stmt = merged_store->first_stmt;
2525  gimple *last_stmt = merged_store->last_stmt;
2526  unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2527  store_immediate_info *infof = m_store_info[first];
2528
2529  for (unsigned int i = first; i <= last; ++i)
2530    {
2531      store_immediate_info *info = m_store_info[i];
2532      struct symbolic_number this_n = info->n;
2533      this_n.type = type;
2534      if (!this_n.base_addr)
2535	this_n.range = try_size / BITS_PER_UNIT;
2536      else
2537	/* Update vuse in case it has changed by output_merged_stores.  */
2538	this_n.vuse = gimple_vuse (info->ins_stmt);
2539      unsigned int bitpos = info->bitpos - infof->bitpos;
2540      if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2541			    BYTES_BIG_ENDIAN
2542			    ? try_size - info->bitsize - bitpos
2543			    : bitpos))
2544	return false;
2545      if (this_n.base_addr && vuse_store)
2546	{
2547	  unsigned int j;
2548	  for (j = first; j <= last; ++j)
2549	    if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2550	      break;
2551	  if (j > last)
2552	    {
2553	      if (vuse_store == 1)
2554		return false;
2555	      vuse_store = 0;
2556	    }
2557	}
2558      if (i == first)
2559	{
2560	  n = this_n;
2561	  ins_stmt = info->ins_stmt;
2562	}
2563      else
2564	{
2565	  if (n.base_addr && n.vuse != this_n.vuse)
2566	    {
2567	      if (vuse_store == 0)
2568		return false;
2569	      vuse_store = 1;
2570	    }
2571	  if (info->order > last_order)
2572	    {
2573	      last_order = info->order;
2574	      last_stmt = info->stmt;
2575	    }
2576	  else if (info->order < first_order)
2577	    {
2578	      first_order = info->order;
2579	      first_stmt = info->stmt;
2580	    }
2581	  end = MAX (end, info->bitpos + info->bitsize);
2582
2583	  ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2584					     &this_n, &n);
2585	  if (ins_stmt == NULL)
2586	    return false;
2587	}
2588    }
2589
2590  uint64_t cmpxchg, cmpnop;
2591  find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2592
2593  /* A complete byte swap should make the symbolic number to start with
2594     the largest digit in the highest order byte.  Unchanged symbolic
2595     number indicates a read with same endianness as target architecture.  */
2596  if (n.n != cmpnop && n.n != cmpxchg)
2597    return false;
2598
2599  if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2600    return false;
2601
2602  if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2603			 merged_store->start, end, first_earlier, first))
2604    return false;
2605
2606  /* Don't handle memory copy this way if normal non-bswap processing
2607     would handle it too.  */
2608  if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2609    {
2610      unsigned int i;
2611      for (i = first; i <= last; ++i)
2612	if (m_store_info[i]->rhs_code != MEM_REF)
2613	  break;
2614      if (i == last + 1)
2615	return false;
2616    }
2617
2618  if (n.n == cmpxchg)
2619    switch (try_size)
2620      {
2621      case 16:
2622	/* Will emit LROTATE_EXPR.  */
2623	break;
2624      case 32:
2625	if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2626	    && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2627	  break;
2628	return false;
2629      case 64:
2630	if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2631	    && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2632	  break;
2633	return false;
2634      default:
2635	gcc_unreachable ();
2636      }
2637
2638  if (!allow_unaligned && n.base_addr)
2639    {
2640      unsigned int align = get_object_alignment (n.src);
2641      if (align < try_size)
2642	return false;
2643    }
2644
2645  /* If each load has vuse of the corresponding store, need to verify
2646     the loads can be sunk right before the last store.  */
2647  if (vuse_store == 1)
2648    {
2649      auto_vec<tree, 64> refs;
2650      for (unsigned int i = first; i <= last; ++i)
2651	gather_bswap_load_refs (&refs,
2652				gimple_assign_rhs1 (m_store_info[i]->stmt));
2653
2654      unsigned int i;
2655      tree ref;
2656      FOR_EACH_VEC_ELT (refs, i, ref)
2657	if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2658	  return false;
2659      n.vuse = NULL_TREE;
2660    }
2661
2662  infof->n = n;
2663  infof->ins_stmt = ins_stmt;
2664  for (unsigned int i = first; i <= last; ++i)
2665    {
2666      m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2667      m_store_info[i]->ops[0].base_addr = NULL_TREE;
2668      m_store_info[i]->ops[1].base_addr = NULL_TREE;
2669      if (i != first)
2670	merged_store->merge_into (m_store_info[i]);
2671    }
2672
2673  return true;
2674}
2675
2676/* Go through the candidate stores recorded in m_store_info and merge them
2677   into merged_store_group objects recorded into m_merged_store_groups
2678   representing the widened stores.  Return true if coalescing was successful
2679   and the number of widened stores is fewer than the original number
2680   of stores.  */
2681
2682bool
2683imm_store_chain_info::coalesce_immediate_stores ()
2684{
2685  /* Anything less can't be processed.  */
2686  if (m_store_info.length () < 2)
2687    return false;
2688
2689  if (dump_file && (dump_flags & TDF_DETAILS))
2690    fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2691	     m_store_info.length ());
2692
2693  store_immediate_info *info;
2694  unsigned int i, ignore = 0;
2695  unsigned int first_earlier = 0;
2696  unsigned int end_earlier = 0;
2697
2698  /* Order the stores by the bitposition they write to.  */
2699  m_store_info.qsort (sort_by_bitpos);
2700
2701  info = m_store_info[0];
2702  merged_store_group *merged_store = new merged_store_group (info);
2703  if (dump_file && (dump_flags & TDF_DETAILS))
2704    fputs ("New store group\n", dump_file);
2705
2706  FOR_EACH_VEC_ELT (m_store_info, i, info)
2707    {
2708      unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2709
2710      if (i <= ignore)
2711	goto done;
2712
2713      while (first_earlier < end_earlier
2714	     && (m_store_info[first_earlier]->bitpos
2715		 + m_store_info[first_earlier]->bitsize
2716		 <= merged_store->start))
2717	first_earlier++;
2718
2719      /* First try to handle group of stores like:
2720	 p[0] = data >> 24;
2721	 p[1] = data >> 16;
2722	 p[2] = data >> 8;
2723	 p[3] = data;
2724	 using the bswap framework.  */
2725      if (info->bitpos == merged_store->start + merged_store->width
2726	  && merged_store->stores.length () == 1
2727	  && merged_store->stores[0]->ins_stmt != NULL
2728	  && info->lp_nr == merged_store->lp_nr
2729	  && info->ins_stmt != NULL)
2730	{
2731	  unsigned int try_size;
2732	  for (try_size = 64; try_size >= 16; try_size >>= 1)
2733	    if (try_coalesce_bswap (merged_store, i - 1, try_size,
2734				    first_earlier))
2735	      break;
2736
2737	  if (try_size >= 16)
2738	    {
2739	      ignore = i + merged_store->stores.length () - 1;
2740	      m_merged_store_groups.safe_push (merged_store);
2741	      if (ignore < m_store_info.length ())
2742		{
2743		  merged_store = new merged_store_group (m_store_info[ignore]);
2744		  end_earlier = ignore;
2745		}
2746	      else
2747		merged_store = NULL;
2748	      goto done;
2749	    }
2750	}
2751
2752      new_bitregion_start
2753	= MIN (merged_store->bitregion_start, info->bitregion_start);
2754      new_bitregion_end
2755	= MAX (merged_store->bitregion_end, info->bitregion_end);
2756
2757      if (info->order >= merged_store->first_nonmergeable_order
2758	  || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2759	      > (unsigned) param_store_merging_max_size))
2760	;
2761
2762      /* |---store 1---|
2763	       |---store 2---|
2764	 Overlapping stores.  */
2765      else if (IN_RANGE (info->bitpos, merged_store->start,
2766			 merged_store->start + merged_store->width - 1)
2767	       /* |---store 1---||---store 2---|
2768		  Handle also the consecutive INTEGER_CST stores case here,
2769		  as we have here the code to deal with overlaps.  */
2770	       || (info->bitregion_start <= merged_store->bitregion_end
2771		   && info->rhs_code == INTEGER_CST
2772		   && merged_store->only_constants
2773		   && merged_store->can_be_merged_into (info)))
2774	{
2775	  /* Only allow overlapping stores of constants.  */
2776	  if (info->rhs_code == INTEGER_CST
2777	      && merged_store->only_constants
2778	      && info->lp_nr == merged_store->lp_nr)
2779	    {
2780	      unsigned int first_order
2781		= MIN (merged_store->first_order, info->order);
2782	      unsigned int last_order
2783		= MAX (merged_store->last_order, info->order);
2784	      unsigned HOST_WIDE_INT end
2785		= MAX (merged_store->start + merged_store->width,
2786		       info->bitpos + info->bitsize);
2787	      if (check_no_overlap (m_store_info, i, true, first_order,
2788				    last_order, merged_store->start, end,
2789				    first_earlier, end_earlier))
2790		{
2791		  /* check_no_overlap call above made sure there are no
2792		     overlapping stores with non-INTEGER_CST rhs_code
2793		     in between the first and last of the stores we've
2794		     just merged.  If there are any INTEGER_CST rhs_code
2795		     stores in between, we need to merge_overlapping them
2796		     even if in the sort_by_bitpos order there are other
2797		     overlapping stores in between.  Keep those stores as is.
2798		     Example:
2799			MEM[(int *)p_28] = 0;
2800			MEM[(char *)p_28 + 3B] = 1;
2801			MEM[(char *)p_28 + 1B] = 2;
2802			MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2803		     We can't merge the zero store with the store of two and
2804		     not merge anything else, because the store of one is
2805		     in the original order in between those two, but in
2806		     store_by_bitpos order it comes after the last store that
2807		     we can't merge with them.  We can merge the first 3 stores
2808		     and keep the last store as is though.  */
2809		  unsigned int len = m_store_info.length ();
2810		  unsigned int try_order = last_order;
2811		  unsigned int first_nonmergeable_order;
2812		  unsigned int k;
2813		  bool last_iter = false;
2814		  int attempts = 0;
2815		  do
2816		    {
2817		      unsigned int max_order = 0;
2818		      unsigned int min_order = first_order;
2819		      unsigned first_nonmergeable_int_order = ~0U;
2820		      unsigned HOST_WIDE_INT this_end = end;
2821		      k = i;
2822		      first_nonmergeable_order = ~0U;
2823		      for (unsigned int j = i + 1; j < len; ++j)
2824			{
2825			  store_immediate_info *info2 = m_store_info[j];
2826			  if (info2->bitpos >= this_end)
2827			    break;
2828			  if (info2->order < try_order)
2829			    {
2830			      if (info2->rhs_code != INTEGER_CST
2831				  || info2->lp_nr != merged_store->lp_nr)
2832				{
2833				  /* Normally check_no_overlap makes sure this
2834				     doesn't happen, but if end grows below,
2835				     then we need to process more stores than
2836				     check_no_overlap verified.  Example:
2837				      MEM[(int *)p_5] = 0;
2838				      MEM[(short *)p_5 + 3B] = 1;
2839				      MEM[(char *)p_5 + 4B] = _9;
2840				      MEM[(char *)p_5 + 2B] = 2;  */
2841				  k = 0;
2842				  break;
2843				}
2844			      k = j;
2845			      min_order = MIN (min_order, info2->order);
2846			      this_end = MAX (this_end,
2847					      info2->bitpos + info2->bitsize);
2848			    }
2849			  else if (info2->rhs_code == INTEGER_CST
2850				   && info2->lp_nr == merged_store->lp_nr
2851				   && !last_iter)
2852			    {
2853			      max_order = MAX (max_order, info2->order + 1);
2854			      first_nonmergeable_int_order
2855				= MIN (first_nonmergeable_int_order,
2856				       info2->order);
2857			    }
2858			  else
2859			    first_nonmergeable_order
2860			      = MIN (first_nonmergeable_order, info2->order);
2861			}
2862		      if (k > i
2863			  && !check_no_overlap (m_store_info, len - 1, true,
2864						min_order, try_order,
2865						merged_store->start, this_end,
2866						first_earlier, end_earlier))
2867			k = 0;
2868		      if (k == 0)
2869			{
2870			  if (last_order == try_order)
2871			    break;
2872			  /* If this failed, but only because we grew
2873			     try_order, retry with the last working one,
2874			     so that we merge at least something.  */
2875			  try_order = last_order;
2876			  last_iter = true;
2877			  continue;
2878			}
2879		      last_order = try_order;
2880		      /* Retry with a larger try_order to see if we could
2881			 merge some further INTEGER_CST stores.  */
2882		      if (max_order
2883			  && (first_nonmergeable_int_order
2884			      < first_nonmergeable_order))
2885			{
2886			  try_order = MIN (max_order,
2887					   first_nonmergeable_order);
2888			  try_order
2889			    = MIN (try_order,
2890				   merged_store->first_nonmergeable_order);
2891			  if (try_order > last_order && ++attempts < 16)
2892			    continue;
2893			}
2894		      first_nonmergeable_order
2895			= MIN (first_nonmergeable_order,
2896			       first_nonmergeable_int_order);
2897		      end = this_end;
2898		      break;
2899		    }
2900		  while (1);
2901
2902		  if (k != 0)
2903		    {
2904		      merged_store->merge_overlapping (info);
2905
2906		      merged_store->first_nonmergeable_order
2907			= MIN (merged_store->first_nonmergeable_order,
2908			       first_nonmergeable_order);
2909
2910		      for (unsigned int j = i + 1; j <= k; j++)
2911			{
2912			  store_immediate_info *info2 = m_store_info[j];
2913			  gcc_assert (info2->bitpos < end);
2914			  if (info2->order < last_order)
2915			    {
2916			      gcc_assert (info2->rhs_code == INTEGER_CST);
2917			      if (info != info2)
2918				merged_store->merge_overlapping (info2);
2919			    }
2920			  /* Other stores are kept and not merged in any
2921			     way.  */
2922			}
2923		      ignore = k;
2924		      goto done;
2925		    }
2926		}
2927	    }
2928	}
2929      /* |---store 1---||---store 2---|
2930	 This store is consecutive to the previous one.
2931	 Merge it into the current store group.  There can be gaps in between
2932	 the stores, but there can't be gaps in between bitregions.  */
2933      else if (info->bitregion_start <= merged_store->bitregion_end
2934	       && merged_store->can_be_merged_into (info))
2935	{
2936	  store_immediate_info *infof = merged_store->stores[0];
2937
2938	  /* All the rhs_code ops that take 2 operands are commutative,
2939	     swap the operands if it could make the operands compatible.  */
2940	  if (infof->ops[0].base_addr
2941	      && infof->ops[1].base_addr
2942	      && info->ops[0].base_addr
2943	      && info->ops[1].base_addr
2944	      && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2945			   info->bitpos - infof->bitpos)
2946	      && operand_equal_p (info->ops[1].base_addr,
2947				  infof->ops[0].base_addr, 0))
2948	    {
2949	      std::swap (info->ops[0], info->ops[1]);
2950	      info->ops_swapped_p = true;
2951	    }
2952	  if (check_no_overlap (m_store_info, i, false,
2953				MIN (merged_store->first_order, info->order),
2954				MAX (merged_store->last_order, info->order),
2955				merged_store->start,
2956				MAX (merged_store->start + merged_store->width,
2957				     info->bitpos + info->bitsize),
2958				first_earlier, end_earlier))
2959	    {
2960	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
2961	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2962		{
2963		  info->rhs_code = BIT_INSERT_EXPR;
2964		  info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2965		  info->ops[0].base_addr = NULL_TREE;
2966		}
2967	      else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2968		{
2969		  store_immediate_info *infoj;
2970		  unsigned int j;
2971		  FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2972		    {
2973		      infoj->rhs_code = BIT_INSERT_EXPR;
2974		      infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2975		      infoj->ops[0].base_addr = NULL_TREE;
2976		    }
2977		}
2978	      if ((infof->ops[0].base_addr
2979		   ? compatible_load_p (merged_store, info, base_addr, 0)
2980		   : !info->ops[0].base_addr)
2981		  && (infof->ops[1].base_addr
2982		      ? compatible_load_p (merged_store, info, base_addr, 1)
2983		      : !info->ops[1].base_addr))
2984		{
2985		  merged_store->merge_into (info);
2986		  goto done;
2987		}
2988	    }
2989	}
2990
2991      /* |---store 1---| <gap> |---store 2---|.
2992	 Gap between stores or the rhs not compatible.  Start a new group.  */
2993
2994      /* Try to apply all the stores recorded for the group to determine
2995	 the bitpattern they write and discard it if that fails.
2996	 This will also reject single-store groups.  */
2997      if (merged_store->apply_stores ())
2998	m_merged_store_groups.safe_push (merged_store);
2999      else
3000	delete merged_store;
3001
3002      merged_store = new merged_store_group (info);
3003      end_earlier = i;
3004      if (dump_file && (dump_flags & TDF_DETAILS))
3005	fputs ("New store group\n", dump_file);
3006
3007    done:
3008      if (dump_file && (dump_flags & TDF_DETAILS))
3009	{
3010	  fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3011			      " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3012		   i, info->bitsize, info->bitpos);
3013	  print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3014	  fputc ('\n', dump_file);
3015	}
3016    }
3017
3018  /* Record or discard the last store group.  */
3019  if (merged_store)
3020    {
3021      if (merged_store->apply_stores ())
3022	m_merged_store_groups.safe_push (merged_store);
3023      else
3024	delete merged_store;
3025    }
3026
3027  gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3028
3029  bool success
3030    = !m_merged_store_groups.is_empty ()
3031      && m_merged_store_groups.length () < m_store_info.length ();
3032
3033  if (success && dump_file)
3034    fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3035	     m_merged_store_groups.length ());
3036
3037  return success;
3038}
3039
3040/* Return the type to use for the merged stores or loads described by STMTS.
3041   This is needed to get the alias sets right.  If IS_LOAD, look for rhs,
3042   otherwise lhs.  Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3043   of the MEM_REFs if any.  */
3044
3045static tree
3046get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3047			  unsigned short *cliquep, unsigned short *basep)
3048{
3049  gimple *stmt;
3050  unsigned int i;
3051  tree type = NULL_TREE;
3052  tree ret = NULL_TREE;
3053  *cliquep = 0;
3054  *basep = 0;
3055
3056  FOR_EACH_VEC_ELT (stmts, i, stmt)
3057    {
3058      tree ref = is_load ? gimple_assign_rhs1 (stmt)
3059			 : gimple_assign_lhs (stmt);
3060      tree type1 = reference_alias_ptr_type (ref);
3061      tree base = get_base_address (ref);
3062
3063      if (i == 0)
3064	{
3065	  if (TREE_CODE (base) == MEM_REF)
3066	    {
3067	      *cliquep = MR_DEPENDENCE_CLIQUE (base);
3068	      *basep = MR_DEPENDENCE_BASE (base);
3069	    }
3070	  ret = type = type1;
3071	  continue;
3072	}
3073      if (!alias_ptr_types_compatible_p (type, type1))
3074	ret = ptr_type_node;
3075      if (TREE_CODE (base) != MEM_REF
3076	  || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3077	  || *basep != MR_DEPENDENCE_BASE (base))
3078	{
3079	  *cliquep = 0;
3080	  *basep = 0;
3081	}
3082    }
3083  return ret;
3084}
3085
3086/* Return the location_t information we can find among the statements
3087   in STMTS.  */
3088
3089static location_t
3090get_location_for_stmts (vec<gimple *> &stmts)
3091{
3092  gimple *stmt;
3093  unsigned int i;
3094
3095  FOR_EACH_VEC_ELT (stmts, i, stmt)
3096    if (gimple_has_location (stmt))
3097      return gimple_location (stmt);
3098
3099  return UNKNOWN_LOCATION;
3100}
3101
3102/* Used to decribe a store resulting from splitting a wide store in smaller
3103   regularly-sized stores in split_group.  */
3104
3105class split_store
3106{
3107public:
3108  unsigned HOST_WIDE_INT bytepos;
3109  unsigned HOST_WIDE_INT size;
3110  unsigned HOST_WIDE_INT align;
3111  auto_vec<store_immediate_info *> orig_stores;
3112  /* True if there is a single orig stmt covering the whole split store.  */
3113  bool orig;
3114  split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3115	       unsigned HOST_WIDE_INT);
3116};
3117
3118/* Simple constructor.  */
3119
3120split_store::split_store (unsigned HOST_WIDE_INT bp,
3121			  unsigned HOST_WIDE_INT sz,
3122			  unsigned HOST_WIDE_INT al)
3123			  : bytepos (bp), size (sz), align (al), orig (false)
3124{
3125  orig_stores.create (0);
3126}
3127
3128/* Record all stores in GROUP that write to the region starting at BITPOS and
3129   is of size BITSIZE.  Record infos for such statements in STORES if
3130   non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
3131   if there is exactly one original store in the range (in that case ignore
3132   clobber stmts, unless there are only clobber stmts).  */
3133
3134static store_immediate_info *
3135find_constituent_stores (class merged_store_group *group,
3136			 vec<store_immediate_info *> *stores,
3137			 unsigned int *first,
3138			 unsigned HOST_WIDE_INT bitpos,
3139			 unsigned HOST_WIDE_INT bitsize)
3140{
3141  store_immediate_info *info, *ret = NULL;
3142  unsigned int i;
3143  bool second = false;
3144  bool update_first = true;
3145  unsigned HOST_WIDE_INT end = bitpos + bitsize;
3146  for (i = *first; group->stores.iterate (i, &info); ++i)
3147    {
3148      unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3149      unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3150      if (stmt_end <= bitpos)
3151	{
3152	  /* BITPOS passed to this function never decreases from within the
3153	     same split_group call, so optimize and don't scan info records
3154	     which are known to end before or at BITPOS next time.
3155	     Only do it if all stores before this one also pass this.  */
3156	  if (update_first)
3157	    *first = i + 1;
3158	  continue;
3159	}
3160      else
3161	update_first = false;
3162
3163      /* The stores in GROUP are ordered by bitposition so if we're past
3164	 the region for this group return early.  */
3165      if (stmt_start >= end)
3166	return ret;
3167
3168      if (gimple_clobber_p (info->stmt))
3169	{
3170	  if (stores)
3171	    stores->safe_push (info);
3172	  if (ret == NULL)
3173	    ret = info;
3174	  continue;
3175	}
3176      if (stores)
3177	{
3178	  stores->safe_push (info);
3179	  if (ret && !gimple_clobber_p (ret->stmt))
3180	    {
3181	      ret = NULL;
3182	      second = true;
3183	    }
3184	}
3185      else if (ret && !gimple_clobber_p (ret->stmt))
3186	return NULL;
3187      if (!second)
3188	ret = info;
3189    }
3190  return ret;
3191}
3192
3193/* Return how many SSA_NAMEs used to compute value to store in the INFO
3194   store have multiple uses.  If any SSA_NAME has multiple uses, also
3195   count statements needed to compute it.  */
3196
3197static unsigned
3198count_multiple_uses (store_immediate_info *info)
3199{
3200  gimple *stmt = info->stmt;
3201  unsigned ret = 0;
3202  switch (info->rhs_code)
3203    {
3204    case INTEGER_CST:
3205      return 0;
3206    case BIT_AND_EXPR:
3207    case BIT_IOR_EXPR:
3208    case BIT_XOR_EXPR:
3209      if (info->bit_not_p)
3210	{
3211	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3212	    ret = 1; /* Fall through below to return
3213			the BIT_NOT_EXPR stmt and then
3214			BIT_{AND,IOR,XOR}_EXPR and anything it
3215			uses.  */
3216	  else
3217	    /* stmt is after this the BIT_NOT_EXPR.  */
3218	    stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3219	}
3220      if (!has_single_use (gimple_assign_rhs1 (stmt)))
3221	{
3222	  ret += 1 + info->ops[0].bit_not_p;
3223	  if (info->ops[1].base_addr)
3224	    ret += 1 + info->ops[1].bit_not_p;
3225	  return ret + 1;
3226	}
3227      stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3228      /* stmt is now the BIT_*_EXPR.  */
3229      if (!has_single_use (gimple_assign_rhs1 (stmt)))
3230	ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3231      else if (info->ops[info->ops_swapped_p].bit_not_p)
3232	{
3233	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3234	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3235	    ++ret;
3236	}
3237      if (info->ops[1].base_addr == NULL_TREE)
3238	{
3239	  gcc_checking_assert (!info->ops_swapped_p);
3240	  return ret;
3241	}
3242      if (!has_single_use (gimple_assign_rhs2 (stmt)))
3243	ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3244      else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3245	{
3246	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3247	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3248	    ++ret;
3249	}
3250      return ret;
3251    case MEM_REF:
3252      if (!has_single_use (gimple_assign_rhs1 (stmt)))
3253	return 1 + info->ops[0].bit_not_p;
3254      else if (info->ops[0].bit_not_p)
3255	{
3256	  stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3257	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3258	    return 1;
3259	}
3260      return 0;
3261    case BIT_INSERT_EXPR:
3262      return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3263    default:
3264      gcc_unreachable ();
3265    }
3266}
3267
3268/* Split a merged store described by GROUP by populating the SPLIT_STORES
3269   vector (if non-NULL) with split_store structs describing the byte offset
3270   (from the base), the bit size and alignment of each store as well as the
3271   original statements involved in each such split group.
3272   This is to separate the splitting strategy from the statement
3273   building/emission/linking done in output_merged_store.
3274   Return number of new stores.
3275   If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3276   If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3277   BZERO_FIRST may be true only when the first store covers the whole group
3278   and clears it; if BZERO_FIRST is true, keep that first store in the set
3279   unmodified and emit further stores for the overrides only.
3280   If SPLIT_STORES is NULL, it is just a dry run to count number of
3281   new stores.  */
3282
3283static unsigned int
3284split_group (merged_store_group *group, bool allow_unaligned_store,
3285	     bool allow_unaligned_load, bool bzero_first,
3286	     vec<split_store *> *split_stores,
3287	     unsigned *total_orig,
3288	     unsigned *total_new)
3289{
3290  unsigned HOST_WIDE_INT pos = group->bitregion_start;
3291  unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3292  unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3293  unsigned HOST_WIDE_INT group_align = group->align;
3294  unsigned HOST_WIDE_INT align_base = group->align_base;
3295  unsigned HOST_WIDE_INT group_load_align = group_align;
3296  bool any_orig = false;
3297
3298  gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3299
3300  if (group->stores[0]->rhs_code == LROTATE_EXPR
3301      || group->stores[0]->rhs_code == NOP_EXPR)
3302    {
3303      gcc_assert (!bzero_first);
3304      /* For bswap framework using sets of stores, all the checking
3305	 has been done earlier in try_coalesce_bswap and needs to be
3306	 emitted as a single store.  */
3307      if (total_orig)
3308	{
3309	  /* Avoid the old/new stmt count heuristics.  It should be
3310	     always beneficial.  */
3311	  total_new[0] = 1;
3312	  total_orig[0] = 2;
3313	}
3314
3315      if (split_stores)
3316	{
3317	  unsigned HOST_WIDE_INT align_bitpos
3318	    = (group->start - align_base) & (group_align - 1);
3319	  unsigned HOST_WIDE_INT align = group_align;
3320	  if (align_bitpos)
3321	    align = least_bit_hwi (align_bitpos);
3322	  bytepos = group->start / BITS_PER_UNIT;
3323	  split_store *store
3324	    = new split_store (bytepos, group->width, align);
3325	  unsigned int first = 0;
3326	  find_constituent_stores (group, &store->orig_stores,
3327				   &first, group->start, group->width);
3328	  split_stores->safe_push (store);
3329	}
3330
3331      return 1;
3332    }
3333
3334  unsigned int ret = 0, first = 0;
3335  unsigned HOST_WIDE_INT try_pos = bytepos;
3336
3337  if (total_orig)
3338    {
3339      unsigned int i;
3340      store_immediate_info *info = group->stores[0];
3341
3342      total_new[0] = 0;
3343      total_orig[0] = 1; /* The orig store.  */
3344      info = group->stores[0];
3345      if (info->ops[0].base_addr)
3346	total_orig[0]++;
3347      if (info->ops[1].base_addr)
3348	total_orig[0]++;
3349      switch (info->rhs_code)
3350	{
3351	case BIT_AND_EXPR:
3352	case BIT_IOR_EXPR:
3353	case BIT_XOR_EXPR:
3354	  total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
3355	  break;
3356	default:
3357	  break;
3358	}
3359      total_orig[0] *= group->stores.length ();
3360
3361      FOR_EACH_VEC_ELT (group->stores, i, info)
3362	{
3363	  total_new[0] += count_multiple_uses (info);
3364	  total_orig[0] += (info->bit_not_p
3365			    + info->ops[0].bit_not_p
3366			    + info->ops[1].bit_not_p);
3367	}
3368    }
3369
3370  if (!allow_unaligned_load)
3371    for (int i = 0; i < 2; ++i)
3372      if (group->load_align[i])
3373	group_load_align = MIN (group_load_align, group->load_align[i]);
3374
3375  if (bzero_first)
3376    {
3377      store_immediate_info *gstore;
3378      FOR_EACH_VEC_ELT (group->stores, first, gstore)
3379	if (!gimple_clobber_p (gstore->stmt))
3380	  break;
3381      ++first;
3382      ret = 1;
3383      if (split_stores)
3384	{
3385	  split_store *store
3386	    = new split_store (bytepos, gstore->bitsize, align_base);
3387	  store->orig_stores.safe_push (gstore);
3388	  store->orig = true;
3389	  any_orig = true;
3390	  split_stores->safe_push (store);
3391	}
3392    }
3393
3394  while (size > 0)
3395    {
3396      if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3397	  && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3398	      || (bzero_first && group->val[try_pos - bytepos] == 0)))
3399	{
3400	  /* Skip padding bytes.  */
3401	  ++try_pos;
3402	  size -= BITS_PER_UNIT;
3403	  continue;
3404	}
3405
3406      unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3407      unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3408      unsigned HOST_WIDE_INT align_bitpos
3409	= (try_bitpos - align_base) & (group_align - 1);
3410      unsigned HOST_WIDE_INT align = group_align;
3411      bool found_orig = false;
3412      if (align_bitpos)
3413	align = least_bit_hwi (align_bitpos);
3414      if (!allow_unaligned_store)
3415	try_size = MIN (try_size, align);
3416      if (!allow_unaligned_load)
3417	{
3418	  /* If we can't do or don't want to do unaligned stores
3419	     as well as loads, we need to take the loads into account
3420	     as well.  */
3421	  unsigned HOST_WIDE_INT load_align = group_load_align;
3422	  align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3423	  if (align_bitpos)
3424	    load_align = least_bit_hwi (align_bitpos);
3425	  for (int i = 0; i < 2; ++i)
3426	    if (group->load_align[i])
3427	      {
3428		align_bitpos
3429		  = known_alignment (try_bitpos
3430				     - group->stores[0]->bitpos
3431				     + group->stores[0]->ops[i].bitpos
3432				     - group->load_align_base[i]);
3433		if (align_bitpos & (group_load_align - 1))
3434		  {
3435		    unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3436		    load_align = MIN (load_align, a);
3437		  }
3438	      }
3439	  try_size = MIN (try_size, load_align);
3440	}
3441      store_immediate_info *info
3442	= find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3443      if (info && !gimple_clobber_p (info->stmt))
3444	{
3445	  /* If there is just one original statement for the range, see if
3446	     we can just reuse the original store which could be even larger
3447	     than try_size.  */
3448	  unsigned HOST_WIDE_INT stmt_end
3449	    = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3450	  info = find_constituent_stores (group, NULL, &first, try_bitpos,
3451					  stmt_end - try_bitpos);
3452	  if (info && info->bitpos >= try_bitpos)
3453	    {
3454	      store_immediate_info *info2 = NULL;
3455	      unsigned int first_copy = first;
3456	      if (info->bitpos > try_bitpos
3457		  && stmt_end - try_bitpos <= try_size)
3458		{
3459		  info2 = find_constituent_stores (group, NULL, &first_copy,
3460						   try_bitpos,
3461						   info->bitpos - try_bitpos);
3462		  gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3463		}
3464	      if (info2 == NULL && stmt_end - try_bitpos < try_size)
3465		{
3466		  info2 = find_constituent_stores (group, NULL, &first_copy,
3467						   stmt_end,
3468						   (try_bitpos + try_size)
3469						   - stmt_end);
3470		  gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3471		}
3472	      if (info2 == NULL)
3473		{
3474		  try_size = stmt_end - try_bitpos;
3475		  found_orig = true;
3476		  goto found;
3477		}
3478	    }
3479	}
3480
3481      /* Approximate store bitsize for the case when there are no padding
3482	 bits.  */
3483      while (try_size > size)
3484	try_size /= 2;
3485      /* Now look for whole padding bytes at the end of that bitsize.  */
3486      for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3487	if (group->mask[try_pos - bytepos + nonmasked - 1]
3488	    != (unsigned char) ~0U
3489	    && (!bzero_first
3490		|| group->val[try_pos - bytepos + nonmasked - 1] != 0))
3491	  break;
3492      if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3493	{
3494	  /* If entire try_size range is padding, skip it.  */
3495	  try_pos += try_size / BITS_PER_UNIT;
3496	  size -= try_size;
3497	  continue;
3498	}
3499      /* Otherwise try to decrease try_size if second half, last 3 quarters
3500	 etc. are padding.  */
3501      nonmasked *= BITS_PER_UNIT;
3502      while (nonmasked <= try_size / 2)
3503	try_size /= 2;
3504      if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3505	{
3506	  /* Now look for whole padding bytes at the start of that bitsize.  */
3507	  unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3508	  for (masked = 0; masked < try_bytesize; ++masked)
3509	    if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3510		&& (!bzero_first
3511		    || group->val[try_pos - bytepos + masked] != 0))
3512	      break;
3513	  masked *= BITS_PER_UNIT;
3514	  gcc_assert (masked < try_size);
3515	  if (masked >= try_size / 2)
3516	    {
3517	      while (masked >= try_size / 2)
3518		{
3519		  try_size /= 2;
3520		  try_pos += try_size / BITS_PER_UNIT;
3521		  size -= try_size;
3522		  masked -= try_size;
3523		}
3524	      /* Need to recompute the alignment, so just retry at the new
3525		 position.  */
3526	      continue;
3527	    }
3528	}
3529
3530    found:
3531      ++ret;
3532
3533      if (split_stores)
3534	{
3535	  split_store *store
3536	    = new split_store (try_pos, try_size, align);
3537	  info = find_constituent_stores (group, &store->orig_stores,
3538					  &first, try_bitpos, try_size);
3539	  if (info
3540	      && !gimple_clobber_p (info->stmt)
3541	      && info->bitpos >= try_bitpos
3542	      && info->bitpos + info->bitsize <= try_bitpos + try_size
3543	      && (store->orig_stores.length () == 1
3544		  || found_orig
3545		  || (info->bitpos == try_bitpos
3546		      && (info->bitpos + info->bitsize
3547			  == try_bitpos + try_size))))
3548	    {
3549	      store->orig = true;
3550	      any_orig = true;
3551	    }
3552	  split_stores->safe_push (store);
3553	}
3554
3555      try_pos += try_size / BITS_PER_UNIT;
3556      size -= try_size;
3557    }
3558
3559  if (total_orig)
3560    {
3561      unsigned int i;
3562      split_store *store;
3563      /* If we are reusing some original stores and any of the
3564	 original SSA_NAMEs had multiple uses, we need to subtract
3565	 those now before we add the new ones.  */
3566      if (total_new[0] && any_orig)
3567	{
3568	  FOR_EACH_VEC_ELT (*split_stores, i, store)
3569	    if (store->orig)
3570	      total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3571	}
3572      total_new[0] += ret; /* The new store.  */
3573      store_immediate_info *info = group->stores[0];
3574      if (info->ops[0].base_addr)
3575	total_new[0] += ret;
3576      if (info->ops[1].base_addr)
3577	total_new[0] += ret;
3578      switch (info->rhs_code)
3579	{
3580	case BIT_AND_EXPR:
3581	case BIT_IOR_EXPR:
3582	case BIT_XOR_EXPR:
3583	  total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
3584	  break;
3585	default:
3586	  break;
3587	}
3588      FOR_EACH_VEC_ELT (*split_stores, i, store)
3589	{
3590	  unsigned int j;
3591	  bool bit_not_p[3] = { false, false, false };
3592	  /* If all orig_stores have certain bit_not_p set, then
3593	     we'd use a BIT_NOT_EXPR stmt and need to account for it.
3594	     If some orig_stores have certain bit_not_p set, then
3595	     we'd use a BIT_XOR_EXPR with a mask and need to account for
3596	     it.  */
3597	  FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3598	    {
3599	      if (info->ops[0].bit_not_p)
3600		bit_not_p[0] = true;
3601	      if (info->ops[1].bit_not_p)
3602		bit_not_p[1] = true;
3603	      if (info->bit_not_p)
3604		bit_not_p[2] = true;
3605	    }
3606	  total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3607	}
3608
3609    }
3610
3611  return ret;
3612}
3613
3614/* Return the operation through which the operand IDX (if < 2) or
3615   result (IDX == 2) should be inverted.  If NOP_EXPR, no inversion
3616   is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3617   the bits should be xored with mask.  */
3618
3619static enum tree_code
3620invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3621{
3622  unsigned int i;
3623  store_immediate_info *info;
3624  unsigned int cnt = 0;
3625  bool any_paddings = false;
3626  FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3627    {
3628      bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3629      if (bit_not_p)
3630	{
3631	  ++cnt;
3632	  tree lhs = gimple_assign_lhs (info->stmt);
3633	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3634	      && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3635	    any_paddings = true;
3636	}
3637    }
3638  mask = NULL_TREE;
3639  if (cnt == 0)
3640    return NOP_EXPR;
3641  if (cnt == split_store->orig_stores.length () && !any_paddings)
3642    return BIT_NOT_EXPR;
3643
3644  unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3645  unsigned buf_size = split_store->size / BITS_PER_UNIT;
3646  unsigned char *buf
3647    = XALLOCAVEC (unsigned char, buf_size);
3648  memset (buf, ~0U, buf_size);
3649  FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3650    {
3651      bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3652      if (!bit_not_p)
3653	continue;
3654      /* Clear regions with bit_not_p and invert afterwards, rather than
3655	 clear regions with !bit_not_p, so that gaps in between stores aren't
3656	 set in the mask.  */
3657      unsigned HOST_WIDE_INT bitsize = info->bitsize;
3658      unsigned HOST_WIDE_INT prec = bitsize;
3659      unsigned int pos_in_buffer = 0;
3660      if (any_paddings)
3661	{
3662	  tree lhs = gimple_assign_lhs (info->stmt);
3663	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3664	      && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3665	    prec = TYPE_PRECISION (TREE_TYPE (lhs));
3666	}
3667      if (info->bitpos < try_bitpos)
3668	{
3669	  gcc_assert (info->bitpos + bitsize > try_bitpos);
3670	  if (!BYTES_BIG_ENDIAN)
3671	    {
3672	      if (prec <= try_bitpos - info->bitpos)
3673		continue;
3674	      prec -= try_bitpos - info->bitpos;
3675	    }
3676	  bitsize -= try_bitpos - info->bitpos;
3677	  if (BYTES_BIG_ENDIAN && prec > bitsize)
3678	    prec = bitsize;
3679	}
3680      else
3681	pos_in_buffer = info->bitpos - try_bitpos;
3682      if (prec < bitsize)
3683	{
3684	  /* If this is a bool inversion, invert just the least significant
3685	     prec bits rather than all bits of it.  */
3686	  if (BYTES_BIG_ENDIAN)
3687	    {
3688	      pos_in_buffer += bitsize - prec;
3689	      if (pos_in_buffer >= split_store->size)
3690		continue;
3691	    }
3692	  bitsize = prec;
3693	}
3694      if (pos_in_buffer + bitsize > split_store->size)
3695	bitsize = split_store->size - pos_in_buffer;
3696      unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3697      if (BYTES_BIG_ENDIAN)
3698	clear_bit_region_be (p, (BITS_PER_UNIT - 1
3699				 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3700      else
3701	clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3702    }
3703  for (unsigned int i = 0; i < buf_size; ++i)
3704    buf[i] = ~buf[i];
3705  mask = native_interpret_expr (int_type, buf, buf_size);
3706  return BIT_XOR_EXPR;
3707}
3708
3709/* Given a merged store group GROUP output the widened version of it.
3710   The store chain is against the base object BASE.
3711   Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3712   unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3713   Make sure that the number of statements output is less than the number of
3714   original statements.  If a better sequence is possible emit it and
3715   return true.  */
3716
3717bool
3718imm_store_chain_info::output_merged_store (merged_store_group *group)
3719{
3720  split_store *split_store;
3721  unsigned int i;
3722  unsigned HOST_WIDE_INT start_byte_pos
3723    = group->bitregion_start / BITS_PER_UNIT;
3724
3725  unsigned int orig_num_stmts = group->stores.length ();
3726  if (orig_num_stmts < 2)
3727    return false;
3728
3729  auto_vec<class split_store *, 32> split_stores;
3730  bool allow_unaligned_store
3731    = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
3732  bool allow_unaligned_load = allow_unaligned_store;
3733  bool bzero_first = false;
3734  store_immediate_info *store;
3735  unsigned int num_clobber_stmts = 0;
3736  if (group->stores[0]->rhs_code == INTEGER_CST)
3737    {
3738      FOR_EACH_VEC_ELT (group->stores, i, store)
3739	if (gimple_clobber_p (store->stmt))
3740	  num_clobber_stmts++;
3741	else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3742		 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3743		 && group->start == store->bitpos
3744		 && group->width == store->bitsize
3745		 && (group->start % BITS_PER_UNIT) == 0
3746		 && (group->width % BITS_PER_UNIT) == 0)
3747	  {
3748	    bzero_first = true;
3749	    break;
3750	  }
3751	else
3752	  break;
3753      FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3754	if (gimple_clobber_p (store->stmt))
3755	  num_clobber_stmts++;
3756      if (num_clobber_stmts == orig_num_stmts)
3757	return false;
3758      orig_num_stmts -= num_clobber_stmts;
3759    }
3760  if (allow_unaligned_store || bzero_first)
3761    {
3762      /* If unaligned stores are allowed, see how many stores we'd emit
3763	 for unaligned and how many stores we'd emit for aligned stores.
3764	 Only use unaligned stores if it allows fewer stores than aligned.
3765	 Similarly, if there is a whole region clear first, prefer expanding
3766	 it together compared to expanding clear first followed by merged
3767	 further stores.  */
3768      unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3769      int pass_min = 0;
3770      for (int pass = 0; pass < 4; ++pass)
3771	{
3772	  if (!allow_unaligned_store && (pass & 1) != 0)
3773	    continue;
3774	  if (!bzero_first && (pass & 2) != 0)
3775	    continue;
3776	  cnt[pass] = split_group (group, (pass & 1) != 0,
3777				   allow_unaligned_load, (pass & 2) != 0,
3778				   NULL, NULL, NULL);
3779	  if (cnt[pass] < cnt[pass_min])
3780	    pass_min = pass;
3781	}
3782      if ((pass_min & 1) == 0)
3783	allow_unaligned_store = false;
3784      if ((pass_min & 2) == 0)
3785	bzero_first = false;
3786    }
3787  unsigned total_orig, total_new;
3788  split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
3789	       &split_stores, &total_orig, &total_new);
3790
3791  /* Determine if there is a clobber covering the whole group at the start,
3792     followed by proposed split stores that cover the whole group.  In that
3793     case, prefer the transformation even if
3794     split_stores.length () == orig_num_stmts.  */
3795  bool clobber_first = false;
3796  if (num_clobber_stmts
3797      && gimple_clobber_p (group->stores[0]->stmt)
3798      && group->start == group->stores[0]->bitpos
3799      && group->width == group->stores[0]->bitsize
3800      && (group->start % BITS_PER_UNIT) == 0
3801      && (group->width % BITS_PER_UNIT) == 0)
3802    {
3803      clobber_first = true;
3804      unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3805      FOR_EACH_VEC_ELT (split_stores, i, split_store)
3806	if (split_store->bytepos != pos)
3807	  {
3808	    clobber_first = false;
3809	    break;
3810	  }
3811	else
3812	  pos += split_store->size / BITS_PER_UNIT;
3813      if (pos != (group->start + group->width) / BITS_PER_UNIT)
3814	clobber_first = false;
3815    }
3816
3817  if (split_stores.length () >= orig_num_stmts + clobber_first)
3818    {
3819
3820      /* We didn't manage to reduce the number of statements.  Bail out.  */
3821      if (dump_file && (dump_flags & TDF_DETAILS))
3822	fprintf (dump_file, "Exceeded original number of stmts (%u)."
3823			    "  Not profitable to emit new sequence.\n",
3824		 orig_num_stmts);
3825      FOR_EACH_VEC_ELT (split_stores, i, split_store)
3826	delete split_store;
3827      return false;
3828    }
3829  if (total_orig <= total_new)
3830    {
3831      /* If number of estimated new statements is above estimated original
3832	 statements, bail out too.  */
3833      if (dump_file && (dump_flags & TDF_DETAILS))
3834	fprintf (dump_file, "Estimated number of original stmts (%u)"
3835			    " not larger than estimated number of new"
3836			    " stmts (%u).\n",
3837		 total_orig, total_new);
3838      FOR_EACH_VEC_ELT (split_stores, i, split_store)
3839	delete split_store;
3840      return false;
3841    }
3842  if (group->stores[0]->rhs_code == INTEGER_CST)
3843    {
3844      bool all_orig = true;
3845      FOR_EACH_VEC_ELT (split_stores, i, split_store)
3846	if (!split_store->orig)
3847	  {
3848	    all_orig = false;
3849	    break;
3850	  }
3851      if (all_orig)
3852	{
3853	  unsigned int cnt = split_stores.length ();
3854	  store_immediate_info *store;
3855	  FOR_EACH_VEC_ELT (group->stores, i, store)
3856	    if (gimple_clobber_p (store->stmt))
3857	      ++cnt;
3858	  /* Punt if we wouldn't make any real changes, i.e. keep all
3859	     orig stmts + all clobbers.  */
3860	  if (cnt == group->stores.length ())
3861	    {
3862	      if (dump_file && (dump_flags & TDF_DETAILS))
3863		fprintf (dump_file, "Exceeded original number of stmts (%u)."
3864				    "  Not profitable to emit new sequence.\n",
3865			 orig_num_stmts);
3866	      FOR_EACH_VEC_ELT (split_stores, i, split_store)
3867		delete split_store;
3868	      return false;
3869	    }
3870	}
3871    }
3872
3873  gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3874  gimple_seq seq = NULL;
3875  tree last_vdef, new_vuse;
3876  last_vdef = gimple_vdef (group->last_stmt);
3877  new_vuse = gimple_vuse (group->last_stmt);
3878  tree bswap_res = NULL_TREE;
3879
3880  /* Clobbers are not removed.  */
3881  if (gimple_clobber_p (group->last_stmt))
3882    {
3883      new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3884      gimple_set_vdef (group->last_stmt, new_vuse);
3885    }
3886
3887  if (group->stores[0]->rhs_code == LROTATE_EXPR
3888      || group->stores[0]->rhs_code == NOP_EXPR)
3889    {
3890      tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3891      gimple *ins_stmt = group->stores[0]->ins_stmt;
3892      struct symbolic_number *n = &group->stores[0]->n;
3893      bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3894
3895      switch (n->range)
3896	{
3897	case 16:
3898	  load_type = bswap_type = uint16_type_node;
3899	  break;
3900	case 32:
3901	  load_type = uint32_type_node;
3902	  if (bswap)
3903	    {
3904	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3905	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3906	    }
3907	  break;
3908	case 64:
3909	  load_type = uint64_type_node;
3910	  if (bswap)
3911	    {
3912	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3913	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3914	    }
3915	  break;
3916	default:
3917	  gcc_unreachable ();
3918	}
3919
3920      /* If the loads have each vuse of the corresponding store,
3921	 we've checked the aliasing already in try_coalesce_bswap and
3922	 we want to sink the need load into seq.  So need to use new_vuse
3923	 on the load.  */
3924      if (n->base_addr)
3925	{
3926	  if (n->vuse == NULL)
3927	    {
3928	      n->vuse = new_vuse;
3929	      ins_stmt = NULL;
3930	    }
3931	  else
3932	    /* Update vuse in case it has changed by output_merged_stores.  */
3933	    n->vuse = gimple_vuse (ins_stmt);
3934	}
3935      bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3936				 bswap_type, load_type, n, bswap);
3937      gcc_assert (bswap_res);
3938    }
3939
3940  gimple *stmt = NULL;
3941  auto_vec<gimple *, 32> orig_stmts;
3942  gimple_seq this_seq;
3943  tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3944				      is_gimple_mem_ref_addr, NULL_TREE);
3945  gimple_seq_add_seq_without_update (&seq, this_seq);
3946
3947  tree load_addr[2] = { NULL_TREE, NULL_TREE };
3948  gimple_seq load_seq[2] = { NULL, NULL };
3949  gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3950  for (int j = 0; j < 2; ++j)
3951    {
3952      store_operand_info &op = group->stores[0]->ops[j];
3953      if (op.base_addr == NULL_TREE)
3954	continue;
3955
3956      store_immediate_info *infol = group->stores.last ();
3957      if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3958	{
3959	  /* We can't pick the location randomly; while we've verified
3960	     all the loads have the same vuse, they can be still in different
3961	     basic blocks and we need to pick the one from the last bb:
3962	     int x = q[0];
3963	     if (x == N) return;
3964	     int y = q[1];
3965	     p[0] = x;
3966	     p[1] = y;
3967	     otherwise if we put the wider load at the q[0] load, we might
3968	     segfault if q[1] is not mapped.  */
3969	  basic_block bb = gimple_bb (op.stmt);
3970	  gimple *ostmt = op.stmt;
3971	  store_immediate_info *info;
3972	  FOR_EACH_VEC_ELT (group->stores, i, info)
3973	    {
3974	      gimple *tstmt = info->ops[j].stmt;
3975	      basic_block tbb = gimple_bb (tstmt);
3976	      if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3977		{
3978		  ostmt = tstmt;
3979		  bb = tbb;
3980		}
3981	    }
3982	  load_gsi[j] = gsi_for_stmt (ostmt);
3983	  load_addr[j]
3984	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3985				      &load_seq[j], is_gimple_mem_ref_addr,
3986				      NULL_TREE);
3987	}
3988      else if (operand_equal_p (base_addr, op.base_addr, 0))
3989	load_addr[j] = addr;
3990      else
3991	{
3992	  load_addr[j]
3993	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3994				      &this_seq, is_gimple_mem_ref_addr,
3995				      NULL_TREE);
3996	  gimple_seq_add_seq_without_update (&seq, this_seq);
3997	}
3998    }
3999
4000  FOR_EACH_VEC_ELT (split_stores, i, split_store)
4001    {
4002      unsigned HOST_WIDE_INT try_size = split_store->size;
4003      unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4004      unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4005      unsigned HOST_WIDE_INT align = split_store->align;
4006      tree dest, src;
4007      location_t loc;
4008      if (split_store->orig)
4009	{
4010	  /* If there is just a single non-clobber constituent store
4011	     which covers the whole area, just reuse the lhs and rhs.  */
4012	  gimple *orig_stmt = NULL;
4013	  store_immediate_info *store;
4014	  unsigned int j;
4015	  FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4016	    if (!gimple_clobber_p (store->stmt))
4017	      {
4018		orig_stmt = store->stmt;
4019		break;
4020	      }
4021	  dest = gimple_assign_lhs (orig_stmt);
4022	  src = gimple_assign_rhs1 (orig_stmt);
4023	  loc = gimple_location (orig_stmt);
4024	}
4025      else
4026	{
4027	  store_immediate_info *info;
4028	  unsigned short clique, base;
4029	  unsigned int k;
4030	  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4031	    orig_stmts.safe_push (info->stmt);
4032	  tree offset_type
4033	    = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4034	  loc = get_location_for_stmts (orig_stmts);
4035	  orig_stmts.truncate (0);
4036
4037	  tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4038	  int_type = build_aligned_type (int_type, align);
4039	  dest = fold_build2 (MEM_REF, int_type, addr,
4040			      build_int_cst (offset_type, try_pos));
4041	  if (TREE_CODE (dest) == MEM_REF)
4042	    {
4043	      MR_DEPENDENCE_CLIQUE (dest) = clique;
4044	      MR_DEPENDENCE_BASE (dest) = base;
4045	    }
4046
4047	  tree mask;
4048	  if (bswap_res)
4049	    mask = integer_zero_node;
4050	  else
4051	    mask = native_interpret_expr (int_type,
4052					  group->mask + try_pos
4053					  - start_byte_pos,
4054					  group->buf_size);
4055
4056	  tree ops[2];
4057	  for (int j = 0;
4058	       j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4059	       ++j)
4060	    {
4061	      store_operand_info &op = split_store->orig_stores[0]->ops[j];
4062	      if (bswap_res)
4063		ops[j] = bswap_res;
4064	      else if (op.base_addr)
4065		{
4066		  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4067		    orig_stmts.safe_push (info->ops[j].stmt);
4068
4069		  offset_type = get_alias_type_for_stmts (orig_stmts, true,
4070							  &clique, &base);
4071		  location_t load_loc = get_location_for_stmts (orig_stmts);
4072		  orig_stmts.truncate (0);
4073
4074		  unsigned HOST_WIDE_INT load_align = group->load_align[j];
4075		  unsigned HOST_WIDE_INT align_bitpos
4076		    = known_alignment (try_bitpos
4077				       - split_store->orig_stores[0]->bitpos
4078				       + op.bitpos);
4079		  if (align_bitpos & (load_align - 1))
4080		    load_align = least_bit_hwi (align_bitpos);
4081
4082		  tree load_int_type
4083		    = build_nonstandard_integer_type (try_size, UNSIGNED);
4084		  load_int_type
4085		    = build_aligned_type (load_int_type, load_align);
4086
4087		  poly_uint64 load_pos
4088		    = exact_div (try_bitpos
4089				 - split_store->orig_stores[0]->bitpos
4090				 + op.bitpos,
4091				 BITS_PER_UNIT);
4092		  ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4093					build_int_cst (offset_type, load_pos));
4094		  if (TREE_CODE (ops[j]) == MEM_REF)
4095		    {
4096		      MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4097		      MR_DEPENDENCE_BASE (ops[j]) = base;
4098		    }
4099		  if (!integer_zerop (mask))
4100		    /* The load might load some bits (that will be masked off
4101		       later on) uninitialized, avoid -W*uninitialized
4102		       warnings in that case.  */
4103		    TREE_NO_WARNING (ops[j]) = 1;
4104
4105		  stmt = gimple_build_assign (make_ssa_name (int_type),
4106					      ops[j]);
4107		  gimple_set_location (stmt, load_loc);
4108		  if (gsi_bb (load_gsi[j]))
4109		    {
4110		      gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4111		      gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4112		    }
4113		  else
4114		    {
4115		      gimple_set_vuse (stmt, new_vuse);
4116		      gimple_seq_add_stmt_without_update (&seq, stmt);
4117		    }
4118		  ops[j] = gimple_assign_lhs (stmt);
4119		  tree xor_mask;
4120		  enum tree_code inv_op
4121		    = invert_op (split_store, j, int_type, xor_mask);
4122		  if (inv_op != NOP_EXPR)
4123		    {
4124		      stmt = gimple_build_assign (make_ssa_name (int_type),
4125						  inv_op, ops[j], xor_mask);
4126		      gimple_set_location (stmt, load_loc);
4127		      ops[j] = gimple_assign_lhs (stmt);
4128
4129		      if (gsi_bb (load_gsi[j]))
4130			gimple_seq_add_stmt_without_update (&load_seq[j],
4131							    stmt);
4132		      else
4133			gimple_seq_add_stmt_without_update (&seq, stmt);
4134		    }
4135		}
4136	      else
4137		ops[j] = native_interpret_expr (int_type,
4138						group->val + try_pos
4139						- start_byte_pos,
4140						group->buf_size);
4141	    }
4142
4143	  switch (split_store->orig_stores[0]->rhs_code)
4144	    {
4145	    case BIT_AND_EXPR:
4146	    case BIT_IOR_EXPR:
4147	    case BIT_XOR_EXPR:
4148	      FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4149		{
4150		  tree rhs1 = gimple_assign_rhs1 (info->stmt);
4151		  orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4152		}
4153	      location_t bit_loc;
4154	      bit_loc = get_location_for_stmts (orig_stmts);
4155	      orig_stmts.truncate (0);
4156
4157	      stmt
4158		= gimple_build_assign (make_ssa_name (int_type),
4159				       split_store->orig_stores[0]->rhs_code,
4160				       ops[0], ops[1]);
4161	      gimple_set_location (stmt, bit_loc);
4162	      /* If there is just one load and there is a separate
4163		 load_seq[0], emit the bitwise op right after it.  */
4164	      if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4165		gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4166	      /* Otherwise, if at least one load is in seq, we need to
4167		 emit the bitwise op right before the store.  If there
4168		 are two loads and are emitted somewhere else, it would
4169		 be better to emit the bitwise op as early as possible;
4170		 we don't track where that would be possible right now
4171		 though.  */
4172	      else
4173		gimple_seq_add_stmt_without_update (&seq, stmt);
4174	      src = gimple_assign_lhs (stmt);
4175	      tree xor_mask;
4176	      enum tree_code inv_op;
4177	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
4178	      if (inv_op != NOP_EXPR)
4179		{
4180		  stmt = gimple_build_assign (make_ssa_name (int_type),
4181					      inv_op, src, xor_mask);
4182		  gimple_set_location (stmt, bit_loc);
4183		  if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4184		    gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4185		  else
4186		    gimple_seq_add_stmt_without_update (&seq, stmt);
4187		  src = gimple_assign_lhs (stmt);
4188		}
4189	      break;
4190	    case LROTATE_EXPR:
4191	    case NOP_EXPR:
4192	      src = ops[0];
4193	      if (!is_gimple_val (src))
4194		{
4195		  stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4196					      src);
4197		  gimple_seq_add_stmt_without_update (&seq, stmt);
4198		  src = gimple_assign_lhs (stmt);
4199		}
4200	      if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4201		{
4202		  stmt = gimple_build_assign (make_ssa_name (int_type),
4203					      NOP_EXPR, src);
4204		  gimple_seq_add_stmt_without_update (&seq, stmt);
4205		  src = gimple_assign_lhs (stmt);
4206		}
4207	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
4208	      if (inv_op != NOP_EXPR)
4209		{
4210		  stmt = gimple_build_assign (make_ssa_name (int_type),
4211					      inv_op, src, xor_mask);
4212		  gimple_set_location (stmt, loc);
4213		  gimple_seq_add_stmt_without_update (&seq, stmt);
4214		  src = gimple_assign_lhs (stmt);
4215		}
4216	      break;
4217	    default:
4218	      src = ops[0];
4219	      break;
4220	    }
4221
4222	  /* If bit insertion is required, we use the source as an accumulator
4223	     into which the successive bit-field values are manually inserted.
4224	     FIXME: perhaps use BIT_INSERT_EXPR instead in some cases?  */
4225	  if (group->bit_insertion)
4226	    FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4227	      if (info->rhs_code == BIT_INSERT_EXPR
4228		  && info->bitpos < try_bitpos + try_size
4229		  && info->bitpos + info->bitsize > try_bitpos)
4230		{
4231		  /* Mask, truncate, convert to final type, shift and ior into
4232		     the accumulator.  Note that every step can be a no-op.  */
4233		  const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4234		  const HOST_WIDE_INT end_gap
4235		    = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4236		  tree tem = info->ops[0].val;
4237		  if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4238		    {
4239		      tree bitfield_type
4240			= build_nonstandard_integer_type (info->bitsize,
4241							  UNSIGNED);
4242		      tem = gimple_convert (&seq, loc, bitfield_type, tem);
4243		    }
4244		  else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4245		    {
4246		      const unsigned HOST_WIDE_INT imask
4247			= (HOST_WIDE_INT_1U << info->bitsize) - 1;
4248		      tem = gimple_build (&seq, loc,
4249					  BIT_AND_EXPR, TREE_TYPE (tem), tem,
4250					  build_int_cst (TREE_TYPE (tem),
4251							 imask));
4252		    }
4253		  const HOST_WIDE_INT shift
4254		    = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4255		  if (shift < 0)
4256		    tem = gimple_build (&seq, loc,
4257					RSHIFT_EXPR, TREE_TYPE (tem), tem,
4258					build_int_cst (NULL_TREE, -shift));
4259		  tem = gimple_convert (&seq, loc, int_type, tem);
4260		  if (shift > 0)
4261		    tem = gimple_build (&seq, loc,
4262					LSHIFT_EXPR, int_type, tem,
4263					build_int_cst (NULL_TREE, shift));
4264		  src = gimple_build (&seq, loc,
4265				      BIT_IOR_EXPR, int_type, tem, src);
4266		}
4267
4268	  if (!integer_zerop (mask))
4269	    {
4270	      tree tem = make_ssa_name (int_type);
4271	      tree load_src = unshare_expr (dest);
4272	      /* The load might load some or all bits uninitialized,
4273		 avoid -W*uninitialized warnings in that case.
4274		 As optimization, it would be nice if all the bits are
4275		 provably uninitialized (no stores at all yet or previous
4276		 store a CLOBBER) we'd optimize away the load and replace
4277		 it e.g. with 0.  */
4278	      TREE_NO_WARNING (load_src) = 1;
4279	      stmt = gimple_build_assign (tem, load_src);
4280	      gimple_set_location (stmt, loc);
4281	      gimple_set_vuse (stmt, new_vuse);
4282	      gimple_seq_add_stmt_without_update (&seq, stmt);
4283
4284	      /* FIXME: If there is a single chunk of zero bits in mask,
4285		 perhaps use BIT_INSERT_EXPR instead?  */
4286	      stmt = gimple_build_assign (make_ssa_name (int_type),
4287					  BIT_AND_EXPR, tem, mask);
4288	      gimple_set_location (stmt, loc);
4289	      gimple_seq_add_stmt_without_update (&seq, stmt);
4290	      tem = gimple_assign_lhs (stmt);
4291
4292	      if (TREE_CODE (src) == INTEGER_CST)
4293		src = wide_int_to_tree (int_type,
4294					wi::bit_and_not (wi::to_wide (src),
4295							 wi::to_wide (mask)));
4296	      else
4297		{
4298		  tree nmask
4299		    = wide_int_to_tree (int_type,
4300					wi::bit_not (wi::to_wide (mask)));
4301		  stmt = gimple_build_assign (make_ssa_name (int_type),
4302					      BIT_AND_EXPR, src, nmask);
4303		  gimple_set_location (stmt, loc);
4304		  gimple_seq_add_stmt_without_update (&seq, stmt);
4305		  src = gimple_assign_lhs (stmt);
4306		}
4307	      stmt = gimple_build_assign (make_ssa_name (int_type),
4308					  BIT_IOR_EXPR, tem, src);
4309	      gimple_set_location (stmt, loc);
4310	      gimple_seq_add_stmt_without_update (&seq, stmt);
4311	      src = gimple_assign_lhs (stmt);
4312	    }
4313	}
4314
4315      stmt = gimple_build_assign (dest, src);
4316      gimple_set_location (stmt, loc);
4317      gimple_set_vuse (stmt, new_vuse);
4318      gimple_seq_add_stmt_without_update (&seq, stmt);
4319
4320      if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4321	add_stmt_to_eh_lp (stmt, group->lp_nr);
4322
4323      tree new_vdef;
4324      if (i < split_stores.length () - 1)
4325	new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4326      else
4327	new_vdef = last_vdef;
4328
4329      gimple_set_vdef (stmt, new_vdef);
4330      SSA_NAME_DEF_STMT (new_vdef) = stmt;
4331      new_vuse = new_vdef;
4332    }
4333
4334  FOR_EACH_VEC_ELT (split_stores, i, split_store)
4335    delete split_store;
4336
4337  gcc_assert (seq);
4338  if (dump_file)
4339    {
4340      fprintf (dump_file,
4341	       "New sequence of %u stores to replace old one of %u stores\n",
4342	       split_stores.length (), orig_num_stmts);
4343      if (dump_flags & TDF_DETAILS)
4344	print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4345    }
4346
4347  if (gimple_clobber_p (group->last_stmt))
4348    update_stmt (group->last_stmt);
4349
4350  if (group->lp_nr > 0)
4351    {
4352      /* We're going to insert a sequence of (potentially) throwing stores
4353	 into an active EH region.  This means that we're going to create
4354	 new basic blocks with EH edges pointing to the post landing pad
4355	 and, therefore, to have to update its PHI nodes, if any.  For the
4356	 virtual PHI node, we're going to use the VDEFs created above, but
4357	 for the other nodes, we need to record the original reaching defs.  */
4358      eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4359      basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4360      basic_block last_bb = gimple_bb (group->last_stmt);
4361      edge last_edge = find_edge (last_bb, lp_bb);
4362      auto_vec<tree, 16> last_defs;
4363      gphi_iterator gpi;
4364      for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4365	{
4366	  gphi *phi = gpi.phi ();
4367	  tree last_def;
4368	  if (virtual_operand_p (gimple_phi_result (phi)))
4369	    last_def = NULL_TREE;
4370	  else
4371	    last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4372	  last_defs.safe_push (last_def);
4373	}
4374
4375      /* Do the insertion.  Then, if new basic blocks have been created in the
4376	 process, rewind the chain of VDEFs create above to walk the new basic
4377	 blocks and update the corresponding arguments of the PHI nodes.  */
4378      update_modified_stmts (seq);
4379      if (gimple_find_sub_bbs (seq, &last_gsi))
4380	while (last_vdef != gimple_vuse (group->last_stmt))
4381	  {
4382	    gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4383	    if (stmt_could_throw_p (cfun, stmt))
4384	      {
4385		edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4386		unsigned int i;
4387		for (gpi = gsi_start_phis (lp_bb), i = 0;
4388		     !gsi_end_p (gpi);
4389		     gsi_next (&gpi), i++)
4390		  {
4391		    gphi *phi = gpi.phi ();
4392		    tree new_def;
4393		    if (virtual_operand_p (gimple_phi_result (phi)))
4394		      new_def = last_vdef;
4395		    else
4396		      new_def = last_defs[i];
4397		    add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4398		  }
4399	      }
4400	    last_vdef = gimple_vuse (stmt);
4401	  }
4402    }
4403  else
4404    gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4405
4406  for (int j = 0; j < 2; ++j)
4407    if (load_seq[j])
4408      gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4409
4410  return true;
4411}
4412
4413/* Process the merged_store_group objects created in the coalescing phase.
4414   The stores are all against the base object BASE.
4415   Try to output the widened stores and delete the original statements if
4416   successful.  Return true iff any changes were made.  */
4417
4418bool
4419imm_store_chain_info::output_merged_stores ()
4420{
4421  unsigned int i;
4422  merged_store_group *merged_store;
4423  bool ret = false;
4424  FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4425    {
4426      if (dbg_cnt (store_merging)
4427	  && output_merged_store (merged_store))
4428	{
4429	  unsigned int j;
4430	  store_immediate_info *store;
4431	  FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4432	    {
4433	      gimple *stmt = store->stmt;
4434	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4435	      /* Don't remove clobbers, they are still useful even if
4436		 everything is overwritten afterwards.  */
4437	      if (gimple_clobber_p (stmt))
4438		continue;
4439	      gsi_remove (&gsi, true);
4440	      if (store->lp_nr)
4441		remove_stmt_from_eh_lp (stmt);
4442	      if (stmt != merged_store->last_stmt)
4443		{
4444		  unlink_stmt_vdef (stmt);
4445		  release_defs (stmt);
4446		}
4447	    }
4448	  ret = true;
4449	}
4450    }
4451  if (ret && dump_file)
4452    fprintf (dump_file, "Merging successful!\n");
4453
4454  return ret;
4455}
4456
4457/* Coalesce the store_immediate_info objects recorded against the base object
4458   BASE in the first phase and output them.
4459   Delete the allocated structures.
4460   Return true if any changes were made.  */
4461
4462bool
4463imm_store_chain_info::terminate_and_process_chain ()
4464{
4465  /* Process store chain.  */
4466  bool ret = false;
4467  if (m_store_info.length () > 1)
4468    {
4469      ret = coalesce_immediate_stores ();
4470      if (ret)
4471	ret = output_merged_stores ();
4472    }
4473
4474  /* Delete all the entries we allocated ourselves.  */
4475  store_immediate_info *info;
4476  unsigned int i;
4477  FOR_EACH_VEC_ELT (m_store_info, i, info)
4478    delete info;
4479
4480  merged_store_group *merged_info;
4481  FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4482    delete merged_info;
4483
4484  return ret;
4485}
4486
4487/* Return true iff LHS is a destination potentially interesting for
4488   store merging.  In practice these are the codes that get_inner_reference
4489   can process.  */
4490
4491static bool
4492lhs_valid_for_store_merging_p (tree lhs)
4493{
4494  if (DECL_P (lhs))
4495    return true;
4496
4497  switch (TREE_CODE (lhs))
4498    {
4499    case ARRAY_REF:
4500    case ARRAY_RANGE_REF:
4501    case BIT_FIELD_REF:
4502    case COMPONENT_REF:
4503    case MEM_REF:
4504      return true;
4505    default:
4506      return false;
4507    }
4508
4509  gcc_unreachable ();
4510}
4511
4512/* Return true if the tree RHS is a constant we want to consider
4513   during store merging.  In practice accept all codes that
4514   native_encode_expr accepts.  */
4515
4516static bool
4517rhs_valid_for_store_merging_p (tree rhs)
4518{
4519  unsigned HOST_WIDE_INT size;
4520  if (TREE_CODE (rhs) == CONSTRUCTOR
4521      && CONSTRUCTOR_NELTS (rhs) == 0
4522      && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4523      && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4524    return true;
4525  return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4526	  && native_encode_expr (rhs, NULL, size) != 0);
4527}
4528
4529/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4530   and return true on success or false on failure.  */
4531
4532static bool
4533adjust_bit_pos (poly_offset_int byte_off,
4534		poly_int64  *pbitpos,
4535		poly_uint64 *pbitregion_start,
4536		poly_uint64 *pbitregion_end)
4537{
4538  poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4539  bit_off += *pbitpos;
4540
4541  if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4542    {
4543      if (maybe_ne (*pbitregion_end, 0U))
4544	{
4545	  bit_off = byte_off << LOG2_BITS_PER_UNIT;
4546	  bit_off += *pbitregion_start;
4547	  if (bit_off.to_uhwi (pbitregion_start))
4548	    {
4549	      bit_off = byte_off << LOG2_BITS_PER_UNIT;
4550	      bit_off += *pbitregion_end;
4551	      if (!bit_off.to_uhwi (pbitregion_end))
4552		*pbitregion_end = 0;
4553	    }
4554	  else
4555	    *pbitregion_end = 0;
4556	}
4557      return true;
4558    }
4559  else
4560    return false;
4561}
4562
4563/* If MEM is a memory reference usable for store merging (either as
4564   store destination or for loads), return the non-NULL base_addr
4565   and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4566   Otherwise return NULL, *PBITPOS should be still valid even for that
4567   case.  */
4568
4569static tree
4570mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4571			     poly_uint64 *pbitpos,
4572			     poly_uint64 *pbitregion_start,
4573			     poly_uint64 *pbitregion_end)
4574{
4575  poly_int64 bitsize, bitpos;
4576  poly_uint64 bitregion_start = 0, bitregion_end = 0;
4577  machine_mode mode;
4578  int unsignedp = 0, reversep = 0, volatilep = 0;
4579  tree offset;
4580  tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4581					&unsignedp, &reversep, &volatilep);
4582  *pbitsize = bitsize;
4583  if (known_le (bitsize, 0))
4584    return NULL_TREE;
4585
4586  if (TREE_CODE (mem) == COMPONENT_REF
4587      && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4588    {
4589      get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4590      if (maybe_ne (bitregion_end, 0U))
4591	bitregion_end += 1;
4592    }
4593
4594  if (reversep)
4595    return NULL_TREE;
4596
4597  /* We do not want to rewrite TARGET_MEM_REFs.  */
4598  if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4599    return NULL_TREE;
4600  /* In some cases get_inner_reference may return a
4601     MEM_REF [ptr + byteoffset].  For the purposes of this pass
4602     canonicalize the base_addr to MEM_REF [ptr] and take
4603     byteoffset into account in the bitpos.  This occurs in
4604     PR 23684 and this way we can catch more chains.  */
4605  else if (TREE_CODE (base_addr) == MEM_REF)
4606    {
4607      if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4608			   &bitregion_start, &bitregion_end))
4609	return NULL_TREE;
4610      base_addr = TREE_OPERAND (base_addr, 0);
4611    }
4612  /* get_inner_reference returns the base object, get at its
4613     address now.  */
4614  else
4615    {
4616      if (maybe_lt (bitpos, 0))
4617	return NULL_TREE;
4618      base_addr = build_fold_addr_expr (base_addr);
4619    }
4620
4621  if (offset)
4622    {
4623      /* If the access is variable offset then a base decl has to be
4624	 address-taken to be able to emit pointer-based stores to it.
4625	 ???  We might be able to get away with re-using the original
4626	 base up to the first variable part and then wrapping that inside
4627	 a BIT_FIELD_REF.  */
4628      tree base = get_base_address (base_addr);
4629      if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4630	return NULL_TREE;
4631
4632      /* Similarly to above for the base, remove constant from the offset.  */
4633      if (TREE_CODE (offset) == PLUS_EXPR
4634	  && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4635	  && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4636			     &bitpos, &bitregion_start, &bitregion_end))
4637	offset = TREE_OPERAND (offset, 0);
4638
4639      base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4640			  base_addr, offset);
4641    }
4642
4643  if (known_eq (bitregion_end, 0U))
4644    {
4645      bitregion_start = round_down_to_byte_boundary (bitpos);
4646      bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4647    }
4648
4649  *pbitsize = bitsize;
4650  *pbitpos = bitpos;
4651  *pbitregion_start = bitregion_start;
4652  *pbitregion_end = bitregion_end;
4653  return base_addr;
4654}
4655
4656/* Return true if STMT is a load that can be used for store merging.
4657   In that case fill in *OP.  BITSIZE, BITPOS, BITREGION_START and
4658   BITREGION_END are properties of the corresponding store.  */
4659
4660static bool
4661handled_load (gimple *stmt, store_operand_info *op,
4662	      poly_uint64 bitsize, poly_uint64 bitpos,
4663	      poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4664{
4665  if (!is_gimple_assign (stmt))
4666    return false;
4667  if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4668    {
4669      tree rhs1 = gimple_assign_rhs1 (stmt);
4670      if (TREE_CODE (rhs1) == SSA_NAME
4671	  && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4672			   bitregion_start, bitregion_end))
4673	{
4674	  /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4675	     been optimized earlier, but if allowed here, would confuse the
4676	     multiple uses counting.  */
4677	  if (op->bit_not_p)
4678	    return false;
4679	  op->bit_not_p = !op->bit_not_p;
4680	  return true;
4681	}
4682      return false;
4683    }
4684  if (gimple_vuse (stmt)
4685      && gimple_assign_load_p (stmt)
4686      && !stmt_can_throw_internal (cfun, stmt)
4687      && !gimple_has_volatile_ops (stmt))
4688    {
4689      tree mem = gimple_assign_rhs1 (stmt);
4690      op->base_addr
4691	= mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4692				       &op->bitregion_start,
4693				       &op->bitregion_end);
4694      if (op->base_addr != NULL_TREE
4695	  && known_eq (op->bitsize, bitsize)
4696	  && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4697	  && known_ge (op->bitpos - op->bitregion_start,
4698		       bitpos - bitregion_start)
4699	  && known_ge (op->bitregion_end - op->bitpos,
4700		       bitregion_end - bitpos))
4701	{
4702	  op->stmt = stmt;
4703	  op->val = mem;
4704	  op->bit_not_p = false;
4705	  return true;
4706	}
4707    }
4708  return false;
4709}
4710
4711/* Return the index number of the landing pad for STMT, if any.  */
4712
4713static int
4714lp_nr_for_store (gimple *stmt)
4715{
4716  if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4717    return 0;
4718
4719  if (!stmt_could_throw_p (cfun, stmt))
4720    return 0;
4721
4722  return lookup_stmt_eh_lp (stmt);
4723}
4724
4725/* Record the store STMT for store merging optimization if it can be
4726   optimized.  Return true if any changes were made.  */
4727
4728bool
4729pass_store_merging::process_store (gimple *stmt)
4730{
4731  tree lhs = gimple_assign_lhs (stmt);
4732  tree rhs = gimple_assign_rhs1 (stmt);
4733  poly_uint64 bitsize, bitpos;
4734  poly_uint64 bitregion_start, bitregion_end;
4735  tree base_addr
4736    = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4737				   &bitregion_start, &bitregion_end);
4738  if (known_eq (bitsize, 0U))
4739    return false;
4740
4741  bool invalid = (base_addr == NULL_TREE
4742		  || (maybe_gt (bitsize,
4743				(unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4744		      && TREE_CODE (rhs) != INTEGER_CST
4745		      && (TREE_CODE (rhs) != CONSTRUCTOR
4746			  || CONSTRUCTOR_NELTS (rhs) != 0)));
4747  enum tree_code rhs_code = ERROR_MARK;
4748  bool bit_not_p = false;
4749  struct symbolic_number n;
4750  gimple *ins_stmt = NULL;
4751  store_operand_info ops[2];
4752  if (invalid)
4753    ;
4754  else if (rhs_valid_for_store_merging_p (rhs))
4755    {
4756      rhs_code = INTEGER_CST;
4757      ops[0].val = rhs;
4758    }
4759  else if (TREE_CODE (rhs) != SSA_NAME)
4760    invalid = true;
4761  else
4762    {
4763      gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4764      if (!is_gimple_assign (def_stmt))
4765	invalid = true;
4766      else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4767			     bitregion_start, bitregion_end))
4768	rhs_code = MEM_REF;
4769      else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4770	{
4771	  tree rhs1 = gimple_assign_rhs1 (def_stmt);
4772	  if (TREE_CODE (rhs1) == SSA_NAME
4773	      && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4774	    {
4775	      bit_not_p = true;
4776	      def_stmt = SSA_NAME_DEF_STMT (rhs1);
4777	    }
4778	}
4779
4780      if (rhs_code == ERROR_MARK && !invalid)
4781	switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4782	  {
4783	  case BIT_AND_EXPR:
4784	  case BIT_IOR_EXPR:
4785	  case BIT_XOR_EXPR:
4786	    tree rhs1, rhs2;
4787	    rhs1 = gimple_assign_rhs1 (def_stmt);
4788	    rhs2 = gimple_assign_rhs2 (def_stmt);
4789	    invalid = true;
4790	    if (TREE_CODE (rhs1) != SSA_NAME)
4791	      break;
4792	    def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4793	    if (!is_gimple_assign (def_stmt1)
4794		|| !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4795				  bitregion_start, bitregion_end))
4796	      break;
4797	    if (rhs_valid_for_store_merging_p (rhs2))
4798	      ops[1].val = rhs2;
4799	    else if (TREE_CODE (rhs2) != SSA_NAME)
4800	      break;
4801	    else
4802	      {
4803		def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4804		if (!is_gimple_assign (def_stmt2))
4805		  break;
4806		else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4807					bitregion_start, bitregion_end))
4808		  break;
4809	      }
4810	    invalid = false;
4811	    break;
4812	  default:
4813	    invalid = true;
4814	    break;
4815	  }
4816
4817      unsigned HOST_WIDE_INT const_bitsize;
4818      if (bitsize.is_constant (&const_bitsize)
4819	  && (const_bitsize % BITS_PER_UNIT) == 0
4820	  && const_bitsize <= 64
4821	  && multiple_p (bitpos, BITS_PER_UNIT))
4822	{
4823	  ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4824	  if (ins_stmt)
4825	    {
4826	      uint64_t nn = n.n;
4827	      for (unsigned HOST_WIDE_INT i = 0;
4828		   i < const_bitsize;
4829		   i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4830		if ((nn & MARKER_MASK) == 0
4831		    || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4832		  {
4833		    ins_stmt = NULL;
4834		    break;
4835		  }
4836	      if (ins_stmt)
4837		{
4838		  if (invalid)
4839		    {
4840		      rhs_code = LROTATE_EXPR;
4841		      ops[0].base_addr = NULL_TREE;
4842		      ops[1].base_addr = NULL_TREE;
4843		    }
4844		  invalid = false;
4845		}
4846	    }
4847	}
4848
4849      if (invalid
4850	  && bitsize.is_constant (&const_bitsize)
4851	  && ((const_bitsize % BITS_PER_UNIT) != 0
4852	      || !multiple_p (bitpos, BITS_PER_UNIT))
4853	  && const_bitsize <= 64)
4854	{
4855	  /* Bypass a conversion to the bit-field type.  */
4856	  if (!bit_not_p
4857	      && is_gimple_assign (def_stmt)
4858	      && CONVERT_EXPR_CODE_P (rhs_code))
4859	    {
4860	      tree rhs1 = gimple_assign_rhs1 (def_stmt);
4861	      if (TREE_CODE (rhs1) == SSA_NAME
4862		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4863		rhs = rhs1;
4864	    }
4865	  rhs_code = BIT_INSERT_EXPR;
4866	  bit_not_p = false;
4867	  ops[0].val = rhs;
4868	  ops[0].base_addr = NULL_TREE;
4869	  ops[1].base_addr = NULL_TREE;
4870	  invalid = false;
4871	}
4872    }
4873
4874  unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4875  unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4876  if (invalid
4877      || !bitsize.is_constant (&const_bitsize)
4878      || !bitpos.is_constant (&const_bitpos)
4879      || !bitregion_start.is_constant (&const_bitregion_start)
4880      || !bitregion_end.is_constant (&const_bitregion_end))
4881    return terminate_all_aliasing_chains (NULL, stmt);
4882
4883  if (!ins_stmt)
4884    memset (&n, 0, sizeof (n));
4885
4886  class imm_store_chain_info **chain_info = NULL;
4887  bool ret = false;
4888  if (base_addr)
4889    chain_info = m_stores.get (base_addr);
4890
4891  store_immediate_info *info;
4892  if (chain_info)
4893    {
4894      unsigned int ord = (*chain_info)->m_store_info.length ();
4895      info = new store_immediate_info (const_bitsize, const_bitpos,
4896				       const_bitregion_start,
4897				       const_bitregion_end,
4898				       stmt, ord, rhs_code, n, ins_stmt,
4899				       bit_not_p, lp_nr_for_store (stmt),
4900				       ops[0], ops[1]);
4901      if (dump_file && (dump_flags & TDF_DETAILS))
4902	{
4903	  fprintf (dump_file, "Recording immediate store from stmt:\n");
4904	  print_gimple_stmt (dump_file, stmt, 0);
4905	}
4906      (*chain_info)->m_store_info.safe_push (info);
4907      ret |= terminate_all_aliasing_chains (chain_info, stmt);
4908      /* If we reach the limit of stores to merge in a chain terminate and
4909	 process the chain now.  */
4910      if ((*chain_info)->m_store_info.length ()
4911	  == (unsigned int) param_max_stores_to_merge)
4912	{
4913	  if (dump_file && (dump_flags & TDF_DETAILS))
4914	    fprintf (dump_file,
4915		     "Reached maximum number of statements to merge:\n");
4916	  ret |= terminate_and_process_chain (*chain_info);
4917	}
4918      return ret;
4919    }
4920
4921  /* Store aliases any existing chain?  */
4922  ret |= terminate_all_aliasing_chains (NULL, stmt);
4923  /* Start a new chain.  */
4924  class imm_store_chain_info *new_chain
4925    = new imm_store_chain_info (m_stores_head, base_addr);
4926  info = new store_immediate_info (const_bitsize, const_bitpos,
4927				   const_bitregion_start,
4928				   const_bitregion_end,
4929				   stmt, 0, rhs_code, n, ins_stmt,
4930				   bit_not_p, lp_nr_for_store (stmt),
4931				   ops[0], ops[1]);
4932  new_chain->m_store_info.safe_push (info);
4933  m_stores.put (base_addr, new_chain);
4934  if (dump_file && (dump_flags & TDF_DETAILS))
4935    {
4936      fprintf (dump_file, "Starting new chain with statement:\n");
4937      print_gimple_stmt (dump_file, stmt, 0);
4938      fprintf (dump_file, "The base object is:\n");
4939      print_generic_expr (dump_file, base_addr);
4940      fprintf (dump_file, "\n");
4941    }
4942  return ret;
4943}
4944
4945/* Return true if STMT is a store valid for store merging.  */
4946
4947static bool
4948store_valid_for_store_merging_p (gimple *stmt)
4949{
4950  return gimple_assign_single_p (stmt)
4951	 && gimple_vdef (stmt)
4952	 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4953	 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
4954}
4955
4956enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4957
4958/* Return the status of basic block BB wrt store merging.  */
4959
4960static enum basic_block_status
4961get_status_for_store_merging (basic_block bb)
4962{
4963  unsigned int num_statements = 0;
4964  gimple_stmt_iterator gsi;
4965  edge e;
4966  gimple *last_stmt = NULL;
4967
4968  for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4969    {
4970      gimple *stmt = gsi_stmt (gsi);
4971
4972      if (is_gimple_debug (stmt))
4973	continue;
4974
4975      last_stmt = stmt;
4976
4977      if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4978	break;
4979    }
4980
4981  if (num_statements == 0)
4982    return BB_INVALID;
4983
4984  if (cfun->can_throw_non_call_exceptions && cfun->eh
4985      && store_valid_for_store_merging_p (last_stmt)
4986      && (e = find_fallthru_edge (bb->succs))
4987      && e->dest == bb->next_bb)
4988    return BB_EXTENDED_VALID;
4989
4990  return num_statements >= 2 ? BB_VALID : BB_INVALID;
4991}
4992
4993/* Entry point for the pass.  Go over each basic block recording chains of
4994   immediate stores.  Upon encountering a terminating statement (as defined
4995   by stmt_terminates_chain_p) process the recorded stores and emit the widened
4996   variants.  */
4997
4998unsigned int
4999pass_store_merging::execute (function *fun)
5000{
5001  basic_block bb;
5002  hash_set<gimple *> orig_stmts;
5003  bool changed = false, open_chains = false;
5004
5005  /* If the function can throw and catch non-call exceptions, we'll be trying
5006     to merge stores across different basic blocks so we need to first unsplit
5007     the EH edges in order to streamline the CFG of the function.  */
5008  if (cfun->can_throw_non_call_exceptions && cfun->eh)
5009    unsplit_eh_edges ();
5010
5011  calculate_dominance_info (CDI_DOMINATORS);
5012
5013  FOR_EACH_BB_FN (bb, fun)
5014    {
5015      const basic_block_status bb_status = get_status_for_store_merging (bb);
5016      gimple_stmt_iterator gsi;
5017
5018      if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5019	{
5020	  changed |= terminate_and_process_all_chains ();
5021	  open_chains = false;
5022	}
5023
5024      if (bb_status == BB_INVALID)
5025	continue;
5026
5027      if (dump_file && (dump_flags & TDF_DETAILS))
5028	fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5029
5030      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5031	{
5032	  gimple *stmt = gsi_stmt (gsi);
5033
5034	  if (is_gimple_debug (stmt))
5035	    continue;
5036
5037	  if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5038	    {
5039	      /* Terminate all chains.  */
5040	      if (dump_file && (dump_flags & TDF_DETAILS))
5041		fprintf (dump_file, "Volatile access terminates "
5042				    "all chains\n");
5043	      changed |= terminate_and_process_all_chains ();
5044	      open_chains = false;
5045	      continue;
5046	    }
5047
5048	  if (store_valid_for_store_merging_p (stmt))
5049	    changed |= process_store (stmt);
5050	  else
5051	    changed |= terminate_all_aliasing_chains (NULL, stmt);
5052	}
5053
5054      if (bb_status == BB_EXTENDED_VALID)
5055	open_chains = true;
5056      else
5057	{
5058	  changed |= terminate_and_process_all_chains ();
5059	  open_chains = false;
5060	}
5061    }
5062
5063  if (open_chains)
5064    changed |= terminate_and_process_all_chains ();
5065
5066  /* If the function can throw and catch non-call exceptions and something
5067     changed during the pass, then the CFG has (very likely) changed too.  */
5068  if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5069    {
5070      free_dominance_info (CDI_DOMINATORS);
5071      return TODO_cleanup_cfg;
5072    }
5073
5074  return 0;
5075}
5076
5077} // anon namespace
5078
5079/* Construct and return a store merging pass object.  */
5080
5081gimple_opt_pass *
5082make_pass_store_merging (gcc::context *ctxt)
5083{
5084  return new pass_store_merging (ctxt);
5085}
5086
5087#if CHECKING_P
5088
5089namespace selftest {
5090
5091/* Selftests for store merging helpers.  */
5092
5093/* Assert that all elements of the byte arrays X and Y, both of length N
5094   are equal.  */
5095
5096static void
5097verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5098{
5099  for (unsigned int i = 0; i < n; i++)
5100    {
5101      if (x[i] != y[i])
5102	{
5103	  fprintf (stderr, "Arrays do not match.  X:\n");
5104	  dump_char_array (stderr, x, n);
5105	  fprintf (stderr, "Y:\n");
5106	  dump_char_array (stderr, y, n);
5107	}
5108      ASSERT_EQ (x[i], y[i]);
5109    }
5110}
5111
5112/* Test shift_bytes_in_array_left and that it carries bits across between
5113   bytes correctly.  */
5114
5115static void
5116verify_shift_bytes_in_array_left (void)
5117{
5118   /* byte 1   | byte 0
5119      00011111 | 11100000.  */
5120  unsigned char orig[2] = { 0xe0, 0x1f };
5121  unsigned char in[2];
5122  memcpy (in, orig, sizeof orig);
5123
5124  unsigned char expected[2] = { 0x80, 0x7f };
5125  shift_bytes_in_array_left (in, sizeof (in), 2);
5126  verify_array_eq (in, expected, sizeof (in));
5127
5128  memcpy (in, orig, sizeof orig);
5129  memcpy (expected, orig, sizeof orig);
5130  /* Check that shifting by zero doesn't change anything.  */
5131  shift_bytes_in_array_left (in, sizeof (in), 0);
5132  verify_array_eq (in, expected, sizeof (in));
5133
5134}
5135
5136/* Test shift_bytes_in_array_right and that it carries bits across between
5137   bytes correctly.  */
5138
5139static void
5140verify_shift_bytes_in_array_right (void)
5141{
5142   /* byte 1   | byte 0
5143      00011111 | 11100000.  */
5144  unsigned char orig[2] = { 0x1f, 0xe0};
5145  unsigned char in[2];
5146  memcpy (in, orig, sizeof orig);
5147  unsigned char expected[2] = { 0x07, 0xf8};
5148  shift_bytes_in_array_right (in, sizeof (in), 2);
5149  verify_array_eq (in, expected, sizeof (in));
5150
5151  memcpy (in, orig, sizeof orig);
5152  memcpy (expected, orig, sizeof orig);
5153  /* Check that shifting by zero doesn't change anything.  */
5154  shift_bytes_in_array_right (in, sizeof (in), 0);
5155  verify_array_eq (in, expected, sizeof (in));
5156}
5157
5158/* Test clear_bit_region that it clears exactly the bits asked and
5159   nothing more.  */
5160
5161static void
5162verify_clear_bit_region (void)
5163{
5164  /* Start with all bits set and test clearing various patterns in them.  */
5165  unsigned char orig[3] = { 0xff, 0xff, 0xff};
5166  unsigned char in[3];
5167  unsigned char expected[3];
5168  memcpy (in, orig, sizeof in);
5169
5170  /* Check zeroing out all the bits.  */
5171  clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5172  expected[0] = expected[1] = expected[2] = 0;
5173  verify_array_eq (in, expected, sizeof in);
5174
5175  memcpy (in, orig, sizeof in);
5176  /* Leave the first and last bits intact.  */
5177  clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5178  expected[0] = 0x1;
5179  expected[1] = 0;
5180  expected[2] = 0x80;
5181  verify_array_eq (in, expected, sizeof in);
5182}
5183
5184/* Test clear_bit_region_be that it clears exactly the bits asked and
5185   nothing more.  */
5186
5187static void
5188verify_clear_bit_region_be (void)
5189{
5190  /* Start with all bits set and test clearing various patterns in them.  */
5191  unsigned char orig[3] = { 0xff, 0xff, 0xff};
5192  unsigned char in[3];
5193  unsigned char expected[3];
5194  memcpy (in, orig, sizeof in);
5195
5196  /* Check zeroing out all the bits.  */
5197  clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5198  expected[0] = expected[1] = expected[2] = 0;
5199  verify_array_eq (in, expected, sizeof in);
5200
5201  memcpy (in, orig, sizeof in);
5202  /* Leave the first and last bits intact.  */
5203  clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5204  expected[0] = 0x80;
5205  expected[1] = 0;
5206  expected[2] = 0x1;
5207  verify_array_eq (in, expected, sizeof in);
5208}
5209
5210
5211/* Run all of the selftests within this file.  */
5212
5213void
5214store_merging_c_tests (void)
5215{
5216  verify_shift_bytes_in_array_left ();
5217  verify_shift_bytes_in_array_right ();
5218  verify_clear_bit_region ();
5219  verify_clear_bit_region_be ();
5220}
5221
5222} // namespace selftest
5223#endif /* CHECKING_P.  */
5224