1/* Tag Collision Avoidance pass for Falkor.
2   Copyright (C) 2018-2020 Free Software Foundation, Inc.
3
4   This file is part of GCC.
5
6   GCC is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   GCC is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with GCC; see the file COPYING3.  If not see
18   <http://www.gnu.org/licenses/>.  */
19
20#define IN_TARGET_CODE 1
21
22#include "config.h"
23#define INCLUDE_LIST
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "target.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tree-pass.h"
31#include "aarch64-protos.h"
32#include "hash-map.h"
33#include "cfgloop.h"
34#include "cfgrtl.h"
35#include "rtl-iter.h"
36#include "df.h"
37#include "memmodel.h"
38#include "optabs.h"
39#include "regs.h"
40#include "recog.h"
41#include "function-abi.h"
42#include "regrename.h"
43#include "print-rtl.h"
44
45/* The Falkor hardware prefetching system uses the encoding of the registers
46   and offsets of loads to decide which of the multiple hardware prefetchers to
47   assign the load to.  This has the positive effect of accelerating prefetches
48   when all related loads with uniform strides are assigned to the same
49   prefetcher unit.  The down side is that because of the way the assignment
50   works, multiple unrelated loads may end up on the same prefetch unit, thus
51   causing the unit to bounce between different sets of addresses and never
52   train correctly.  The point of this pass is to avoid such collisions so that
53   unrelated loads are spread out to different prefetchers.  It also makes a
54   rudimentary attempt to ensure that related loads with the same tags don't
55   get moved out unnecessarily.
56
57   Perhaps a future enhancement would be to make a more concerted attempt to
58   get related loads under the same tag.  See the memcpy/memset implementation
59   for falkor in glibc to understand the kind of impact this can have on
60   falkor.
61
62   The assignment of loads is based on a tag that is computed from the encoding
63   of the first destination register (the only destination in case of LDR), the
64   base register and the offset (either the register or the immediate value, as
65   encoded in the instruction).  This is what the 14 bit tag looks like:
66
67   |<- 6 bits ->|<- 4b ->|<- 4b ->|
68   --------------------------------
69   |  OFFSET    |  SRC   |  DST   |
70   --------------------------------
71
72   For all cases, the SRC and DST are the 4 LSB of the encoding of the register
73   in the instruction.  Offset computation is more involved and is as follows:
74
75   - For register offset addressing: 4 LSB of the offset register with the MSB
76     of the 6 bits set to 1.
77
78   - For immediate offset: 4 LSB of the encoded immediate offset.  The encoding
79     depends on the width of the load and is expressed as multiples of the
80     width.
81
82   - For loads with update: 4 LSB of the offset.  The encoding here is the
83     exact number by which the base is offset and incremented.
84
85   Based on the above it is clear that registers 0 and 16 will result in
86   collisions, 1 and 17 and so on.  This pass detects such collisions within a
87   def/use chain of the source register in a loop and tries to resolve the
88   collision by renaming one of the destination registers.  */
89
90/* Get the destination part of the tag.  */
91#define TAG_GET_DEST(__tag) ((__tag) & 0xf)
92
93/* Get the tag with the destination part updated.  */
94#define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
95
96#define MAX_PREFETCH_STRIDE 2048
97
98/* The instruction information structure.  This is used to cache information
99   about the INSN that we derive when traversing through all of the insns in
100   loops.  */
101class tag_insn_info
102{
103public:
104  rtx_insn *insn;
105  rtx dest;
106  rtx base;
107  rtx offset;
108  bool writeback;
109  bool ldp;
110
111  tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
112    : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
113  {}
114
115  /* Compute the tag based on BASE, DEST and OFFSET of the load.  */
116  unsigned tag ()
117    {
118      unsigned int_offset = 0;
119      rtx offset = this->offset;
120      unsigned dest = REGNO (this->dest);
121      unsigned base = REGNO (this->base);
122      machine_mode dest_mode = GET_MODE (this->dest);
123
124      /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
125	 destination mode is constant here.  */
126      unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
127
128      /* For loads of larger than 16 bytes, the DEST part of the tag is 0.  */
129      if ((dest_mode_size << this->ldp) > 16)
130	dest = 0;
131
132      if (offset && REG_P (offset))
133	int_offset = (1 << 5) | REGNO (offset);
134      else if (offset && CONST_INT_P (offset))
135	{
136	  int_offset = INTVAL (offset);
137	  int_offset /= dest_mode_size;
138	  if (!this->writeback)
139	    int_offset >>= 2;
140	}
141      return ((dest & 0xf)
142	      | ((base & 0xf) << 4)
143	      | ((int_offset & 0x3f) << 8));
144    }
145};
146
147/* Hash map to traverse and process instructions with colliding tags.  */
148typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
149
150/* Vector of instructions with colliding tags.  */
151typedef auto_vec <tag_insn_info *> insn_info_list_t;
152
153/* Pair of instruction information and unavailable register set to pass to
154   CHECK_COLLIDING_TAGS.  */
155typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
156
157
158/* Callback to free all tag_insn_info objects.  */
159bool
160free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
161		void *arg ATTRIBUTE_UNUSED)
162{
163  while (v->length () > 0)
164    delete v->pop ();
165
166  return true;
167}
168
169
170/* Add all aliases of the register to the unavailable register set.  REG is the
171   smallest register number that can then be used to reference its aliases.
172   UNAVAILABLE is the hard register set to add the ignored register numbers to
173   and MODE is the mode in which the registers would have been used.  */
174static void
175ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
176{
177  add_to_hard_reg_set (unavailable, mode, reg);
178  add_to_hard_reg_set (unavailable, mode, reg + 16);
179  add_to_hard_reg_set (unavailable, mode, reg + 32);
180  add_to_hard_reg_set (unavailable, mode, reg + 48);
181}
182
183
184/* Callback to check which destination registers are unavailable to us for
185   renaming because of the base and offset colliding.  This is a callback that
186   gets called for every name value pair (T, V) in the TAG_MAP.  The ARG is an
187   std::pair of the tag_insn_info of the original insn and the hard register
188   set UNAVAILABLE that is used to record hard register numbers that cannot be
189   used for the renaming.  This always returns true since we want to traverse
190   through the entire TAG_MAP.  */
191bool
192check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
193{
194  HARD_REG_SET *unavailable = arg->second;
195  unsigned orig_tag = arg->first->tag ();
196  unsigned tag = INTVAL (t);
197  machine_mode mode = GET_MODE (arg->first->dest);
198
199  /* Can't collide with emptiness.  */
200  if (v.length () == 0)
201    return true;
202
203  /* Drop all aliased destination registers that result in the same
204     tag.  It is not necessary to drop all of them but we do anyway
205     because it is quicker than checking ranges.  */
206  if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
207    ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
208
209  return true;
210}
211
212
213/* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
214   renaming.  INSN_INFO is the original insn, TAG_MAP is the map of the list of
215   insns indexed by their tags, HEAD is the def/use chain head of the
216   destination register of the original insn.  The routine returns the super
217   class of register classes that may be used during the renaming.  */
218static enum reg_class
219init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
220		  HARD_REG_SET *unavailable)
221{
222  unsigned dest = head->regno;
223  enum reg_class super_class = NO_REGS;
224  machine_mode mode = GET_MODE (insn_info->dest);
225
226  CLEAR_HARD_REG_SET (*unavailable);
227
228  for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
229    {
230      if (DEBUG_INSN_P (tmp->insn))
231	continue;
232
233      *unavailable |= ~reg_class_contents[tmp->cl];
234      super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
235    }
236
237  for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
238    if (fixed_regs[i] || global_regs[i])
239      add_to_hard_reg_set (unavailable, mode, i);
240
241  arg_pair_t arg = arg_pair_t (insn_info, unavailable);
242
243  /* Exclude all registers that would lead to collisions with other loads.  */
244  tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
245
246  /* Finally, also ignore all aliases of the current reg.  */
247  ignore_all_aliases (unavailable, mode, dest & 0xf);
248
249  return super_class;
250}
251
252
253/* Find a suitable and available register and rename the chain of occurrences
254   of the register  defined in the def/use chain headed by HEAD in which INSN
255   exists.  CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
256   unavailable due to a potential collision due to the rename.  The routine
257   returns the register number in case of a successful rename or -1 to indicate
258   failure.  */
259static int
260rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
261{
262  unsigned dest_regno = head->regno;
263
264  if (head->cannot_rename || head->renamed)
265    return -1;
266
267  HARD_REG_SET unavailable;
268
269  enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
270						 &unavailable);
271
272  unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
273					dest_regno, false);
274
275  /* Attempt to rename as long as regrename doesn't just throw the same
276     register at us.  */
277  if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
278    {
279      if (dump_file && (dump_flags & TDF_DETAILS))
280	  fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
281		   INSN_UID (insn_info->insn), dest_regno, new_regno);
282
283      return new_regno;
284    }
285
286  return -1;
287}
288
289
290/* Return true if REGNO is not safe to rename.  */
291static bool
292unsafe_rename_p (unsigned regno)
293{
294  /* Avoid renaming registers used for argument passing and return value.  In
295     future we could be a little less conservative and walk through the basic
296     blocks to see if there are any call or syscall sites.  */
297  if (regno <= R8_REGNUM
298      || (regno >= V0_REGNUM && regno < V8_REGNUM))
299    return true;
300
301  /* Don't attempt to rename registers that may have specific meanings.  */
302  switch (regno)
303    {
304    case LR_REGNUM:
305    case HARD_FRAME_POINTER_REGNUM:
306    case FRAME_POINTER_REGNUM:
307    case STACK_POINTER_REGNUM:
308      return true;
309    }
310
311  return false;
312}
313
314
315/* Go through the def/use chains for the register and find the chain for this
316   insn to rename.  The function returns the hard register number in case of a
317   successful rename and -1 otherwise.  */
318static int
319rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
320{
321  struct du_chain *chain = NULL;
322  du_head_p head = NULL;
323  int i;
324
325  unsigned dest_regno = REGNO (insn_info->dest);
326
327  if (unsafe_rename_p (dest_regno))
328    return -1;
329
330  /* Search the chain where this instruction is (one of) the root.  */
331  rtx_insn *insn = insn_info->insn;
332  operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
333
334  for (i = 0; i < dest_op_info->n_chains; i++)
335    {
336      /* The register tracked by this chain does not match the
337	 destination register of insn.  */
338      if (dest_op_info->heads[i]->regno != dest_regno)
339	continue;
340
341      head = dest_op_info->heads[i];
342      /* The chain was merged in another, find the new head.  */
343      if (!head->first)
344	head = regrename_chain_from_id (head->id);
345
346      for (chain = head->first; chain; chain = chain->next_use)
347	/* Found the insn in the chain, so try renaming the register in this
348	   chain.  */
349	if (chain->insn == insn)
350	  return rename_chain (insn_info, tag_map, head);
351    }
352
353  return -1;
354}
355
356
357/* Flag to track if the map has changed.  */
358static bool map_changed = false;
359
360/* The actual reallocation logic.  For each vector of collisions V, try to
361   resolve the collision by attempting to rename the destination register of
362   all but one of the loads.  This is a callback that is invoked for each
363   name-value pair (T, V) in TAG_MAP.  The function returns true whenever it
364   returns unchanged and false otherwise to halt traversal.  */
365bool
366avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
367{
368  /* We need at least two loads to cause a tag collision, return unchanged.  */
369  if (v->length () < 2)
370    return true;
371
372  tag_insn_info *vec_start = v->pop ();
373  tag_insn_info *insn_info = vec_start;
374
375  /* Try to rename at least one register to reduce the collision.  If we
376     iterate all the way through, we end up dropping one of the loads from the
377     list.  This is fine because we want at most one element to ensure that a
378     subsequent rename attempt does not end up worsening the collision.  */
379  do
380    {
381      int new_regno;
382
383      if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
384	{
385	  rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
386
387	  tag_map->get_or_insert (new_tag).safe_push (insn_info);
388	  df_set_regs_ever_live (new_regno, true);
389	  map_changed = true;
390	  return false;
391	}
392
393      v->safe_insert (0, insn_info);
394      insn_info = v->pop ();
395    }
396  while (insn_info != vec_start);
397
398  if (dump_file)
399    fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
400	     INSN_UID (insn_info->insn));
401
402  /* Drop the last element and move on to the next tag.  */
403  delete insn_info;
404  return true;
405}
406
407
408/* For each set of collisions, attempt to rename the registers or insert a move
409   to avoid the collision.  We repeatedly traverse through TAG_MAP using
410   AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
411   full traversal results in no change in the map.  */
412static void
413avoid_collisions (tag_map_t &tag_map)
414{
415  do
416    {
417      map_changed = false;
418      tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
419    }
420  while (map_changed);
421}
422
423
424
425/* Find the use def chain in which INSN exists and then see if there is a
426   definition inside the loop and outside it.  We use this as a simple
427   approximation to determine whether the base register is an IV.  The basic
428   idea is to find INSN in the use-def chains for its base register and find
429   all definitions that reach it.  Of all these definitions, there should be at
430   least one definition that is a simple addition of a constant value, either
431   as a binary operation or a pre or post update.
432
433   The function returns true if the base register is estimated to be an IV.  */
434static bool
435iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
436{
437  df_ref ause;
438  unsigned regno = REGNO (reg);
439
440  /* Ignore loads from the stack.  */
441  if (regno == SP_REGNUM)
442    return false;
443
444  for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
445    {
446      if (!DF_REF_INSN_INFO (ause)
447	  || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
448	continue;
449
450      if (insn != DF_REF_INSN (ause))
451	continue;
452
453      struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
454      df_ref def_rec;
455
456      FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
457	{
458	  rtx_insn *insn = DF_REF_INSN (def_rec);
459	  basic_block bb = BLOCK_FOR_INSN (insn);
460
461	  if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
462	      && bb->loop_father == loop)
463	    {
464	      if (recog_memoized (insn) < 0)
465		continue;
466
467	      rtx pat = PATTERN (insn);
468
469	      /* Prefetch or clobber; unlikely to be a constant stride.  The
470		 falkor software prefetcher tuning is pretty conservative, so
471		 its presence indicates that the access pattern is probably
472		 strided but most likely with an unknown stride size or a
473		 stride size that is quite large.  */
474	      if (GET_CODE (pat) != SET)
475		continue;
476
477	      rtx x = SET_SRC (pat);
478	      if (GET_CODE (x) == ZERO_EXTRACT
479		  || GET_CODE (x) == ZERO_EXTEND
480		  || GET_CODE (x) == SIGN_EXTEND)
481		x = XEXP (x, 0);
482
483	      /* Loading the value from memory; unlikely to be a constant
484		 stride.  */
485	      if (MEM_P (x))
486		continue;
487
488	      /* An increment or decrement by a constant MODE_SIZE amount or
489		 the result of a binary expression is likely to be an IV.  */
490	      if (GET_CODE (x) == POST_INC
491		  || GET_CODE (x) == POST_DEC
492		  || GET_CODE (x) == PRE_INC
493		  || GET_CODE (x) == PRE_DEC)
494		return true;
495	      else if (BINARY_P (x)
496		       && (CONST_INT_P (XEXP (x, 0))
497			   || CONST_INT_P (XEXP (x, 1))))
498		{
499		  rtx stride = (CONST_INT_P (XEXP (x, 0))
500				? XEXP (x, 0) : XEXP (x, 1));
501
502		  /* Don't bother with very long strides because the prefetcher
503		     is unable to train on them anyway.  */
504		  if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
505		    return true;
506		}
507	    }
508	}
509      return false;
510    }
511  return false;
512}
513
514
515/* Return true if SRC is a strided load in the LOOP, false otherwise.
516   If it is a strided load, set the BASE and OFFSET.  Also, if this is
517   a pre/post increment load, set PRE_POST to true.  */
518static bool
519valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
520	     rtx *base, rtx *offset, bool load_pair)
521{
522  subrtx_var_iterator::array_type array;
523  rtx x = NULL_RTX;
524
525  FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
526    if (MEM_P (*iter))
527      {
528	x = *iter;
529	break;
530      }
531
532  if (!x)
533    return false;
534
535  struct aarch64_address_info addr;
536  machine_mode mode = GET_MODE (x);
537
538  if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
539    return false;
540
541  if (addr.type != ADDRESS_REG_IMM
542      && addr.type != ADDRESS_REG_WB
543      && addr.type != ADDRESS_REG_REG
544      && addr.type != ADDRESS_REG_UXTW
545      && addr.type != ADDRESS_REG_SXTW)
546    return false;
547
548  unsigned regno = REGNO (addr.base);
549  if (global_regs[regno] || fixed_regs[regno])
550    return false;
551
552  if (addr.type == ADDRESS_REG_WB)
553    {
554      unsigned code = GET_CODE (XEXP (x, 0));
555
556      *pre_post = true;
557      *base = addr.base;
558
559      if (code == PRE_MODIFY || code == POST_MODIFY)
560	*offset = addr.offset;
561      else
562	{
563	  /*Writeback is only supported for fixed-width modes.  */
564	  unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
565
566	  /* For post-incremented load pairs we would increment the base twice
567	     over, so make that adjustment.  */
568	  if (load_pair && (code == POST_INC || code == POST_DEC))
569	    int_offset *= 2;
570
571	  *offset = GEN_INT (int_offset);
572	}
573      return true;
574    }
575  else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
576    {
577      /* Check if the load is strided.  */
578      if (!iv_p (insn, addr.base, loop))
579	return false;
580
581      *base = addr.base;
582      *offset = addr.offset;
583      return true;
584    }
585
586  return false;
587}
588
589
590/* Return true if INSN is a strided load in LOOP.  If it is a strided load, set
591   the DEST, BASE and OFFSET.  Also, if this is a pre/post increment load, set
592   PRE_POST to true.
593
594   The routine does checks on the destination of the insn and depends on
595   STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET.  */
596static bool
597get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
598	       rtx *offset, bool *pre_post, bool *ldp)
599{
600  if (!INSN_P (insn) || recog_memoized (insn) < 0)
601    return false;
602
603  rtx pat = PATTERN (insn);
604  unsigned code = GET_CODE (pat);
605  bool load_pair = (code == PARALLEL);
606
607  /* For a load pair we need only the first base and destination
608     registers.  We however need to ensure that our pre/post increment
609     offset is doubled; we do that in STRIDED_LOAD_P.  */
610  if (load_pair)
611    {
612      pat = XVECEXP (pat, 0, 0);
613      code = GET_CODE (pat);
614    }
615
616  if (code != SET)
617    return false;
618
619  rtx dest_rtx = SET_DEST (pat);
620
621  if (!REG_P (dest_rtx))
622    return false;
623
624  unsigned regno = REGNO (dest_rtx);
625  machine_mode mode = GET_MODE (dest_rtx);
626  machine_mode inner_mode = GET_MODE_INNER (mode);
627
628  /* Falkor does not support SVE vectors.  */
629  if (!GET_MODE_SIZE (mode).is_constant ())
630    return false;
631
632  /* Ignore vector struct or lane loads.  */
633  if (GET_MODE_SIZE (mode).to_constant ()
634      != GET_MODE_SIZE (inner_mode).to_constant ())
635    return false;
636
637  /* The largest width we want to bother with is a load of a pair of
638     quad-words.  */
639  if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
640      > GET_MODE_SIZE (OImode))
641    return false;
642
643  /* Ignore loads into the stack pointer because it is unlikely to be a
644     stream.  */
645  if (regno == SP_REGNUM)
646    return false;
647
648  if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
649		   load_pair))
650    {
651      *dest = dest_rtx;
652      *ldp = load_pair;
653
654      return true;
655    }
656
657  return false;
658}
659
660
661/* Return whether INSN and CAND are in the same def/use chain.  */
662static bool
663in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
664{
665  struct du_chain *chain = NULL;
666  du_head_p head = NULL;
667  int i;
668
669  /* Search the chain where this instruction is (one of) the root.  */
670  operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
671
672  for (i = 0; i < op_info->n_chains; i++)
673    {
674      /* The register tracked by this chain does not match the
675	 dest register of insn.  */
676      if (op_info->heads[i]->regno != regno)
677	continue;
678
679      head = op_info->heads[i];
680      /* The chain was merged in another, find the new head.  */
681      if (!head->first)
682	head = regrename_chain_from_id (head->id);
683
684      bool found_insn = false, found_cand = false;
685
686      for (chain = head->first; chain; chain = chain->next_use)
687	{
688	  rtx *loc = &SET_DEST (PATTERN (chain->insn));
689
690	  if (chain->loc != loc)
691	    continue;
692
693	  if (chain->insn == insn)
694	    found_insn = true;
695
696	  if (chain->insn == cand)
697	    found_cand = true;
698
699	  if (found_insn && found_cand)
700	    return true;
701	}
702    }
703
704  return false;
705}
706
707
708/* Callback function to traverse the tag map and drop loads that have the same
709   destination and are in the same chain of occurrence.  Routine always returns
710   true to allow traversal through all of TAG_MAP.  */
711bool
712single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
713		       void *arg ATTRIBUTE_UNUSED)
714{
715  for (int i = v->length () - 1; i>= 1; i--)
716    {
717      tag_insn_info *insn_info = (*v)[i];
718
719      for (int j = v->length () - 2; j >= 0; j--)
720	{
721	  /* Filter out destinations in the same chain.  */
722	  if (in_same_chain (insn_info->insn, (*v)[j]->insn,
723			     REGNO (insn_info->dest)))
724	    {
725	      v->ordered_remove (j);
726	      i = v->length ();
727	      break;
728	    }
729	}
730    }
731
732  return true;
733}
734
735
736/* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
737   list INSN_INFO for tag T.  */
738bool
739dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
740		void *unused ATTRIBUTE_UNUSED)
741{
742  gcc_assert (dump_file);
743  fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t));
744
745  for (unsigned i = 0; i < insn_info.length (); i++)
746    dump_insn_slim (dump_file, insn_info[i]->insn);
747
748  fprintf (dump_file, "\n");
749
750  return true;
751}
752
753
754/* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
755   prefetcher memory tags.  */
756static void
757record_loads (tag_map_t &tag_map, struct loop *loop)
758{
759  rtx_insn *insn;
760  basic_block *body, bb;
761
762  body = get_loop_body (loop);
763
764  for (unsigned i = 0; i < loop->num_nodes; i++)
765    {
766      bb = body[i];
767      FOR_BB_INSNS (bb, insn)
768	{
769	  rtx base = NULL_RTX;
770	  rtx dest = NULL_RTX;
771	  rtx offset = NULL_RTX;
772	  bool writeback = false;
773	  bool ldp = false;
774
775	  if (!INSN_P (insn) || DEBUG_INSN_P (insn))
776	    continue;
777
778	  if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
779			     &ldp))
780	    {
781	      tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
782						    writeback, ldp);
783	      rtx tag = GEN_INT (i->tag ());
784	      tag_map.get_or_insert (tag).safe_push (i);
785	    }
786	}
787    }
788
789  if (dump_file)
790    {
791      fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
792      tag_map.traverse <void *, dump_insn_list> (NULL);
793    }
794
795  /* Try to reduce the dataset before launching into the rename attempt.  Drop
796     destinations in the same collision chain that appear in the same def/use
797     chain, all as defs.  These chains will move together in a rename so
798     there's no point in keeping both in there.  */
799  tag_map.traverse <void *, single_dest_per_chain> (NULL);
800}
801
802
803/* Tag collision avoidance pass for Falkor.  The pass runs in two phases for
804   each loop; the first phase collects all loads that we consider as
805   interesting for renaming into a tag-indexed map of lists.  The second phase
806   renames the destination register of the loads in an attempt to spread out
807   the loads into different tags.  */
808void
809execute_tag_collision_avoidance ()
810{
811  struct loop *loop;
812
813  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
814  df_chain_add_problem (DF_UD_CHAIN);
815  df_compute_regs_ever_live (true);
816  df_note_add_problem ();
817  df_analyze ();
818  df_set_flags (DF_DEFER_INSN_RESCAN);
819
820  regrename_init (true);
821  regrename_analyze (NULL);
822
823  compute_bb_for_insn ();
824  calculate_dominance_info (CDI_DOMINATORS);
825  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
826
827  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
828    {
829      tag_map_t tag_map (512);
830
831      record_loads (tag_map, loop);
832      avoid_collisions (tag_map);
833      if (dump_file)
834	{
835	  fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
836	  tag_map.traverse <void *, dump_insn_list> (NULL);
837	}
838      tag_map.traverse <void *, free_insn_info> (NULL);
839    }
840
841  loop_optimizer_finalize ();
842  free_dominance_info (CDI_DOMINATORS);
843  regrename_finish ();
844}
845
846
847const pass_data pass_data_tag_collision_avoidance =
848{
849  RTL_PASS, /* type */
850  "tag_collision_avoidance", /* name */
851  OPTGROUP_NONE, /* optinfo_flags */
852  TV_NONE, /* tv_id */
853  0, /* properties_required */
854  0, /* properties_provided */
855  0, /* properties_destroyed */
856  0, /* todo_flags_start */
857  TODO_df_finish, /* todo_flags_finish */
858};
859
860
861class pass_tag_collision_avoidance : public rtl_opt_pass
862{
863public:
864  pass_tag_collision_avoidance (gcc::context *ctxt)
865    : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
866  {}
867
868  /* opt_pass methods: */
869  virtual bool gate (function *)
870    {
871      return ((aarch64_tune_params.extra_tuning_flags
872	       & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
873	      && optimize >= 2);
874    }
875
876  virtual unsigned int execute (function *)
877    {
878      execute_tag_collision_avoidance ();
879      return 0;
880    }
881
882}; // class pass_tag_collision_avoidance
883
884
885/* Create a new pass instance.  */
886rtl_opt_pass *
887make_pass_tag_collision_avoidance (gcc::context *ctxt)
888{
889  return new pass_tag_collision_avoidance (ctxt);
890}
891