1/* Simulator cache routines for CGEN simulators (and maybe others).
2   Copyright (C) 1996, 1997, 1998, 2007 Free Software Foundation, Inc.
3   Contributed by Cygnus Support.
4
5This file is part of GDB, the GNU debugger.
6
7This program is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20#define SCACHE_DEFINE_INLINE
21
22#include "sim-main.h"
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#include "libiberty.h"
27#include "sim-options.h"
28#include "sim-io.h"
29
30#define MAX(a,b) ((a) > (b) ? (a) : (b))
31
32/* Unused address.  */
33#define UNUSED_ADDR 0xffffffff
34
35/* Scache configuration parameters.
36   ??? Experiments to determine reasonable values is wip.
37   These are just guesses.  */
38
39/* Default number of scache elements.
40   The size of an element is typically 32-64 bytes, so the size of the
41   default scache will be between 512K and 1M bytes.  */
42#ifdef CONFIG_SIM_CACHE_SIZE
43#define SCACHE_DEFAULT_CACHE_SIZE CONFIG_SIM_CACHE_SIZE
44#else
45#define SCACHE_DEFAULT_CACHE_SIZE 16384
46#endif
47
48/* Minimum cache size.
49   The m32r port assumes a cache size of at least 2 so it can decode both 16
50   bit insns.  When compiling we need an extra for the chain entry.  And this
51   must be a multiple of 2.  Hence 4 is the minimum (though, for those with
52   featuritis or itchy pedantic bits, we could make this conditional on
53   WITH_SCACHE_PBB).  */
54#define MIN_SCACHE_SIZE 4
55
56/* Ratio of size of text section to size of scache.
57   When compiling, we don't want to flush the scache more than we have to
58   but we also don't want it to be exorbitantly(sp?) large.  So we pick a high
59   default value, then reduce it by the size of the program being simulated,
60   but we don't override any value specified on the command line.
61   If not specified on the command line, the size to use is computed as
62   max (MIN_SCACHE_SIZE,
63        min (DEFAULT_SCACHE_SIZE,
64             text_size / (base_insn_size * INSN_SCACHE_RATIO))).  */
65/* ??? Interesting idea but not currently used.  */
66#define INSN_SCACHE_RATIO 4
67
68/* Default maximum insn chain length.
69   The only reason for a maximum is so we can place a maximum size on the
70   profiling table.  Chain lengths are determined by cti's.
71   32 is a more reasonable number, but when profiling, the before/after
72   handlers take up that much more space.  The scache is filled from front to
73   back so all this determines is when the scache needs to be flushed.  */
74#define MAX_CHAIN_LENGTH 64
75
76/* Default maximum hash list length.  */
77#define MAX_HASH_CHAIN_LENGTH 4
78
79/* Minimum hash table size.  */
80#define MIN_HASH_CHAINS 32
81
82/* Ratio of number of scache elements to number of hash lists.
83   Since the user can only specify the size of the scache, we compute the
84   size of the hash table as
85   max (MIN_HASH_CHAINS, scache_size / SCACHE_HASH_RATIO).  */
86#define SCACHE_HASH_RATIO 8
87
88/* Hash a PC value.
89   FIXME: May wish to make the hashing architecture specific.
90   FIXME: revisit */
91#define HASH_PC(pc) (((pc) >> 2) + ((pc) >> 5))
92
93static MODULE_INIT_FN scache_init;
94static MODULE_UNINSTALL_FN scache_uninstall;
95
96static DECLARE_OPTION_HANDLER (scache_option_handler);
97
98#define OPTION_PROFILE_SCACHE	(OPTION_START + 0)
99
100static const OPTION scache_options[] = {
101  { {"scache-size", optional_argument, NULL, 'c'},
102      'c', "[SIZE]", "Specify size of simulator execution cache",
103      scache_option_handler },
104#if WITH_SCACHE_PBB
105  /* ??? It might be nice to allow the user to specify the size of the hash
106     table, the maximum hash list length, and the maximum chain length, but
107     for now that might be more akin to featuritis.  */
108#endif
109  { {"profile-scache", optional_argument, NULL, OPTION_PROFILE_SCACHE},
110      '\0', "on|off", "Perform simulator execution cache profiling",
111      scache_option_handler },
112  { {NULL, no_argument, NULL, 0}, '\0', NULL, NULL, NULL }
113};
114
115static SIM_RC
116scache_option_handler (SIM_DESC sd, sim_cpu *cpu, int opt,
117		       char *arg, int is_command)
118{
119  switch (opt)
120    {
121    case 'c' :
122      if (WITH_SCACHE)
123	{
124	  if (arg != NULL)
125	    {
126	      int n = strtol (arg, NULL, 0);
127	      if (n < MIN_SCACHE_SIZE)
128		{
129		  sim_io_eprintf (sd, "invalid scache size `%d', must be at least 4", n);
130		  return SIM_RC_FAIL;
131		}
132	      /* Ensure it's a multiple of 2.  */
133	      if ((n & (n - 1)) != 0)
134		{
135		  sim_io_eprintf (sd, "scache size `%d' not a multiple of 2\n", n);
136		  {
137		    /* round up to nearest multiple of 2 */
138		    int i;
139		    for (i = 1; i < n; i <<= 1)
140		      continue;
141		    n = i;
142		  }
143		  sim_io_eprintf (sd, "rounding scache size up to %d\n", n);
144		}
145	      if (cpu == NULL)
146		STATE_SCACHE_SIZE (sd) = n;
147	      else
148		CPU_SCACHE_SIZE (cpu) = n;
149	    }
150	  else
151	    {
152	      if (cpu == NULL)
153		STATE_SCACHE_SIZE (sd) = SCACHE_DEFAULT_CACHE_SIZE;
154	      else
155		CPU_SCACHE_SIZE (cpu) = SCACHE_DEFAULT_CACHE_SIZE;
156	    }
157	}
158      else
159	sim_io_eprintf (sd, "Simulator execution cache not enabled, `--scache-size' ignored\n");
160      break;
161
162    case OPTION_PROFILE_SCACHE :
163      if (WITH_SCACHE && WITH_PROFILE_SCACHE_P)
164	{
165	  /* FIXME: handle cpu != NULL.  */
166	  return sim_profile_set_option (sd, "-scache", PROFILE_SCACHE_IDX,
167					 arg);
168	}
169      else
170	sim_io_eprintf (sd, "Simulator cache profiling not compiled in, `--profile-scache' ignored\n");
171      break;
172    }
173
174  return SIM_RC_OK;
175}
176
177SIM_RC
178scache_install (SIM_DESC sd)
179{
180  sim_add_option_table (sd, NULL, scache_options);
181  sim_module_add_init_fn (sd, scache_init);
182  sim_module_add_uninstall_fn (sd, scache_uninstall);
183
184  /* This is the default, it may be overridden on the command line.  */
185  STATE_SCACHE_SIZE (sd) = WITH_SCACHE;
186
187  return SIM_RC_OK;
188}
189
190static SIM_RC
191scache_init (SIM_DESC sd)
192{
193  int c;
194
195  for (c = 0; c < MAX_NR_PROCESSORS; ++c)
196    {
197      SIM_CPU *cpu = STATE_CPU (sd, c);
198      int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu)));
199
200      /* elm_size is 0 if the cpu doesn't not have scache support */
201      if (elm_size == 0)
202	{
203	  CPU_SCACHE_SIZE (cpu) = 0;
204	  CPU_SCACHE_CACHE (cpu) = NULL;
205	}
206      else
207	{
208	  if (CPU_SCACHE_SIZE (cpu) == 0)
209	    CPU_SCACHE_SIZE (cpu) = STATE_SCACHE_SIZE (sd);
210	  CPU_SCACHE_CACHE (cpu) =
211	    (SCACHE *) xmalloc (CPU_SCACHE_SIZE (cpu) * elm_size);
212#if WITH_SCACHE_PBB
213	  CPU_SCACHE_MAX_CHAIN_LENGTH (cpu) = MAX_CHAIN_LENGTH;
214	  CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu) = MAX_HASH_CHAIN_LENGTH;
215	  CPU_SCACHE_NUM_HASH_CHAINS (cpu) = MAX (MIN_HASH_CHAINS,
216						  CPU_SCACHE_SIZE (cpu)
217						  / SCACHE_HASH_RATIO);
218	  CPU_SCACHE_HASH_TABLE (cpu) =
219	    (SCACHE_MAP *) xmalloc (CPU_SCACHE_NUM_HASH_CHAINS (cpu)
220				    * CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu)
221				    * sizeof (SCACHE_MAP));
222	  CPU_SCACHE_PBB_BEGIN (cpu) = (SCACHE *) zalloc (elm_size);
223	  CPU_SCACHE_CHAIN_LENGTHS (cpu) =
224	    (unsigned long *) zalloc ((CPU_SCACHE_MAX_CHAIN_LENGTH (cpu) + 1)
225				      * sizeof (long));
226#endif
227	}
228    }
229
230  scache_flush (sd);
231
232  return SIM_RC_OK;
233}
234
235static void
236scache_uninstall (SIM_DESC sd)
237{
238  int c;
239
240  for (c = 0; c < MAX_NR_PROCESSORS; ++c)
241    {
242      SIM_CPU *cpu = STATE_CPU (sd, c);
243
244      if (CPU_SCACHE_CACHE (cpu) != NULL)
245	free (CPU_SCACHE_CACHE (cpu));
246#if WITH_SCACHE_PBB
247      if (CPU_SCACHE_HASH_TABLE (cpu) != NULL)
248	free (CPU_SCACHE_HASH_TABLE (cpu));
249      if (CPU_SCACHE_PBB_BEGIN (cpu) != NULL)
250	free (CPU_SCACHE_PBB_BEGIN (cpu));
251      if (CPU_SCACHE_CHAIN_LENGTHS (cpu) != NULL)
252	free (CPU_SCACHE_CHAIN_LENGTHS (cpu));
253#endif
254    }
255}
256
257void
258scache_flush (SIM_DESC sd)
259{
260  int c;
261
262  for (c = 0; c < MAX_NR_PROCESSORS; ++c)
263    {
264      SIM_CPU *cpu = STATE_CPU (sd, c);
265      scache_flush_cpu (cpu);
266    }
267}
268
269void
270scache_flush_cpu (SIM_CPU *cpu)
271{
272  int i,n;
273
274  /* Don't bother if cache not in use.  */
275  if (CPU_SCACHE_SIZE (cpu) == 0)
276    return;
277
278#if WITH_SCACHE_PBB
279  /* It's important that this be reasonably fast as this can be done when
280     the simulation is running.  */
281  CPU_SCACHE_NEXT_FREE (cpu) = CPU_SCACHE_CACHE (cpu);
282  n = CPU_SCACHE_NUM_HASH_CHAINS (cpu) * CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu);
283  /* ??? Might be faster to just set the first entry, then update the
284     "last entry" marker during allocation.  */
285  for (i = 0; i < n; ++i)
286    CPU_SCACHE_HASH_TABLE (cpu) [i] . pc = UNUSED_ADDR;
287#else
288  {
289    int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu)));
290    SCACHE *sc;
291
292    /* Technically, this may not be necessary, but it helps debugging.  */
293    memset (CPU_SCACHE_CACHE (cpu), 0,
294	    CPU_SCACHE_SIZE (cpu) * elm_size);
295
296    for (i = 0, sc = CPU_SCACHE_CACHE (cpu); i < CPU_SCACHE_SIZE (cpu);
297	 ++i, sc = (SCACHE *) ((char *) sc + elm_size))
298      {
299	sc->argbuf.addr = UNUSED_ADDR;
300      }
301  }
302#endif
303}
304
305#if WITH_SCACHE_PBB
306
307/* Look up PC in the hash table of scache entry points.
308   Returns the entry or NULL if not found.  */
309
310SCACHE *
311scache_lookup (SIM_CPU *cpu, IADDR pc)
312{
313  /* FIXME: hash computation is wrong, doesn't take into account
314     NUM_HASH_CHAIN_ENTRIES.  A lot of the hash table will be unused!  */
315  unsigned int slot = HASH_PC (pc) & (CPU_SCACHE_NUM_HASH_CHAINS (cpu) - 1);
316  int i, max_i = CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu);
317  SCACHE_MAP *scm;
318
319  /* We don't update hit/miss statistics as this is only used when recording
320     branch target addresses.  */
321
322  scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot];
323  for (i = 0; i < max_i && scm->pc != UNUSED_ADDR; ++i, ++scm)
324    {
325      if (scm->pc == pc)
326	return scm->sc;
327    }
328  return 0;
329}
330
331/* Look up PC and if not found create an entry for it.
332   If found the result is a pointer to the SCACHE entry.
333   If not found the result is NULL, and the address of a buffer of at least
334   N entries is stored in BUFP.
335   It's done this way so the caller can still distinguish found/not-found.
336   If the table is full, it is emptied to make room.
337   If the maximum length of a hash list is reached a random entry is thrown out
338   to make room.
339   ??? One might want to try to make this smarter, but let's see some
340   measurable benefit first.  */
341
342SCACHE *
343scache_lookup_or_alloc (SIM_CPU *cpu, IADDR pc, int n, SCACHE **bufp)
344{
345  /* FIXME: hash computation is wrong, doesn't take into account
346     NUM_HASH_CHAIN_ENTRIES.  A lot of the hash table will be unused!  */
347  unsigned int slot = HASH_PC (pc) & (CPU_SCACHE_NUM_HASH_CHAINS (cpu) - 1);
348  int i, max_i = CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu);
349  SCACHE_MAP *scm;
350  SCACHE *sc;
351
352  scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot];
353  for (i = 0; i < max_i && scm->pc != UNUSED_ADDR; ++i, ++scm)
354    {
355      if (scm->pc == pc)
356	{
357	  PROFILE_COUNT_SCACHE_HIT (cpu);
358	  return scm->sc;
359	}
360    }
361  PROFILE_COUNT_SCACHE_MISS (cpu);
362
363  /* The address we want isn't cached.  Bummer.
364     If the hash chain we have for this address is full, throw out an entry
365     to make room.  */
366
367  if (i == max_i)
368    {
369      /* Rather than do something sophisticated like LRU, we just throw out
370	 a semi-random entry.  Let someone else have the joy of saying how
371	 wrong this is.  NEXT_FREE is the entry to throw out and cycles
372	 through all possibilities.  */
373      static int next_free = 0;
374
375      scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot];
376      /* FIXME: This seems rather clumsy.  */
377      for (i = 0; i < next_free; ++i, ++scm)
378	continue;
379      ++next_free;
380      if (next_free == CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu))
381	next_free = 0;
382    }
383
384  /* At this point SCM points to the hash table entry to use.
385     Now make sure there's room in the cache.  */
386  /* FIXME: Kinda weird to use a next_free adjusted scm when cache is
387     flushed.  */
388
389  {
390    int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu)));
391    int elms_used = (((char *) CPU_SCACHE_NEXT_FREE (cpu)
392		      - (char *) CPU_SCACHE_CACHE (cpu))
393		     / elm_size);
394    int elms_left = CPU_SCACHE_SIZE (cpu) - elms_used;
395
396    if (elms_left < n)
397      {
398	PROFILE_COUNT_SCACHE_FULL_FLUSH (cpu);
399	scache_flush_cpu (cpu);
400      }
401  }
402
403  sc = CPU_SCACHE_NEXT_FREE (cpu);
404  scm->pc = pc;
405  scm->sc = sc;
406
407  *bufp = sc;
408  return NULL;
409}
410
411#endif /* WITH_SCACHE_PBB */
412
413/* Print cache access statics for CPU.  */
414
415void
416scache_print_profile (SIM_CPU *cpu, int verbose)
417{
418  SIM_DESC sd = CPU_STATE (cpu);
419  unsigned long hits = CPU_SCACHE_HITS (cpu);
420  unsigned long misses = CPU_SCACHE_MISSES (cpu);
421  char buf[20];
422  unsigned long max_val;
423  unsigned long *lengths;
424  int i;
425
426  if (CPU_SCACHE_SIZE (cpu) == 0)
427    return;
428
429  sim_io_printf (sd, "Simulator Cache Statistics\n\n");
430
431  /* One could use PROFILE_LABEL_WIDTH here.  I chose not to.  */
432  sim_io_printf (sd, "  Cache size: %s\n",
433		 sim_add_commas (buf, sizeof (buf), CPU_SCACHE_SIZE (cpu)));
434  sim_io_printf (sd, "  Hits:       %s\n",
435		 sim_add_commas (buf, sizeof (buf), hits));
436  sim_io_printf (sd, "  Misses:     %s\n",
437		 sim_add_commas (buf, sizeof (buf), misses));
438  if (hits + misses != 0)
439    sim_io_printf (sd, "  Hit rate:   %.2f%%\n",
440		   ((double) hits / ((double) hits + (double) misses)) * 100);
441
442#if WITH_SCACHE_PBB
443  sim_io_printf (sd, "\n");
444  sim_io_printf (sd, "  Hash table size:       %s\n",
445		 sim_add_commas (buf, sizeof (buf), CPU_SCACHE_NUM_HASH_CHAINS (cpu)));
446  sim_io_printf (sd, "  Max hash list length:  %s\n",
447		 sim_add_commas (buf, sizeof (buf), CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu)));
448  sim_io_printf (sd, "  Max insn chain length: %s\n",
449		 sim_add_commas (buf, sizeof (buf), CPU_SCACHE_MAX_CHAIN_LENGTH (cpu)));
450  sim_io_printf (sd, "  Cache full flushes:    %s\n",
451		 sim_add_commas (buf, sizeof (buf), CPU_SCACHE_FULL_FLUSHES (cpu)));
452  sim_io_printf (sd, "\n");
453
454  if (verbose)
455    {
456      sim_io_printf (sd, "  Insn chain lengths:\n\n");
457      max_val = 0;
458      lengths = CPU_SCACHE_CHAIN_LENGTHS (cpu);
459      for (i = 1; i < CPU_SCACHE_MAX_CHAIN_LENGTH (cpu); ++i)
460	if (lengths[i] > max_val)
461	  max_val = lengths[i];
462      for (i = 1; i < CPU_SCACHE_MAX_CHAIN_LENGTH (cpu); ++i)
463	{
464	  sim_io_printf (sd, "  %2d: %*s: ",
465			 i,
466			 max_val < 10000 ? 5 : 10,
467			 sim_add_commas (buf, sizeof (buf), lengths[i]));
468	  sim_profile_print_bar (sd, PROFILE_HISTOGRAM_WIDTH,
469				 lengths[i], max_val);
470	  sim_io_printf (sd, "\n");
471	}
472      sim_io_printf (sd, "\n");
473    }
474#endif /* WITH_SCACHE_PBB */
475}
476