1/* Time routines for speed measurements.
2
3Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of either:
9
10  * the GNU Lesser General Public License as published by the Free
11    Software Foundation; either version 3 of the License, or (at your
12    option) any later version.
13
14or
15
16  * the GNU General Public License as published by the Free Software
17    Foundation; either version 2 of the License, or (at your option) any
18    later version.
19
20or both in parallel, as here.
21
22The GNU MP Library is distributed in the hope that it will be useful, but
23WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25for more details.
26
27You should have received copies of the GNU General Public License and the
28GNU Lesser General Public License along with the GNU MP Library.  If not,
29see https://www.gnu.org/licenses/.  */
30
31
32/* Usage:
33
34   The code in this file implements the lowest level of time measuring,
35   simple one-time measuring of time between two points.
36
37   void speed_starttime (void)
38   double speed_endtime (void)
39       Call speed_starttime to start measuring, and then call speed_endtime
40       when done.
41
42       speed_endtime returns the time taken, in seconds.  Or if the timebase
43       is in CPU cycles and the CPU frequency is unknown then speed_endtime
44       returns cycles.  Applications can identify the cycles return by
45       checking for speed_cycletime (described below) equal to 1.0.
46
47       If some sort of temporary glitch occurs then speed_endtime returns
48       0.0.  Currently this is for various cases where a negative time has
49       occurred.  This unfortunately occurs with getrusage on some systems,
50       and with the hppa cycle counter on hpux.
51
52   double speed_cycletime
53       The time in seconds for each CPU cycle.  For example on a 100 MHz CPU
54       this would be 1.0e-8.
55
56       If the CPU frequency is unknown, then speed_cycletime is either 0.0
57       or 1.0.  It's 0.0 when speed_endtime is returning seconds, or it's
58       1.0 when speed_endtime is returning cycles.
59
60       It may be noted that "speed_endtime() / speed_cycletime" gives a
61       measured time in cycles, irrespective of whether speed_endtime is
62       returning cycles or seconds.  (Assuming cycles can be had, ie. it's
63       either cycles already or the cpu frequency is known.  See also
64       speed_cycletime_need_cycles below.)
65
66   double speed_unittime
67       The unit of time measurement accuracy for the timing method in use.
68       This is in seconds or cycles, as per speed_endtime.
69
70   char speed_time_string[]
71       A null-terminated string describing the time method in use.
72
73   void speed_time_init (void)
74       Initialize time measuring.  speed_starttime() does this
75       automatically, so it's only needed if an application wants to inspect
76       the above global variables before making a measurement.
77
78   int speed_precision
79       The intended accuracy of time measurements.  speed_measure() in
80       common.c for instance runs target routines with enough repetitions so
81       it takes at least "speed_unittime * speed_precision" (this expression
82       works for both cycles or seconds from speed_endtime).
83
84       A program can provide an option so the user to set speed_precision.
85       If speed_precision is zero when speed_time_init or speed_starttime
86       first run then it gets a default based on the measuring method
87       chosen.  (More precision for higher accuracy methods.)
88
89   void speed_cycletime_need_seconds (void)
90       Call this to demand that speed_endtime will return seconds, and not
91       cycles.  If only cycles are available then an error is printed and
92       the program exits.
93
94   void speed_cycletime_need_cycles (void)
95       Call this to demand that speed_cycletime is non-zero, so that
96       "speed_endtime() / speed_cycletime" will give times in cycles.
97
98
99
100   Notes:
101
102   Various combinations of cycle counter, read_real_time(), getrusage(),
103   gettimeofday() and times() can arise, according to which are available
104   and their precision.
105
106
107   Allowing speed_endtime() to return either seconds or cycles is only a
108   slight complication and makes it possible for the speed program to do
109   some sensible things without demanding the CPU frequency.  If seconds are
110   being measured then it can always print seconds, and if cycles are being
111   measured then it can always print them without needing to know how long
112   they are.  Also the tune program doesn't care at all what the units are.
113
114   GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
115   fail.  This will be needed if times in seconds are wanted but a cycle
116   counter is being used, or if times in cycles are wanted but getrusage or
117   another seconds based timer is in use.
118
119   If the measuring method uses a cycle counter but supplements it with
120   getrusage or the like, then knowing the CPU frequency is mandatory since
121   the code compares values from the two.
122
123
124   Not done:
125
126   Solaris gethrtime() seems no more than a slow way to access the Sparc V9
127   cycle counter.  gethrvtime() seems to be relevant only to light weight
128   processes, it doesn't for instance give nanosecond virtual time.  So
129   neither of these are used.
130
131
132   Bugs:
133
134   getrusage_microseconds_p is fundamentally flawed, getrusage and
135   gettimeofday can have resolutions other than clock ticks or microseconds,
136   for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
137
138
139   Enhancements:
140
141   The SGI hardware counter has 64 bits on some machines, which could be
142   used when available.  But perhaps 32 bits is enough range, and then rely
143   on the getrusage supplement.
144
145   Maybe getrusage (or times) should be used as a supplement for any
146   wall-clock measuring method.  Currently a wall clock with a good range
147   (eg. a 64-bit cycle counter) is used without a supplement.
148
149   On PowerPC the timebase registers could be used, but would have to do
150   something to find out the speed.  On 6xx chips it's normally 1/4 bus
151   speed, on 4xx chips it's either that or an external clock.  Measuring
152   against gettimeofday might be ok.  */
153
154#include "config.h"
155
156#include <errno.h>
157#include <setjmp.h>
158#include <signal.h>
159#include <stddef.h>
160#include <stdio.h>
161#include <string.h>
162#include <stdlib.h> /* for getenv() */
163
164#if HAVE_FCNTL_H
165#include <fcntl.h>  /* for open() */
166#endif
167
168#if HAVE_STDINT_H
169#include <stdint.h> /* for uint64_t */
170#endif
171
172#if HAVE_UNISTD_H
173#include <unistd.h> /* for sysconf() */
174#endif
175
176#include <sys/types.h>
177
178#if TIME_WITH_SYS_TIME
179# include <sys/time.h>  /* for struct timeval */
180# include <time.h>
181#else
182# if HAVE_SYS_TIME_H
183#  include <sys/time.h>
184# else
185#  include <time.h>
186# endif
187#endif
188
189#if HAVE_SYS_MMAN_H
190#include <sys/mman.h>      /* for mmap() */
191#endif
192
193#if HAVE_SYS_RESOURCE_H
194#include <sys/resource.h>  /* for struct rusage */
195#endif
196
197#if HAVE_SYS_SYSSGI_H
198#include <sys/syssgi.h>    /* for syssgi() */
199#endif
200
201#if HAVE_SYS_SYSTEMCFG_H
202#include <sys/systemcfg.h> /* for RTC_POWER on AIX */
203#endif
204
205#if HAVE_SYS_TIMES_H
206#include <sys/times.h>  /* for times() and struct tms */
207#endif
208
209#include "gmp-impl.h"
210
211#include "speed.h"
212
213
214/* strerror is only used for some stuff on newish systems, no need to have a
215   proper replacement */
216#if ! HAVE_STRERROR
217#define strerror(n)  "<strerror not available>"
218#endif
219
220
221char    speed_time_string[256];
222int     speed_precision = 0;
223double  speed_unittime;
224double  speed_cycletime = 0.0;
225
226
227/* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
228   native cc */
229#define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)
230
231#define M_2POW32  4294967296.0
232#define M_2POW64  (M_2POW32 * M_2POW32)
233
234
235/* Conditionals for the time functions available are done with normal C
236   code, which is a lot easier than wildly nested preprocessor directives.
237
238   The choice of what to use is partly made at run-time, according to
239   whether the cycle counter works and the measured accuracy of getrusage
240   and gettimeofday.
241
242   A routine that's not available won't be getting called, but is an abort()
243   to be sure it isn't called mistakenly.
244
245   It can be assumed that if a function exists then its data type will, but
246   if the function doesn't then the data type might or might not exist, so
247   the type can't be used unconditionally.  The "struct_rusage" etc macros
248   provide dummies when the respective function doesn't exist. */
249
250
251#if HAVE_SPEED_CYCLECOUNTER
252static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
253#else
254static const int have_cycles = 0;
255#define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)
256#endif
257
258/* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
259   microseconds.  Same #ifdefs here as in longlong.h.  */
260#if defined (__GNUC__) && ! defined (NO_ASM)                            \
261  && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
262static const int  have_stck = 1;
263static const int  use_stck = 1;  /* always use when available */
264typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
265#define STCK(timestamp)                 \
266  do {                                  \
267    asm ("stck %0" : "=Q" (timestamp)); \
268  } while (0)
269#else
270static const int  have_stck = 0;
271static const int  use_stck = 0;
272typedef unsigned long  stck_t;   /* dummy */
273#define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)
274#endif
275#define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */
276
277/* mftb
278   Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
279   and a loop (see powerpc64.asm).  */
280#if HAVE_HOST_CPU_FAMILY_powerpc
281static const int  have_mftb = 1;
282#if defined (__GNUC__) && ! defined (NO_ASM)
283#define MFTB(a)                         \
284  do {                                  \
285    unsigned  __h1, __l, __h2;          \
286    do {                                \
287      asm volatile ("mftbu %0\n"        \
288		    "mftb  %1\n"        \
289		    "mftbu %2"          \
290		    : "=r" (__h1),      \
291		      "=r" (__l),       \
292		      "=r" (__h2));     \
293    } while (__h1 != __h2);             \
294    a[0] = __l;                         \
295    a[1] = __h1;                        \
296  } while (0)
297#else
298#define MFTB(a)   mftb_function (a)
299#endif
300#else /* ! powerpc */
301static const int  have_mftb = 0;
302#define MFTB(a)                         \
303  do {                                  \
304    a[0] = 0;                           \
305    a[1] = 0;                           \
306    ASSERT_FAIL (mftb not available);   \
307  } while (0)
308#endif
309
310/* Unicos 10.X has syssgi(), but not mmap(). */
311#if HAVE_SYSSGI && HAVE_MMAP
312static const int  have_sgi = 1;
313#else
314static const int  have_sgi = 0;
315#endif
316
317#if HAVE_READ_REAL_TIME
318static const int have_rrt = 1;
319#else
320static const int have_rrt = 0;
321#define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)
322#define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)
323#define RTC_POWER     1
324#define RTC_POWER_PC  2
325#define timebasestruct_t   struct timebasestruct_dummy
326struct timebasestruct_dummy {
327  int             flag;
328  unsigned int    tb_high;
329  unsigned int    tb_low;
330};
331#endif
332
333#if HAVE_CLOCK_GETTIME
334static const int have_cgt = 1;
335#define struct_timespec  struct timespec
336#else
337static const int have_cgt = 0;
338#define struct_timespec       struct timespec_dummy
339#define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)
340#define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)
341#endif
342
343#if HAVE_GETRUSAGE
344static const int have_grus = 1;
345#define struct_rusage   struct rusage
346#else
347static const int have_grus = 0;
348#define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)
349#define struct_rusage    struct rusage_dummy
350#endif
351
352#if HAVE_GETTIMEOFDAY
353static const int have_gtod = 1;
354#define struct_timeval   struct timeval
355#else
356static const int have_gtod = 0;
357#define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)
358#define struct_timeval   struct timeval_dummy
359#endif
360
361#if HAVE_TIMES
362static const int have_times = 1;
363#define struct_tms   struct tms
364#else
365static const int have_times = 0;
366#define times(tms)   ASSERT_FAIL (times not available)
367#define struct_tms   struct tms_dummy
368#endif
369
370struct tms_dummy {
371  long  tms_utime;
372};
373struct timeval_dummy {
374  long  tv_sec;
375  long  tv_usec;
376};
377struct rusage_dummy {
378  struct_timeval ru_utime;
379};
380struct timespec_dummy {
381  long  tv_sec;
382  long  tv_nsec;
383};
384
385static int  use_cycles;
386static int  use_mftb;
387static int  use_sgi;
388static int  use_rrt;
389static int  use_cgt;
390static int  use_gtod;
391static int  use_grus;
392static int  use_times;
393static int  use_tick_boundary;
394
395static unsigned         start_cycles[2];
396static stck_t           start_stck;
397static unsigned         start_mftb[2];
398static unsigned         start_sgi;
399static timebasestruct_t start_rrt;
400static struct_timespec  start_cgt;
401static struct_rusage    start_grus;
402static struct_timeval   start_gtod;
403static struct_tms       start_times;
404
405static double  cycles_limit = 1e100;
406static double  mftb_unittime;
407static double  sgi_unittime;
408static double  cgt_unittime;
409static double  grus_unittime;
410static double  gtod_unittime;
411static double  times_unittime;
412
413/* for RTC_POWER format, ie. seconds and nanoseconds */
414#define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)
415
416
417/* Return a string representing a time in seconds, nicely formatted.
418   Eg. "10.25ms".  */
419char *
420unittime_string (double t)
421{
422  static char  buf[128];
423
424  const char  *unit;
425  int         prec;
426
427  /* choose units and scale */
428  if (t < 1e-6)
429    t *= 1e9, unit = "ns";
430  else if (t < 1e-3)
431    t *= 1e6, unit = "us";
432  else if (t < 1.0)
433    t *= 1e3, unit = "ms";
434  else
435    unit = "s";
436
437  /* want 4 significant figures */
438  if (t < 1.0)
439    prec = 4;
440  else if (t < 10.0)
441    prec = 3;
442  else if (t < 100.0)
443    prec = 2;
444  else
445    prec = 1;
446
447  sprintf (buf, "%.*f%s", prec, t, unit);
448  return buf;
449}
450
451
452static jmp_buf  cycles_works_buf;
453
454static RETSIGTYPE
455cycles_works_handler (int sig)
456{
457  longjmp (cycles_works_buf, 1);
458}
459
460int
461cycles_works_p (void)
462{
463  static int  result = -1;
464
465  if (result != -1)
466    goto done;
467
468  /* FIXME: On linux, the cycle counter is not saved and restored over
469   * context switches, making it almost useless for precise cputime
470   * measurements. When available, it's better to use clock_gettime,
471   * which seems to have reasonable accuracy (tested on x86_32,
472   * linux-2.6.26, glibc-2.7). However, there are also some linux
473   * systems where clock_gettime is broken in one way or the other,
474   * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
475   * kind-of implemented but broken (needs code to detect that), and
476   * on those systems a wall-clock cycle counter is the least bad
477   * fallback.
478   *
479   * So we need some code to disable the cycle counter on some but not
480   * all linux systems. */
481#ifdef SIGILL
482  {
483    RETSIGTYPE (*old_handler) (int);
484    unsigned  cycles[2];
485
486    old_handler = signal (SIGILL, cycles_works_handler);
487    if (old_handler == SIG_ERR)
488      {
489	if (speed_option_verbose)
490	  printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
491	goto yes;
492      }
493    if (setjmp (cycles_works_buf))
494      {
495	if (speed_option_verbose)
496	  printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
497	result = 0;
498	goto done;
499      }
500    speed_cyclecounter (cycles);
501    signal (SIGILL, old_handler);
502    if (speed_option_verbose)
503      printf ("cycles_works_p(): speed_cyclecounter() works\n");
504  }
505#else
506
507  if (speed_option_verbose)
508    printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
509  goto yes;
510#endif
511
512 yes:
513  result = 1;
514
515 done:
516  return result;
517}
518
519
520/* The number of clock ticks per second, but looking at sysconf rather than
521   just CLK_TCK, where possible.  */
522long
523clk_tck (void)
524{
525  static long  result = -1L;
526  if (result != -1L)
527    return result;
528
529#if HAVE_SYSCONF
530  result = sysconf (_SC_CLK_TCK);
531  if (result != -1L)
532    {
533      if (speed_option_verbose)
534	printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
535      return result;
536    }
537
538  fprintf (stderr,
539	   "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
540#endif
541
542#ifdef CLK_TCK
543  result = CLK_TCK;
544  if (speed_option_verbose)
545    printf ("CLK_TCK is %ld per second\n", result);
546  return result;
547#else
548  fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
549  abort ();
550#endif
551}
552
553
554/* If two times can be observed less than half a clock tick apart, then
555   assume "get" is microsecond accurate.
556
557   Two times only 1 microsecond apart are not believed, since some kernels
558   take it upon themselves to ensure gettimeofday doesn't return the same
559   value twice, for the benefit of applications using it for a timestamp.
560   This is obviously very stupid given the speed of CPUs these days.
561
562   Making "reps" many calls to noop_1() is designed to waste some CPU, with
563   a view to getting measurements 2 microseconds (or more) apart.  "reps" is
564   increased progressively until such a period is seen.
565
566   The outer loop "attempts" are just to allow for any random nonsense or
567   system load upsetting the measurements (ie. making two successive calls
568   to "get" come out as a longer interval than normal).
569
570   Bugs:
571
572   The assumption that any interval less than a half tick implies
573   microsecond resolution is obviously fairly rash, the true resolution
574   could be anything between a microsecond and that half tick.  Perhaps
575   something special would have to be done on a system where this is the
576   case, since there's no obvious reliable way to detect it
577   automatically.  */
578
579#define MICROSECONDS_P(name, type, get, sec, usec)                      \
580  {                                                                     \
581    static int  result = -1;                                            \
582    type      st, et;                                                   \
583    long      dt, half_tick;                                            \
584    unsigned  attempt, reps, i, j;                                      \
585									\
586    if (result != -1)                                                   \
587      return result;                                                    \
588									\
589    result = 0;                                                         \
590    half_tick = (1000000L / clk_tck ()) / 2;                            \
591									\
592    for (attempt = 0; attempt < 5; attempt++)                           \
593      {                                                                 \
594	reps = 0;                                                       \
595	for (;;)                                                        \
596	  {                                                             \
597	    get (st);                                                   \
598	    for (i = 0; i < reps; i++)                                  \
599	      for (j = 0; j < 100; j++)                                 \
600		noop_1 (CNST_LIMB(0));                                  \
601	    get (et);                                                   \
602									\
603	    dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \
604									\
605	    if (speed_option_verbose >= 2)                              \
606	      printf ("%s attempt=%u, reps=%u, dt=%ld\n",               \
607		      name, attempt, reps, dt);                         \
608									\
609	    if (dt >= 2)                                                \
610	      break;                                                    \
611									\
612	    reps = (reps == 0 ? 1 : 2*reps);                            \
613	    if (reps == 0)                                              \
614	      break;  /* uint overflow, not normal */                   \
615	  }                                                             \
616									\
617	if (dt < half_tick)                                             \
618	  {                                                             \
619	    result = 1;                                                 \
620	    break;                                                      \
621	  }                                                             \
622      }                                                                 \
623									\
624    if (speed_option_verbose)                                           \
625      {                                                                 \
626	if (result)                                                     \
627	  printf ("%s is microsecond accurate\n", name);                \
628	else                                                            \
629	  printf ("%s is only %s clock tick accurate\n",                \
630		  name, unittime_string (1.0/clk_tck()));               \
631      }                                                                 \
632    return result;                                                      \
633  }
634
635
636int
637gettimeofday_microseconds_p (void)
638{
639#define call_gettimeofday(t)   gettimeofday (&(t), NULL)
640#define timeval_tv_sec(t)      ((t).tv_sec)
641#define timeval_tv_usec(t)     ((t).tv_usec)
642  MICROSECONDS_P ("gettimeofday", struct_timeval,
643		  call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
644}
645
646int
647getrusage_microseconds_p (void)
648{
649#define call_getrusage(t)   getrusage (0, &(t))
650#define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
651#define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
652  MICROSECONDS_P ("getrusage", struct_rusage,
653		  call_getrusage, rusage_tv_sec, rusage_tv_usec);
654}
655
656/* Test whether getrusage goes backwards, return non-zero if it does
657   (suggesting it's flawed).
658
659   On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
660   microsecond accurate, but has been seen remaining unchanged after many
661   microseconds have elapsed.  It also regularly goes backwards by 1000 to
662   5000 usecs, this has been seen after between 500 and 4000 attempts taking
663   perhaps 0.03 seconds.  We consider this too broken for good measuring.
664   We used to have configure pretend getrusage didn't exist on this system,
665   but a runtime test should be more reliable, since we imagine the problem
666   is not confined to just this exact system tuple.  */
667
668int
669getrusage_backwards_p (void)
670{
671  static int result = -1;
672  struct rusage  start, prev, next;
673  long  d;
674  int   i;
675
676  if (result != -1)
677    return result;
678
679  getrusage (0, &start);
680  memcpy (&next, &start, sizeof (next));
681
682  result = 0;
683  i = 0;
684  for (;;)
685    {
686      memcpy (&prev, &next, sizeof (prev));
687      getrusage (0, &next);
688
689      if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
690	  || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
691	      && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
692	{
693	  if (speed_option_verbose)
694	    printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
695		    i,
696		    (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
697		    (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
698	  result = 1;
699	  break;
700	}
701
702      /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
703	 attempts, whichever comes first */
704      d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
705	+ (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
706      i++;
707      if (i > 50000 || (i > 1000 && d > 100000))
708	break;
709    }
710
711  return result;
712}
713
714/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
715   of glibc (some time post 2.2).
716
717   CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
718   defined, but returning -1 for an error).  */
719
720#ifdef CLOCK_PROCESS_CPUTIME_ID
721# define CGT_ID        CLOCK_PROCESS_CPUTIME_ID
722#else
723# ifdef CLOCK_VIRTUAL
724#  define CGT_ID       CLOCK_VIRTUAL
725# endif
726#endif
727#ifdef CGT_ID
728const int  have_cgt_id = 1;
729#else
730const int  have_cgt_id = 0;
731# define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
732#endif
733
734#define CGT_DELAY_COUNT 1000
735
736int
737cgt_works_p (void)
738{
739  static int  result = -1;
740  struct_timespec  unit;
741
742  if (! have_cgt)
743    return 0;
744
745  if (! have_cgt_id)
746    {
747      if (speed_option_verbose)
748	printf ("clock_gettime don't know what ID to use\n");
749      result = 0;
750      return result;
751    }
752
753  if (result != -1)
754    return result;
755
756  /* trial run to see if it works */
757  if (clock_gettime (CGT_ID, &unit) != 0)
758    {
759      if (speed_option_verbose)
760	printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
761      result = 0;
762      return result;
763    }
764
765  /* get the resolution */
766  if (clock_getres (CGT_ID, &unit) != 0)
767    {
768      if (speed_option_verbose)
769	printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
770      result = 0;
771      return result;
772    }
773
774  cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
775  if (speed_option_verbose)
776    printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime));
777
778  if (cgt_unittime < 10e-9)
779    {
780      /* Do we believe this? */
781      struct timespec start, end;
782      static volatile int counter;
783      double duration;
784      if (clock_gettime (CGT_ID, &start))
785	{
786	  if (speed_option_verbose)
787	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
788	  result = 0;
789	  return result;
790	}
791      /* Loop of at least 1000 memory accesses, ought to take at
792	 least 100 ns*/
793      for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
794	;
795      if (clock_gettime (CGT_ID, &end))
796	{
797	  if (speed_option_verbose)
798	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
799	  result = 0;
800	  return result;
801	}
802      duration = (end.tv_sec + end.tv_nsec * 1e-9
803		  - start.tv_sec - start.tv_nsec * 1e-9);
804      if (speed_option_verbose)
805	printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
806		CGT_DELAY_COUNT, unittime_string (duration));
807      if (duration < 100e-9)
808	{
809	  if (speed_option_verbose)
810	    printf ("clock_gettime id=%d not believable\n", CGT_ID);
811	  result = 0;
812	  return result;
813	}
814    }
815  result = 1;
816  return result;
817}
818
819
820static double
821freq_measure_mftb_one (void)
822{
823#define call_gettimeofday(t)   gettimeofday (&(t), NULL)
824#define timeval_tv_sec(t)      ((t).tv_sec)
825#define timeval_tv_usec(t)     ((t).tv_usec)
826  FREQ_MEASURE_ONE ("mftb", struct_timeval,
827		    call_gettimeofday, MFTB,
828		    timeval_tv_sec, timeval_tv_usec);
829}
830
831
832static jmp_buf  mftb_works_buf;
833
834static RETSIGTYPE
835mftb_works_handler (int sig)
836{
837  longjmp (mftb_works_buf, 1);
838}
839
840int
841mftb_works_p (void)
842{
843  unsigned   a[2];
844  RETSIGTYPE (*old_handler) (int);
845  double     cycletime;
846
847  /* suppress a warning about a[] unused */
848  a[0] = 0;
849
850  if (! have_mftb)
851    return 0;
852
853#ifdef SIGILL
854  old_handler = signal (SIGILL, mftb_works_handler);
855  if (old_handler == SIG_ERR)
856    {
857      if (speed_option_verbose)
858	printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
859      return 1;
860    }
861  if (setjmp (mftb_works_buf))
862    {
863      if (speed_option_verbose)
864	printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
865      return 0;
866    }
867  MFTB (a);
868  signal (SIGILL, old_handler);
869  if (speed_option_verbose)
870    printf ("mftb_works_p(): mftb works\n");
871#else
872
873  if (speed_option_verbose)
874    printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
875#endif
876
877#if ! HAVE_GETTIMEOFDAY
878  if (speed_option_verbose)
879    printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
880  return 0;
881#endif
882
883  /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
884     other chips it can be driven from an external clock. */
885  cycletime = freq_measure ("mftb", freq_measure_mftb_one);
886  if (cycletime == -1.0)
887    {
888      if (speed_option_verbose)
889	printf ("mftb_works_p(): cannot measure mftb period\n");
890      return 0;
891    }
892
893  mftb_unittime = cycletime;
894  return 1;
895}
896
897
898volatile unsigned  *sgi_addr;
899
900int
901sgi_works_p (void)
902{
903#if HAVE_SYSSGI && HAVE_MMAP
904  static int  result = -1;
905
906  size_t          pagesize, offset;
907  __psunsigned_t  phys, physpage;
908  void            *virtpage;
909  unsigned        period_picoseconds;
910  int             size, fd;
911
912  if (result != -1)
913    return result;
914
915  phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
916  if (phys == (__psunsigned_t) -1)
917    {
918      /* ENODEV is the error when a counter is not available */
919      if (speed_option_verbose)
920	printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
921      result = 0;
922      return result;
923    }
924  sgi_unittime = period_picoseconds * 1e-12;
925
926  /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
927     Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
928     obvious way to identify that without SGI_CYCLECNTR_SIZE.  */
929#ifdef SGI_CYCLECNTR_SIZE
930  size = syssgi (SGI_CYCLECNTR_SIZE);
931  if (size == -1)
932    {
933      if (speed_option_verbose)
934	{
935	  printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
936	  printf ("    will assume size==4\n");
937	}
938      size = 32;
939    }
940#else
941  size = 32;
942#endif
943
944  if (size < 32)
945    {
946      printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
947      result = 0;
948      return result;
949    }
950
951  pagesize = getpagesize();
952  offset = (size_t) phys & (pagesize-1);
953  physpage = phys - offset;
954
955  /* shouldn't cross over a page boundary */
956  ASSERT_ALWAYS (offset + size/8 <= pagesize);
957
958  fd = open("/dev/mmem", O_RDONLY);
959  if (fd == -1)
960    {
961      if (speed_option_verbose)
962	printf ("open /dev/mmem: %s\n", strerror (errno));
963      result = 0;
964      return result;
965    }
966
967  virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
968  if (virtpage == (void *) -1)
969    {
970      if (speed_option_verbose)
971	printf ("mmap /dev/mmem: %s\n", strerror (errno));
972      result = 0;
973      return result;
974    }
975
976  /* address of least significant 4 bytes, knowing mips is big endian */
977  sgi_addr = (unsigned *) ((char *) virtpage + offset
978			   + size/8 - sizeof(unsigned));
979  result = 1;
980  return result;
981
982#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
983  return 0;
984#endif
985}
986
987
988#define DEFAULT(var,n)  \
989  do {                  \
990    if (! (var))        \
991      (var) = (n);      \
992  } while (0)
993
994void
995speed_time_init (void)
996{
997  double supplement_unittime = 0.0;
998
999  static int  speed_time_initialized = 0;
1000  if (speed_time_initialized)
1001    return;
1002  speed_time_initialized = 1;
1003
1004  speed_cycletime_init ();
1005
1006  if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
1007    {
1008      use_cycles = 1;
1009      DEFAULT (speed_cycletime, 1.0);
1010      speed_unittime = speed_cycletime;
1011      DEFAULT (speed_precision, 10000);
1012      strcpy (speed_time_string, "CPU cycle counter");
1013
1014      /* only used if a supplementary method is chosen below */
1015      cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
1016	* speed_cycletime;
1017
1018      if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1019	{
1020	  /* this is a good combination */
1021	  use_grus = 1;
1022	  supplement_unittime = grus_unittime = 1.0e-6;
1023	  strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
1024	}
1025      else if (have_cycles == 1)
1026	{
1027	  /* When speed_cyclecounter has a limited range, look for something
1028	     to supplement it. */
1029	  if (have_gtod && gettimeofday_microseconds_p())
1030	    {
1031	      use_gtod = 1;
1032	      supplement_unittime = gtod_unittime = 1.0e-6;
1033	      strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
1034	    }
1035	  else if (have_grus)
1036	    {
1037	      use_grus = 1;
1038	      supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1039	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
1040	    }
1041	  else if (have_times)
1042	    {
1043	      use_times = 1;
1044	      supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
1045	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
1046	    }
1047	  else if (have_gtod)
1048	    {
1049	      use_gtod = 1;
1050	      supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1051	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
1052	    }
1053	  else
1054	    {
1055	      fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
1056	      fprintf (stderr, "    Wraparounds may produce bad results on long measurements.\n");
1057	    }
1058	}
1059
1060      if (use_grus || use_times || use_gtod)
1061	{
1062	  /* must know cycle period to compare cycles to other measuring
1063	     (via cycles_limit) */
1064	  speed_cycletime_need_seconds ();
1065
1066	  if (speed_precision * supplement_unittime > cycles_limit)
1067	    {
1068	      fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
1069	      fprintf (stderr, "    cycle counter and limited precision supplemental method\n");
1070	      fprintf (stderr, "    (%s)\n", speed_time_string);
1071	    }
1072	}
1073    }
1074  else if (have_stck)
1075    {
1076      strcpy (speed_time_string, "STCK timestamp");
1077      /* stck is in units of 2^-12 microseconds, which is very likely higher
1078	 resolution than a cpu cycle */
1079      if (speed_cycletime == 0.0)
1080	speed_cycletime_fail
1081	  ("Need to know CPU frequency for effective stck unit");
1082      speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
1083      DEFAULT (speed_precision, 10000);
1084    }
1085  else if (have_mftb && mftb_works_p ())
1086    {
1087      use_mftb = 1;
1088      DEFAULT (speed_precision, 10000);
1089      speed_unittime = mftb_unittime;
1090      sprintf (speed_time_string, "mftb counter (%s)",
1091	       unittime_string (speed_unittime));
1092    }
1093  else if (have_sgi && sgi_works_p ())
1094    {
1095      use_sgi = 1;
1096      DEFAULT (speed_precision, 10000);
1097      speed_unittime = sgi_unittime;
1098      sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
1099	       unittime_string (speed_unittime));
1100      /* supplemented with getrusage, which we assume to have 1ms resolution */
1101      use_grus = 1;
1102      supplement_unittime = 1e-3;
1103    }
1104  else if (have_rrt)
1105    {
1106      timebasestruct_t  t;
1107      use_rrt = 1;
1108      DEFAULT (speed_precision, 10000);
1109      read_real_time (&t, sizeof(t));
1110      switch (t.flag) {
1111      case RTC_POWER:
1112	/* FIXME: What's the actual RTC resolution? */
1113	speed_unittime = 1e-7;
1114	strcpy (speed_time_string, "read_real_time() power nanoseconds");
1115	break;
1116      case RTC_POWER_PC:
1117	t.tb_high = 1;
1118	t.tb_low = 0;
1119	time_base_to_time (&t, sizeof(t));
1120	speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
1121	sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
1122		 unittime_string (speed_unittime));
1123	break;
1124      default:
1125	fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
1126		 t.flag);
1127	abort ();
1128      }
1129    }
1130  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
1131    {
1132      /* use clock_gettime if microsecond or better resolution */
1133    choose_cgt:
1134      use_cgt = 1;
1135      speed_unittime = cgt_unittime;
1136      DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
1137      strcpy (speed_time_string, "microsecond accurate clock_gettime()");
1138    }
1139  else if (have_times && clk_tck() > 1000000)
1140    {
1141      /* Cray vector systems have times() which is clock cycle resolution
1142	 (eg. 450 MHz).  */
1143      DEFAULT (speed_precision, 10000);
1144      goto choose_times;
1145    }
1146  else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1147    {
1148      use_grus = 1;
1149      speed_unittime = grus_unittime = 1.0e-6;
1150      DEFAULT (speed_precision, 1000);
1151      strcpy (speed_time_string, "microsecond accurate getrusage()");
1152    }
1153  else if (have_gtod && gettimeofday_microseconds_p())
1154    {
1155      use_gtod = 1;
1156      speed_unittime = gtod_unittime = 1.0e-6;
1157      DEFAULT (speed_precision, 1000);
1158      strcpy (speed_time_string, "microsecond accurate gettimeofday()");
1159    }
1160  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
1161    {
1162      /* use clock_gettime if 1 tick or better resolution */
1163      goto choose_cgt;
1164    }
1165  else if (have_times)
1166    {
1167      use_tick_boundary = 1;
1168      DEFAULT (speed_precision, 200);
1169    choose_times:
1170      use_times = 1;
1171      speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
1172      sprintf (speed_time_string, "%s clock tick times()",
1173	       unittime_string (speed_unittime));
1174    }
1175  else if (have_grus)
1176    {
1177      use_grus = 1;
1178      use_tick_boundary = 1;
1179      speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1180      DEFAULT (speed_precision, 200);
1181      sprintf (speed_time_string, "%s clock tick getrusage()\n",
1182	       unittime_string (speed_unittime));
1183    }
1184  else if (have_gtod)
1185    {
1186      use_gtod = 1;
1187      use_tick_boundary = 1;
1188      speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1189      DEFAULT (speed_precision, 200);
1190      sprintf (speed_time_string, "%s clock tick gettimeofday()",
1191	       unittime_string (speed_unittime));
1192    }
1193  else
1194    {
1195      fprintf (stderr, "No time measuring method available\n");
1196      fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
1197      abort ();
1198    }
1199
1200  if (speed_option_verbose)
1201    {
1202      printf ("speed_time_init: %s\n", speed_time_string);
1203      printf ("    speed_precision     %d\n", speed_precision);
1204      printf ("    speed_unittime      %.2g\n", speed_unittime);
1205      if (supplement_unittime)
1206	printf ("    supplement_unittime %.2g\n", supplement_unittime);
1207      printf ("    use_tick_boundary   %d\n", use_tick_boundary);
1208      if (have_cycles)
1209	printf ("    cycles_limit        %.2g seconds\n", cycles_limit);
1210    }
1211}
1212
1213
1214
1215/* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the
1216   corresponding "start_foo" appropriately too. */
1217
1218void
1219grus_tick_boundary (void)
1220{
1221  struct_rusage  prev;
1222  getrusage (0, &prev);
1223  do {
1224    getrusage (0, &start_grus);
1225  } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
1226}
1227
1228void
1229gtod_tick_boundary (void)
1230{
1231  struct_timeval  prev;
1232  gettimeofday (&prev, NULL);
1233  do {
1234    gettimeofday (&start_gtod, NULL);
1235  } while (start_gtod.tv_usec == prev.tv_usec);
1236}
1237
1238void
1239times_tick_boundary (void)
1240{
1241  struct_tms  prev;
1242  times (&prev);
1243  do
1244    times (&start_times);
1245  while (start_times.tms_utime == prev.tms_utime);
1246}
1247
1248
1249/* "have_" values are tested to let unused code go dead.  */
1250
1251void
1252speed_starttime (void)
1253{
1254  speed_time_init ();
1255
1256  if (have_grus && use_grus)
1257    {
1258      if (use_tick_boundary)
1259	grus_tick_boundary ();
1260      else
1261	getrusage (0, &start_grus);
1262    }
1263
1264  if (have_gtod && use_gtod)
1265    {
1266      if (use_tick_boundary)
1267	gtod_tick_boundary ();
1268      else
1269	gettimeofday (&start_gtod, NULL);
1270    }
1271
1272  if (have_times && use_times)
1273    {
1274      if (use_tick_boundary)
1275	times_tick_boundary ();
1276      else
1277	times (&start_times);
1278    }
1279
1280  if (have_cgt && use_cgt)
1281    clock_gettime (CGT_ID, &start_cgt);
1282
1283  if (have_rrt && use_rrt)
1284    read_real_time (&start_rrt, sizeof(start_rrt));
1285
1286  if (have_sgi && use_sgi)
1287    start_sgi = *sgi_addr;
1288
1289  if (have_mftb && use_mftb)
1290    MFTB (start_mftb);
1291
1292  if (have_stck && use_stck)
1293    STCK (start_stck);
1294
1295  /* Cycles sampled last for maximum accuracy. */
1296  if (have_cycles && use_cycles)
1297    speed_cyclecounter (start_cycles);
1298}
1299
1300
1301/* Calculate the difference between two cycle counter samples, as a "double"
1302   counter of cycles.
1303
1304   The start and end values are allowed to cancel in integers in case the
1305   counter values are bigger than the 53 bits that normally fit in a double.
1306
1307   This works even if speed_cyclecounter() puts a value bigger than 32-bits
1308   in the low word (the high word always gets a 2**32 multiplier though). */
1309
1310double
1311speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1312{
1313  unsigned  d;
1314  double    t;
1315
1316  if (have_cycles == 1)
1317    {
1318      t = (end[0] - start[0]);
1319    }
1320  else
1321    {
1322      d = end[0] - start[0];
1323      t = d - (d > end[0] ? M_2POWU : 0.0);
1324      t += (end[1] - start[1]) * M_2POW32;
1325    }
1326  return t;
1327}
1328
1329
1330double
1331speed_mftb_diff (const unsigned end[2], const unsigned start[2])
1332{
1333  unsigned  d;
1334  double    t;
1335
1336  d = end[0] - start[0];
1337  t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
1338  t += (end[1] - start[1]) * M_2POW32;
1339  return t;
1340}
1341
1342
1343/* Calculate the difference between "start" and "end" using fields "sec" and
1344   "psec", where each "psec" is a "punit" of a second.
1345
1346   The seconds parts are allowed to cancel before being combined with the
1347   psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
1348   double.
1349
1350   Total time is only calculated in a "double" since an integer count of
1351   psecs might overflow.  2^32 microseconds is only a bit over an hour, or
1352   2^32 nanoseconds only about 4 seconds.
1353
1354   The casts to "long" are for the benefit of timebasestruct_t, where the
1355   fields are only "unsigned int", but we want a signed difference.  */
1356
1357#define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
1358  {                                                             \
1359    long  sec_diff, psec_diff;                                  \
1360    sec_diff = (long) end->sec - (long) start->sec;             \
1361    psec_diff = (long) end->psec - (long) start->psec;          \
1362    return (double) sec_diff + punit * (double) psec_diff;      \
1363  }
1364
1365double
1366timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1367{
1368  DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
1369}
1370
1371double
1372rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
1373{
1374  DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1375}
1376
1377double
1378timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1379{
1380  DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
1381}
1382
1383/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
1384double
1385timebasestruct_diff_secs (const timebasestruct_t *end,
1386			  const timebasestruct_t *start)
1387{
1388  DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1389}
1390
1391
1392double
1393speed_endtime (void)
1394{
1395#define END_USE(name,value)                             \
1396  do {                                                  \
1397    if (speed_option_verbose >= 3)                      \
1398      printf ("speed_endtime(): used %s\n", name);      \
1399    result = value;                                     \
1400    goto done;                                          \
1401  } while (0)
1402
1403#define END_ENOUGH(name,value)                                          \
1404  do {                                                                  \
1405    if (speed_option_verbose >= 3)                                      \
1406      printf ("speed_endtime(): %s gives enough precision\n", name);    \
1407    result = value;                                                     \
1408    goto done;                                                          \
1409  } while (0)
1410
1411#define END_EXCEED(name,value)                                            \
1412  do {                                                                    \
1413    if (speed_option_verbose >= 3)                                        \
1414      printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
1415	      name);                                                      \
1416    result = value;                                                       \
1417    goto done;                                                            \
1418  } while (0)
1419
1420  unsigned          end_cycles[2];
1421  stck_t            end_stck;
1422  unsigned          end_mftb[2];
1423  unsigned          end_sgi;
1424  timebasestruct_t  end_rrt;
1425  struct_timespec   end_cgt;
1426  struct_timeval    end_gtod;
1427  struct_rusage     end_grus;
1428  struct_tms        end_times;
1429  double            t_gtod, t_grus, t_times, t_cgt;
1430  double            t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
1431  double            result;
1432
1433  /* Cycles sampled first for maximum accuracy.
1434     "have_" values tested to let unused code go dead.  */
1435
1436  if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);
1437  if (have_stck   && use_stck)    STCK (end_stck);
1438  if (have_mftb   && use_mftb)    MFTB (end_mftb);
1439  if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;
1440  if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));
1441  if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);
1442  if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);
1443  if (have_grus   && use_grus)    getrusage (0, &end_grus);
1444  if (have_times  && use_times)   times (&end_times);
1445
1446  result = -1.0;
1447
1448  if (speed_option_verbose >= 4)
1449    {
1450      printf ("speed_endtime():\n");
1451      if (use_cycles)
1452	printf ("   cycles  0x%X,0x%X -> 0x%X,0x%X\n",
1453		start_cycles[1], start_cycles[0],
1454		end_cycles[1], end_cycles[0]);
1455
1456      if (use_stck)
1457	printf ("   stck  0x%lX -> 0x%lX\n", start_stck, end_stck);
1458
1459      if (use_mftb)
1460	printf ("   mftb  0x%X,%08X -> 0x%X,%08X\n",
1461		start_mftb[1], start_mftb[0],
1462		end_mftb[1], end_mftb[0]);
1463
1464      if (use_sgi)
1465	printf ("   sgi  0x%X -> 0x%X\n", start_sgi, end_sgi);
1466
1467      if (use_rrt)
1468	printf ("   read_real_time  (%d)%u,%u -> (%d)%u,%u\n",
1469		start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
1470		end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
1471
1472      if (use_cgt)
1473	printf ("   clock_gettime  %ld.%09ld -> %ld.%09ld\n",
1474		(long) start_cgt.tv_sec, (long) start_cgt.tv_nsec,
1475		(long) end_cgt.tv_sec, (long) end_cgt.tv_nsec);
1476
1477      if (use_gtod)
1478	printf ("   gettimeofday  %ld.%06ld -> %ld.%06ld\n",
1479		(long) start_gtod.tv_sec,
1480		(long) start_gtod.tv_usec,
1481		(long) end_gtod.tv_sec,
1482		(long) end_gtod.tv_usec);
1483
1484      if (use_grus)
1485	printf ("   getrusage  %ld.%06ld -> %ld.%06ld\n",
1486		(long) start_grus.ru_utime.tv_sec,
1487		(long) start_grus.ru_utime.tv_usec,
1488		(long) end_grus.ru_utime.tv_sec,
1489		(long) end_grus.ru_utime.tv_usec);
1490
1491      if (use_times)
1492	printf ("   times  %ld -> %ld\n",
1493		start_times.tms_utime, end_times.tms_utime);
1494    }
1495
1496  if (use_rrt)
1497    {
1498      time_base_to_time (&start_rrt, sizeof(start_rrt));
1499      time_base_to_time (&end_rrt, sizeof(end_rrt));
1500      t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
1501      END_USE ("read_real_time()", t_rrt);
1502    }
1503
1504  if (use_cgt)
1505    {
1506      t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
1507      END_USE ("clock_gettime()", t_cgt);
1508    }
1509
1510  if (use_grus)
1511    {
1512      t_grus = rusage_diff_secs (&end_grus, &start_grus);
1513
1514      /* Use getrusage() if the cycle counter limit would be exceeded, or if
1515	 it provides enough accuracy already. */
1516      if (use_cycles)
1517	{
1518	  if (t_grus >= speed_precision*grus_unittime)
1519	    END_ENOUGH ("getrusage()", t_grus);
1520	  if (t_grus >= cycles_limit)
1521	    END_EXCEED ("getrusage()", t_grus);
1522	}
1523    }
1524
1525  if (use_times)
1526    {
1527      t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1528
1529      /* Use times() if the cycle counter limit would be exceeded, or if
1530	 it provides enough accuracy already. */
1531      if (use_cycles)
1532	{
1533	  if (t_times >= speed_precision*times_unittime)
1534	    END_ENOUGH ("times()", t_times);
1535	  if (t_times >= cycles_limit)
1536	    END_EXCEED ("times()", t_times);
1537	}
1538    }
1539
1540  if (use_gtod)
1541    {
1542      t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1543
1544      /* Use gettimeofday() if it measured a value bigger than the cycle
1545	 counter can handle.  */
1546      if (use_cycles)
1547	{
1548	  if (t_gtod >= cycles_limit)
1549	    END_EXCEED ("gettimeofday()", t_gtod);
1550	}
1551    }
1552
1553  if (use_mftb)
1554    {
1555      t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
1556      END_USE ("mftb", t_mftb);
1557    }
1558
1559  if (use_stck)
1560    {
1561      t_stck = (end_stck - start_stck) * STCK_PERIOD;
1562      END_USE ("stck", t_stck);
1563    }
1564
1565  if (use_sgi)
1566    {
1567      t_sgi = (end_sgi - start_sgi) * sgi_unittime;
1568      END_USE ("SGI hardware counter", t_sgi);
1569    }
1570
1571  if (use_cycles)
1572    {
1573      t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
1574	* speed_cycletime;
1575      END_USE ("cycle counter", t_cycles);
1576    }
1577
1578  if (use_grus && getrusage_microseconds_p())
1579    END_USE ("getrusage()", t_grus);
1580
1581  if (use_gtod && gettimeofday_microseconds_p())
1582    END_USE ("gettimeofday()", t_gtod);
1583
1584  if (use_times)  END_USE ("times()",        t_times);
1585  if (use_grus)   END_USE ("getrusage()",    t_grus);
1586  if (use_gtod)   END_USE ("gettimeofday()", t_gtod);
1587
1588  fprintf (stderr, "speed_endtime(): oops, no time method available\n");
1589  abort ();
1590
1591 done:
1592  if (result < 0.0)
1593    {
1594      if (speed_option_verbose >= 2)
1595	fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
1596      result = 0.0;
1597    }
1598  return result;
1599}
1600