1/* Time routines for speed measurments.
2
3Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of the GNU Lesser General Public License as published by
9the Free Software Foundation; either version 3 of the License, or (at your
10option) any later version.
11
12The GNU MP Library is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15License for more details.
16
17You should have received a copy of the GNU Lesser General Public License
18along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
19
20
21/* Usage:
22
23   The code in this file implements the lowest level of time measuring,
24   simple one-time measuring of time between two points.
25
26   void speed_starttime (void)
27   double speed_endtime (void)
28       Call speed_starttime to start measuring, and then call speed_endtime
29       when done.
30
31       speed_endtime returns the time taken, in seconds.  Or if the timebase
32       is in CPU cycles and the CPU frequency is unknown then speed_endtime
33       returns cycles.  Applications can identify the cycles return by
34       checking for speed_cycletime (described below) equal to 1.0.
35
36       If some sort of temporary glitch occurs then speed_endtime returns
37       0.0.  Currently this is for various cases where a negative time has
38       occurred.  This unfortunately occurs with getrusage on some systems,
39       and with the hppa cycle counter on hpux.
40
41   double speed_cycletime
42       The time in seconds for each CPU cycle.  For example on a 100 MHz CPU
43       this would be 1.0e-8.
44
45       If the CPU frequency is unknown, then speed_cycletime is either 0.0
46       or 1.0.  It's 0.0 when speed_endtime is returning seconds, or it's
47       1.0 when speed_endtime is returning cycles.
48
49       It may be noted that "speed_endtime() / speed_cycletime" gives a
50       measured time in cycles, irrespective of whether speed_endtime is
51       returning cycles or seconds.  (Assuming cycles can be had, ie. it's
52       either cycles already or the cpu frequency is known.  See also
53       speed_cycletime_need_cycles below.)
54
55   double speed_unittime
56       The unit of time measurement accuracy for the timing method in use.
57       This is in seconds or cycles, as per speed_endtime.
58
59   char speed_time_string[]
60       A null-terminated string describing the time method in use.
61
62   void speed_time_init (void)
63       Initialize time measuring.  speed_starttime() does this
64       automatically, so it's only needed if an application wants to inspect
65       the above global variables before making a measurement.
66
67   int speed_precision
68       The intended accuracy of time measurements.  speed_measure() in
69       common.c for instance runs target routines with enough repetitions so
70       it takes at least "speed_unittime * speed_precision" (this expression
71       works for both cycles or seconds from speed_endtime).
72
73       A program can provide an option so the user to set speed_precision.
74       If speed_precision is zero when speed_time_init or speed_starttime
75       first run then it gets a default based on the measuring method
76       chosen.  (More precision for higher accuracy methods.)
77
78   void speed_cycletime_need_seconds (void)
79       Call this to demand that speed_endtime will return seconds, and not
80       cycles.  If only cycles are available then an error is printed and
81       the program exits.
82
83   void speed_cycletime_need_cycles (void)
84       Call this to demand that speed_cycletime is non-zero, so that
85       "speed_endtime() / speed_cycletime" will give times in cycles.
86
87
88
89   Notes:
90
91   Various combinations of cycle counter, read_real_time(), getrusage(),
92   gettimeofday() and times() can arise, according to which are available
93   and their precision.
94
95
96   Allowing speed_endtime() to return either seconds or cycles is only a
97   slight complication and makes it possible for the speed program to do
98   some sensible things without demanding the CPU frequency.  If seconds are
99   being measured then it can always print seconds, and if cycles are being
100   measured then it can always print them without needing to know how long
101   they are.  Also the tune program doesn't care at all what the units are.
102
103   GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
104   fail.  This will be needed if times in seconds are wanted but a cycle
105   counter is being used, or if times in cycles are wanted but getrusage or
106   another seconds based timer is in use.
107
108   If the measuring method uses a cycle counter but supplements it with
109   getrusage or the like, then knowing the CPU frequency is mandatory since
110   the code compares values from the two.
111
112
113   Not done:
114
115   Solaris gethrtime() seems no more than a slow way to access the Sparc V9
116   cycle counter.  gethrvtime() seems to be relevant only to light weight
117   processes, it doesn't for instance give nanosecond virtual time.  So
118   neither of these are used.
119
120
121   Bugs:
122
123   getrusage_microseconds_p is fundamentally flawed, getrusage and
124   gettimeofday can have resolutions other than clock ticks or microseconds,
125   for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
126
127
128   Enhancements:
129
130   The SGI hardware counter has 64 bits on some machines, which could be
131   used when available.  But perhaps 32 bits is enough range, and then rely
132   on the getrusage supplement.
133
134   Maybe getrusage (or times) should be used as a supplement for any
135   wall-clock measuring method.  Currently a wall clock with a good range
136   (eg. a 64-bit cycle counter) is used without a supplement.
137
138   On PowerPC the timebase registers could be used, but would have to do
139   something to find out the speed.  On 6xx chips it's normally 1/4 bus
140   speed, on 4xx chips it's either that or an external clock.  Measuring
141   against gettimeofday might be ok.  */
142
143
144#include "config.h"
145
146#include <errno.h>
147#include <setjmp.h>
148#include <signal.h>
149#include <stddef.h>
150#include <stdio.h>
151#include <string.h>
152#include <stdlib.h> /* for getenv() */
153
154#if HAVE_FCNTL_H
155#include <fcntl.h>  /* for open() */
156#endif
157
158#if HAVE_STDINT_H
159#include <stdint.h> /* for uint64_t */
160#endif
161
162#if HAVE_UNISTD_H
163#include <unistd.h> /* for sysconf() */
164#endif
165
166#include <sys/types.h>
167
168#if TIME_WITH_SYS_TIME
169# include <sys/time.h>  /* for struct timeval */
170# include <time.h>
171#else
172# if HAVE_SYS_TIME_H
173#  include <sys/time.h>
174# else
175#  include <time.h>
176# endif
177#endif
178
179#if HAVE_SYS_MMAN_H
180#include <sys/mman.h>      /* for mmap() */
181#endif
182
183#if HAVE_SYS_RESOURCE_H
184#include <sys/resource.h>  /* for struct rusage */
185#endif
186
187#if HAVE_SYS_SYSSGI_H
188#include <sys/syssgi.h>    /* for syssgi() */
189#endif
190
191#if HAVE_SYS_SYSTEMCFG_H
192#include <sys/systemcfg.h> /* for RTC_POWER on AIX */
193#endif
194
195#if HAVE_SYS_TIMES_H
196#include <sys/times.h>  /* for times() and struct tms */
197#endif
198
199#include "gmp.h"
200#include "gmp-impl.h"
201
202#include "speed.h"
203
204
205/* strerror is only used for some stuff on newish systems, no need to have a
206   proper replacement */
207#if ! HAVE_STRERROR
208#define strerror(n)  "<strerror not available>"
209#endif
210
211
212char    speed_time_string[256];
213int     speed_precision = 0;
214double  speed_unittime;
215double  speed_cycletime = 0.0;
216
217
218/* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
219   native cc */
220#define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)
221
222#define M_2POW32  4294967296.0
223#define M_2POW64  (M_2POW32 * M_2POW32)
224
225
226/* Conditionals for the time functions available are done with normal C
227   code, which is a lot easier than wildly nested preprocessor directives.
228
229   The choice of what to use is partly made at run-time, according to
230   whether the cycle counter works and the measured accuracy of getrusage
231   and gettimeofday.
232
233   A routine that's not available won't be getting called, but is an abort()
234   to be sure it isn't called mistakenly.
235
236   It can be assumed that if a function exists then its data type will, but
237   if the function doesn't then the data type might or might not exist, so
238   the type can't be used unconditionally.  The "struct_rusage" etc macros
239   provide dummies when the respective function doesn't exist. */
240
241
242#if HAVE_SPEED_CYCLECOUNTER
243static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
244#else
245static const int have_cycles = 0;
246#define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)
247#endif
248
249/* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
250   microseconds.  Same #ifdefs here as in longlong.h.  */
251#if defined (__GNUC__) && ! defined (NO_ASM)                            \
252  && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
253static const int  have_stck = 1;
254static const int  use_stck = 1;  /* always use when available */
255typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
256#define STCK(timestamp)                 \
257  do {                                  \
258    asm ("stck %0" : "=m" (timestamp)); \
259  } while (0)
260#else
261static const int  have_stck = 0;
262static const int  use_stck = 0;
263typedef unsigned long  stck_t;   /* dummy */
264#define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)
265#endif
266#define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */
267
268/* mftb
269   Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
270   and a loop (see powerpc64.asm).  */
271#if HAVE_HOST_CPU_FAMILY_powerpc
272static const int  have_mftb = 1;
273#if defined (__GNUC__) && ! defined (NO_ASM)
274#define MFTB(a)                         \
275  do {                                  \
276    unsigned  __h1, __l, __h2;          \
277    do {                                \
278      asm volatile ("mftbu %0\n"        \
279		    "mftb  %1\n"        \
280		    "mftbu %2"          \
281		    : "=r" (__h1),      \
282		      "=r" (__l),       \
283		      "=r" (__h2));     \
284    } while (__h1 != __h2);             \
285    a[0] = __l;                         \
286    a[1] = __h1;                        \
287  } while (0)
288#else
289#define MFTB(a)   mftb_function (a)
290#endif
291#else /* ! powerpc */
292static const int  have_mftb = 0;
293#define MFTB(a)                         \
294  do {                                  \
295    a[0] = 0;                           \
296    a[1] = 0;                           \
297    ASSERT_FAIL (mftb not available);   \
298  } while (0)
299#endif
300
301/* Unicos 10.X has syssgi(), but not mmap(). */
302#if HAVE_SYSSGI && HAVE_MMAP
303static const int  have_sgi = 1;
304#else
305static const int  have_sgi = 0;
306#endif
307
308#if HAVE_READ_REAL_TIME
309static const int have_rrt = 1;
310#else
311static const int have_rrt = 0;
312#define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)
313#define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)
314#define RTC_POWER     1
315#define RTC_POWER_PC  2
316#define timebasestruct_t   struct timebasestruct_dummy
317struct timebasestruct_dummy {
318  int             flag;
319  unsigned int    tb_high;
320  unsigned int    tb_low;
321};
322#endif
323
324#if HAVE_CLOCK_GETTIME
325static const int have_cgt = 1;
326#define struct_timespec  struct timespec
327#else
328static const int have_cgt = 0;
329#define struct_timespec       struct timespec_dummy
330#define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)
331#define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)
332#endif
333
334#if HAVE_GETRUSAGE
335static const int have_grus = 1;
336#define struct_rusage   struct rusage
337#else
338static const int have_grus = 0;
339#define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)
340#define struct_rusage    struct rusage_dummy
341#endif
342
343#if HAVE_GETTIMEOFDAY
344static const int have_gtod = 1;
345#define struct_timeval   struct timeval
346#else
347static const int have_gtod = 0;
348#define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)
349#define struct_timeval   struct timeval_dummy
350#endif
351
352#if HAVE_TIMES
353static const int have_times = 1;
354#define struct_tms   struct tms
355#else
356static const int have_times = 0;
357#define times(tms)   ASSERT_FAIL (times not available)
358#define struct_tms   struct tms_dummy
359#endif
360
361struct tms_dummy {
362  long  tms_utime;
363};
364struct timeval_dummy {
365  long  tv_sec;
366  long  tv_usec;
367};
368struct rusage_dummy {
369  struct_timeval ru_utime;
370};
371struct timespec_dummy {
372  long  tv_sec;
373  long  tv_nsec;
374};
375
376static int  use_cycles;
377static int  use_mftb;
378static int  use_sgi;
379static int  use_rrt;
380static int  use_cgt;
381static int  use_gtod;
382static int  use_grus;
383static int  use_times;
384static int  use_tick_boundary;
385
386static unsigned         start_cycles[2];
387static stck_t           start_stck;
388static unsigned         start_mftb[2];
389static unsigned         start_sgi;
390static timebasestruct_t start_rrt;
391static struct_timespec  start_cgt;
392static struct_rusage    start_grus;
393static struct_timeval   start_gtod;
394static struct_tms       start_times;
395
396static double  cycles_limit = 1e100;
397static double  mftb_unittime;
398static double  sgi_unittime;
399static double  cgt_unittime;
400static double  grus_unittime;
401static double  gtod_unittime;
402static double  times_unittime;
403
404/* for RTC_POWER format, ie. seconds and nanoseconds */
405#define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)
406
407
408/* Return a string representing a time in seconds, nicely formatted.
409   Eg. "10.25ms".  */
410char *
411unittime_string (double t)
412{
413  static char  buf[128];
414
415  const char  *unit;
416  int         prec;
417
418  /* choose units and scale */
419  if (t < 1e-6)
420    t *= 1e9, unit = "ns";
421  else if (t < 1e-3)
422    t *= 1e6, unit = "us";
423  else if (t < 1.0)
424    t *= 1e3, unit = "ms";
425  else
426    unit = "s";
427
428  /* want 4 significant figures */
429  if (t < 1.0)
430    prec = 4;
431  else if (t < 10.0)
432    prec = 3;
433  else if (t < 100.0)
434    prec = 2;
435  else
436    prec = 1;
437
438  sprintf (buf, "%.*f%s", prec, t, unit);
439  return buf;
440}
441
442
443static jmp_buf  cycles_works_buf;
444
445static RETSIGTYPE
446cycles_works_handler (int sig)
447{
448  longjmp (cycles_works_buf, 1);
449}
450
451int
452cycles_works_p (void)
453{
454  static int  result = -1;
455
456  if (result != -1)
457    goto done;
458
459#ifdef SIGILL
460  {
461    RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
462    unsigned  cycles[2];
463
464    old_handler = signal (SIGILL, cycles_works_handler);
465    if (old_handler == SIG_ERR)
466      {
467	if (speed_option_verbose)
468	  printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
469	goto yes;
470      }
471    if (setjmp (cycles_works_buf))
472      {
473	if (speed_option_verbose)
474	  printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
475	result = 0;
476	goto done;
477      }
478    speed_cyclecounter (cycles);
479    signal (SIGILL, old_handler);
480    if (speed_option_verbose)
481      printf ("cycles_works_p(): speed_cyclecounter() works\n");
482  }
483#else
484
485  if (speed_option_verbose)
486    printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
487  goto yes;
488#endif
489
490 yes:
491  result = 1;
492
493 done:
494  return result;
495}
496
497
498/* The number of clock ticks per second, but looking at sysconf rather than
499   just CLK_TCK, where possible.  */
500long
501clk_tck (void)
502{
503  static long  result = -1L;
504  if (result != -1L)
505    return result;
506
507#if HAVE_SYSCONF
508  result = sysconf (_SC_CLK_TCK);
509  if (result != -1L)
510    {
511      if (speed_option_verbose)
512	printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
513      return result;
514    }
515
516  fprintf (stderr,
517	   "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
518#endif
519
520#ifdef CLK_TCK
521  result = CLK_TCK;
522  if (speed_option_verbose)
523    printf ("CLK_TCK is %ld per second\n", result);
524  return result;
525#else
526  fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
527  abort ();
528#endif
529}
530
531
532/* If two times can be observed less than half a clock tick apart, then
533   assume "get" is microsecond accurate.
534
535   Two times only 1 microsecond apart are not believed, since some kernels
536   take it upon themselves to ensure gettimeofday doesn't return the same
537   value twice, for the benefit of applications using it for a timestamp.
538   This is obviously very stupid given the speed of CPUs these days.
539
540   Making "reps" many calls to noop_1() is designed to waste some CPU, with
541   a view to getting measurements 2 microseconds (or more) apart.  "reps" is
542   increased progressively until such a period is seen.
543
544   The outer loop "attempts" are just to allow for any random nonsense or
545   system load upsetting the measurements (ie. making two successive calls
546   to "get" come out as a longer interval than normal).
547
548   Bugs:
549
550   The assumption that any interval less than a half tick implies
551   microsecond resolution is obviously fairly rash, the true resolution
552   could be anything between a microsecond and that half tick.  Perhaps
553   something special would have to be done on a system where this is the
554   case, since there's no obvious reliable way to detect it
555   automatically.  */
556
557#define MICROSECONDS_P(name, type, get, sec, usec)                      \
558  {                                                                     \
559    static int  result = -1;                                            \
560    type      st, et;                                                   \
561    long      dt, half_tick;                                            \
562    unsigned  attempt, reps, i, j;                                      \
563									\
564    if (result != -1)                                                   \
565      return result;                                                    \
566									\
567    result = 0;                                                         \
568    half_tick = (1000000L / clk_tck ()) / 2;                            \
569									\
570    for (attempt = 0; attempt < 5; attempt++)                           \
571      {                                                                 \
572	reps = 0;                                                       \
573	for (;;)                                                        \
574	  {                                                             \
575	    get (st);                                                   \
576	    for (i = 0; i < reps; i++)                                  \
577	      for (j = 0; j < 100; j++)                                 \
578		noop_1 (CNST_LIMB(0));                                  \
579	    get (et);                                                   \
580									\
581	    dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \
582									\
583	    if (speed_option_verbose >= 2)                              \
584	      printf ("%s attempt=%u, reps=%u, dt=%ld\n",               \
585		      name, attempt, reps, dt);                         \
586									\
587	    if (dt >= 2)                                                \
588	      break;                                                    \
589									\
590	    reps = (reps == 0 ? 1 : 2*reps);                            \
591	    if (reps == 0)                                              \
592	      break;  /* uint overflow, not normal */                   \
593	  }                                                             \
594									\
595	if (dt < half_tick)                                             \
596	  {                                                             \
597	    result = 1;                                                 \
598	    break;                                                      \
599	  }                                                             \
600      }                                                                 \
601									\
602    if (speed_option_verbose)                                           \
603      {                                                                 \
604	if (result)                                                     \
605	  printf ("%s is microsecond accurate\n", name);                \
606	else                                                            \
607	  printf ("%s is only %s clock tick accurate\n",                \
608		  name, unittime_string (1.0/clk_tck()));               \
609      }                                                                 \
610    return result;                                                      \
611  }
612
613
614int
615gettimeofday_microseconds_p (void)
616{
617#define call_gettimeofday(t)   gettimeofday (&(t), NULL)
618#define timeval_tv_sec(t)      ((t).tv_sec)
619#define timeval_tv_usec(t)     ((t).tv_usec)
620  MICROSECONDS_P ("gettimeofday", struct_timeval,
621		  call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
622}
623
624int
625getrusage_microseconds_p (void)
626{
627#define call_getrusage(t)   getrusage (0, &(t))
628#define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
629#define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
630  MICROSECONDS_P ("getrusage", struct_rusage,
631		  call_getrusage, rusage_tv_sec, rusage_tv_usec);
632}
633
634/* Test whether getrusage goes backwards, return non-zero if it does
635   (suggesting it's flawed).
636
637   On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
638   microsecond accurate, but has been seen remaining unchanged after many
639   microseconds have elapsed.  It also regularly goes backwards by 1000 to
640   5000 usecs, this has been seen after between 500 and 4000 attempts taking
641   perhaps 0.03 seconds.  We consider this too broken for good measuring.
642   We used to have configure pretend getrusage didn't exist on this system,
643   but a runtime test should be more reliable, since we imagine the problem
644   is not confined to just this exact system tuple.  */
645
646int
647getrusage_backwards_p (void)
648{
649  static int result = -1;
650  struct rusage  start, prev, next;
651  long  d;
652  int   i;
653
654  if (result != -1)
655    return result;
656
657  getrusage (0, &start);
658  memcpy (&next, &start, sizeof (next));
659
660  result = 0;
661  i = 0;
662  for (;;)
663    {
664      memcpy (&prev, &next, sizeof (prev));
665      getrusage (0, &next);
666
667      if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
668	  || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
669	      && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
670	{
671	  if (speed_option_verbose)
672	    printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
673		    i,
674		    prev.ru_utime.tv_sec, prev.ru_utime.tv_usec,
675		    next.ru_utime.tv_sec, next.ru_utime.tv_usec);
676	  result = 1;
677	  break;
678	}
679
680      /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
681	 attempts, whichever comes first */
682      d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
683	+ (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
684      i++;
685      if (i > 50000 || (i > 1000 && d > 100000))
686	break;
687    }
688
689  return result;
690}
691
692/* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
693   of glibc (some time post 2.2).
694
695   CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
696   defined, but returning -1 for an error).  */
697
698#ifdef CLOCK_PROCESS_CPUTIME_ID
699# define CGT_ID        CLOCK_PROCESS_CPUTIME_ID
700#else
701# ifdef CLOCK_VIRTUAL
702#  define CGT_ID       CLOCK_VIRTUAL
703# endif
704#endif
705#ifdef CGT_ID
706const int  have_cgt_id = 1;
707#else
708const int  have_cgt_id = 0;
709# define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
710#endif
711
712int
713cgt_works_p (void)
714{
715  static int  result = -1;
716  struct_timespec  unit;
717
718  if (! have_cgt)
719    return 0;
720
721  if (! have_cgt_id)
722    {
723      if (speed_option_verbose)
724	printf ("clock_gettime don't know what ID to use\n");
725      result = 0;
726      return result;
727    }
728
729  if (result != -1)
730    return result;
731
732  /* trial run to see if it works */
733  if (clock_gettime (CGT_ID, &unit) != 0)
734    {
735      if (speed_option_verbose)
736	printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
737      result = 0;
738      return result;
739    }
740
741  /* get the resolution */
742  if (clock_getres (CGT_ID, &unit) != 0)
743    {
744      if (speed_option_verbose)
745	printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
746      result = 0;
747      return result;
748    }
749
750  cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
751  printf ("clock_gettime is %s accurate\n",
752	  unittime_string (cgt_unittime));
753  result = 1;
754  return result;
755}
756
757
758static double
759freq_measure_mftb_one (void)
760{
761#define call_gettimeofday(t)   gettimeofday (&(t), NULL)
762#define timeval_tv_sec(t)      ((t).tv_sec)
763#define timeval_tv_usec(t)     ((t).tv_usec)
764  FREQ_MEASURE_ONE ("mftb", struct_timeval,
765		    call_gettimeofday, MFTB,
766		    timeval_tv_sec, timeval_tv_usec);
767}
768
769
770static jmp_buf  mftb_works_buf;
771
772static RETSIGTYPE
773mftb_works_handler (int sig)
774{
775  longjmp (mftb_works_buf, 1);
776}
777
778int
779mftb_works_p (void)
780{
781  unsigned   a[2];
782  RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
783  double     cycletime;
784
785  /* suppress a warning about a[] unused */
786  a[0] = 0;
787
788  if (! have_mftb)
789    return 0;
790
791#ifdef SIGILL
792  old_handler = signal (SIGILL, mftb_works_handler);
793  if (old_handler == SIG_ERR)
794    {
795      if (speed_option_verbose)
796	printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
797      return 1;
798    }
799  if (setjmp (mftb_works_buf))
800    {
801      if (speed_option_verbose)
802	printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
803      return 0;
804    }
805  MFTB (a);
806  signal (SIGILL, old_handler);
807  if (speed_option_verbose)
808    printf ("mftb_works_p(): mftb works\n");
809#else
810
811  if (speed_option_verbose)
812    printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
813#endif
814
815#if ! HAVE_GETTIMEOFDAY
816  if (speed_option_verbose)
817    printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
818  return 0;
819#endif
820
821  /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
822     other chips it can be driven from an external clock. */
823  cycletime = freq_measure ("mftb", freq_measure_mftb_one);
824  if (cycletime == -1.0)
825    {
826      if (speed_option_verbose)
827	printf ("mftb_works_p(): cannot measure mftb period\n");
828      return 0;
829    }
830
831  mftb_unittime = cycletime;
832  return 1;
833}
834
835
836volatile unsigned  *sgi_addr;
837
838int
839sgi_works_p (void)
840{
841#if HAVE_SYSSGI && HAVE_MMAP
842  static int  result = -1;
843
844  size_t          pagesize, offset;
845  __psunsigned_t  phys, physpage;
846  void            *virtpage;
847  unsigned        period_picoseconds;
848  int             size, fd;
849
850  if (result != -1)
851    return result;
852
853  phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
854  if (phys == (__psunsigned_t) -1)
855    {
856      /* ENODEV is the error when a counter is not available */
857      if (speed_option_verbose)
858	printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
859      result = 0;
860      return result;
861    }
862  sgi_unittime = period_picoseconds * 1e-12;
863
864  /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
865     Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
866     obvious way to identify that without SGI_CYCLECNTR_SIZE.  */
867#ifdef SGI_CYCLECNTR_SIZE
868  size = syssgi (SGI_CYCLECNTR_SIZE);
869  if (size == -1)
870    {
871      if (speed_option_verbose)
872	{
873	  printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
874	  printf ("    will assume size==4\n");
875	}
876      size = 32;
877    }
878#else
879  size = 32;
880#endif
881
882  if (size < 32)
883    {
884      printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
885      result = 0;
886      return result;
887    }
888
889  pagesize = getpagesize();
890  offset = (size_t) phys & (pagesize-1);
891  physpage = phys - offset;
892
893  /* shouldn't cross over a page boundary */
894  ASSERT_ALWAYS (offset + size/8 <= pagesize);
895
896  fd = open("/dev/mmem", O_RDONLY);
897  if (fd == -1)
898    {
899      if (speed_option_verbose)
900	printf ("open /dev/mmem: %s\n", strerror (errno));
901      result = 0;
902      return result;
903    }
904
905  virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
906  if (virtpage == (void *) -1)
907    {
908      if (speed_option_verbose)
909	printf ("mmap /dev/mmem: %s\n", strerror (errno));
910      result = 0;
911      return result;
912    }
913
914  /* address of least significant 4 bytes, knowing mips is big endian */
915  sgi_addr = (unsigned *) ((char *) virtpage + offset
916			   + size/8 - sizeof(unsigned));
917  result = 1;
918  return result;
919
920#else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
921  return 0;
922#endif
923}
924
925
926#define DEFAULT(var,n)  \
927  do {                  \
928    if (! (var))        \
929      (var) = (n);      \
930  } while (0)
931
932void
933speed_time_init (void)
934{
935  double supplement_unittime = 0.0;
936
937  static int  speed_time_initialized = 0;
938  if (speed_time_initialized)
939    return;
940  speed_time_initialized = 1;
941
942  speed_cycletime_init ();
943
944  if (have_cycles && cycles_works_p ())
945    {
946      use_cycles = 1;
947      DEFAULT (speed_cycletime, 1.0);
948      speed_unittime = speed_cycletime;
949      DEFAULT (speed_precision, 10000);
950      strcpy (speed_time_string, "CPU cycle counter");
951
952      /* only used if a supplementary method is chosen below */
953      cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
954	* speed_cycletime;
955
956      if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
957	{
958	  /* this is a good combination */
959	  use_grus = 1;
960	  supplement_unittime = grus_unittime = 1.0e-6;
961	  strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
962	}
963      else if (have_cycles == 1)
964	{
965	  /* When speed_cyclecounter has a limited range, look for something
966	     to supplement it. */
967	  if (have_gtod && gettimeofday_microseconds_p())
968	    {
969	      use_gtod = 1;
970	      supplement_unittime = gtod_unittime = 1.0e-6;
971	      strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
972	    }
973	  else if (have_grus)
974	    {
975	      use_grus = 1;
976	      supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
977	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
978	    }
979	  else if (have_times)
980	    {
981	      use_times = 1;
982	      supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
983	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
984	    }
985	  else if (have_gtod)
986	    {
987	      use_gtod = 1;
988	      supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
989	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
990	    }
991	  else
992	    {
993	      fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
994	      fprintf (stderr, "    Wraparounds may produce bad results on long measurements.\n");
995	    }
996	}
997
998      if (use_grus || use_times || use_gtod)
999	{
1000	  /* must know cycle period to compare cycles to other measuring
1001	     (via cycles_limit) */
1002	  speed_cycletime_need_seconds ();
1003
1004	  if (speed_precision * supplement_unittime > cycles_limit)
1005	    {
1006	      fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
1007	      fprintf (stderr, "    cycle counter and limited precision supplemental method\n");
1008	      fprintf (stderr, "    (%s)\n", speed_time_string);
1009	    }
1010	}
1011    }
1012  else if (have_stck)
1013    {
1014      strcpy (speed_time_string, "STCK timestamp");
1015      /* stck is in units of 2^-12 microseconds, which is very likely higher
1016	 resolution than a cpu cycle */
1017      if (speed_cycletime == 0.0)
1018	speed_cycletime_fail
1019	  ("Need to know CPU frequency for effective stck unit");
1020      speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
1021      DEFAULT (speed_precision, 10000);
1022    }
1023  else if (have_mftb && mftb_works_p ())
1024    {
1025      use_mftb = 1;
1026      DEFAULT (speed_precision, 10000);
1027      speed_unittime = mftb_unittime;
1028      sprintf (speed_time_string, "mftb counter (%s)",
1029	       unittime_string (speed_unittime));
1030    }
1031  else if (have_sgi && sgi_works_p ())
1032    {
1033      use_sgi = 1;
1034      DEFAULT (speed_precision, 10000);
1035      speed_unittime = sgi_unittime;
1036      sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
1037	       unittime_string (speed_unittime));
1038      /* supplemented with getrusage, which we assume to have 1ms resolution */
1039      use_grus = 1;
1040      supplement_unittime = 1e-3;
1041    }
1042  else if (have_rrt)
1043    {
1044      timebasestruct_t  t;
1045      use_rrt = 1;
1046      DEFAULT (speed_precision, 10000);
1047      read_real_time (&t, sizeof(t));
1048      switch (t.flag) {
1049      case RTC_POWER:
1050	/* FIXME: What's the actual RTC resolution? */
1051	speed_unittime = 1e-7;
1052	strcpy (speed_time_string, "read_real_time() power nanoseconds");
1053	break;
1054      case RTC_POWER_PC:
1055	t.tb_high = 1;
1056	t.tb_low = 0;
1057	time_base_to_time (&t, sizeof(t));
1058	speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
1059	sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
1060		 unittime_string (speed_unittime));
1061	break;
1062      default:
1063	fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
1064		 t.flag);
1065	abort ();
1066      }
1067    }
1068  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
1069    {
1070      /* use clock_gettime if microsecond or better resolution */
1071    choose_cgt:
1072      use_cgt = 1;
1073      speed_unittime = cgt_unittime;
1074      DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
1075      strcpy (speed_time_string, "microsecond accurate getrusage()");
1076    }
1077  else if (have_times && clk_tck() > 1000000)
1078    {
1079      /* Cray vector systems have times() which is clock cycle resolution
1080	 (eg. 450 MHz).  */
1081      DEFAULT (speed_precision, 10000);
1082      goto choose_times;
1083    }
1084  else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
1085    {
1086      use_grus = 1;
1087      speed_unittime = grus_unittime = 1.0e-6;
1088      DEFAULT (speed_precision, 1000);
1089      strcpy (speed_time_string, "microsecond accurate getrusage()");
1090    }
1091  else if (have_gtod && gettimeofday_microseconds_p())
1092    {
1093      use_gtod = 1;
1094      speed_unittime = gtod_unittime = 1.0e-6;
1095      DEFAULT (speed_precision, 1000);
1096      strcpy (speed_time_string, "microsecond accurate gettimeofday()");
1097    }
1098  else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
1099    {
1100      /* use clock_gettime if 1 tick or better resolution */
1101      goto choose_cgt;
1102    }
1103  else if (have_times)
1104    {
1105      use_tick_boundary = 1;
1106      DEFAULT (speed_precision, 200);
1107    choose_times:
1108      use_times = 1;
1109      speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
1110      sprintf (speed_time_string, "%s clock tick times()",
1111	       unittime_string (speed_unittime));
1112    }
1113  else if (have_grus)
1114    {
1115      use_grus = 1;
1116      use_tick_boundary = 1;
1117      speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
1118      DEFAULT (speed_precision, 200);
1119      sprintf (speed_time_string, "%s clock tick getrusage()\n",
1120	       unittime_string (speed_unittime));
1121    }
1122  else if (have_gtod)
1123    {
1124      use_gtod = 1;
1125      use_tick_boundary = 1;
1126      speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
1127      DEFAULT (speed_precision, 200);
1128      sprintf (speed_time_string, "%s clock tick gettimeofday()",
1129	       unittime_string (speed_unittime));
1130    }
1131  else
1132    {
1133      fprintf (stderr, "No time measuring method available\n");
1134      fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
1135      abort ();
1136    }
1137
1138  if (speed_option_verbose)
1139    {
1140      printf ("speed_time_init: %s\n", speed_time_string);
1141      printf ("    speed_precision     %d\n", speed_precision);
1142      printf ("    speed_unittime      %.2g\n", speed_unittime);
1143      if (supplement_unittime)
1144	printf ("    supplement_unittime %.2g\n", supplement_unittime);
1145      printf ("    use_tick_boundary   %d\n", use_tick_boundary);
1146      if (have_cycles)
1147	printf ("    cycles_limit        %.2g seconds\n", cycles_limit);
1148    }
1149}
1150
1151
1152
1153/* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the
1154   corresponding "start_foo" appropriately too. */
1155
1156void
1157grus_tick_boundary (void)
1158{
1159  struct_rusage  prev;
1160  getrusage (0, &prev);
1161  do {
1162    getrusage (0, &start_grus);
1163  } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
1164}
1165
1166void
1167gtod_tick_boundary (void)
1168{
1169  struct_timeval  prev;
1170  gettimeofday (&prev, NULL);
1171  do {
1172    gettimeofday (&start_gtod, NULL);
1173  } while (start_gtod.tv_usec == prev.tv_usec);
1174}
1175
1176void
1177times_tick_boundary (void)
1178{
1179  struct_tms  prev;
1180  times (&prev);
1181  do
1182    times (&start_times);
1183  while (start_times.tms_utime == prev.tms_utime);
1184}
1185
1186
1187/* "have_" values are tested to let unused code go dead.  */
1188
1189void
1190speed_starttime (void)
1191{
1192  speed_time_init ();
1193
1194  if (have_grus && use_grus)
1195    {
1196      if (use_tick_boundary)
1197	grus_tick_boundary ();
1198      else
1199	getrusage (0, &start_grus);
1200    }
1201
1202  if (have_gtod && use_gtod)
1203    {
1204      if (use_tick_boundary)
1205	gtod_tick_boundary ();
1206      else
1207	gettimeofday (&start_gtod, NULL);
1208    }
1209
1210  if (have_times && use_times)
1211    {
1212      if (use_tick_boundary)
1213	times_tick_boundary ();
1214      else
1215	times (&start_times);
1216    }
1217
1218  if (have_cgt && use_cgt)
1219    clock_gettime (CGT_ID, &start_cgt);
1220
1221  if (have_rrt && use_rrt)
1222    read_real_time (&start_rrt, sizeof(start_rrt));
1223
1224  if (have_sgi && use_sgi)
1225    start_sgi = *sgi_addr;
1226
1227  if (have_mftb && use_mftb)
1228    MFTB (start_mftb);
1229
1230  if (have_stck && use_stck)
1231    STCK (start_stck);
1232
1233  /* Cycles sampled last for maximum accuracy. */
1234  if (have_cycles && use_cycles)
1235    speed_cyclecounter (start_cycles);
1236}
1237
1238
1239/* Calculate the difference between two cycle counter samples, as a "double"
1240   counter of cycles.
1241
1242   The start and end values are allowed to cancel in integers in case the
1243   counter values are bigger than the 53 bits that normally fit in a double.
1244
1245   This works even if speed_cyclecounter() puts a value bigger than 32-bits
1246   in the low word (the high word always gets a 2**32 multiplier though). */
1247
1248double
1249speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1250{
1251  unsigned  d;
1252  double    t;
1253
1254  if (have_cycles == 1)
1255    {
1256      t = (end[0] - start[0]);
1257    }
1258  else
1259    {
1260      d = end[0] - start[0];
1261      t = d - (d > end[0] ? M_2POWU : 0.0);
1262      t += (end[1] - start[1]) * M_2POW32;
1263    }
1264  return t;
1265}
1266
1267
1268double
1269speed_mftb_diff (const unsigned end[2], const unsigned start[2])
1270{
1271  unsigned  d;
1272  double    t;
1273
1274  d = end[0] - start[0];
1275  t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
1276  t += (end[1] - start[1]) * M_2POW32;
1277  return t;
1278}
1279
1280
1281/* Calculate the difference between "start" and "end" using fields "sec" and
1282   "psec", where each "psec" is a "punit" of a second.
1283
1284   The seconds parts are allowed to cancel before being combined with the
1285   psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
1286   double.
1287
1288   Total time is only calculated in a "double" since an integer count of
1289   psecs might overflow.  2^32 microseconds is only a bit over an hour, or
1290   2^32 nanoseconds only about 4 seconds.
1291
1292   The casts to "long" are for the benefit of timebasestruct_t, where the
1293   fields are only "unsigned int", but we want a signed difference.  */
1294
1295#define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
1296  {                                                             \
1297    long  sec_diff, psec_diff;                                  \
1298    sec_diff = (long) end->sec - (long) start->sec;             \
1299    psec_diff = (long) end->psec - (long) start->psec;          \
1300    return (double) sec_diff + punit * (double) psec_diff;      \
1301  }
1302
1303double
1304timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1305{
1306  DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
1307}
1308
1309double
1310rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
1311{
1312  DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1313}
1314
1315double
1316timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1317{
1318  DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
1319}
1320
1321/* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
1322double
1323timebasestruct_diff_secs (const timebasestruct_t *end,
1324			  const timebasestruct_t *start)
1325{
1326  DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1327}
1328
1329
1330double
1331speed_endtime (void)
1332{
1333#define END_USE(name,value)                             \
1334  do {                                                  \
1335    if (speed_option_verbose >= 3)                      \
1336      printf ("speed_endtime(): used %s\n", name);      \
1337    result = value;                                     \
1338    goto done;                                          \
1339  } while (0)
1340
1341#define END_ENOUGH(name,value)                                          \
1342  do {                                                                  \
1343    if (speed_option_verbose >= 3)                                      \
1344      printf ("speed_endtime(): %s gives enough precision\n", name);    \
1345    result = value;                                                     \
1346    goto done;                                                          \
1347  } while (0)
1348
1349#define END_EXCEED(name,value)                                            \
1350  do {                                                                    \
1351    if (speed_option_verbose >= 3)                                        \
1352      printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
1353	      name);                                                      \
1354    result = value;                                                       \
1355    goto done;                                                            \
1356  } while (0)
1357
1358  unsigned          end_cycles[2];
1359  stck_t            end_stck;
1360  unsigned          end_mftb[2];
1361  unsigned          end_sgi;
1362  timebasestruct_t  end_rrt;
1363  struct_timespec   end_cgt;
1364  struct_timeval    end_gtod;
1365  struct_rusage     end_grus;
1366  struct_tms        end_times;
1367  double            t_gtod, t_grus, t_times, t_cgt;
1368  double            t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
1369  double            result;
1370
1371  /* Cycles sampled first for maximum accuracy.
1372     "have_" values tested to let unused code go dead.  */
1373
1374  if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);
1375  if (have_stck   && use_stck)    STCK (end_stck);
1376  if (have_mftb   && use_mftb)    MFTB (end_mftb);
1377  if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;
1378  if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));
1379  if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);
1380  if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);
1381  if (have_grus   && use_grus)    getrusage (0, &end_grus);
1382  if (have_times  && use_times)   times (&end_times);
1383
1384  result = -1.0;
1385
1386  if (speed_option_verbose >= 4)
1387    {
1388      printf ("speed_endtime():\n");
1389      if (use_cycles)
1390	printf ("   cycles  0x%X,0x%X -> 0x%X,0x%X\n",
1391		start_cycles[1], start_cycles[0],
1392		end_cycles[1], end_cycles[0]);
1393
1394      if (use_stck)
1395	printf ("   stck  0x%lX -> 0x%lX\n", start_stck, end_stck);
1396
1397      if (use_mftb)
1398	printf ("   mftb  0x%X,%08X -> 0x%X,%08X\n",
1399		start_mftb[1], start_mftb[0],
1400		end_mftb[1], end_mftb[0]);
1401
1402      if (use_sgi)
1403	printf ("   sgi  0x%X -> 0x%X\n", start_sgi, end_sgi);
1404
1405      if (use_rrt)
1406	printf ("   read_real_time  (%d)%u,%u -> (%d)%u,%u\n",
1407		start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
1408		end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
1409
1410      if (use_cgt)
1411	printf ("   clock_gettime  %ld.%09ld -> %ld.%09ld\n",
1412		start_cgt.tv_sec, start_cgt.tv_nsec,
1413		end_cgt.tv_sec, end_cgt.tv_nsec);
1414
1415      if (use_gtod)
1416	printf ("   gettimeofday  %ld.%06ld -> %ld.%06ld\n",
1417		start_gtod.tv_sec, start_gtod.tv_usec,
1418		end_gtod.tv_sec, end_gtod.tv_usec);
1419
1420      if (use_grus)
1421	printf ("   getrusage  %ld.%06ld -> %ld.%06ld\n",
1422		start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
1423		end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
1424
1425      if (use_times)
1426	printf ("   times  %ld -> %ld\n",
1427		start_times.tms_utime, end_times.tms_utime);
1428    }
1429
1430  if (use_rrt)
1431    {
1432      time_base_to_time (&start_rrt, sizeof(start_rrt));
1433      time_base_to_time (&end_rrt, sizeof(end_rrt));
1434      t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
1435      END_USE ("read_real_time()", t_rrt);
1436    }
1437
1438  if (use_cgt)
1439    {
1440      t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
1441      END_USE ("clock_gettime()", t_cgt);
1442    }
1443
1444  if (use_grus)
1445    {
1446      t_grus = rusage_diff_secs (&end_grus, &start_grus);
1447
1448      /* Use getrusage() if the cycle counter limit would be exceeded, or if
1449	 it provides enough accuracy already. */
1450      if (use_cycles)
1451	{
1452	  if (t_grus >= speed_precision*grus_unittime)
1453	    END_ENOUGH ("getrusage()", t_grus);
1454	  if (t_grus >= cycles_limit)
1455	    END_EXCEED ("getrusage()", t_grus);
1456	}
1457    }
1458
1459  if (use_times)
1460    {
1461      t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1462
1463      /* Use times() if the cycle counter limit would be exceeded, or if
1464	 it provides enough accuracy already. */
1465      if (use_cycles)
1466	{
1467	  if (t_times >= speed_precision*times_unittime)
1468	    END_ENOUGH ("times()", t_times);
1469	  if (t_times >= cycles_limit)
1470	    END_EXCEED ("times()", t_times);
1471	}
1472    }
1473
1474  if (use_gtod)
1475    {
1476      t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1477
1478      /* Use gettimeofday() if it measured a value bigger than the cycle
1479	 counter can handle.  */
1480      if (use_cycles)
1481	{
1482	  if (t_gtod >= cycles_limit)
1483	    END_EXCEED ("gettimeofday()", t_gtod);
1484	}
1485    }
1486
1487  if (use_mftb)
1488    {
1489      t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
1490      END_USE ("mftb", t_mftb);
1491    }
1492
1493  if (use_stck)
1494    {
1495      t_stck = (end_stck - start_stck) * STCK_PERIOD;
1496      END_USE ("stck", t_stck);
1497    }
1498
1499  if (use_sgi)
1500    {
1501      t_sgi = (end_sgi - start_sgi) * sgi_unittime;
1502      END_USE ("SGI hardware counter", t_sgi);
1503    }
1504
1505  if (use_cycles)
1506    {
1507      t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
1508	* speed_cycletime;
1509      END_USE ("cycle counter", t_cycles);
1510    }
1511
1512  if (use_grus && getrusage_microseconds_p())
1513    END_USE ("getrusage()", t_grus);
1514
1515  if (use_gtod && gettimeofday_microseconds_p())
1516    END_USE ("gettimeofday()", t_gtod);
1517
1518  if (use_times)  END_USE ("times()",        t_times);
1519  if (use_grus)   END_USE ("getrusage()",    t_grus);
1520  if (use_gtod)   END_USE ("gettimeofday()", t_gtod);
1521
1522  fprintf (stderr, "speed_endtime(): oops, no time method available\n");
1523  abort ();
1524
1525 done:
1526  if (result < 0.0)
1527    {
1528      if (speed_option_verbose >= 2)
1529	fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
1530      result = 0.0;
1531    }
1532  return result;
1533}
1534