1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms
5 * of the Common Development and Distribution License
6 * (the "License").  You may not use this file except
7 * in compliance with the License.
8 *
9 * You can obtain a copy of the license at
10 * src/OPENSOLARIS.LICENSE
11 * or http://www.opensolaris.org/os/licensing.
12 * See the License for the specific language governing
13 * permissions and limitations under the License.
14 *
15 * When distributing Covered Code, include this CDDL
16 * HEADER in each file and include the License file at
17 * usr/src/OPENSOLARIS.LICENSE.  If applicable,
18 * add the following below this CDDL HEADER, with the
19 * fields enclosed by brackets "[]" replaced with your
20 * own identifying information: Portions Copyright [yyyy]
21 * [name of copyright owner]
22 *
23 * CDDL HEADER END
24 */
25
26/*
27 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30
31/*
32 * benchmarking routines
33 */
34
35#include <sys/types.h>
36#include <sys/time.h>
37#include <sys/ipc.h>
38#include <sys/sem.h>
39#include <sys/mman.h>
40#include <sys/wait.h>
41#include <ctype.h>
42#include <string.h>
43#include <strings.h>
44#include <signal.h>
45#include <stdio.h>
46#include <unistd.h>
47#include <stdlib.h>
48#include <poll.h>
49#include <pthread.h>
50#include <dlfcn.h>
51#include <errno.h>
52#include <sys/resource.h>
53#include <math.h>
54#include <limits.h>
55
56#ifdef	__sun
57#include <sys/elf.h>
58#endif
59
60#include "libmicro.h"
61
62
63#if defined(__APPLE__)
64#include <mach/mach_time.h>
65
66long long
67gethrtime(void)
68{
69   long long        elapsed;
70   static long long        start;
71   static mach_timebase_info_data_t    sTimebaseInfo = { 0, 0 };
72
73   // If this is the first time we've run, get the timebase.
74   // We can use denom == 0 to indicate that sTimebaseInfo is
75   // uninitialised because it makes no sense to have a zero
76   // denominator in a fraction.
77
78   if ( sTimebaseInfo.denom == 0 ) {
79       (void) mach_timebase_info(&sTimebaseInfo);
80		start = mach_absolute_time();
81   }
82
83   elapsed = mach_absolute_time() - start;
84
85   // Convert to nanoseconds.
86	// return (elapsed * (long long)sTimebaseInfo.numer)/(long long)sTimebaseInfo.denom;
87
88	// Provided the final result is representable in 64 bits the following maneuver will
89	// deliver that result without intermediate overflow.
90	if (sTimebaseInfo.denom == sTimebaseInfo.numer)
91		return elapsed;
92	else if (sTimebaseInfo.denom == 1)
93		return elapsed * (long long)sTimebaseInfo.numer;
94	else {
95       // Decompose elapsed = eta32 * 2^32 + eps32:
96       long long eta32 = elapsed >> 32;
97       long long eps32 = elapsed & 0x00000000ffffffffLL;
98
99       long long numer = sTimebaseInfo.numer, denom = sTimebaseInfo.denom;
100
101       // Form product of elapsed64 (decomposed) and numer:
102       long long mu64 = numer * eta32;
103       long long lambda64 = numer * eps32;
104
105       // Divide the constituents by denom:
106       long long q32 = mu64/denom;
107       long long r32 = mu64 - (q32 * denom); // mu64 % denom
108
109       return (q32 << 32) + ((r32 << 32) + lambda64)/denom;
110	}
111}
112
113#endif
114
115/*
116 * user visible globals
117 */
118
119int				lm_argc = 0;
120char **				lm_argv = NULL;
121
122int				lm_opt1;
123int				lm_optA;
124int				lm_optB;
125int				lm_optC = 100;
126int				lm_optD;
127int				lm_optE;
128int				lm_optH;
129int				lm_optI;
130int				lm_optL = 0;
131int				lm_optM = 0;
132char				*lm_optN;
133int				lm_optP;
134int				lm_optS;
135int				lm_optT;
136int				lm_optW;
137
138int				lm_def1 = 0;
139int				lm_defB = 0; /* use lm_nsecs_per_op */
140int				lm_defD = 10;
141int				lm_defH = 0;
142char				*lm_defN = NULL;
143int				lm_defP = 1;
144
145int				lm_defS = 0;
146int				lm_defT = 1;
147
148/*
149 * default on fast platform, should be overridden by individual
150 * benchmarks if significantly wrong in either direction.
151 */
152
153int				lm_nsecs_per_op = 5;
154
155char				*lm_procpath;
156char				lm_procname[STRSIZE];
157char				lm_usage[STRSIZE];
158char				lm_optstr[STRSIZE];
159char				lm_header[STRSIZE];
160size_t				lm_tsdsize = 0;
161
162
163/*
164 *  Globals we do not export to the user
165 */
166
167static barrier_t		*lm_barrier;
168static pid_t			*pids = NULL;
169static pthread_t		*tids = NULL;
170static int			pindex = -1;
171static void			*tsdseg = NULL;
172static size_t			tsdsize = 0;
173
174#ifdef USE_RDTSC
175static long long		lm_hz = 0;
176#endif
177
178
179/*
180 * Forward references
181 */
182
183static void 		worker_process();
184static void 		usage();
185static void 		print_stats(barrier_t *);
186static void 		print_histo(barrier_t *);
187static int 		remove_outliers(double *, int, stats_t *);
188static long long	nsecs_overhead;
189static long long	nsecs_resolution;
190static long long	get_nsecs_overhead();
191static int		crunch_stats(double *, int, stats_t *);
192static void 		compute_stats(barrier_t *);
193/*
194 * main routine; renamed in this file to allow linking with other
195 * files
196 */
197
198int
199actual_main(int argc, char *argv[])
200{
201	int			i;
202	int			opt;
203	extern char		*optarg;
204	char			*tmp;
205	char			optstr[256];
206	barrier_t		*b;
207	long long		startnsecs = getnsecs();
208
209#ifdef USE_RDTSC
210	if (getenv("LIBMICRO_HZ") == NULL) {
211		(void) printf("LIBMICRO_HZ needed but not set\n");
212		exit(1);
213	}
214	lm_hz = strtoll(getenv("LIBMICRO_HZ"), NULL, 10);
215#endif
216
217	lm_argc = argc;
218	lm_argv = argv;
219
220	/* before we do anything */
221	(void) benchmark_init();
222
223
224	nsecs_overhead = get_nsecs_overhead();
225	nsecs_resolution = get_nsecs_resolution();
226
227	/*
228	 * Set defaults
229	 */
230
231	lm_opt1	= lm_def1;
232	lm_optB	= lm_defB;
233	lm_optD	= lm_defD;
234	lm_optH	= lm_defH;
235	lm_optN	= lm_defN;
236	lm_optP	= lm_defP;
237
238	lm_optS	= lm_defS;
239	lm_optT	= lm_defT;
240
241	/*
242	 * squirrel away the path to the current
243	 * binary in a way that works on both
244	 * Linux and Solaris
245	 */
246
247	if (*argv[0] == '/') {
248		lm_procpath = strdup(argv[0]);
249		*strrchr(lm_procpath, '/') = 0;
250	} else {
251		char path[1024];
252		(void) getcwd(path, 1024);
253		(void) strcat(path, "/");
254		(void) strcat(path, argv[0]);
255		*strrchr(path, '/') = 0;
256		lm_procpath = strdup(path);
257	}
258
259	/*
260	 * name of binary
261	 */
262
263	if ((tmp = strrchr(argv[0], '/')) == NULL)
264		(void) strcpy(lm_procname, argv[0]);
265	else
266		(void) strcpy(lm_procname, tmp + 1);
267
268	if (lm_optN == NULL) {
269		lm_optN = lm_procname;
270	}
271
272	/*
273	 * Parse command line arguments
274	 */
275
276	(void) sprintf(optstr, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr);
277	while ((opt = getopt(argc, argv, optstr)) != -1) {
278		switch (opt) {
279		case '1':
280			lm_opt1 = 1;
281			break;
282		case 'A':
283			lm_optA = 1;
284			break;
285		case 'B':
286			lm_optB = sizetoint(optarg);
287			break;
288		case 'C':
289			lm_optC = sizetoint(optarg);
290			break;
291		case 'D':
292			lm_optD = sizetoint(optarg);
293			break;
294		case 'E':
295			lm_optE = 1;
296			break;
297		case 'H':
298			lm_optH = 1;
299			break;
300		case 'I':
301			lm_optI = sizetoint(optarg);
302			break;
303		case 'L':
304			lm_optL = 1;
305			break;
306		case 'M':
307			lm_optM = 1;
308			break;
309		case 'N':
310			lm_optN = optarg;
311			break;
312		case 'P':
313			lm_optP = sizetoint(optarg);
314			break;
315		case 'S':
316			lm_optS = 1;
317			break;
318		case 'T':
319			lm_optT = sizetoint(optarg);
320			break;
321		case 'V':
322			(void) printf("%s\n", LIBMICRO_VERSION);
323			exit(0);
324			break;
325		case 'W':
326			lm_optW = 1;
327			lm_optS = 1;
328			break;
329		case '?':
330			usage();
331			exit(0);
332			break;
333		default:
334			if (benchmark_optswitch(opt, optarg) == -1) {
335				usage();
336				exit(0);
337			}
338		}
339	}
340
341	/* deal with implicit and overriding options */
342	if (lm_opt1 && lm_optP > 1) {
343		lm_optP = 1;
344		(void) printf("warning: -1 overrides -P\n");
345	}
346
347	if (lm_optE) {
348		(void) fprintf(stderr, "Running:%20s", lm_optN);
349		(void) fflush(stderr);
350	}
351
352	if (lm_optB == 0) {
353		/*
354		 * neither benchmark or user has specified the number
355		 * of cnts/sample, so use computed value
356		 */
357		if (lm_optI)
358			lm_nsecs_per_op = lm_optI;
359#define BLOCK_TOCK_DURATION 10000 /* number of raw timer "tocks" ideally comprising a block of work */
360		lm_optB = nsecs_resolution * BLOCK_TOCK_DURATION / lm_nsecs_per_op;
361		if (lm_optB == 0)
362			lm_optB = 1;
363	}
364
365	/*
366	 * now that the options are set
367	 */
368
369	if (benchmark_initrun() == -1) {
370		exit(1);
371	}
372
373	/* allocate dynamic data */
374	pids = (pid_t *)malloc(lm_optP * sizeof (pid_t));
375	if (pids == NULL) {
376		perror("malloc(pids)");
377		exit(1);
378	}
379	tids = (pthread_t *)malloc(lm_optT * sizeof (pthread_t));
380	if (tids == NULL) {
381		perror("malloc(tids)");
382		exit(1);
383	}
384
385	/* check that the case defines lm_tsdsize before proceeding */
386	if (lm_tsdsize == (size_t)-1) {
387		(void) fprintf(stderr, "error in benchmark_init: "
388		    "lm_tsdsize not set\n");
389		exit(1);
390	}
391
392	/* round up tsdsize to nearest 128 to eliminate false sharing */
393	tsdsize = ((lm_tsdsize + 127) / 128) * 128;
394
395	/* allocate sufficient TSD for each thread in each process */
396	tsdseg = (void *)mmap(NULL, lm_optT * lm_optP * tsdsize + 8192,
397	    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0L);
398	if (tsdseg == NULL) {
399		perror("mmap(tsd)");
400		exit(1);
401	}
402
403	/* initialise worker synchronisation */
404	b = barrier_create(lm_optT * lm_optP, DATASIZE);
405	if (b == NULL) {
406		perror("barrier_create()");
407		exit(1);
408	}
409	lm_barrier = b;
410	b->ba_flag = 1;
411
412	/* need this here so that parent and children can call exit() */
413	(void) fflush(stdout);
414	(void) fflush(stderr);
415
416	/* when we started and when to stop */
417
418	b->ba_starttime = getnsecs();
419	b->ba_deadline = (long long) (b->ba_starttime + (lm_optD * 1000000LL));
420
421	/* do the work */
422	if (lm_opt1) {
423		/* single process, non-fork mode */
424		pindex = 0;
425		worker_process();
426	} else {
427		/* create worker processes */
428		for (i = 0; i < lm_optP; i++) {
429			pids[i] = fork();
430
431			switch (pids[i]) {
432			case 0:
433				pindex = i;
434				worker_process();
435				exit(0);
436				break;
437			case -1:
438				perror("fork");
439				exit(1);
440				break;
441			default:
442				continue;
443			}
444		}
445
446		/* wait for worker processes */
447		for (i = 0; i < lm_optP; i++) {
448			if (pids[i] > 0) {
449				(void) waitpid(pids[i], NULL, 0);
450			}
451		}
452	}
453
454	b->ba_endtime = getnsecs();
455
456	/* compute results */
457
458	compute_stats(b);
459
460	/* print arguments benchmark was invoked with ? */
461	if (lm_optL) {
462		int l;
463		(void) printf("# %s ", argv[0]);
464		for (l = 1; l < argc; l++) {
465			(void) printf("%s ", argv[l]);
466		}
467		(void) printf("\n");
468	}
469
470	/* print result header (unless suppressed) */
471	if (!lm_optH) {
472		(void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n",
473		    "", "prc", "thr",
474		    "usecs/call",
475		    "samples", "errors", "cnt/samp", lm_header);
476	}
477
478	/* print result */
479
480	(void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n",
481	    lm_optN, lm_optP, lm_optT,
482	    (lm_optM?b->ba_corrected.st_mean:b->ba_corrected.st_median),
483	    b->ba_batches, b->ba_errors, lm_optB,
484	    benchmark_result());
485
486	if (lm_optS) {
487		print_stats(b);
488	}
489
490	/* just incase something goes awry */
491	(void) fflush(stdout);
492	(void) fflush(stderr);
493
494	/* cleanup by stages */
495	(void) benchmark_finirun();
496	(void) barrier_destroy(b);
497	(void) benchmark_fini();
498
499	if (lm_optE) {
500		(void) fprintf(stderr, " for %12.5f seconds\n",
501		    (double)(getnsecs() - startnsecs) /
502		    1.e9);
503		(void) fflush(stderr);
504	}
505	return (0);
506}
507
508void *
509worker_thread(void *arg)
510{
511	result_t		r;
512	long long 		last_sleep = 0;
513	long long		t;
514
515	r.re_errors = benchmark_initworker(arg);
516
517	while (lm_barrier->ba_flag) {
518		r.re_count = 0;
519		r.re_errors += benchmark_initbatch(arg);
520
521		/* sync to clock */
522
523		if (lm_optA && ((t = getnsecs()) - last_sleep) > 75000000LL) {
524			(void) poll(0, 0, 10);
525			last_sleep = t;
526		}
527		/* wait for it ... */
528		(void) barrier_queue(lm_barrier, NULL);
529
530		/* time the test */
531		r.re_t0 = getnsecs();
532		(void) benchmark(arg, &r);
533		r.re_t1 = getnsecs();
534
535		/* time to stop? */
536		if (r.re_t1 > lm_barrier->ba_deadline &&
537		    (!lm_optC || lm_optC < lm_barrier->ba_batches)) {
538			lm_barrier->ba_flag = 0;
539		}
540
541		/* record results and sync */
542		(void) barrier_queue(lm_barrier, &r);
543
544		(void) benchmark_finibatch(arg);
545
546		r.re_errors = 0;
547	}
548
549	(void) benchmark_finiworker(arg);
550
551	return (0);
552}
553
554void
555worker_process()
556{
557	int			i;
558	void			*tsd;
559
560	for (i = 1; i < lm_optT; i++) {
561		tsd = gettsd(pindex, i);
562		if (pthread_create(&tids[i], NULL, worker_thread, tsd) != 0) {
563			perror("pthread_create");
564			exit(1);
565		}
566	}
567
568	tsd = gettsd(pindex, 0);
569	(void) worker_thread(tsd);
570
571	for (i = 1; i < lm_optT; i++) {
572		(void) pthread_join(tids[i], NULL);
573	}
574}
575
576void
577usage()
578{
579	(void) printf(
580	    "usage: %s\n"
581	    "       [-1] (single process; overrides -P > 1)\n"
582	    "       [-A] (align with clock)\n"
583	    "       [-B batch-size (default %d)]\n"
584	    "       [-C minimum number of samples (default 0)]\n"
585	    "       [-D duration in msecs (default %ds)]\n"
586	    "       [-E (echo name to stderr)]\n"
587	    "       [-H] (suppress headers)\n"
588	    "       [-I] nsecs per op (used to compute batch size)"
589	    "       [-L] (print argument line)\n"
590	    "       [-M] (reports mean rather than median)\n"
591	    "       [-N test-name (default '%s')]\n"
592	    "       [-P processes (default %d)]\n"
593	    "       [-S] (print detailed stats)\n"
594	    "       [-T threads (default %d)]\n"
595	    "       [-V] (print the libMicro version and exit)\n"
596	    "       [-W] (flag possible benchmark problems)\n"
597	    "%s\n",
598	    lm_procname,
599	    lm_defB, lm_defD, lm_procname, lm_defP, lm_defT,
600	    lm_usage);
601}
602
603void
604print_warnings(barrier_t *b)
605{
606	int head = 0;
607	int increase;
608
609	if (b->ba_quant) {
610		if (!head++) {
611			(void) printf("#\n# WARNINGS\n");
612		}
613		increase = (int)(floor((nsecs_resolution * 100.0) /
614		    ((double)lm_optB * b->ba_corrected.st_median * 1000.0)) +
615		    1.0);
616		(void) printf("#     Quantization error likely;"
617		    "increase batch size (-B option) %dX to avoid.\n",
618		    increase);
619	}
620
621	/*
622	 * XXX should warn on median != mean by a lot
623	 */
624
625	if (b->ba_errors) {
626		if (!head++) {
627			(void) printf("#\n# WARNINGS\n");
628		}
629		(void) printf("#     Errors occured during benchmark.\n");
630	}
631}
632
633void
634print_stats(barrier_t *b)
635{
636	(void) printf("#\n");
637	(void) printf("# STATISTICS         %12s          %12s\n",
638	    "usecs/call (raw)",
639	    "usecs/call (outliers removed)");
640
641	if (b->ba_count == 0) {
642		(void) printf("zero samples\n");
643		return;
644	}
645
646	(void) printf("#                    min %12.5f            %12.5f\n",
647	    b->ba_raw.st_min,
648	    b->ba_corrected.st_min);
649
650	(void) printf("#                    max %12.5f            %12.5f\n",
651	    b->ba_raw.st_max,
652	    b->ba_corrected.st_max);
653	(void) printf("#                   mean %12.5f            %12.5f\n",
654	    b->ba_raw.st_mean,
655	    b->ba_corrected.st_mean);
656	(void) printf("#                 median %12.5f            %12.5f\n",
657	    b->ba_raw.st_median,
658	    b->ba_corrected.st_median);
659	(void) printf("#                 stddev %12.5f            %12.5f\n",
660	    b->ba_raw.st_stddev,
661	    b->ba_corrected.st_stddev);
662	(void) printf("#         standard error %12.5f            %12.5f\n",
663	    b->ba_raw.st_stderr,
664	    b->ba_corrected.st_stderr);
665	(void) printf("#   99%% confidence level %12.5f            %12.5f\n",
666	    b->ba_raw.st_99confidence,
667	    b->ba_corrected.st_99confidence);
668	(void) printf("#                   skew %12.5f            %12.5f\n",
669	    b->ba_raw.st_skew,
670	    b->ba_corrected.st_skew);
671	(void) printf("#               kurtosis %12.5f            %12.5f\n",
672	    b->ba_raw.st_kurtosis,
673	    b->ba_corrected.st_kurtosis);
674
675	(void) printf("#       time correlation %12.5f            %12.5f\n",
676	    b->ba_raw.st_timecorr,
677	    b->ba_corrected.st_timecorr);
678	(void) printf("#\n");
679
680	(void) printf("#           elasped time %12.5f\n", (b->ba_endtime -
681	    b->ba_starttime) / 1.0e9);
682	(void) printf("#      number of samples %12d\n",   b->ba_batches);
683	(void) printf("#     number of outliers %12d\n", b->ba_outliers);
684	(void) printf("#      getnsecs overhead %12d\n", (int)nsecs_overhead);
685
686	(void) printf("#\n");
687	(void) printf("# DISTRIBUTION\n");
688
689	print_histo(b);
690
691	if (lm_optW) {
692		print_warnings(b);
693	}
694}
695
696void
697update_stats(barrier_t *b, result_t *r)
698{
699	double			time;
700	double			nsecs_per_call;
701
702	if (b->ba_waiters == 0) {
703		/* first thread only */
704		b->ba_t0 = r->re_t0;
705		b->ba_t1 = r->re_t1;
706		b->ba_count0 = 0;
707		b->ba_errors0 = 0;
708	} else {
709		/* all but first thread */
710		if (r->re_t0 < b->ba_t0) {
711			b->ba_t0 = r->re_t0;
712		}
713		if (r->re_t1 > b->ba_t1) {
714			b->ba_t1 = r->re_t1;
715		}
716	}
717
718	b->ba_count0  += r->re_count;
719	b->ba_errors0 += r->re_errors;
720
721	if (b->ba_waiters == b->ba_hwm - 1) {
722		/* last thread only */
723
724
725		time = (double)b->ba_t1 - (double)b->ba_t0 -
726		    (double)nsecs_overhead;
727
728		if (time < 100 * nsecs_resolution)
729			b->ba_quant++;
730
731		/*
732		 * normalize by procs * threads if not -U
733		 */
734
735		nsecs_per_call = time / (double)b->ba_count0 *
736		    (double)(lm_optT * lm_optP);
737
738		b->ba_count  += b->ba_count0;
739		b->ba_errors += b->ba_errors0;
740
741		b->ba_data[b->ba_batches % b->ba_datasize] =
742		    nsecs_per_call;
743
744		b->ba_batches++;
745	}
746}
747
748#ifdef USE_SEMOP
749barrier_t *
750barrier_create(int hwm, int datasize)
751{
752	struct sembuf		s[1];
753	barrier_t		*b;
754
755	/*LINTED*/
756	b = (barrier_t *)mmap(NULL,
757	    sizeof (barrier_t) + (datasize - 1) * sizeof (double),
758	    PROT_READ | PROT_WRITE,
759	    MAP_SHARED | MAP_ANON, -1, 0L);
760	if (b == (barrier_t *)MAP_FAILED) {
761		return (NULL);
762	}
763	b->ba_datasize = datasize;
764
765	b->ba_flag  = 0;
766	b->ba_hwm   = hwm;
767	b->ba_semid = semget(IPC_PRIVATE, 3, 0600);
768	if (b->ba_semid == -1) {
769		(void) munmap((void *)b, sizeof (barrier_t));
770		return (NULL);
771	}
772
773	/* [hwm - 1, 0, 0] */
774	s[0].sem_num = 0;
775	s[0].sem_op  = hwm - 1;
776	s[0].sem_flg = 0;
777	if (semop(b->ba_semid, s, 1) == -1) {
778		perror("semop(1)");
779		(void) semctl(b->ba_semid, 0, IPC_RMID);
780		(void) munmap((void *)b, sizeof (barrier_t));
781		return (NULL);
782	}
783
784	b->ba_waiters = 0;
785	b->ba_phase = 0;
786
787	b->ba_count = 0;
788	b->ba_errors = 0;
789
790	return (b);
791}
792
793int
794barrier_destroy(barrier_t *b)
795{
796	(void) semctl(b->ba_semid, 0, IPC_RMID);
797	(void) munmap((void *)b, sizeof (barrier_t));
798
799	return (0);
800}
801
802int
803barrier_queue(barrier_t *b, result_t *r)
804{
805	struct sembuf		s[2];
806
807	/*
808	 * {s0(-(hwm-1))}
809	 * if ! nowait {s1(-(hwm-1))}
810	 *   (all other threads)
811	 *   update shared stats
812	 *   {s0(hwm-1), s1(1)}
813	 *   {s0(1), s2(-1)}
814	 * else
815	 *   (last thread)
816	 *   update shared stats
817	 *   {s2(hwm-1)}
818	 */
819
820	s[0].sem_num = 0;
821	s[0].sem_op  = -(b->ba_hwm - 1);
822	s[0].sem_flg = 0;
823	if (semop(b->ba_semid, s, 1) == -1) {
824		perror("semop(2)");
825		return (-1);
826	}
827
828	s[0].sem_num = 1;
829	s[0].sem_op  = -(b->ba_hwm - 1);
830	s[0].sem_flg = IPC_NOWAIT;
831	if (semop(b->ba_semid, s, 1) == -1) {
832		if (errno != EAGAIN) {
833			perror("semop(3)");
834			return (-1);
835		}
836
837		/* all but the last thread */
838
839		if (r != NULL) {
840			update_stats(b, r);
841		}
842
843		b->ba_waiters++;
844
845		s[0].sem_num = 0;
846		s[0].sem_op  = b->ba_hwm - 1;
847		s[0].sem_flg = 0;
848		s[1].sem_num = 1;
849		s[1].sem_op  = 1;
850		s[1].sem_flg = 0;
851		if (semop(b->ba_semid, s, 2) == -1) {
852			perror("semop(4)");
853			return (-1);
854		}
855
856		s[0].sem_num = 0;
857		s[0].sem_op  = 1;
858		s[0].sem_flg = 0;
859		s[1].sem_num = 2;
860		s[1].sem_op  = -1;
861		s[1].sem_flg = 0;
862		if (semop(b->ba_semid, s, 2) == -1) {
863			perror("semop(5)");
864			return (-1);
865		}
866
867	} else {
868		/* the last thread */
869
870		if (r != NULL) {
871			update_stats(b, r);
872		}
873
874		b->ba_waiters = 0;
875		b->ba_phase++;
876
877		s[0].sem_num = 2;
878		s[0].sem_op  = b->ba_hwm - 1;
879		s[0].sem_flg = 0;
880		if (semop(b->ba_semid, s, 1) == -1) {
881			perror("semop(6)");
882			return (-1);
883		}
884	}
885
886	return (0);
887}
888
889#else /* USE_SEMOP */
890
891barrier_t *
892barrier_create(int hwm, int datasize)
893{
894	pthread_mutexattr_t	attr;
895	pthread_condattr_t	cattr;
896	barrier_t		*b;
897
898	/*LINTED*/
899	b = (barrier_t *)mmap(NULL,
900	    sizeof (barrier_t) + (datasize - 1) * sizeof (double),
901	    PROT_READ | PROT_WRITE,
902	    MAP_SHARED | MAP_ANON, -1, 0L);
903	if (b == (barrier_t *)MAP_FAILED) {
904		return (NULL);
905	}
906	b->ba_datasize = datasize;
907
908	b->ba_hwm = hwm;
909	b->ba_flag  = 0;
910
911	(void) pthread_mutexattr_init(&attr);
912	(void) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
913
914	(void) pthread_condattr_init(&cattr);
915	(void) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
916
917	(void) pthread_mutex_init(&b->ba_lock, &attr);
918	(void) pthread_cond_init(&b->ba_cv, &cattr);
919
920	b->ba_waiters = 0;
921	b->ba_phase = 0;
922
923	b->ba_count = 0;
924	b->ba_errors = 0;
925
926	return (b);
927}
928
929int
930barrier_destroy(barrier_t *b)
931{
932	(void) munmap((void *)b, sizeof (barrier_t));
933
934	return (0);
935}
936
937int
938barrier_queue(barrier_t *b, result_t *r)
939{
940	int			phase;
941
942	(void) pthread_mutex_lock(&b->ba_lock);
943
944	if (r != NULL) {
945		update_stats(b, r);
946	}
947
948	phase = b->ba_phase;
949
950	b->ba_waiters++;
951	if (b->ba_hwm == b->ba_waiters) {
952		b->ba_waiters = 0;
953		b->ba_phase++;
954		(void) pthread_cond_broadcast(&b->ba_cv);
955	}
956
957	while (b->ba_phase == phase) {
958		(void) pthread_cond_wait(&b->ba_cv, &b->ba_lock);
959	}
960
961	(void) pthread_mutex_unlock(&b->ba_lock);
962	return (0);
963}
964#endif /* USE_SEMOP */
965
966int
967gettindex()
968{
969	int			i;
970
971	if (tids == NULL) {
972		return (-1);
973	}
974
975	for (i = 1; i < lm_optT; i++) {
976		if (pthread_self() == tids[i]) {
977			return (i);
978		}
979	}
980
981	return (0);
982}
983
984int
985getpindex()
986{
987	return (pindex);
988}
989
990void *
991gettsd(int p, int t)
992{
993	if ((p < 0) || (p >= lm_optP) || (t < 0) || (t >= lm_optT))
994		return (NULL);
995
996	return ((void *)((unsigned long)tsdseg +
997	    (((p * lm_optT) + t) * tsdsize)));
998}
999
1000#if defined(__APPLE__)
1001int
1002gettsdindex(void *arg){
1003        /*
1004         * gettindex() can race with pthread_create() filling in tids[].
1005         * This is an alternative approach to finding the calling thread's tsd in t
1006sdseg
1007         */
1008        return tsdsize ? ((unsigned long)arg - (unsigned long)tsdseg)/tsdsize : 0;
1009}
1010#endif /* __APPLE__ */
1011
1012#ifdef USE_GETHRTIME
1013long long
1014getnsecs()
1015{
1016	return (gethrtime());
1017}
1018
1019long long
1020getusecs()
1021{
1022	return (gethrtime() / 1000);
1023}
1024
1025#elif USE_RDTSC /* USE_GETHRTIME */
1026
1027__inline__ long long
1028rdtsc(void)
1029{
1030	unsigned long long x;
1031	__asm__ volatile(".byte 0x0f, 0x31" : "=A" (x));
1032	return (x);
1033}
1034
1035long long
1036getusecs()
1037{
1038	return (rdtsc() * 1000000 / lm_hz);
1039}
1040
1041long long
1042getnsecs()
1043{
1044	return (rdtsc() * 1000000000 / lm_hz);
1045}
1046
1047#else /* USE_GETHRTIME */
1048
1049long long
1050getusecs()
1051{
1052	struct timeval		tv;
1053
1054	(void) gettimeofday(&tv, NULL);
1055
1056	return ((long long)tv.tv_sec * 1000000LL + (long long) tv.tv_usec);
1057}
1058
1059long long
1060getnsecs()
1061{
1062	struct timeval		tv;
1063
1064	(void) gettimeofday(&tv, NULL);
1065
1066	return ((long long)tv.tv_sec * 1000000000LL +
1067	    (long long) tv.tv_usec * 1000LL);
1068}
1069
1070#endif /* USE_GETHRTIME */
1071
1072int
1073setfdlimit(int limit)
1074{
1075	struct rlimit rlimit;
1076
1077	if (getrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
1078		perror("getrlimit");
1079		exit(1);
1080	}
1081
1082	if (rlimit.rlim_cur > limit)
1083		return (0); /* no worries */
1084
1085	rlimit.rlim_cur = limit;
1086
1087	if (rlimit.rlim_max < limit)
1088		rlimit.rlim_max = limit;
1089
1090	if (setrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
1091		perror("setrlimit");
1092		exit(3);
1093	}
1094
1095	return (0);
1096}
1097
1098
1099#define	KILOBYTE		1024
1100#define	MEGABYTE		(KILOBYTE * KILOBYTE)
1101#define	GIGABYTE		(KILOBYTE * MEGABYTE)
1102
1103long long
1104sizetoll(const char *arg)
1105{
1106	int			len = strlen(arg);
1107	int			i;
1108	long long		mult = 1;
1109
1110	if (len && isalpha(arg[len - 1])) {
1111		switch (arg[len - 1]) {
1112
1113		case 'k':
1114		case 'K':
1115			mult = KILOBYTE;
1116			break;
1117		case 'm':
1118		case 'M':
1119			mult = MEGABYTE;
1120			break;
1121		case 'g':
1122		case 'G':
1123			mult = GIGABYTE;
1124			break;
1125		default:
1126			return (-1);
1127		}
1128
1129		for (i = 0; i < len - 1; i++)
1130			if (!isdigit(arg[i]))
1131				return (-1);
1132	}
1133
1134	return (mult * strtoll(arg, NULL, 10));
1135}
1136
1137int
1138sizetoint(const char *arg)
1139{
1140	int			len = strlen(arg);
1141	int			i;
1142	long long		mult = 1;
1143
1144	if (len && isalpha(arg[len - 1])) {
1145		switch (arg[len - 1]) {
1146
1147		case 'k':
1148		case 'K':
1149			mult = KILOBYTE;
1150			break;
1151		case 'm':
1152		case 'M':
1153			mult = MEGABYTE;
1154			break;
1155		case 'g':
1156		case 'G':
1157			mult = GIGABYTE;
1158			break;
1159		default:
1160			return (-1);
1161		}
1162
1163		for (i = 0; i < len - 1; i++)
1164			if (!isdigit(arg[i]))
1165				return (-1);
1166	}
1167
1168	return (mult * atoi(arg));
1169}
1170
1171static void
1172print_bar(long count, long total)
1173{
1174	int			i;
1175
1176	(void) putchar_unlocked(count ? '*' : ' ');
1177	for (i = 1; i < (32 * count) / total; i++)
1178		(void) putchar_unlocked('*');
1179	for (; i < 32; i++)
1180		(void) putchar_unlocked(' ');
1181}
1182
1183static int
1184doublecmp(const void *p1, const void *p2)
1185{
1186	double a = *((double *)p1);
1187	double b = *((double *)p2);
1188
1189	if (a > b)
1190		return (1);
1191	if (a < b)
1192		return (-1);
1193	return (0);
1194}
1195
1196static void
1197print_histo(barrier_t *b)
1198{
1199	int			n;
1200	int			i;
1201	int			j;
1202	int			last;
1203	long long		maxcount;
1204	double			sum;
1205	long long		min;
1206	long long		scale;
1207	double			x;
1208	long long		y;
1209	long long		count;
1210	int			i95;
1211	double			p95;
1212	double			r95;
1213	double			m95;
1214	histo_t			*histo;
1215
1216	(void) printf("#	%12s %12s %32s %12s\n", "counts", "usecs/call",
1217	    "", "means");
1218
1219	/* calculate how much data we've captured */
1220	n = b->ba_batches > b->ba_datasize ? b->ba_datasize : b->ba_batches;
1221
1222	/* find the 95th percentile - index, value and range */
1223	qsort((void *)b->ba_data, n, sizeof (double), doublecmp);
1224	min = b->ba_data[0] + 0.000001;
1225	i95 = n * 95 / 100;
1226	p95 = b->ba_data[i95];
1227	r95 = p95 - min + 1;
1228
1229	/* find a suitable min and scale */
1230	i = 0;
1231	x = r95 / (HISTOSIZE - 1);
1232	while (x >= 10.0) {
1233		x /= 10.0;
1234		i++;
1235	}
1236	y = x + 0.9999999999;
1237	while (i > 0) {
1238		y *= 10;
1239		i--;
1240	}
1241	min /= y;
1242	min *= y;
1243	scale = y * (HISTOSIZE - 1);
1244	if (scale < (HISTOSIZE - 1)) {
1245		scale = (HISTOSIZE - 1);
1246	}
1247
1248	/* create and initialise the histogram */
1249	histo = malloc(HISTOSIZE * sizeof (histo_t));
1250	for (i = 0; i < HISTOSIZE; i++) {
1251		histo[i].sum = 0.0;
1252		histo[i].count = 0;
1253	}
1254
1255	/* populate the histogram */
1256	last = 0;
1257	sum = 0.0;
1258	count = 0;
1259	for (i = 0; i < i95; i++) {
1260		j = (HISTOSIZE - 1) * (b->ba_data[i] - min) / scale;
1261
1262		if (j >= HISTOSIZE) {
1263			(void) printf("panic!\n");
1264			j = HISTOSIZE - 1;
1265		}
1266
1267		histo[j].sum += b->ba_data[i];
1268		histo[j].count++;
1269
1270		sum += b->ba_data[i];
1271		count++;
1272	}
1273	m95 = sum / count;
1274
1275	/* find the larges bucket */
1276	maxcount = 0;
1277	for (i = 0; i < HISTOSIZE; i++)
1278		if (histo[i].count > 0) {
1279			last = i;
1280			if (histo[i].count > maxcount)
1281				maxcount = histo[i].count;
1282		}
1283
1284	/* print the buckets */
1285	for (i = 0; i <= last; i++) {
1286		(void) printf("#       %12lld %12.5f |", histo[i].count,
1287		    (min + scale * (double)i / (HISTOSIZE - 1)));
1288
1289		print_bar(histo[i].count, maxcount);
1290
1291		if (histo[i].count > 0)
1292			(void) printf("%12.5f\n",
1293			    histo[i].sum / histo[i].count);
1294		else
1295			(void) printf("%12s\n", "-");
1296	}
1297
1298	/* find the mean of values beyond the 95th percentile */
1299	sum = 0.0;
1300	count = 0;
1301	for (i = i95; i < n; i++) {
1302		sum += b->ba_data[i];
1303		count++;
1304	}
1305
1306	/* print the >95% bucket summary */
1307	(void) printf("#\n");
1308	(void) printf("#       %12lld %12s |", count, "> 95%");
1309	print_bar(count, maxcount);
1310	if (count > 0)
1311		(void) printf("%12.5f\n", sum / count);
1312	else
1313		(void) printf("%12s\n", "-");
1314	(void) printf("#\n");
1315	(void) printf("#       %12s %12.5f\n", "mean of 95%", m95);
1316	(void) printf("#       %12s %12.5f\n", "95th %ile", p95);
1317
1318	/* quantify any buffer overflow */
1319	if (b->ba_batches > b->ba_datasize)
1320		(void) printf("#       %12s %12d\n", "data dropped",
1321		    b->ba_batches - b->ba_datasize);
1322}
1323
1324static void
1325compute_stats(barrier_t *b)
1326{
1327	int i;
1328
1329	if (b->ba_batches > b->ba_datasize)
1330		b->ba_batches = b->ba_datasize;
1331
1332	/*
1333	 * convert to usecs/call
1334	 */
1335
1336	for (i = 0; i < b->ba_batches; i++)
1337		b->ba_data[i] /= 1000.0;
1338
1339	/*
1340	 * do raw stats
1341	 */
1342
1343	(void) crunch_stats(b->ba_data, b->ba_batches, &b->ba_raw);
1344
1345	/*
1346	 * recursively apply 3 sigma rule to remove outliers
1347	 */
1348
1349	b->ba_corrected = b->ba_raw;
1350	b->ba_outliers = 0;
1351
1352	if (b->ba_batches > 40) { /* remove outliers */
1353		int removed;
1354
1355		do {
1356			removed = remove_outliers(b->ba_data, b->ba_batches,
1357			    &b->ba_corrected);
1358			b->ba_outliers += removed;
1359			b->ba_batches -= removed;
1360			(void) crunch_stats(b->ba_data, b->ba_batches,
1361			    &b->ba_corrected);
1362			} while (removed != 0 && b->ba_batches > 40);
1363	}
1364
1365}
1366
1367/*
1368 * routine to compute various statistics on array of doubles.
1369 */
1370
1371static int
1372crunch_stats(double *data, int count, stats_t *stats)
1373{
1374	double a;
1375	double std;
1376	double diff;
1377	double sk;
1378	double ku;
1379	double mean;
1380	int i;
1381	int bytes;
1382	double *dupdata;
1383
1384	/*
1385	 * first we need the mean
1386	 */
1387
1388	mean = 0.0;
1389
1390	for (i = 0; i < count; i++) {
1391		mean += data[i];
1392	}
1393
1394	mean /= count;
1395
1396	stats->st_mean = mean;
1397
1398	/*
1399	 * malloc and sort so we can do median
1400	 */
1401
1402	dupdata = malloc(bytes = sizeof (double) * count);
1403	(void) memcpy(dupdata, data, bytes);
1404	qsort((void *)dupdata, count, sizeof (double), doublecmp);
1405	stats->st_median   = dupdata[count/2];
1406
1407	/*
1408	 * reuse dupdata to compute time correlation of data to
1409	 * detect interesting time-based trends
1410	 */
1411
1412	for (i = 0; i < count; i++)
1413		dupdata[i] = (double)i;
1414
1415	(void) fit_line(dupdata, data, count, &a, &stats->st_timecorr);
1416	free(dupdata);
1417
1418	std = 0.0;
1419	sk  = 0.0;
1420	ku  = 0.0;
1421
1422	stats->st_max = -1;
1423	stats->st_min = 1.0e99; /* hard to find portable values */
1424
1425	for (i = 0; i < count; i++) {
1426		if (data[i] > stats->st_max)
1427			stats->st_max = data[i];
1428		if (data[i] < stats->st_min)
1429			stats->st_min = data[i];
1430
1431		diff = data[i] - mean;
1432		std += diff * diff;
1433		sk  += diff * diff * diff;
1434		ku  += diff * diff * diff * diff;
1435	}
1436
1437	stats->st_stddev   = std = sqrt(std/(double)(count - 1));
1438	stats->st_stderr   = std / sqrt(count);
1439	stats->st_99confidence = stats->st_stderr * 2.326;
1440	stats->st_skew	   = sk / (std * std * std) / (double)(count);
1441	stats->st_kurtosis = ku / (std * std * std * std) /
1442	    (double)(count) - 3;
1443
1444	return (0);
1445}
1446
1447/*
1448 * does a least squares fit to the set of points x, y and
1449 * fits a line y = a + bx.  Returns a, b
1450 */
1451
1452int
1453fit_line(double *x, double *y, int count, double *a, double *b)
1454{
1455	double sumx, sumy, sumxy, sumx2;
1456	double denom;
1457	int i;
1458
1459	sumx = sumy = sumxy = sumx2 = 0.0;
1460
1461	for (i = 0; i < count; i++) {
1462		sumx	+= x[i];
1463		sumx2	+= x[i] * x[i];
1464		sumy	+= y[i];
1465		sumxy	+= x[i] * y[i];
1466	}
1467
1468	denom = count * sumx2 - sumx * sumx;
1469
1470	if (denom == 0.0)
1471		return (-1);
1472
1473	*a = (sumy * sumx2 - sumx * sumxy) / denom;
1474
1475	*b = (count * sumxy - sumx * sumy) / denom;
1476
1477	return (0);
1478}
1479
1480/*
1481 * empty function for measurement purposes
1482 */
1483
1484int
1485nop()
1486{
1487	return (1);
1488}
1489
1490#define	NSECITER 1000
1491
1492static long long
1493get_nsecs_overhead()
1494{
1495	long long s;
1496
1497	double data[NSECITER];
1498	stats_t stats;
1499
1500	int i;
1501	int count;
1502	int outliers;
1503
1504	(void) getnsecs(); /* warmup */
1505	(void) getnsecs(); /* warmup */
1506	(void) getnsecs(); /* warmup */
1507
1508	i = 0;
1509
1510	count = NSECITER;
1511
1512	for (i = 0; i < count; i++) {
1513		s = getnsecs();
1514		data[i] = getnsecs() - s;
1515	}
1516
1517	(void) crunch_stats(data, count, &stats);
1518
1519	while ((outliers = remove_outliers(data, count, &stats)) != 0) {
1520		count -= outliers;
1521		(void) crunch_stats(data, count, &stats);
1522	}
1523
1524	return ((long long)stats.st_mean);
1525
1526}
1527
1528long long
1529get_nsecs_resolution()
1530{
1531	long long y[1000];
1532
1533	int i, j, nops, res;
1534	long long start, stop;
1535
1536	/*
1537	 * first, figure out how many nops to use
1538	 * to get any delta between time measurements.
1539	 * use a minimum of one.
1540	 */
1541
1542	/*
1543	 * warm cache
1544	 */
1545
1546	stop = start = getnsecs();
1547
1548	for (i = 1; i < 10000000; i++) {
1549		start = getnsecs();
1550		for (j = i; j; j--)
1551			;
1552		stop = getnsecs();
1553		if (stop > start)
1554			break;
1555	}
1556
1557	nops = i;
1558
1559	/*
1560	 * now collect data at linearly varying intervals
1561	 */
1562
1563	for (i = 0; i < 1000; i++) {
1564		start = getnsecs();
1565		for (j = nops * i; j; j--)
1566			;
1567		stop = getnsecs();
1568		y[i] = stop - start;
1569	}
1570
1571	/*
1572	 * find smallest positive difference between samples;
1573	 * this is the timer resolution
1574	 */
1575
1576	res = 1<<30;
1577
1578	for (i = 1; i < 1000; i++) {
1579		int diff = y[i] - y[i-1];
1580
1581		if (diff > 0 && res > diff)
1582			res = diff;
1583
1584	}
1585
1586	return (res);
1587}
1588
1589/*
1590 * remove any data points from the array more than 3 sigma out
1591 */
1592
1593static int
1594remove_outliers(double *data, int count, stats_t *stats)
1595{
1596	double outmin = stats->st_mean - 3 * stats->st_stddev;
1597	double outmax = stats->st_mean + 3 * stats->st_stddev;
1598
1599	int i, j, outliers;
1600
1601	for (outliers = i = j = 0; i < count; i++)
1602		if (data[i] > outmax || data[i] < outmin)
1603			outliers++;
1604		else
1605			data[j++] = data[i];
1606
1607	return (outliers);
1608}
1609