1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms
5 * of the Common Development and Distribution License
6 * (the "License").  You may not use this file except
7 * in compliance with the License.
8 *
9 * You can obtain a copy of the license at
10 * src/OPENSOLARIS.LICENSE
11 * or http://www.opensolaris.org/os/licensing.
12 * See the License for the specific language governing
13 * permissions and limitations under the License.
14 *
15 * When distributing Covered Code, include this CDDL
16 * HEADER in each file and include the License file at
17 * usr/src/OPENSOLARIS.LICENSE.  If applicable,
18 * add the following below this CDDL HEADER, with the
19 * fields enclosed by brackets "[]" replaced with your
20 * own identifying information: Portions Copyright [yyyy]
21 * [name of copyright owner]
22 *
23 * CDDL HEADER END
24 */
25
26/*
27 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30
31/*
32 * benchmarking routines
33 */
34
35#include <sys/types.h>
36#include <sys/time.h>
37/* #include <sys/ipc.h> */
38/* #include <sys/sem.h> */
39#include <sys/mman.h>
40#include <sys/wait.h>
41#include <ctype.h>
42#include <string.h>
43#include <strings.h>
44#include <signal.h>
45#include <stdio.h>
46#include <unistd.h>
47#include <stdlib.h>
48#include <poll.h>
49#include <pthread.h>
50#include <dlfcn.h>
51#include <errno.h>
52#include <sys/resource.h>
53#include <math.h>
54#include <limits.h>
55
56#ifdef	__sun
57#include <sys/elf.h>
58#endif
59
60#include "libmicro.h"
61
62
63/*
64 * user visible globals
65 */
66
67int				lm_argc = 0;
68char **				lm_argv = NULL;
69
70int				lm_opt1;
71int				lm_optA;
72int				lm_optB;
73int				lm_optC = 100;
74int				lm_optD;
75int				lm_optE;
76int				lm_optH;
77int				lm_optI;
78int				lm_optL = 0;
79int				lm_optM = 0;
80char				*lm_optN;
81int				lm_optP;
82int				lm_optS;
83int				lm_optT;
84int				lm_optW;
85
86int				lm_def1 = 0;
87int				lm_defB = 0; /* use lm_nsecs_per_op */
88int				lm_defD = 10;
89int				lm_defH = 0;
90char				*lm_defN = NULL;
91int				lm_defP = 1;
92
93int				lm_defS = 0;
94int				lm_defT = 1;
95
96/*
97 * default on fast platform, should be overridden by individual
98 * benchmarks if significantly wrong in either direction.
99 */
100
101int				lm_nsecs_per_op = 5;
102
103char				*lm_procpath;
104char				lm_procname[STRSIZE];
105char				lm_usage[STRSIZE];
106char				lm_optstr[STRSIZE];
107char				lm_header[STRSIZE];
108size_t				lm_tsdsize = 0;
109
110
111/*
112 *  Globals we do not export to the user
113 */
114
115static barrier_t		*lm_barrier;
116static pid_t			*pids = NULL;
117static pthread_t		*tids = NULL;
118static int			pindex = -1;
119static void			*tsdseg = NULL;
120static size_t			tsdsize = 0;
121
122#ifdef USE_RDTSC
123static long long		lm_hz = 0;
124#endif
125
126
127/*
128 * Forward references
129 */
130
131static void 		worker_process();
132static void 		usage();
133static void 		print_stats(barrier_t *);
134static void 		print_histo(barrier_t *);
135static int 		remove_outliers(double *, int, stats_t *);
136static long long	nsecs_overhead;
137static long long	nsecs_resolution;
138static long long	get_nsecs_overhead();
139static int		crunch_stats(double *, int, stats_t *);
140static void 		compute_stats(barrier_t *);
141/*
142 * main routine; renamed in this file to allow linking with other
143 * files
144 */
145
146int
147actual_main(int argc, char *argv[])
148{
149	int			i;
150	int			opt;
151	extern char		*optarg;
152	char			*tmp;
153	char			optstr[256];
154	barrier_t		*b;
155	long long		startnsecs = getnsecs();
156
157#ifdef USE_RDTSC
158	if (getenv("LIBMICRO_HZ") == NULL) {
159		(void) printf("LIBMICRO_HZ needed but not set\n");
160		exit(1);
161	}
162	lm_hz = strtoll(getenv("LIBMICRO_HZ"), NULL, 10);
163#endif
164
165	lm_argc = argc;
166	lm_argv = argv;
167
168	/* before we do anything */
169	(void) benchmark_init();
170
171
172	nsecs_overhead = get_nsecs_overhead();
173	nsecs_resolution = get_nsecs_resolution();
174
175	/*
176	 * Set defaults
177	 */
178
179	lm_opt1	= lm_def1;
180	lm_optB	= lm_defB;
181	lm_optD	= lm_defD;
182	lm_optH	= lm_defH;
183	lm_optN	= lm_defN;
184	lm_optP	= lm_defP;
185
186	lm_optS	= lm_defS;
187	lm_optT	= lm_defT;
188
189	/*
190	 * squirrel away the path to the current
191	 * binary in a way that works on both
192	 * Linux and Solaris
193	 */
194
195	if (*argv[0] == '/') {
196		lm_procpath = strdup(argv[0]);
197		*strrchr(lm_procpath, '/') = 0;
198	} else {
199		char path[1024];
200		(void) getcwd(path, 1024);
201		(void) strcat(path, "/");
202		(void) strcat(path, argv[0]);
203		*strrchr(path, '/') = 0;
204		lm_procpath = strdup(path);
205	}
206
207	/*
208	 * name of binary
209	 */
210
211	if ((tmp = strrchr(argv[0], '/')) == NULL)
212		(void) strcpy(lm_procname, argv[0]);
213	else
214		(void) strcpy(lm_procname, tmp + 1);
215
216	if (lm_optN == NULL) {
217		lm_optN = lm_procname;
218	}
219
220	/*
221	 * Parse command line arguments
222	 */
223
224	(void) sprintf(optstr, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr);
225	while ((opt = getopt(argc, argv, optstr)) != -1) {
226		switch (opt) {
227		case '1':
228			lm_opt1 = 1;
229			break;
230		case 'A':
231			lm_optA = 1;
232			break;
233		case 'B':
234			lm_optB = sizetoint(optarg);
235			break;
236		case 'C':
237			lm_optC = sizetoint(optarg);
238			break;
239		case 'D':
240			lm_optD = sizetoint(optarg);
241			break;
242		case 'E':
243			lm_optE = 1;
244			break;
245		case 'H':
246			lm_optH = 1;
247			break;
248		case 'I':
249			lm_optI = sizetoint(optarg);
250			break;
251		case 'L':
252			lm_optL = 1;
253			break;
254		case 'M':
255			lm_optM = 1;
256			break;
257		case 'N':
258			lm_optN = optarg;
259			break;
260		case 'P':
261			lm_optP = sizetoint(optarg);
262			break;
263		case 'S':
264			lm_optS = 1;
265			break;
266		case 'T':
267			lm_optT = sizetoint(optarg);
268			break;
269		case 'V':
270			(void) printf("%s\n", LIBMICRO_VERSION);
271			exit(0);
272			break;
273		case 'W':
274			lm_optW = 1;
275			lm_optS = 1;
276			break;
277		case '?':
278			usage();
279			exit(0);
280			break;
281		default:
282			if (benchmark_optswitch(opt, optarg) == -1) {
283				usage();
284				exit(0);
285			}
286		}
287	}
288
289	/* deal with implicit and overriding options */
290	if (lm_opt1 && lm_optP > 1) {
291		lm_optP = 1;
292		(void) printf("warning: -1 overrides -P\n");
293	}
294
295	if (lm_optE) {
296		(void) fprintf(stderr, "Running:%20s", lm_optN);
297		(void) fflush(stderr);
298	}
299
300	if (lm_optB == 0) {
301		/*
302		 * neither benchmark or user has specified the number
303		 * of cnts/sample, so use computed value
304		 */
305		if (lm_optI)
306			lm_nsecs_per_op = lm_optI;
307
308		lm_optB = nsecs_resolution * 100 / lm_nsecs_per_op;
309		if (lm_optB == 0)
310			lm_optB = 1;
311	}
312
313	/*
314	 * now that the options are set
315	 */
316
317	if (benchmark_initrun() == -1) {
318		exit(1);
319	}
320
321	/* allocate dynamic data */
322	pids = (pid_t *)malloc(lm_optP * sizeof (pid_t));
323	if (pids == NULL) {
324		perror("malloc(pids)");
325		exit(1);
326	}
327	tids = (pthread_t *)malloc(lm_optT * sizeof (pthread_t));
328	if (tids == NULL) {
329		perror("malloc(tids)");
330		exit(1);
331	}
332
333	/* check that the case defines lm_tsdsize before proceeding */
334	if (lm_tsdsize == (size_t)-1) {
335		(void) fprintf(stderr, "error in benchmark_init: "
336		    "lm_tsdsize not set\n");
337		exit(1);
338	}
339
340	/* round up tsdsize to nearest 128 to eliminate false sharing */
341	tsdsize = ((lm_tsdsize + 127) / 128) * 128;
342
343	/* allocate sufficient TSD for each thread in each process */
344	tsdseg = (void *)mmap(NULL, lm_optT * lm_optP * tsdsize + 8192,
345	    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0L);
346	if (tsdseg == NULL) {
347		perror("mmap(tsd)");
348		exit(1);
349	}
350
351	/* initialise worker synchronisation */
352	b = barrier_create(lm_optT * lm_optP, DATASIZE);
353	if (b == NULL) {
354		perror("barrier_create()");
355		exit(1);
356	}
357	lm_barrier = b;
358	b->ba_flag = 1;
359
360	/* need this here so that parent and children can call exit() */
361	(void) fflush(stdout);
362	(void) fflush(stderr);
363
364	/* when we started and when to stop */
365
366	b->ba_starttime = getnsecs();
367	b->ba_deadline = (long long) (b->ba_starttime + (lm_optD * 1000000LL));
368
369	/* do the work */
370	if (lm_opt1) {
371		/* single process, non-fork mode */
372		pindex = 0;
373		worker_process();
374	} else {
375		/* create worker processes */
376		for (i = 0; i < lm_optP; i++) {
377			pids[i] = fork();
378
379			switch (pids[i]) {
380			case 0:
381				pindex = i;
382				worker_process();
383				exit(0);
384				break;
385			case -1:
386				perror("fork");
387				exit(1);
388				break;
389			default:
390				continue;
391			}
392		}
393
394		/* wait for worker processes */
395		for (i = 0; i < lm_optP; i++) {
396			if (pids[i] > 0) {
397				(void) waitpid(pids[i], NULL, 0);
398			}
399		}
400	}
401
402	b->ba_endtime = getnsecs();
403
404	/* compute results */
405
406	compute_stats(b);
407
408	/* print arguments benchmark was invoked with ? */
409	if (lm_optL) {
410		int l;
411		(void) printf("# %s ", argv[0]);
412		for (l = 1; l < argc; l++) {
413			(void) printf("%s ", argv[l]);
414		}
415		(void) printf("\n");
416	}
417
418	/* print result header (unless suppressed) */
419	if (!lm_optH) {
420		(void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n",
421		    "", "prc", "thr",
422		    "usecs/call",
423		    "samples", "errors", "cnt/samp", lm_header);
424	}
425
426	/* print result */
427
428	(void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n",
429	    lm_optN, lm_optP, lm_optT,
430	    (lm_optM?b->ba_corrected.st_mean:b->ba_corrected.st_median),
431	    b->ba_batches, b->ba_errors, lm_optB,
432	    benchmark_result());
433
434	if (lm_optS) {
435		print_stats(b);
436	}
437
438	/* just incase something goes awry */
439	(void) fflush(stdout);
440	(void) fflush(stderr);
441
442	/* cleanup by stages */
443	(void) benchmark_finirun();
444	(void) barrier_destroy(b);
445	(void) benchmark_fini();
446
447	if (lm_optE) {
448		(void) fprintf(stderr, " for %12.5f seconds\n",
449		    (double)(getnsecs() - startnsecs) /
450		    1.e9);
451		(void) fflush(stderr);
452	}
453	return (0);
454}
455
456void *
457worker_thread(void *arg)
458{
459	result_t		r;
460	long long 		last_sleep = 0;
461	long long		t;
462
463	r.re_errors = benchmark_initworker(arg);
464
465	while (lm_barrier->ba_flag) {
466		r.re_count = 0;
467		r.re_errors += benchmark_initbatch(arg);
468
469		/* sync to clock */
470
471		if (lm_optA && ((t = getnsecs()) - last_sleep) > 75000000LL) {
472			(void) poll(0, 0, 10);
473			last_sleep = t;
474		}
475		/* wait for it ... */
476		(void) barrier_queue(lm_barrier, NULL);
477
478		/* time the test */
479		r.re_t0 = getnsecs();
480		(void) benchmark(arg, &r);
481		r.re_t1 = getnsecs();
482
483		/* time to stop? */
484		if (r.re_t1 > lm_barrier->ba_deadline &&
485		    (!lm_optC || lm_optC < lm_barrier->ba_batches)) {
486			lm_barrier->ba_flag = 0;
487		}
488
489		/* record results and sync */
490		(void) barrier_queue(lm_barrier, &r);
491
492		(void) benchmark_finibatch(arg);
493
494		r.re_errors = 0;
495	}
496
497	(void) benchmark_finiworker(arg);
498
499	return (0);
500}
501
502void
503worker_process()
504{
505	int			i;
506	void			*tsd;
507
508	for (i = 1; i < lm_optT; i++) {
509		tsd = gettsd(pindex, i);
510		if (pthread_create(&tids[i], NULL, worker_thread, tsd) != 0) {
511			perror("pthread_create");
512			exit(1);
513		}
514	}
515
516	tsd = gettsd(pindex, 0);
517	(void) worker_thread(tsd);
518
519	for (i = 1; i < lm_optT; i++) {
520		(void) pthread_join(tids[i], NULL);
521	}
522}
523
524void
525usage()
526{
527	(void) printf(
528	    "usage: %s\n"
529	    "       [-1] (single process; overrides -P > 1)\n"
530	    "       [-A] (align with clock)\n"
531	    "       [-B batch-size (default %d)]\n"
532	    "       [-C minimum number of samples (default 0)]\n"
533	    "       [-D duration in msecs (default %ds)]\n"
534	    "       [-E (echo name to stderr)]\n"
535	    "       [-H] (suppress headers)\n"
536	    "       [-I] nsecs per op (used to compute batch size)"
537	    "       [-L] (print argument line)\n"
538	    "       [-M] (reports mean rather than median)\n"
539	    "       [-N test-name (default '%s')]\n"
540	    "       [-P processes (default %d)]\n"
541	    "       [-S] (print detailed stats)\n"
542	    "       [-T threads (default %d)]\n"
543	    "       [-V] (print the libMicro version and exit)\n"
544	    "       [-W] (flag possible benchmark problems)\n"
545	    "%s\n",
546	    lm_procname,
547	    lm_defB, lm_defD, lm_procname, lm_defP, lm_defT,
548	    lm_usage);
549}
550
551void
552print_warnings(barrier_t *b)
553{
554	int head = 0;
555	int increase;
556
557	if (b->ba_quant) {
558		if (!head++) {
559			(void) printf("#\n# WARNINGS\n");
560		}
561		increase = (int)(floor((nsecs_resolution * 100.0) /
562		    ((double)lm_optB * b->ba_corrected.st_median * 1000.0)) +
563		    1.0);
564		(void) printf("#     Quantization error likely;"
565		    "increase batch size (-B option) %dX to avoid.\n",
566		    increase);
567	}
568
569	/*
570	 * XXX should warn on median != mean by a lot
571	 */
572
573	if (b->ba_errors) {
574		if (!head++) {
575			(void) printf("#\n# WARNINGS\n");
576		}
577		(void) printf("#     Errors occured during benchmark.\n");
578	}
579}
580
581void
582print_stats(barrier_t *b)
583{
584	(void) printf("#\n");
585	(void) printf("# STATISTICS         %12s          %12s\n",
586	    "usecs/call (raw)",
587	    "usecs/call (outliers removed)");
588
589	if (b->ba_count == 0) {
590		(void) printf("zero samples\n");
591		return;
592	}
593
594	(void) printf("#                    min %12.5f            %12.5f\n",
595	    b->ba_raw.st_min,
596	    b->ba_corrected.st_min);
597
598	(void) printf("#                    max %12.5f            %12.5f\n",
599	    b->ba_raw.st_max,
600	    b->ba_corrected.st_max);
601	(void) printf("#                   mean %12.5f            %12.5f\n",
602	    b->ba_raw.st_mean,
603	    b->ba_corrected.st_mean);
604	(void) printf("#                 median %12.5f            %12.5f\n",
605	    b->ba_raw.st_median,
606	    b->ba_corrected.st_median);
607	(void) printf("#                 stddev %12.5f            %12.5f\n",
608	    b->ba_raw.st_stddev,
609	    b->ba_corrected.st_stddev);
610	(void) printf("#         standard error %12.5f            %12.5f\n",
611	    b->ba_raw.st_stderr,
612	    b->ba_corrected.st_stderr);
613	(void) printf("#   99%% confidence level %12.5f            %12.5f\n",
614	    b->ba_raw.st_99confidence,
615	    b->ba_corrected.st_99confidence);
616	(void) printf("#                   skew %12.5f            %12.5f\n",
617	    b->ba_raw.st_skew,
618	    b->ba_corrected.st_skew);
619	(void) printf("#               kurtosis %12.5f            %12.5f\n",
620	    b->ba_raw.st_kurtosis,
621	    b->ba_corrected.st_kurtosis);
622
623	(void) printf("#       time correlation %12.5f            %12.5f\n",
624	    b->ba_raw.st_timecorr,
625	    b->ba_corrected.st_timecorr);
626	(void) printf("#\n");
627
628	(void) printf("#           elasped time %12.5f\n", (b->ba_endtime -
629	    b->ba_starttime) / 1.0e9);
630	(void) printf("#      number of samples %12d\n",   b->ba_batches);
631	(void) printf("#     number of outliers %12d\n", b->ba_outliers);
632	(void) printf("#      getnsecs overhead %12d\n", (int)nsecs_overhead);
633
634	(void) printf("#\n");
635	(void) printf("# DISTRIBUTION\n");
636
637	print_histo(b);
638
639	if (lm_optW) {
640		print_warnings(b);
641	}
642}
643
644void
645update_stats(barrier_t *b, result_t *r)
646{
647	double			time;
648	double			nsecs_per_call;
649
650	if (b->ba_waiters == 0) {
651		/* first thread only */
652		b->ba_t0 = r->re_t0;
653		b->ba_t1 = r->re_t1;
654		b->ba_count0 = 0;
655		b->ba_errors0 = 0;
656	} else {
657		/* all but first thread */
658		if (r->re_t0 < b->ba_t0) {
659			b->ba_t0 = r->re_t0;
660		}
661		if (r->re_t1 > b->ba_t1) {
662			b->ba_t1 = r->re_t1;
663		}
664	}
665
666	b->ba_count0  += r->re_count;
667	b->ba_errors0 += r->re_errors;
668
669	if (b->ba_waiters == b->ba_hwm - 1) {
670		/* last thread only */
671
672
673		time = (double)b->ba_t1 - (double)b->ba_t0 -
674		    (double)nsecs_overhead;
675
676		if (time < 100 * nsecs_resolution)
677			b->ba_quant++;
678
679		/*
680		 * normalize by procs * threads if not -U
681		 */
682
683		nsecs_per_call = time / (double)b->ba_count0 *
684		    (double)(lm_optT * lm_optP);
685
686		b->ba_count  += b->ba_count0;
687		b->ba_errors += b->ba_errors0;
688
689		b->ba_data[b->ba_batches % b->ba_datasize] =
690		    nsecs_per_call;
691
692		b->ba_batches++;
693	}
694}
695
696#ifdef USE_SEMOP
697barrier_t *
698barrier_create(int hwm, int datasize)
699{
700	struct sembuf		s[1];
701	barrier_t		*b;
702
703	/*LINTED*/
704	b = (barrier_t *)mmap(NULL,
705	    sizeof (barrier_t) + (datasize - 1) * sizeof (double),
706	    PROT_READ | PROT_WRITE,
707	    MAP_SHARED | MAP_ANON, -1, 0L);
708	if (b == (barrier_t *)MAP_FAILED) {
709		return (NULL);
710	}
711	b->ba_datasize = datasize;
712
713	b->ba_flag  = 0;
714	b->ba_hwm   = hwm;
715	b->ba_semid = semget(IPC_PRIVATE, 3, 0600);
716	if (b->ba_semid == -1) {
717		(void) munmap((void *)b, sizeof (barrier_t));
718		return (NULL);
719	}
720
721	/* [hwm - 1, 0, 0] */
722	s[0].sem_num = 0;
723	s[0].sem_op  = hwm - 1;
724	s[0].sem_flg = 0;
725	if (semop(b->ba_semid, s, 1) == -1) {
726		perror("semop(1)");
727		(void) semctl(b->ba_semid, 0, IPC_RMID);
728		(void) munmap((void *)b, sizeof (barrier_t));
729		return (NULL);
730	}
731
732	b->ba_waiters = 0;
733	b->ba_phase = 0;
734
735	b->ba_count = 0;
736	b->ba_errors = 0;
737
738	return (b);
739}
740
741int
742barrier_destroy(barrier_t *b)
743{
744	(void) semctl(b->ba_semid, 0, IPC_RMID);
745	(void) munmap((void *)b, sizeof (barrier_t));
746
747	return (0);
748}
749
750int
751barrier_queue(barrier_t *b, result_t *r)
752{
753	struct sembuf		s[2];
754
755	/*
756	 * {s0(-(hwm-1))}
757	 * if ! nowait {s1(-(hwm-1))}
758	 *   (all other threads)
759	 *   update shared stats
760	 *   {s0(hwm-1), s1(1)}
761	 *   {s0(1), s2(-1)}
762	 * else
763	 *   (last thread)
764	 *   update shared stats
765	 *   {s2(hwm-1)}
766	 */
767
768	s[0].sem_num = 0;
769	s[0].sem_op  = -(b->ba_hwm - 1);
770	s[0].sem_flg = 0;
771	if (semop(b->ba_semid, s, 1) == -1) {
772		perror("semop(2)");
773		return (-1);
774	}
775
776	s[0].sem_num = 1;
777	s[0].sem_op  = -(b->ba_hwm - 1);
778	s[0].sem_flg = IPC_NOWAIT;
779	if (semop(b->ba_semid, s, 1) == -1) {
780		if (errno != EAGAIN) {
781			perror("semop(3)");
782			return (-1);
783		}
784
785		/* all but the last thread */
786
787		if (r != NULL) {
788			update_stats(b, r);
789		}
790
791		b->ba_waiters++;
792
793		s[0].sem_num = 0;
794		s[0].sem_op  = b->ba_hwm - 1;
795		s[0].sem_flg = 0;
796		s[1].sem_num = 1;
797		s[1].sem_op  = 1;
798		s[1].sem_flg = 0;
799		if (semop(b->ba_semid, s, 2) == -1) {
800			perror("semop(4)");
801			return (-1);
802		}
803
804		s[0].sem_num = 0;
805		s[0].sem_op  = 1;
806		s[0].sem_flg = 0;
807		s[1].sem_num = 2;
808		s[1].sem_op  = -1;
809		s[1].sem_flg = 0;
810		if (semop(b->ba_semid, s, 2) == -1) {
811			perror("semop(5)");
812			return (-1);
813		}
814
815	} else {
816		/* the last thread */
817
818		if (r != NULL) {
819			update_stats(b, r);
820		}
821
822		b->ba_waiters = 0;
823		b->ba_phase++;
824
825		s[0].sem_num = 2;
826		s[0].sem_op  = b->ba_hwm - 1;
827		s[0].sem_flg = 0;
828		if (semop(b->ba_semid, s, 1) == -1) {
829			perror("semop(6)");
830			return (-1);
831		}
832	}
833
834	return (0);
835}
836
837#else /* USE_SEMOP */
838
839barrier_t *
840barrier_create(int hwm, int datasize)
841{
842	pthread_mutexattr_t	attr;
843	pthread_condattr_t	cattr;
844	barrier_t		*b;
845
846	/*LINTED*/
847	b = (barrier_t *)mmap(NULL,
848	    sizeof (barrier_t) + (datasize - 1) * sizeof (double),
849	    PROT_READ | PROT_WRITE,
850	    MAP_SHARED | MAP_ANON, -1, 0L);
851	if (b == (barrier_t *)MAP_FAILED) {
852		return (NULL);
853	}
854	b->ba_datasize = datasize;
855
856	b->ba_hwm = hwm;
857	b->ba_flag  = 0;
858
859	(void) pthread_mutexattr_init(&attr);
860	(void) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
861
862	(void) pthread_condattr_init(&cattr);
863	(void) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
864
865	(void) pthread_mutex_init(&b->ba_lock, &attr);
866	(void) pthread_cond_init(&b->ba_cv, &cattr);
867
868	b->ba_waiters = 0;
869	b->ba_phase = 0;
870
871	b->ba_count = 0;
872	b->ba_errors = 0;
873
874	return (b);
875}
876
877int
878barrier_destroy(barrier_t *b)
879{
880	(void) munmap((void *)b, sizeof (barrier_t));
881
882	return (0);
883}
884
885int
886barrier_queue(barrier_t *b, result_t *r)
887{
888	int			phase;
889
890	(void) pthread_mutex_lock(&b->ba_lock);
891
892	if (r != NULL) {
893		update_stats(b, r);
894	}
895
896	phase = b->ba_phase;
897
898	b->ba_waiters++;
899	if (b->ba_hwm == b->ba_waiters) {
900		b->ba_waiters = 0;
901		b->ba_phase++;
902		(void) pthread_cond_broadcast(&b->ba_cv);
903	}
904
905	while (b->ba_phase == phase) {
906		(void) pthread_cond_wait(&b->ba_cv, &b->ba_lock);
907	}
908
909	(void) pthread_mutex_unlock(&b->ba_lock);
910	return (0);
911}
912#endif /* USE_SEMOP */
913
914int
915gettindex()
916{
917	int			i;
918
919	if (tids == NULL) {
920		return (-1);
921	}
922
923	for (i = 1; i < lm_optT; i++) {
924		if (pthread_self() == tids[i]) {
925			return (i);
926		}
927	}
928
929	return (0);
930}
931
932int
933getpindex()
934{
935	return (pindex);
936}
937
938void *
939gettsd(int p, int t)
940{
941	if ((p < 0) || (p >= lm_optP) || (t < 0) || (t >= lm_optT))
942		return (NULL);
943
944	return ((void *)((unsigned long)tsdseg +
945	    (((p * lm_optT) + t) * tsdsize)));
946}
947
948#ifdef USE_GETHRTIME
949long long
950getnsecs()
951{
952	return (gethrtime());
953}
954
955long long
956getusecs()
957{
958	return (gethrtime() / 1000);
959}
960
961#elif USE_RDTSC /* USE_GETHRTIME */
962
963__inline__ long long
964rdtsc(void)
965{
966	unsigned long long x;
967	__asm__ volatile(".byte 0x0f, 0x31" : "=A" (x));
968	return (x);
969}
970
971long long
972getusecs()
973{
974	return (rdtsc() * 1000000 / lm_hz);
975}
976
977long long
978getnsecs()
979{
980	return (rdtsc() * 1000000000 / lm_hz);
981}
982
983#else /* USE_GETHRTIME */
984
985long long
986getusecs()
987{
988	struct timeval		tv;
989
990	(void) gettimeofday(&tv, NULL);
991
992	return ((long long)tv.tv_sec * 1000000LL + (long long) tv.tv_usec);
993}
994
995long long
996getnsecs()
997{
998	struct timeval		tv;
999
1000	(void) gettimeofday(&tv, NULL);
1001
1002	return ((long long)tv.tv_sec * 1000000000LL +
1003	    (long long) tv.tv_usec * 1000LL);
1004}
1005
1006#endif /* USE_GETHRTIME */
1007
1008int
1009setfdlimit(int limit)
1010{
1011	struct rlimit rlimit;
1012
1013	if (getrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
1014		perror("getrlimit");
1015		exit(1);
1016	}
1017
1018	if (rlimit.rlim_cur > limit)
1019		return (0); /* no worries */
1020
1021	rlimit.rlim_cur = limit;
1022
1023	if (rlimit.rlim_max < limit)
1024		rlimit.rlim_max = limit;
1025
1026	if (setrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
1027		perror("setrlimit");
1028		exit(3);
1029	}
1030
1031	return (0);
1032}
1033
1034
1035#define	KILOBYTE		1024
1036#define	MEGABYTE		(KILOBYTE * KILOBYTE)
1037#define	GIGABYTE		(KILOBYTE * MEGABYTE)
1038
1039long long
1040sizetoll(const char *arg)
1041{
1042	int			len = strlen(arg);
1043	int			i;
1044	long long		mult = 1;
1045
1046	if (len && isalpha(arg[len - 1])) {
1047		switch (arg[len - 1]) {
1048
1049		case 'k':
1050		case 'K':
1051			mult = KILOBYTE;
1052			break;
1053		case 'm':
1054		case 'M':
1055			mult = MEGABYTE;
1056			break;
1057		case 'g':
1058		case 'G':
1059			mult = GIGABYTE;
1060			break;
1061		default:
1062			return (-1);
1063		}
1064
1065		for (i = 0; i < len - 1; i++)
1066			if (!isdigit(arg[i]))
1067				return (-1);
1068	}
1069
1070	return (mult * strtoll(arg, NULL, 10));
1071}
1072
1073int
1074sizetoint(const char *arg)
1075{
1076	int			len = strlen(arg);
1077	int			i;
1078	long long		mult = 1;
1079
1080	if (len && isalpha(arg[len - 1])) {
1081		switch (arg[len - 1]) {
1082
1083		case 'k':
1084		case 'K':
1085			mult = KILOBYTE;
1086			break;
1087		case 'm':
1088		case 'M':
1089			mult = MEGABYTE;
1090			break;
1091		case 'g':
1092		case 'G':
1093			mult = GIGABYTE;
1094			break;
1095		default:
1096			return (-1);
1097		}
1098
1099		for (i = 0; i < len - 1; i++)
1100			if (!isdigit(arg[i]))
1101				return (-1);
1102	}
1103
1104	return (mult * atoi(arg));
1105}
1106
1107static void
1108print_bar(long count, long total)
1109{
1110	int			i;
1111
1112	(void) putchar_unlocked(count ? '*' : ' ');
1113	for (i = 1; i < (32 * count) / total; i++)
1114		(void) putchar_unlocked('*');
1115	for (; i < 32; i++)
1116		(void) putchar_unlocked(' ');
1117}
1118
1119static int
1120doublecmp(const void *p1, const void *p2)
1121{
1122	double a = *((double *)p1);
1123	double b = *((double *)p2);
1124
1125	if (a > b)
1126		return (1);
1127	if (a < b)
1128		return (-1);
1129	return (0);
1130}
1131
1132static void
1133print_histo(barrier_t *b)
1134{
1135	int			n;
1136	int			i;
1137	int			j;
1138	int			last;
1139	long long		maxcount;
1140	double			sum;
1141	long long		min;
1142	long long		scale;
1143	double			x;
1144	long long		y;
1145	long long		count;
1146	int			i95;
1147	double			p95;
1148	double			r95;
1149	double			m95;
1150	histo_t			*histo;
1151
1152	(void) printf("#	%12s %12s %32s %12s\n", "counts", "usecs/call",
1153	    "", "means");
1154
1155	/* calculate how much data we've captured */
1156	n = b->ba_batches > b->ba_datasize ? b->ba_datasize : b->ba_batches;
1157
1158	/* find the 95th percentile - index, value and range */
1159	qsort((void *)b->ba_data, n, sizeof (double), doublecmp);
1160	min = b->ba_data[0] + 0.000001;
1161	i95 = n * 95 / 100;
1162	p95 = b->ba_data[i95];
1163	r95 = p95 - min + 1;
1164
1165	/* find a suitable min and scale */
1166	i = 0;
1167	x = r95 / (HISTOSIZE - 1);
1168	while (x >= 10.0) {
1169		x /= 10.0;
1170		i++;
1171	}
1172	y = x + 0.9999999999;
1173	while (i > 0) {
1174		y *= 10;
1175		i--;
1176	}
1177	min /= y;
1178	min *= y;
1179	scale = y * (HISTOSIZE - 1);
1180	if (scale < (HISTOSIZE - 1)) {
1181		scale = (HISTOSIZE - 1);
1182	}
1183
1184	/* create and initialise the histogram */
1185	histo = malloc(HISTOSIZE * sizeof (histo_t));
1186	for (i = 0; i < HISTOSIZE; i++) {
1187		histo[i].sum = 0.0;
1188		histo[i].count = 0;
1189	}
1190
1191	/* populate the histogram */
1192	last = 0;
1193	sum = 0.0;
1194	count = 0;
1195	for (i = 0; i < i95; i++) {
1196		j = (HISTOSIZE - 1) * (b->ba_data[i] - min) / scale;
1197
1198		if (j >= HISTOSIZE) {
1199			(void) printf("panic!\n");
1200			j = HISTOSIZE - 1;
1201		}
1202
1203		histo[j].sum += b->ba_data[i];
1204		histo[j].count++;
1205
1206		sum += b->ba_data[i];
1207		count++;
1208	}
1209	m95 = sum / count;
1210
1211	/* find the larges bucket */
1212	maxcount = 0;
1213	for (i = 0; i < HISTOSIZE; i++)
1214		if (histo[i].count > 0) {
1215			last = i;
1216			if (histo[i].count > maxcount)
1217				maxcount = histo[i].count;
1218		}
1219
1220	/* print the buckets */
1221	for (i = 0; i <= last; i++) {
1222		(void) printf("#       %12lld %12.5f |", histo[i].count,
1223		    (min + scale * (double)i / (HISTOSIZE - 1)));
1224
1225		print_bar(histo[i].count, maxcount);
1226
1227		if (histo[i].count > 0)
1228			(void) printf("%12.5f\n",
1229			    histo[i].sum / histo[i].count);
1230		else
1231			(void) printf("%12s\n", "-");
1232	}
1233
1234	/* find the mean of values beyond the 95th percentile */
1235	sum = 0.0;
1236	count = 0;
1237	for (i = i95; i < n; i++) {
1238		sum += b->ba_data[i];
1239		count++;
1240	}
1241
1242	/* print the >95% bucket summary */
1243	(void) printf("#\n");
1244	(void) printf("#       %12lld %12s |", count, "> 95%");
1245	print_bar(count, maxcount);
1246	if (count > 0)
1247		(void) printf("%12.5f\n", sum / count);
1248	else
1249		(void) printf("%12s\n", "-");
1250	(void) printf("#\n");
1251	(void) printf("#       %12s %12.5f\n", "mean of 95%", m95);
1252	(void) printf("#       %12s %12.5f\n", "95th %ile", p95);
1253
1254	/* quantify any buffer overflow */
1255	if (b->ba_batches > b->ba_datasize)
1256		(void) printf("#       %12s %12d\n", "data dropped",
1257		    b->ba_batches - b->ba_datasize);
1258}
1259
1260static void
1261compute_stats(barrier_t *b)
1262{
1263	int i;
1264
1265	if (b->ba_batches > b->ba_datasize)
1266		b->ba_batches = b->ba_datasize;
1267
1268	/*
1269	 * convert to usecs/call
1270	 */
1271
1272	for (i = 0; i < b->ba_batches; i++)
1273		b->ba_data[i] /= 1000.0;
1274
1275	/*
1276	 * do raw stats
1277	 */
1278
1279	(void) crunch_stats(b->ba_data, b->ba_batches, &b->ba_raw);
1280
1281	/*
1282	 * recursively apply 3 sigma rule to remove outliers
1283	 */
1284
1285	b->ba_corrected = b->ba_raw;
1286	b->ba_outliers = 0;
1287
1288	if (b->ba_batches > 40) { /* remove outliers */
1289		int removed;
1290
1291		do {
1292			removed = remove_outliers(b->ba_data, b->ba_batches,
1293			    &b->ba_corrected);
1294			b->ba_outliers += removed;
1295			b->ba_batches -= removed;
1296			(void) crunch_stats(b->ba_data, b->ba_batches,
1297			    &b->ba_corrected);
1298			} while (removed != 0 && b->ba_batches > 40);
1299	}
1300
1301}
1302
1303/*
1304 * routine to compute various statistics on array of doubles.
1305 */
1306
1307static int
1308crunch_stats(double *data, int count, stats_t *stats)
1309{
1310	double a;
1311	double std;
1312	double diff;
1313	double sk;
1314	double ku;
1315	double mean;
1316	int i;
1317	int bytes;
1318	double *dupdata;
1319
1320	/*
1321	 * first we need the mean
1322	 */
1323
1324	mean = 0.0;
1325
1326	for (i = 0; i < count; i++) {
1327		mean += data[i];
1328	}
1329
1330	mean /= count;
1331
1332	stats->st_mean = mean;
1333
1334	/*
1335	 * malloc and sort so we can do median
1336	 */
1337
1338	dupdata = malloc(bytes = sizeof (double) * count);
1339	(void) memcpy(dupdata, data, bytes);
1340	qsort((void *)dupdata, count, sizeof (double), doublecmp);
1341	stats->st_median   = dupdata[count/2];
1342
1343	/*
1344	 * reuse dupdata to compute time correlation of data to
1345	 * detect interesting time-based trends
1346	 */
1347
1348	for (i = 0; i < count; i++)
1349		dupdata[i] = (double)i;
1350
1351	(void) fit_line(dupdata, data, count, &a, &stats->st_timecorr);
1352	free(dupdata);
1353
1354	std = 0.0;
1355	sk  = 0.0;
1356	ku  = 0.0;
1357
1358	stats->st_max = -1;
1359	stats->st_min = 1.0e99; /* hard to find portable values */
1360
1361	for (i = 0; i < count; i++) {
1362		if (data[i] > stats->st_max)
1363			stats->st_max = data[i];
1364		if (data[i] < stats->st_min)
1365			stats->st_min = data[i];
1366
1367		diff = data[i] - mean;
1368		std += diff * diff;
1369		sk  += diff * diff * diff;
1370		ku  += diff * diff * diff * diff;
1371	}
1372
1373	stats->st_stddev   = std = sqrt(std/(double)(count - 1));
1374	stats->st_stderr   = std / sqrt(count);
1375	stats->st_99confidence = stats->st_stderr * 2.326;
1376	stats->st_skew	   = sk / (std * std * std) / (double)(count);
1377	stats->st_kurtosis = ku / (std * std * std * std) /
1378	    (double)(count) - 3;
1379
1380	return (0);
1381}
1382
1383/*
1384 * does a least squares fit to the set of points x, y and
1385 * fits a line y = a + bx.  Returns a, b
1386 */
1387
1388int
1389fit_line(double *x, double *y, int count, double *a, double *b)
1390{
1391	double sumx, sumy, sumxy, sumx2;
1392	double denom;
1393	int i;
1394
1395	sumx = sumy = sumxy = sumx2 = 0.0;
1396
1397	for (i = 0; i < count; i++) {
1398		sumx	+= x[i];
1399		sumx2	+= x[i] * x[i];
1400		sumy	+= y[i];
1401		sumxy	+= x[i] * y[i];
1402	}
1403
1404	denom = count * sumx2 - sumx * sumx;
1405
1406	if (denom == 0.0)
1407		return (-1);
1408
1409	*a = (sumy * sumx2 - sumx * sumxy) / denom;
1410
1411	*b = (count * sumxy - sumx * sumy) / denom;
1412
1413	return (0);
1414}
1415
1416/*
1417 * empty function for measurement purposes
1418 */
1419
1420int
1421nop()
1422{
1423	return (1);
1424}
1425
1426#define	NSECITER 1000
1427
1428static long long
1429get_nsecs_overhead()
1430{
1431	long long s;
1432
1433	double data[NSECITER];
1434	stats_t stats;
1435
1436	int i;
1437	int count;
1438	int outliers;
1439
1440	(void) getnsecs(); /* warmup */
1441	(void) getnsecs(); /* warmup */
1442	(void) getnsecs(); /* warmup */
1443
1444	i = 0;
1445
1446	count = NSECITER;
1447
1448	for (i = 0; i < count; i++) {
1449		s = getnsecs();
1450		data[i] = getnsecs() - s;
1451	}
1452
1453	(void) crunch_stats(data, count, &stats);
1454
1455	while ((outliers = remove_outliers(data, count, &stats)) != 0) {
1456		count -= outliers;
1457		(void) crunch_stats(data, count, &stats);
1458	}
1459
1460	return ((long long)stats.st_mean);
1461
1462}
1463
1464long long
1465get_nsecs_resolution()
1466{
1467	long long y[1000];
1468
1469	int i, j, nops, res;
1470	long long start, stop;
1471
1472	/*
1473	 * first, figure out how many nops to use
1474	 * to get any delta between time measurements.
1475	 * use a minimum of one.
1476	 */
1477
1478	/*
1479	 * warm cache
1480	 */
1481
1482	stop = start = getnsecs();
1483
1484	for (i = 1; i < 10000000; i++) {
1485		start = getnsecs();
1486		for (j = i; j; j--)
1487			;
1488		stop = getnsecs();
1489		if (stop > start)
1490			break;
1491	}
1492
1493	nops = i;
1494
1495	/*
1496	 * now collect data at linearly varying intervals
1497	 */
1498
1499	for (i = 0; i < 1000; i++) {
1500		start = getnsecs();
1501		for (j = nops * i; j; j--)
1502			;
1503		stop = getnsecs();
1504		y[i] = stop - start;
1505	}
1506
1507	/*
1508	 * find smallest positive difference between samples;
1509	 * this is the timer resolution
1510	 */
1511
1512	res = 1<<30;
1513
1514	for (i = 1; i < 1000; i++) {
1515		int diff = y[i] - y[i-1];
1516
1517		if (diff > 0 && res > diff)
1518			res = diff;
1519
1520	}
1521
1522	return (res);
1523}
1524
1525/*
1526 * remove any data points from the array more than 3 sigma out
1527 */
1528
1529static int
1530remove_outliers(double *data, int count, stats_t *stats)
1531{
1532	double outmin = stats->st_mean - 3 * stats->st_stddev;
1533	double outmax = stats->st_mean + 3 * stats->st_stddev;
1534
1535	int i, j, outliers;
1536
1537	for (outliers = i = j = 0; i < count; i++)
1538		if (data[i] > outmax || data[i] < outmin)
1539			outliers++;
1540		else
1541			data[j++] = data[i];
1542
1543	return (outliers);
1544}
1545