1/*-
2 * Copyright (c) 2014 Hudson River Trading LLC
3 * Written by: John H. Baldwin <jhb@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/cpuset.h>
34#include <machine/atomic.h>
35#include <machine/cpu.h>
36#include <machine/cpufunc.h>
37#include <assert.h>
38#include <err.h>
39#include <errno.h>
40#include <math.h>
41#include <pthread.h>
42#include <stdint.h>
43#include <stdlib.h>
44#include <stdio.h>
45
46#define	barrier()	__asm __volatile("" ::: "memory")
47
48#define	TESTS		1024
49
50static volatile int gate;
51static volatile uint64_t thread_tsc;
52
53/* Bind the current thread to the specified CPU. */
54static void
55bind_cpu(int cpu)
56{
57	cpuset_t set;
58
59	CPU_ZERO(&set);
60	CPU_SET(cpu, &set);
61	if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(set),
62	    &set) < 0)
63		err(1, "cpuset_setaffinity(%d)", cpu);
64}
65
66static void *
67thread_main(void *arg)
68{
69	int cpu, i;
70
71	cpu = (intptr_t)arg;
72	bind_cpu(cpu);
73	for (i = 0; i < TESTS; i++) {
74		gate = 1;
75		while (gate == 1)
76			cpu_spinwait();
77		barrier();
78
79		__asm __volatile("lfence");
80		thread_tsc = rdtsc();
81
82		barrier();
83		gate = 3;
84		while (gate == 3)
85			cpu_spinwait();
86	}
87	return (NULL);
88}
89
90int
91main(int ac __unused, char **av __unused)
92{
93	cpuset_t all_cpus;
94	int64_t **skew, *aveskew, *minskew, *maxskew;
95	float *stddev;
96	double sumsq;
97	pthread_t child;
98	uint64_t tsc;
99	int *cpus;
100	int error, i, j, ncpu;
101
102	/*
103	 * Find all the CPUs this program is eligible to run on and use
104	 * this as our global set.  This means you can use cpuset to
105	 * restrict this program to only run on a subset of CPUs.
106	 */
107	if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1,
108	    sizeof(all_cpus), &all_cpus) < 0)
109		err(1, "cpuset_getaffinity");
110	for (ncpu = 0, i = 0; i < CPU_SETSIZE; i++) {
111		if (CPU_ISSET(i, &all_cpus))
112			ncpu++;
113	}
114	if (ncpu < 2)
115		errx(1, "Only one available CPU");
116	cpus = calloc(ncpu, sizeof(*cpus));
117	skew = calloc(ncpu, sizeof(*skew));
118	for (i = 0; i < ncpu; i++)
119		skew[i] = calloc(TESTS, sizeof(*skew[i]));
120	for (i = 0, j = 0; i < CPU_SETSIZE; i++)
121		if (CPU_ISSET(i, &all_cpus)) {
122			assert(j < ncpu);
123			cpus[j] = i;
124			j++;
125		}
126
127	/*
128	 * We bind this thread to the first CPU and then bind all the
129	 * other threads to other CPUs in turn saving TESTS counts of
130	 * skew calculations.
131	 */
132	bind_cpu(cpus[0]);
133	for (i = 1; i < ncpu; i++) {
134		error = pthread_create(&child, NULL, thread_main,
135		    (void *)(intptr_t)cpus[i]);
136		if (error)
137			errc(1, error, "pthread_create");
138
139		for (j = 0; j < TESTS; j++) {
140			while (gate != 1)
141				cpu_spinwait();
142			gate = 2;
143			barrier();
144
145			tsc = rdtsc();
146
147			barrier();
148			while (gate != 3)
149				cpu_spinwait();
150			gate = 4;
151
152			skew[i][j] = thread_tsc - tsc;
153		}
154
155		error = pthread_join(child, NULL);
156		if (error)
157			errc(1, error, "pthread_join");
158	}
159
160	/*
161	 * Compute average skew for each CPU and output a summary of
162	 * the results.
163	 */
164	aveskew = calloc(ncpu, sizeof(*aveskew));
165	minskew = calloc(ncpu, sizeof(*minskew));
166	maxskew = calloc(ncpu, sizeof(*maxskew));
167	stddev = calloc(ncpu, sizeof(*stddev));
168	stddev[0] = 0.0;
169	for (i = 1; i < ncpu; i++) {
170		sumsq = 0;
171		minskew[i] = maxskew[i] = skew[i][0];
172		for (j = 0; j < TESTS; j++) {
173			aveskew[i] += skew[i][j];
174			if (skew[i][j] < minskew[i])
175				minskew[i] = skew[i][j];
176			if (skew[i][j] > maxskew[i])
177				maxskew[i] = skew[i][j];
178			sumsq += (skew[i][j] * skew[i][j]);
179		}
180		aveskew[i] /= TESTS;
181		sumsq /= TESTS;
182		sumsq -= aveskew[i] * aveskew[i];
183		stddev[i] = sqrt(sumsq);
184	}
185
186	printf("CPU | TSC skew (min/avg/max/stddev)\n");
187	printf("----+------------------------------\n");
188	for (i = 0; i < ncpu; i++)
189		printf("%3d | %5jd %5jd %5jd   %6.3f\n", cpus[i],
190		    (intmax_t)minskew[i], (intmax_t)aveskew[i],
191		    (intmax_t)maxskew[i], stddev[i]);
192	return (0);
193}
194