1/*- 2 * Copyright (c) 2014 Hudson River Trading LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/cpuset.h> 34#include <machine/atomic.h> 35#include <machine/cpu.h> 36#include <machine/cpufunc.h> 37#include <assert.h> 38#include <err.h> 39#include <errno.h> 40#include <math.h> 41#include <pthread.h> 42#include <stdint.h> 43#include <stdlib.h> 44#include <stdio.h> 45 46#define barrier() __asm __volatile("" ::: "memory") 47 48#define TESTS 1024 49 50static volatile int gate; 51static volatile uint64_t thread_tsc; 52 53/* Bind the current thread to the specified CPU. */ 54static void 55bind_cpu(int cpu) 56{ 57 cpuset_t set; 58 59 CPU_ZERO(&set); 60 CPU_SET(cpu, &set); 61 if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(set), 62 &set) < 0) 63 err(1, "cpuset_setaffinity(%d)", cpu); 64} 65 66static void * 67thread_main(void *arg) 68{ 69 int cpu, i; 70 71 cpu = (intptr_t)arg; 72 bind_cpu(cpu); 73 for (i = 0; i < TESTS; i++) { 74 gate = 1; 75 while (gate == 1) 76 cpu_spinwait(); 77 barrier(); 78 79 __asm __volatile("lfence"); 80 thread_tsc = rdtsc(); 81 82 barrier(); 83 gate = 3; 84 while (gate == 3) 85 cpu_spinwait(); 86 } 87 return (NULL); 88} 89 90int 91main(int ac __unused, char **av __unused) 92{ 93 cpuset_t all_cpus; 94 int64_t **skew, *aveskew, *minskew, *maxskew; 95 float *stddev; 96 double sumsq; 97 pthread_t child; 98 uint64_t tsc; 99 int *cpus; 100 int error, i, j, ncpu; 101 102 /* 103 * Find all the CPUs this program is eligible to run on and use 104 * this as our global set. This means you can use cpuset to 105 * restrict this program to only run on a subset of CPUs. 106 */ 107 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, 108 sizeof(all_cpus), &all_cpus) < 0) 109 err(1, "cpuset_getaffinity"); 110 for (ncpu = 0, i = 0; i < CPU_SETSIZE; i++) { 111 if (CPU_ISSET(i, &all_cpus)) 112 ncpu++; 113 } 114 if (ncpu < 2) 115 errx(1, "Only one available CPU"); 116 cpus = calloc(ncpu, sizeof(*cpus)); 117 skew = calloc(ncpu, sizeof(*skew)); 118 for (i = 0; i < ncpu; i++) 119 skew[i] = calloc(TESTS, sizeof(*skew[i])); 120 for (i = 0, j = 0; i < CPU_SETSIZE; i++) 121 if (CPU_ISSET(i, &all_cpus)) { 122 assert(j < ncpu); 123 cpus[j] = i; 124 j++; 125 } 126 127 /* 128 * We bind this thread to the first CPU and then bind all the 129 * other threads to other CPUs in turn saving TESTS counts of 130 * skew calculations. 131 */ 132 bind_cpu(cpus[0]); 133 for (i = 1; i < ncpu; i++) { 134 error = pthread_create(&child, NULL, thread_main, 135 (void *)(intptr_t)cpus[i]); 136 if (error) 137 errc(1, error, "pthread_create"); 138 139 for (j = 0; j < TESTS; j++) { 140 while (gate != 1) 141 cpu_spinwait(); 142 gate = 2; 143 barrier(); 144 145 tsc = rdtsc(); 146 147 barrier(); 148 while (gate != 3) 149 cpu_spinwait(); 150 gate = 4; 151 152 skew[i][j] = thread_tsc - tsc; 153 } 154 155 error = pthread_join(child, NULL); 156 if (error) 157 errc(1, error, "pthread_join"); 158 } 159 160 /* 161 * Compute average skew for each CPU and output a summary of 162 * the results. 163 */ 164 aveskew = calloc(ncpu, sizeof(*aveskew)); 165 minskew = calloc(ncpu, sizeof(*minskew)); 166 maxskew = calloc(ncpu, sizeof(*maxskew)); 167 stddev = calloc(ncpu, sizeof(*stddev)); 168 stddev[0] = 0.0; 169 for (i = 1; i < ncpu; i++) { 170 sumsq = 0; 171 minskew[i] = maxskew[i] = skew[i][0]; 172 for (j = 0; j < TESTS; j++) { 173 aveskew[i] += skew[i][j]; 174 if (skew[i][j] < minskew[i]) 175 minskew[i] = skew[i][j]; 176 if (skew[i][j] > maxskew[i]) 177 maxskew[i] = skew[i][j]; 178 sumsq += (skew[i][j] * skew[i][j]); 179 } 180 aveskew[i] /= TESTS; 181 sumsq /= TESTS; 182 sumsq -= aveskew[i] * aveskew[i]; 183 stddev[i] = sqrt(sumsq); 184 } 185 186 printf("CPU | TSC skew (min/avg/max/stddev)\n"); 187 printf("----+------------------------------\n"); 188 for (i = 0; i < ncpu; i++) 189 printf("%3d | %5jd %5jd %5jd %6.3f\n", cpus[i], 190 (intmax_t)minskew[i], (intmax_t)aveskew[i], 191 (intmax_t)maxskew[i], stddev[i]); 192 return (0); 193} 194