1/*- 2 * Copyright (c) 2014 Hudson River Trading LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/param.h> 29#include <sys/cpuset.h> 30#include <machine/atomic.h> 31#include <machine/cpu.h> 32#include <machine/cpufunc.h> 33#include <assert.h> 34#include <err.h> 35#include <errno.h> 36#include <math.h> 37#include <pthread.h> 38#include <stdint.h> 39#include <stdlib.h> 40#include <stdio.h> 41 42#define barrier() __asm __volatile("" ::: "memory") 43 44#define TESTS 1024 45 46static volatile int gate; 47static volatile uint64_t thread_tsc; 48 49/* Bind the current thread to the specified CPU. */ 50static void 51bind_cpu(int cpu) 52{ 53 cpuset_t set; 54 55 CPU_ZERO(&set); 56 CPU_SET(cpu, &set); 57 if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(set), 58 &set) < 0) 59 err(1, "cpuset_setaffinity(%d)", cpu); 60} 61 62static void * 63thread_main(void *arg) 64{ 65 int cpu, i; 66 67 cpu = (intptr_t)arg; 68 bind_cpu(cpu); 69 for (i = 0; i < TESTS; i++) { 70 gate = 1; 71 while (gate == 1) 72 cpu_spinwait(); 73 barrier(); 74 75 __asm __volatile("lfence"); 76 thread_tsc = rdtsc(); 77 78 barrier(); 79 gate = 3; 80 while (gate == 3) 81 cpu_spinwait(); 82 } 83 return (NULL); 84} 85 86int 87main(int ac __unused, char **av __unused) 88{ 89 cpuset_t all_cpus; 90 int64_t **skew, *aveskew, *minskew, *maxskew; 91 float *stddev; 92 double sumsq; 93 pthread_t child; 94 uint64_t tsc; 95 int *cpus; 96 int error, i, j, ncpu; 97 98 /* 99 * Find all the CPUs this program is eligible to run on and use 100 * this as our global set. This means you can use cpuset to 101 * restrict this program to only run on a subset of CPUs. 102 */ 103 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, 104 sizeof(all_cpus), &all_cpus) < 0) 105 err(1, "cpuset_getaffinity"); 106 for (ncpu = 0, i = 0; i < CPU_SETSIZE; i++) { 107 if (CPU_ISSET(i, &all_cpus)) 108 ncpu++; 109 } 110 if (ncpu < 2) 111 errx(1, "Only one available CPU"); 112 cpus = calloc(ncpu, sizeof(*cpus)); 113 skew = calloc(ncpu, sizeof(*skew)); 114 for (i = 0; i < ncpu; i++) 115 skew[i] = calloc(TESTS, sizeof(*skew[i])); 116 for (i = 0, j = 0; i < CPU_SETSIZE; i++) 117 if (CPU_ISSET(i, &all_cpus)) { 118 assert(j < ncpu); 119 cpus[j] = i; 120 j++; 121 } 122 123 /* 124 * We bind this thread to the first CPU and then bind all the 125 * other threads to other CPUs in turn saving TESTS counts of 126 * skew calculations. 127 */ 128 bind_cpu(cpus[0]); 129 for (i = 1; i < ncpu; i++) { 130 error = pthread_create(&child, NULL, thread_main, 131 (void *)(intptr_t)cpus[i]); 132 if (error) 133 errc(1, error, "pthread_create"); 134 135 for (j = 0; j < TESTS; j++) { 136 while (gate != 1) 137 cpu_spinwait(); 138 gate = 2; 139 barrier(); 140 141 tsc = rdtsc(); 142 143 barrier(); 144 while (gate != 3) 145 cpu_spinwait(); 146 gate = 4; 147 148 skew[i][j] = thread_tsc - tsc; 149 } 150 151 error = pthread_join(child, NULL); 152 if (error) 153 errc(1, error, "pthread_join"); 154 } 155 156 /* 157 * Compute average skew for each CPU and output a summary of 158 * the results. 159 */ 160 aveskew = calloc(ncpu, sizeof(*aveskew)); 161 minskew = calloc(ncpu, sizeof(*minskew)); 162 maxskew = calloc(ncpu, sizeof(*maxskew)); 163 stddev = calloc(ncpu, sizeof(*stddev)); 164 stddev[0] = 0.0; 165 for (i = 1; i < ncpu; i++) { 166 sumsq = 0; 167 minskew[i] = maxskew[i] = skew[i][0]; 168 for (j = 0; j < TESTS; j++) { 169 aveskew[i] += skew[i][j]; 170 if (skew[i][j] < minskew[i]) 171 minskew[i] = skew[i][j]; 172 if (skew[i][j] > maxskew[i]) 173 maxskew[i] = skew[i][j]; 174 sumsq += (skew[i][j] * skew[i][j]); 175 } 176 aveskew[i] /= TESTS; 177 sumsq /= TESTS; 178 sumsq -= aveskew[i] * aveskew[i]; 179 stddev[i] = sqrt(sumsq); 180 } 181 182 printf("CPU | TSC skew (min/avg/max/stddev)\n"); 183 printf("----+------------------------------\n"); 184 for (i = 0; i < ncpu; i++) 185 printf("%3d | %5jd %5jd %5jd %6.3f\n", cpus[i], 186 (intmax_t)minskew[i], (intmax_t)aveskew[i], 187 (intmax_t)maxskew[i], stddev[i]); 188 return (0); 189} 190