1271044Sjhb/*- 2281887Sjhb * Copyright (c) 2014 Hudson River Trading LLC 3271044Sjhb * Written by: John H. Baldwin <jhb@FreeBSD.org> 4271044Sjhb * All rights reserved. 5271044Sjhb * 6271044Sjhb * Redistribution and use in source and binary forms, with or without 7271044Sjhb * modification, are permitted provided that the following conditions 8271044Sjhb * are met: 9271044Sjhb * 1. Redistributions of source code must retain the above copyright 10271044Sjhb * notice, this list of conditions and the following disclaimer. 11271044Sjhb * 2. Redistributions in binary form must reproduce the above copyright 12271044Sjhb * notice, this list of conditions and the following disclaimer in the 13271044Sjhb * documentation and/or other materials provided with the distribution. 14271044Sjhb * 15271044Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16271044Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17271044Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18271044Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19271044Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20271044Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21271044Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22271044Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23271044Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24271044Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25271044Sjhb * SUCH DAMAGE. 26271044Sjhb */ 27271044Sjhb 28271044Sjhb 29271044Sjhb#include <sys/cdefs.h> 30271044Sjhb__FBSDID("$FreeBSD$"); 31271044Sjhb 32271044Sjhb#include <sys/param.h> 33271044Sjhb#include <sys/cpuset.h> 34271044Sjhb#include <machine/atomic.h> 35271044Sjhb#include <machine/cpu.h> 36271044Sjhb#include <machine/cpufunc.h> 37271044Sjhb#include <assert.h> 38271044Sjhb#include <err.h> 39271044Sjhb#include <errno.h> 40271044Sjhb#include <math.h> 41271044Sjhb#include <pthread.h> 42271044Sjhb#include <stdint.h> 43271044Sjhb#include <stdlib.h> 44271044Sjhb#include <stdio.h> 45271044Sjhb 46271044Sjhb#define barrier() __asm __volatile("" ::: "memory") 47271044Sjhb 48271044Sjhb#define TESTS 1024 49271044Sjhb 50271044Sjhbstatic volatile int gate; 51271044Sjhbstatic volatile uint64_t thread_tsc; 52271044Sjhb 53271044Sjhb/* Bind the current thread to the specified CPU. */ 54271044Sjhbstatic void 55271044Sjhbbind_cpu(int cpu) 56271044Sjhb{ 57271044Sjhb cpuset_t set; 58271044Sjhb 59271044Sjhb CPU_ZERO(&set); 60271044Sjhb CPU_SET(cpu, &set); 61271044Sjhb if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(set), 62271044Sjhb &set) < 0) 63271044Sjhb err(1, "cpuset_setaffinity(%d)", cpu); 64271044Sjhb} 65271044Sjhb 66271044Sjhbstatic void * 67271044Sjhbthread_main(void *arg) 68271044Sjhb{ 69271044Sjhb int cpu, i; 70271044Sjhb 71271044Sjhb cpu = (intptr_t)arg; 72271044Sjhb bind_cpu(cpu); 73271044Sjhb for (i = 0; i < TESTS; i++) { 74271044Sjhb gate = 1; 75271044Sjhb while (gate == 1) 76271044Sjhb cpu_spinwait(); 77271044Sjhb barrier(); 78271044Sjhb 79271044Sjhb __asm __volatile("lfence"); 80271044Sjhb thread_tsc = rdtsc(); 81271044Sjhb 82271044Sjhb barrier(); 83271044Sjhb gate = 3; 84271044Sjhb while (gate == 3) 85271044Sjhb cpu_spinwait(); 86271044Sjhb } 87271044Sjhb return (NULL); 88271044Sjhb} 89271044Sjhb 90271044Sjhbint 91271044Sjhbmain(int ac __unused, char **av __unused) 92271044Sjhb{ 93271044Sjhb cpuset_t all_cpus; 94271044Sjhb int64_t **skew, *aveskew, *minskew, *maxskew; 95271044Sjhb float *stddev; 96271044Sjhb double sumsq; 97271044Sjhb pthread_t child; 98271044Sjhb uint64_t tsc; 99271044Sjhb int *cpus; 100271044Sjhb int error, i, j, ncpu; 101271044Sjhb 102271044Sjhb /* 103271044Sjhb * Find all the CPUs this program is eligible to run on and use 104271044Sjhb * this as our global set. This means you can use cpuset to 105271044Sjhb * restrict this program to only run on a subset of CPUs. 106271044Sjhb */ 107271044Sjhb if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, 108271044Sjhb sizeof(all_cpus), &all_cpus) < 0) 109271044Sjhb err(1, "cpuset_getaffinity"); 110271044Sjhb for (ncpu = 0, i = 0; i < CPU_SETSIZE; i++) { 111271044Sjhb if (CPU_ISSET(i, &all_cpus)) 112271044Sjhb ncpu++; 113271044Sjhb } 114271044Sjhb if (ncpu < 2) 115271044Sjhb errx(1, "Only one available CPU"); 116271044Sjhb cpus = calloc(ncpu, sizeof(*cpus)); 117271044Sjhb skew = calloc(ncpu, sizeof(*skew)); 118271044Sjhb for (i = 0; i < ncpu; i++) 119271044Sjhb skew[i] = calloc(TESTS, sizeof(*skew[i])); 120271044Sjhb for (i = 0, j = 0; i < CPU_SETSIZE; i++) 121271044Sjhb if (CPU_ISSET(i, &all_cpus)) { 122271044Sjhb assert(j < ncpu); 123271044Sjhb cpus[j] = i; 124271044Sjhb j++; 125271044Sjhb } 126271044Sjhb 127271044Sjhb /* 128271044Sjhb * We bind this thread to the first CPU and then bind all the 129271044Sjhb * other threads to other CPUs in turn saving TESTS counts of 130271044Sjhb * skew calculations. 131271044Sjhb */ 132271044Sjhb bind_cpu(cpus[0]); 133271044Sjhb for (i = 1; i < ncpu; i++) { 134271044Sjhb error = pthread_create(&child, NULL, thread_main, 135271044Sjhb (void *)(intptr_t)cpus[i]); 136271044Sjhb if (error) 137271044Sjhb errc(1, error, "pthread_create"); 138271044Sjhb 139271044Sjhb for (j = 0; j < TESTS; j++) { 140271044Sjhb while (gate != 1) 141271044Sjhb cpu_spinwait(); 142271044Sjhb gate = 2; 143271044Sjhb barrier(); 144271044Sjhb 145271044Sjhb tsc = rdtsc(); 146271044Sjhb 147271044Sjhb barrier(); 148271044Sjhb while (gate != 3) 149271044Sjhb cpu_spinwait(); 150271044Sjhb gate = 4; 151271044Sjhb 152271044Sjhb skew[i][j] = thread_tsc - tsc; 153271044Sjhb } 154271044Sjhb 155271044Sjhb error = pthread_join(child, NULL); 156271044Sjhb if (error) 157271044Sjhb errc(1, error, "pthread_join"); 158271044Sjhb } 159271044Sjhb 160271044Sjhb /* 161271044Sjhb * Compute average skew for each CPU and output a summary of 162271044Sjhb * the results. 163271044Sjhb */ 164271044Sjhb aveskew = calloc(ncpu, sizeof(*aveskew)); 165271044Sjhb minskew = calloc(ncpu, sizeof(*minskew)); 166271044Sjhb maxskew = calloc(ncpu, sizeof(*maxskew)); 167271044Sjhb stddev = calloc(ncpu, sizeof(*stddev)); 168271044Sjhb stddev[0] = 0.0; 169271044Sjhb for (i = 1; i < ncpu; i++) { 170271044Sjhb sumsq = 0; 171271044Sjhb minskew[i] = maxskew[i] = skew[i][0]; 172271044Sjhb for (j = 0; j < TESTS; j++) { 173271044Sjhb aveskew[i] += skew[i][j]; 174271044Sjhb if (skew[i][j] < minskew[i]) 175271044Sjhb minskew[i] = skew[i][j]; 176271044Sjhb if (skew[i][j] > maxskew[i]) 177271044Sjhb maxskew[i] = skew[i][j]; 178271044Sjhb sumsq += (skew[i][j] * skew[i][j]); 179271044Sjhb } 180271044Sjhb aveskew[i] /= TESTS; 181271044Sjhb sumsq /= TESTS; 182271044Sjhb sumsq -= aveskew[i] * aveskew[i]; 183271044Sjhb stddev[i] = sqrt(sumsq); 184271044Sjhb } 185271044Sjhb 186271044Sjhb printf("CPU | TSC skew (min/avg/max/stddev)\n"); 187271044Sjhb printf("----+------------------------------\n"); 188271044Sjhb for (i = 0; i < ncpu; i++) 189271044Sjhb printf("%3d | %5jd %5jd %5jd %6.3f\n", cpus[i], 190271044Sjhb (intmax_t)minskew[i], (intmax_t)aveskew[i], 191271044Sjhb (intmax_t)maxskew[i], stddev[i]); 192271044Sjhb return (0); 193271044Sjhb} 194