1/** 2 * \file 3 * \brief User-space Microbenchmarks. 4 */ 5 6/* 7 * Copyright (c) 2007, 2008, 2009, 2010, 2012, ETH Zurich. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group. 13 */ 14 15#include <stdio.h> 16#include <stdint.h> 17#include <inttypes.h> 18#include <barrelfish/barrelfish.h> 19#include <barrelfish/dispatch.h> 20 21/* #define MINIMAL_BARRELFISH */ 22 23#ifndef BENCH_POSIX 24# include <barrelfish/barrelfish.h> 25# include <barrelfish_kpi/syscalls.h> 26# include <barrelfish_kpi/types.h> 27# include <barrelfish/syscalls.h> 28# include <barrelfish/sys_debug.h> 29// # include <barrelfish/backwards/idc.h> 30# include <stdarg.h> 31# include <string.h> 32#else 33# include <sys/types.h> 34# include <unistd.h> 35#endif 36 37#define MICROBENCH_ITERATIONS 1000000 38 39typedef void (*Benchmark)(void); 40 41#ifdef BENCH_POSIX 42 43static inline uint64_t rdtsc(void) 44{ 45 uint64_t eax, edx; 46 __asm volatile ("rdtsc" : "=a" (eax), "=d" (edx)); 47 return (edx << 32) | eax; 48} 49 50#else 51 52// Dummy mcount() that does nothing. For profiling. 53static __attribute__ ((unused,no_instrument_function)) void mcount(void) 54{ 55} 56 57#ifdef MINIMAL_BARRELFISH 58 59static int myprintf(const char *fmt, ...) 60{ 61 va_list argptr; 62 char str[256]; 63 int ret; 64 65 va_start(argptr, fmt); 66 ret = vsnprintf(str, sizeof(str), fmt, argptr); 67 sys_print(str, strlen(str)); 68 va_end(argptr); 69 70 return ret; 71} 72 73# define printf myprintf 74 75#endif 76 77#endif 78 79static uint64_t 80divide_round(uint64_t quotient, uint64_t divisor) 81{ 82 if ((quotient % divisor) * 2 >= divisor) { 83 // round up 84 return (quotient / divisor) + 1; 85 } else { 86 return (quotient / divisor); 87 } 88} 89 90static inline void print_result(uint64_t result) 91{ 92 printf("%" PRIu64 " ticks. Done %u iterations in %" PRIu64 " ticks.\n", 93 divide_round(result, MICROBENCH_ITERATIONS), MICROBENCH_ITERATIONS, 94 result); 95} 96 97static void syscall_benchmark(void) 98{ 99 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 100#ifndef BENCH_POSIX 101 sys_nop(); 102#else 103 getpid(); 104#endif 105 } 106} 107 108static void rdtsc_benchmark(void) 109{ 110 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 111 rdtsc(); 112 } 113} 114 115static void xchg_benchmark(void) 116{ 117 uint32_t mem = 1, reg = 7; 118 119 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 120 __asm__ __volatile__( 121 "xchg %0, %1" 122 : "=a" (reg), "=m" (mem) 123 : "a" (reg)); 124 } 125} 126 127static void cmpxchg_benchmark(void) 128{ 129 uint32_t src = 5, dest = 5, reg = 5; 130 131 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 132 __asm__ __volatile__( 133 "lock cmpxchg %1, %2" 134 : "=a" (reg) 135 : "q"(src), "m"(dest), "0"(reg) 136 : "memory"); 137 } 138} 139 140static void dec_benchmark(void) 141{ 142 uint32_t val = 1000000; 143 144 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 145 __asm__ __volatile__( 146 "lock dec{l} %0" 147 : "+m" (val) 148 : 149 : "memory"); 150 } 151} 152 153#if 0 154 155/* 156 * XXX: These two benchmarks are not implemented safely (xor'ing rdx, 157 * going back to C and then assuming rdx is still zero doesn't work) 158 * and do not currently compile this way. I'm disabling them until 159 * someone fixes them. 160 */ 161 162static void bts_benchmark(void) 163{ 164 uint64_t mem = 3; 165 166 __asm__ __volatile__ ("xor %%rdx, %%rdx" ::); 167 168 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 169 __asm__ __volatile__ ( 170 "lock bts %%rdx,%0" 171 : /* no output */ 172 : "m" (mem) 173 : "%rdx"); 174 } 175} 176 177static void bts_clr_benchmark(void) 178{ 179 uint64_t mem = 3; 180 181 __asm__ __volatile__("xor %%rdx,%%rdx"); 182 183 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 184 __asm__ __volatile__ ( 185 "lock bts %%rdx,%0\n\t" 186 "movq $0,%0\n\t" 187 : /* no output */ 188 : "m" (mem) 189 : "%rdx"); 190 } 191} 192 193#endif 194 195static void clr_benchmark(void) 196{ 197 uint64_t mem = 0; 198 199 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 200 __asm__ __volatile__ ( 201 "movl $0,%0\n\t" 202 : /* no output */ 203 : "m" (mem) 204 : "%rax"); 205 } 206} 207 208#ifndef BENCH_POSIX 209 210#if 0 211static void rdtscp_benchmark(void) 212{ 213 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 214 rdtscp(); 215 } 216} 217#endif 218 219#if 0 // XXX: update to new IDC system to make it work 220static struct capref capaddr; 221 222static void capinvoke_benchmark(void) 223{ 224 struct idc_send_msg msg; 225 226 idc_msg_init(&msg); 227 228 for(int i = 0; i < MICROBENCH_ITERATIONS; i++) { 229 cap_invoke(capaddr, &msg); 230 } 231} 232 233static void idc_benchmark(void) 234{ 235 struct idc_send_msg msg; 236 struct idc_recv_msg rmsg; 237 struct idc_endpoint *ep; 238 struct capref epcap; 239 240 int r = endpoint_create(NULL, NULL, DEFAULT_IDC_BUF_WORDS, &epcap, &ep); 241 assert(r == 0); 242 243 idc_msg_init(&msg); 244 245 uint64_t minres = 0, maxres = 0, sum = 0; 246 for(int i = 0; i < 1000; i++) { 247 uint64_t start = rdtsc(); 248 cap_invoke(epcap, &msg); 249 messages_idc_wait(ep); 250 idc_endpoint_poll(ep, &rmsg, NULL); 251 uint64_t duration = rdtsc() - start; 252 253 sum += duration; 254 if(minres == 0 && maxres == 0) { 255 minres = maxres = duration; 256 } else { 257 if(duration < minres) { 258 minres = duration; 259 } else { 260 if(duration > maxres) { 261 maxres = duration; 262 } 263 } 264 } 265 } 266 267 uint64_t variance = maxres - minres; 268 printf("min: %" PRIu64 " ticks, max: %" PRIu64 " ticks, " 269 "variance: %" PRIu64 " ticks, avg: %" PRIu64 " ticks\n", 270 minres, maxres, variance, divide_round(sum, 1000)); 271} 272#endif 273 274#endif 275 276static uint64_t benchmark(Benchmark bench) 277{ 278 uint64_t start, end; 279 280 start = rdtsc(); 281 bench(); 282 end = rdtsc(); 283 284 return end - start; 285} 286 287int main(int argc, char *argv[]) 288{ 289 printf("bench running on core %d.\n", disp_get_core_id()); 290 291 printf("NOP system call: "); 292 print_result(benchmark(syscall_benchmark)); 293 294 printf("RDTSC instruction: "); 295 print_result(benchmark(rdtsc_benchmark)); 296 297#ifndef BENCH_POSIX 298// printf("RDTSCP instruction: "); 299// print_result(benchmark(rdtscp_benchmark)); 300 301 printf("XCHG instruction: "); 302 print_result(benchmark(xchg_benchmark)); 303 304 printf("LOCK CMPXCHG instruction: "); 305 print_result(benchmark(cmpxchg_benchmark)); 306 307 printf("LOCK DEC instruction: "); 308 print_result(benchmark(dec_benchmark)); 309 310#if 0 311 printf("atomic test & set: "); 312 print_result(benchmark(bts_benchmark)); 313 314 printf("atomic test & set when cleared: "); 315 print_result(benchmark(bts_clr_benchmark)); 316#endif 317 318 printf("clear memory: "); 319 print_result(benchmark(clr_benchmark)); 320 321#if 0 /* FIXME: change this code to avoid hardcoded caddr manipulation! */ 322 printf("NULL cap invocation (1 level deep): "); 323 // 4th entry in rootcn 324 capaddr = 4U << 26; 325 print_result(benchmark(capinvoke_benchmark)); 326 327 printf("NULL cap invocation (2 levels deep): "); 328 // last (64th) entry in taskdircn 329 capaddr = 63U << 6; 330 print_result(benchmark(capinvoke_benchmark)); 331#endif 332 333#if 0 // XXX: update to new IDC system to make it work 334 printf("NULL cap invocation (3 levels deep): "); 335 // 1st entry in taskcn 336 //capaddr = CPTR_NULL; 337 print_result(benchmark(capinvoke_benchmark)); 338 339 printf("Local IDC: "); 340 idc_benchmark(); 341#endif 342 343// printf("Async IDC: %lu ticks\n", 344// divide_round(benchmark(asyncidc_benchmark), 2)); 345 346/* printf("Local IDC w/ cap transfer: "); */ 347/* localcaptransfer_benchmark(); */ 348#endif 349 350 printf("End of benchmarks.\n"); 351 return 0; 352} 353