1/* 2 * Copyright (c) 2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28#include <unistd.h> 29#include <stdio.h> 30#include <math.h> 31#include <sys/wait.h> 32#include <sys/syscall.h> 33#include <sys/types.h> 34#include <sys/ptrace.h> 35#include <semaphore.h> 36#include <stdlib.h> 37#include <pthread.h> 38#include <fcntl.h> 39#include <errno.h> 40#include <string.h> 41 42#include <libkern/OSAtomic.h> 43 44#include <mach/mach_time.h> 45#include <mach/mach.h> 46#include <mach/task.h> 47#include <mach/semaphore.h> 48 49typedef enum wake_type { WAKE_BROADCAST_ONESEM, WAKE_BROADCAST_PERTHREAD, WAKE_CHAIN } wake_type_t; 50typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_FIXEDPRI } my_policy_type_t; 51 52#define assert(truth, label) do { if(!(truth)) { printf("Thread %p: failure on line %d\n", pthread_self(), __LINE__); goto label; } } while (0) 53 54#define CONSTRAINT_NANOS (20000000ll) /* 20 ms */ 55#define COMPUTATION_NANOS (10000000ll) /* 10 ms */ 56#define TRACEWORTHY_NANOS (10000000ll) /* 10 ms */ 57 58#if DEBUG 59#define debug_log(args...) printf(args) 60#else 61#define debug_log(args...) do { } while(0) 62#endif 63 64/* Declarations */ 65void* child_thread_func(void *arg); 66void print_usage(); 67int thread_setup(); 68my_policy_type_t parse_thread_policy(const char *str); 69int thread_finish_iteration(); 70 71/* Global variables (general) */ 72int g_numthreads; 73wake_type_t g_waketype; 74policy_t g_policy; 75int g_iterations; 76struct mach_timebase_info g_mti; 77semaphore_t g_main_sem; 78uint64_t *g_thread_endtimes_abs; 79volatile int32_t g_done_threads; 80boolean_t g_do_spin = FALSE; 81boolean_t g_verbose = FALSE; 82uint64_t g_starttime_abs; 83#if MIMIC_DIGI_LEAD_TIME 84int g_long_spinid; 85uint64_t g_spinlength_abs; 86#endif /* MIMIC_DIGI_LEAD_TIME */ 87 88/* Global variables (broadcast) */ 89semaphore_t g_machsem; 90semaphore_t g_leadersem; 91 92/* Global variables (chain) */ 93semaphore_t *g_semarr; 94 95uint64_t 96abs_to_nanos(uint64_t abstime) 97{ 98 return (uint64_t)(abstime * (((double)g_mti.numer) / ((double)g_mti.denom))); 99} 100 101uint64_t 102nanos_to_abs(uint64_t ns) 103{ 104 return (uint64_t)(ns * (((double)g_mti.denom) / ((double)g_mti.numer))); 105} 106 107/* 108 * Figure out what thread policy to use 109 */ 110my_policy_type_t 111parse_thread_policy(const char *str) 112{ 113 if (strcmp(str, "timeshare") == 0) { 114 return MY_POLICY_TIMESHARE; 115 } else if (strcmp(str, "realtime") == 0) { 116 return MY_POLICY_REALTIME; 117 } else if (strcmp(str, "fixed") == 0) { 118 return MY_POLICY_FIXEDPRI; 119 } else { 120 printf("Invalid thread policy %s\n", str); 121 exit(1); 122 } 123} 124 125/* 126 * Figure out what wakeup pattern to use 127 */ 128wake_type_t 129parse_wakeup_pattern(const char *str) 130{ 131 if (strcmp(str, "chain") == 0) { 132 return WAKE_CHAIN; 133 } else if (strcmp(str, "broadcast-single-sem") == 0) { 134 return WAKE_BROADCAST_ONESEM; 135 } else if (strcmp(str, "broadcast-per-thread") == 0) { 136 return WAKE_BROADCAST_PERTHREAD; 137 } else { 138 print_usage(); 139 exit(1); 140 } 141} 142 143/* 144 * Set policy 145 */ 146int 147thread_setup() 148{ 149 int res; 150 151 switch (g_policy) { 152 case MY_POLICY_TIMESHARE: 153 { 154 return 0; 155 } 156 case MY_POLICY_REALTIME: 157 { 158 thread_time_constraint_policy_data_t pol; 159 160 /* Hard-coded realtime parameters (similar to what Digi uses) */ 161 pol.period = 100000; 162 pol.constraint = nanos_to_abs(CONSTRAINT_NANOS); 163 pol.computation = nanos_to_abs(COMPUTATION_NANOS); 164 pol.preemptible = 0; /* Ignored by OS */ 165 166 res = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT); 167 assert(res == 0, fail); 168 break; 169 } 170 case MY_POLICY_FIXEDPRI: 171 { 172 thread_extended_policy_data_t pol; 173 pol.timeshare = 0; 174 175 res = thread_policy_set(mach_thread_self(), THREAD_EXTENDED_POLICY, (thread_policy_t) &pol, THREAD_EXTENDED_POLICY_COUNT); 176 assert(res == 0, fail); 177 break; 178 } 179 default: 180 { 181 printf("invalid policy type\n"); 182 return 1; 183 } 184 } 185 186 return 0; 187fail: 188 return 1; 189} 190 191/* 192 * Wake up main thread if everyone's done 193 */ 194int 195thread_finish_iteration(int id) 196{ 197 int32_t new; 198 int res = 0; 199 volatile float x = 0.0; 200 volatile float y = 0.0; 201 202 debug_log("Thread %p finished iteration.\n", pthread_self()); 203 204#if MIMIC_DIGI_LEAD_TIME 205 /* 206 * One randomly chosen thread determines when everybody gets to stop. 207 */ 208 if (g_do_spin) { 209 if (g_long_spinid == id) { 210 uint64_t endspin; 211 212 /* This thread took up fully half of his computation */ 213 endspin = g_starttime_abs + g_spinlength_abs; 214 while (mach_absolute_time() < endspin) { 215 y = y + 1.5 + x; 216 x = sqrt(y); 217 } 218 } 219 } 220#endif /* MIMIC_DIGI_LEAD_TIME */ 221 222 new = OSAtomicIncrement32(&g_done_threads); 223 224 debug_log("New value is %d\n", new); 225 226 /* 227 * When the last thread finishes, everyone gets to go back to sleep. 228 */ 229 if (new == g_numthreads) { 230 debug_log("Thread %p signalling main thread.\n", pthread_self()); 231 res = semaphore_signal(g_main_sem); 232 } else { 233 if (g_do_spin) { 234 while (g_done_threads < g_numthreads) { 235 y = y + 1.5 + x; 236 x = sqrt(y); 237 } 238 } 239 } 240 241 return res; 242} 243 244/* 245 * Wait for a wakeup, potentially wake up another of the "0-N" threads, 246 * and notify the main thread when done. 247 */ 248void* 249child_thread_func(void *arg) 250{ 251 int my_id = (int)(uintptr_t)arg; 252 int res; 253 int i, j; 254 int32_t new; 255 256 /* Set policy and so forth */ 257 thread_setup(); 258 259 /* Tell main thread when everyone has set up */ 260 new = OSAtomicIncrement32(&g_done_threads); 261 if (new == g_numthreads) { 262 semaphore_signal(g_main_sem); 263 } 264 265 /* For each iteration */ 266 for (i = 0; i < g_iterations; i++) { 267 /* 268 * Leader thread either wakes everyone up or starts the chain going. 269 */ 270 if (my_id == 0) { 271 res = semaphore_wait(g_leadersem); 272 assert(res == 0, fail); 273 274 g_thread_endtimes_abs[my_id] = mach_absolute_time(); 275 276#if MIMIC_DIGI_LEAD_TIME 277 g_long_spinid = rand() % g_numthreads; 278#endif /* MIMIC_DIGI_LEAD_TIME */ 279 280 switch (g_waketype) { 281 case WAKE_CHAIN: 282 semaphore_signal(g_semarr[my_id + 1]); 283 break; 284 case WAKE_BROADCAST_ONESEM: 285 semaphore_signal_all(g_machsem); 286 break; 287 case WAKE_BROADCAST_PERTHREAD: 288 for (j = 1; j < g_numthreads; j++) { 289 semaphore_signal(g_semarr[j]); 290 } 291 break; 292 default: 293 printf("Invalid wakeup type?!\n"); 294 exit(1); 295 } 296 } else { 297 /* 298 * Everyone else waits to be woken up, 299 * records when they wake up, and possibly 300 * wakes up a friend. 301 */ 302 switch(g_waketype) { 303 case WAKE_BROADCAST_ONESEM: 304 res = semaphore_wait(g_machsem); 305 assert(res == KERN_SUCCESS, fail); 306 307 g_thread_endtimes_abs[my_id] = mach_absolute_time(); 308 309 break; 310 /* 311 * For the chain wakeup case: 312 * wait, record time, signal next thread if appropriate 313 */ 314 case WAKE_BROADCAST_PERTHREAD: 315 res = semaphore_wait(g_semarr[my_id]); 316 assert(res == 0, fail); 317 318 g_thread_endtimes_abs[my_id] = mach_absolute_time(); 319 break; 320 321 case WAKE_CHAIN: 322 res = semaphore_wait(g_semarr[my_id]); 323 assert(res == 0, fail); 324 325 g_thread_endtimes_abs[my_id] = mach_absolute_time(); 326 327 if (my_id < (g_numthreads - 1)) { 328 res = semaphore_signal(g_semarr[my_id + 1]); 329 assert(res == 0, fail); 330 } 331 332 break; 333 default: 334 printf("Invalid wake type.\n"); 335 goto fail; 336 } 337 } 338 339 res = thread_finish_iteration(my_id); 340 assert(res == 0, fail); 341 } 342 343 return 0; 344fail: 345 exit(1); 346} 347 348/* 349 * Admittedly not very attractive. 350 */ 351void 352print_usage() 353{ 354 printf("Usage: zn <num threads> <chain | broadcast-single-sem | broadcast-per-thread> <realtime | timeshare | fixed> <num iterations> [-trace <traceworthy latency in ns>] [-spin] [-verbose]\n"); 355} 356 357/* 358 * Given an array of uint64_t values, compute average, max, min, and standard deviation 359 */ 360void 361compute_stats(uint64_t *values, uint64_t count, float *averagep, uint64_t *maxp, uint64_t *minp, float *stddevp) 362{ 363 int i; 364 uint64_t _sum = 0; 365 uint64_t _max = 0; 366 uint64_t _min = UINT64_MAX; 367 float _avg = 0; 368 float _dev = 0; 369 370 for (i = 0; i < count; i++) { 371 _sum += values[i]; 372 _max = values[i] > _max ? values[i] : _max; 373 _min = values[i] < _min ? values[i] : _min; 374 } 375 376 _avg = ((float)_sum) / ((float)count); 377 378 _dev = 0; 379 for (i = 0; i < count; i++) { 380 _dev += powf((((float)values[i]) - _avg), 2); 381 } 382 383 _dev /= count; 384 _dev = sqrtf(_dev); 385 386 *averagep = _avg; 387 *maxp = _max; 388 *minp = _min; 389 *stddevp = _dev; 390} 391 392int 393main(int argc, char **argv) 394{ 395 int i; 396 int res; 397 pthread_t *threads; 398 uint64_t *worst_latencies_ns; 399 uint64_t *worst_latencies_from_first_ns; 400 uint64_t last_end; 401 uint64_t max, min; 402 uint64_t traceworthy_latency_ns = TRACEWORTHY_NANOS; 403 float avg, stddev; 404 405 srand(time(NULL)); 406 407 if (argc < 5 || argc > 9) { 408 print_usage(); 409 goto fail; 410 } 411 412 /* How many threads? */ 413 g_numthreads = atoi(argv[1]); 414 415 /* What wakeup pattern? */ 416 g_waketype = parse_wakeup_pattern(argv[2]); 417 418 /* Policy */ 419 g_policy = parse_thread_policy(argv[3]); 420 421 /* Iterations */ 422 g_iterations = atoi(argv[4]); 423 424 /* Optional args */ 425 for (i = 5; i < argc; i++) { 426 if (strcmp(argv[i], "-spin") == 0) { 427 g_do_spin = TRUE; 428 } else if (strcmp(argv[i], "-verbose") == 0) { 429 g_verbose = TRUE; 430 } else if ((strcmp(argv[i], "-trace") == 0) && 431 (i < (argc - 1))) { 432 traceworthy_latency_ns = strtoull(argv[++i], NULL, 10); 433 } else { 434 print_usage(); 435 goto fail; 436 } 437 } 438 439 mach_timebase_info(&g_mti); 440 441#if MIMIC_DIGI_LEAD_TIME 442 g_spinlength_abs = nanos_to_abs(COMPUTATION_NANOS) / 2; 443#endif /* MIMIC_DIGI_LEAD_TIME */ 444 445 /* Arrays for threads and their wakeup times */ 446 threads = (pthread_t*) malloc(sizeof(pthread_t) * g_numthreads); 447 assert(threads, fail); 448 449 g_thread_endtimes_abs = (uint64_t*) malloc(sizeof(uint64_t) * g_numthreads); 450 assert(g_thread_endtimes_abs, fail); 451 452 worst_latencies_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations); 453 assert(worst_latencies_ns, fail); 454 455 worst_latencies_from_first_ns = (uint64_t*) malloc(sizeof(uint64_t) * g_iterations); 456 assert(worst_latencies_from_first_ns, fail); 457 res = semaphore_create(mach_task_self(), &g_main_sem, SYNC_POLICY_FIFO, 0); 458 assert(res == KERN_SUCCESS, fail); 459 460 /* Either one big semaphore or one per thread */ 461 if (g_waketype == WAKE_CHAIN || g_waketype == WAKE_BROADCAST_PERTHREAD) { 462 g_semarr = malloc(sizeof(semaphore_t) * g_numthreads); 463 assert(g_semarr != NULL, fail); 464 465 for (i = 0; i < g_numthreads; i++) { 466 res = semaphore_create(mach_task_self(), &g_semarr[i], SYNC_POLICY_FIFO, 0); 467 assert(res == KERN_SUCCESS, fail); 468 } 469 470 g_leadersem = g_semarr[0]; 471 } else { 472 res = semaphore_create(mach_task_self(), &g_machsem, SYNC_POLICY_FIFO, 0); 473 assert(res == KERN_SUCCESS, fail); 474 res = semaphore_create(mach_task_self(), &g_leadersem, SYNC_POLICY_FIFO, 0); 475 assert(res == KERN_SUCCESS, fail); 476 } 477 478 /* Create the threads */ 479 g_done_threads = 0; 480 for (i = 0; i < g_numthreads; i++) { 481 res = pthread_create(&threads[i], NULL, child_thread_func, (void*)(uintptr_t)i); 482 assert(res == 0, fail); 483 } 484 485 /* Let everyone get settled */ 486 semaphore_wait(g_main_sem); 487 sleep(1); 488 489 /* Go! */ 490 for (i = 0; i < g_iterations; i++) { 491 int j; 492 uint64_t worst_abs = 0, best_abs = UINT64_MAX; 493 494 g_done_threads = 0; 495 OSMemoryBarrier(); 496 497 g_starttime_abs = mach_absolute_time(); 498 499 /* Fire them off */ 500 semaphore_signal(g_leadersem); 501 502 /* Wait for worker threads to finish */ 503 semaphore_wait(g_main_sem); 504 assert(res == KERN_SUCCESS, fail); 505 506 /* 507 * We report the worst latencies relative to start time 508 * and relative to the lead worker thread. 509 */ 510 for (j = 0; j < g_numthreads; j++) { 511 uint64_t latency_abs; 512 513 latency_abs = g_thread_endtimes_abs[j] - g_starttime_abs; 514 worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs; 515 } 516 517 worst_latencies_ns[i] = abs_to_nanos(worst_abs); 518 519 worst_abs = 0; 520 for (j = 1; j < g_numthreads; j++) { 521 uint64_t latency_abs; 522 523 latency_abs = g_thread_endtimes_abs[j] - g_thread_endtimes_abs[0]; 524 worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs; 525 best_abs = best_abs > latency_abs ? latency_abs : best_abs; 526 } 527 528 worst_latencies_from_first_ns[i] = abs_to_nanos(worst_abs); 529 530 /* 531 * In the event of a bad run, cut a trace point. 532 */ 533 if (worst_latencies_from_first_ns[i] > traceworthy_latency_ns) { 534 int _tmp; 535 536 if (g_verbose) { 537 printf("Worst on this round was %.2f us.\n", ((float)worst_latencies_from_first_ns[i]) / 1000.0); 538 } 539 540 _tmp = syscall(SYS_kdebug_trace, 0xEEEEEEEE, 0, 0, 0, 0); 541 } 542 543 /* Let worker threads get back to sleep... */ 544 usleep(g_numthreads * 10); 545 } 546 547 /* Rejoin threads */ 548 last_end = 0; 549 for (i = 0; i < g_numthreads; i++) { 550 res = pthread_join(threads[i], NULL); 551 assert(res == 0, fail); 552 } 553 554 compute_stats(worst_latencies_ns, g_iterations, &avg, &max, &min, &stddev); 555 printf("Results (from a stop):\n"); 556 printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0); 557 printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0); 558 printf("Avg:\t\t%.2f us\n", avg / 1000.0); 559 printf("Stddev:\t\t%.2f us\n", stddev / 1000.0); 560 561 putchar('\n'); 562 563 compute_stats(worst_latencies_from_first_ns, g_iterations, &avg, &max, &min, &stddev); 564 printf("Results (relative to first thread):\n"); 565 printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0); 566 printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0); 567 printf("Avg:\t\t%.2f us\n", avg / 1000.0); 568 printf("Stddev:\t\t%.2f us\n", stddev / 1000.0); 569 570#if 0 571 for (i = 0; i < g_iterations; i++) { 572 printf("Iteration %d: %f us\n", i, worst_latencies_ns[i] / 1000.0); 573 } 574#endif 575 576 return 0; 577fail: 578 return 1; 579} 580