1/* 2 * Copyright (c) 2006 Apple Inc. All Rights Reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 30/* 31 * Order of Execution 32 * 33 * benchmark_init 34 * 35 * benchmark_optswitch 36 * 37 * benchmark_initrun 38 * 39 * benchmark_initworker 40 * benchmark_initbatch 41 * benchmark 42 * benchmark_finibatch 43 * benchmark_initbatch 44 * benchmark 45 * benchmark_finibatch, etc. 46 * benchmark_finiworker 47 * 48 * benchmark_result 49 * 50 * benchmark_finirun 51 * 52 * benchmark_fini 53 */ 54 55 56 57#ifdef __sun 58#pragma ident "@(#)trivial.c 1.0 08/17/06 Apple Inc." 59#endif 60 61 62 63#include <unistd.h> 64#include <stdlib.h> 65#include <stdio.h> 66 67#include <signal.h> 68#include <strings.h> 69 70#include <sys/sysctl.h> 71#include "../libmicro.h" 72 73#if 1 74# define debug(fmt, args...) (void) fprintf(stderr, fmt "\n" , ##args) 75#else 76# define debug(fmt, args...) 77#endif 78 79 80#define MAXPROC 2048 81#define CHUNK (4<<10) 82#define TRIPS 5 83#ifndef max 84#define max(a, b) ((a) > (b) ? (a) : (b)) 85#endif 86 87 88/* 89 * Your state variables should live in the tsd_t struct below 90 */ 91typedef struct { 92 int process_size; 93 double overhead; 94 int procs; 95 pid_t* pids; 96 int **p; 97 void* data; 98} tsd_t; 99 100static int opts = 1; 101 102void doit(int rd, int wr, int process_size); 103int create_pipes(int **p, int procs); 104int create_daemons(int **p, pid_t *pids, int procs, int process_size); 105void initialize_overhead(void* tsd); 106void cleanup_overhead(void* tsd); 107void benchmark_overhead(void* tsd); 108void initialize(void* tsd); 109void cleanup(void* tsd); 110long bread(void* buf, long nbytes); 111 112 113#pragma mark *** lmbench routines 114 115/* 116 * lmbench routines, etc. brought over for this benchmark 117 */ 118 119void 120morefds(void) 121{ 122#ifdef RLIMIT_NOFILE 123 struct rlimit r; 124 125 getrlimit(RLIMIT_NOFILE, &r); 126 r.rlim_cur = r.rlim_max; 127 setrlimit(RLIMIT_NOFILE, &r); 128#endif 129} 130 131void 132doit(int rd, int wr, int process_size) 133{ 134 int msg; 135 void* data = NULL; 136 137 if (process_size) { 138 data = malloc(process_size); 139 if (data) bzero(data, process_size); 140 } 141 for ( ;; ) { 142 if (read(rd, &msg, sizeof(msg)) != sizeof(msg)) { 143 debug("read/write on pipe"); 144 break; 145 } 146 bread(data, process_size); 147 if (write(wr, &msg, sizeof(msg)) != sizeof(msg)) { 148 debug("read/write on pipe"); 149 break; 150 } 151 } 152 exit(0); 153} 154 155/* 156 * Return the number of processors in this host 157 */ 158int 159sched_ncpus() 160{ 161#ifdef MP_NPROCS 162 /* SGI IRIX interface */ 163 return sysmp(MP_NPROCS); 164#elif defined(HAVE_MPCTL) 165 /* HP-UX interface */ 166 return mpctl(MPC_GETNUMSPUS_SYS, 0, 0); 167#elif defined(_SC_NPROCESSORS_ONLN) 168 /* AIX, Solaris, and Linux interface */ 169 return sysconf(_SC_NPROCESSORS_ONLN); 170#elif __APPLE__ 171 char *name="hw.activecpu"; 172 int cpus, retval; 173 size_t len = 4; 174 retval=sysctlbyname(name, &cpus, &len, NULL, 0); 175 /* Check retval here */ 176 debug("cpus = %d retval = %d", cpus, retval); 177 return cpus; 178#endif 179 return 1; 180} 181 182/* 183 * Use to get sequentially created processes "far" away from 184 * each other in an SMP. 185 * 186 * XXX: probably doesn't work for NCPUS not a power of two. 187 */ 188int 189reverse_bits(int cpu) 190{ 191 int i; 192 int nbits; 193 int max = sched_ncpus() - 1; 194 int cpu_reverse = 0; 195 196 for (i = max>>1, nbits = 1; i > 0; i >>= 1, nbits++) 197 ; 198 /* now reverse the bits */ 199 for (i = 0; i < nbits; i++) { 200 if (cpu & (1<<i)) 201 cpu_reverse |= (1<<(nbits-i-1)); 202 } 203 return cpu_reverse; 204} 205 206 207/* 208 * The interface used by benchmp. 209 * 210 * childno is the "logical" child id number. 211 * In range [0, ..., parallel-1]. 212 * benchproc is the "logical" id within the benchmark process. The 213 * benchmp-created process is logical ID zero, child processes 214 * created by the benchmark range from [1, ..., nbenchprocs]. 215 * nbenchprocs is the number of child processes that each benchmark 216 * process will create. Most benchmarks will leave this zero, 217 * but some such as the pipe() benchmarks will not. 218 */ 219int 220handle_scheduler(int childno, int benchproc, int nbenchprocs) 221{ 222 int cpu = 0; 223 char* sched = getenv("LMBENCH_SCHED"); 224 225 if (!sched || strcasecmp(sched, "DEFAULT") == 0) { 226 /* do nothing. Allow scheduler to control placement */ 227 return 0; 228 } else if (strcasecmp(sched, "SINGLE") == 0) { 229 /* assign all processes to CPU 0 */ 230 cpu = 0; 231 } else if (strcasecmp(sched, "BALANCED") == 0) { 232 /* assign each benchmark process to its own processor, 233 * but child processes will share the CPU with the 234 * parent. 235 */ 236 cpu = childno; 237 } else if (strcasecmp(sched, "BALANCED_SPREAD") == 0) { 238 /* 239 * assign each benchmark process to its own processor, 240 * logically as far away from neighboring IDs as 241 * possible. This can help identify bus contention 242 * issues in SMPs with hierarchical busses or NUMA 243 * memory. 244 */ 245 cpu = reverse_bits(childno); 246 } else if (strcasecmp(sched, "UNIQUE") == 0) { 247 /* 248 * assign each benchmark process and each child process 249 * to its own processor. 250 */ 251 cpu = childno * (nbenchprocs + 1) + benchproc; 252 } else if (strcasecmp(sched, "UNIQUE_SPREAD") == 0) { 253 /* 254 * assign each benchmark process and each child process 255 * to its own processor, logically as far away from 256 * neighboring IDs as possible. This can help identify 257 * bus contention issues in SMPs with hierarchical busses 258 * or NUMA memory. 259 */ 260 cpu = reverse_bits(childno * (nbenchprocs + 1) + benchproc); 261 } 262#if 0 // BLOB 263 else if (strncasecmp(sched, "CUSTOM ", strlen("CUSTOM ")) == 0) { 264 cpu = custom(sched + strlen("CUSTOM"), childno); 265 } else if (strncasecmp(sched, "CUSTOM_UNIQUE ", strlen("CUSTOM_UNIQUE ")) == 0) { 266 cpu = custom(sched + strlen("CUSTOM_UNIQUE"), 267 childno * (nbenchprocs + 1) + benchproc); 268 } 269#endif // BLOB 270 else { 271 /* default action: do nothing */ 272 return 0; 273 } 274 debug("cpu = %d, sched_ncpus() = %d", cpu, sched_ncpus()); 275 return 0; 276// return sched_pin(cpu % sched_ncpus()); 277} 278 279int 280create_daemons(int **p, pid_t *pids, int procs, int process_size) 281{ 282 int i, j; 283 int msg; 284 285 /* 286 * Use the pipes as a ring, and fork off a bunch of processes 287 * to pass the byte through their part of the ring. 288 * 289 * Do the sum in each process and get that time before moving on. 290 */ 291 handle_scheduler(getpid(), 0, procs-1); 292 for (i = 1; i < procs; ++i) { 293 switch (pids[i] = fork()) { 294 case -1: /* could not fork, out of processes? */ 295 return i; 296 297 case 0: /* child */ 298 handle_scheduler(getpid(), i, procs-1); 299 for (j = 0; j < procs; ++j) { 300 if (j != i - 1) close(p[j][0]); 301 if (j != i) close(p[j][1]); 302 } 303 doit(p[i-1][0], p[i][1], process_size); 304 /* NOTREACHED */ 305 306 default: /* parent */ 307 ; 308 } 309 } 310 311 /* 312 * Go once around the loop to make sure that everyone is ready and 313 * to get the token in the pipeline. 314 */ 315 if (write(p[0][1], &msg, sizeof(msg)) != sizeof(msg) || 316 read(p[procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) { 317 debug("write/read/write on pipe"); 318 exit(1); 319 } 320 return procs; 321} 322 323int 324create_pipes(int **p, int procs) 325{ 326 int i; 327 /* 328 * Get a bunch of pipes. 329 */ 330 morefds(); 331 for (i = 0; i < procs; ++i) { 332 if (pipe(p[i]) == -1) { 333 return i; 334 } 335 } 336 return procs; 337} 338 339void 340initialize_overhead(void* cookie) 341{ 342 int i; 343 int procs; 344 int* p; 345 tsd_t *pState = (tsd_t *)cookie; 346 347 pState->pids = NULL; 348 pState->p = (int**)malloc(pState->procs * (sizeof(int*) + 2 * sizeof(int))); 349 p = (int*)&pState->p[pState->procs]; 350 for (i = 0; i < pState->procs; ++i) { 351 pState->p[i] = p; 352 p += 2; 353 } 354 355 pState->data = (pState->process_size > 0) ? malloc(pState->process_size) : NULL; 356 if (pState->data) 357 bzero(pState->data, pState->process_size); 358 359 procs = create_pipes(pState->p, pState->procs); 360 if (procs < pState->procs) { 361 debug("procs < pState->procs"); 362 cleanup_overhead(cookie); 363 exit(1); 364 } 365} 366 367void 368cleanup_overhead(void* tsd) 369{ 370 int i; 371 tsd_t *ts = (tsd_t *)tsd; 372 373 for (i = 0; i < ts->procs; ++i) { 374 close(ts->p[i][0]); 375 close(ts->p[i][1]); 376 } 377 378 free(ts->p); 379 if (ts->data) free(ts->data); 380} 381 382void 383cleanup(void* cookie) 384{ 385 int i; 386 tsd_t *pState = (tsd_t *)cookie; 387 388 389 /* 390 * Close the pipes and kill the children. 391 */ 392 cleanup_overhead(cookie); 393 for (i = 1; pState->pids && i < pState->procs; ++i) { 394 if (pState->pids[i] > 0) { 395 kill(pState->pids[i], SIGKILL); 396 waitpid(pState->pids[i], NULL, 0); 397 } 398 } 399 if (pState->pids) 400 free(pState->pids); 401 pState->pids = NULL; 402} 403 404void 405benchmark_overhead(void* tsd) 406{ 407 tsd_t *ts = (tsd_t *)tsd; 408 int i = 0; 409 int msg = 1; 410 411 for (i = 0; i < lm_optB; i++) { 412 if (write(ts->p[i][1], &msg, sizeof(msg)) != sizeof(msg)) { 413 debug("read/write on pipe"); 414 exit(1); 415 } 416 if (read(ts->p[i][0], &msg, sizeof(msg)) != sizeof(msg)) { 417 debug("read/write on pipe"); 418 exit(1); 419 } 420 if (++i == ts->procs) { 421 i = 0; 422 } 423 bread(ts->data, ts->process_size); 424 } 425} 426 427/* analogous to bzero, bcopy, etc., except that it just reads 428 * data into the processor 429 */ 430long 431bread(void* buf, long nbytes) 432{ 433 long sum = 0; 434 register long *p, *next; 435 register char *end; 436 437 p = (long*)buf; 438 end = (char*)buf + nbytes; 439 for (next = p + 128; (void*)next <= (void*)end; p = next, next += 128) { 440 sum += 441 p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+ 442 p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+ 443 p[15]+p[16]+p[17]+p[18]+p[19]+p[20]+p[21]+ 444 p[22]+p[23]+p[24]+p[25]+p[26]+p[27]+p[28]+ 445 p[29]+p[30]+p[31]+p[32]+p[33]+p[34]+p[35]+ 446 p[36]+p[37]+p[38]+p[39]+p[40]+p[41]+p[42]+ 447 p[43]+p[44]+p[45]+p[46]+p[47]+p[48]+p[49]+ 448 p[50]+p[51]+p[52]+p[53]+p[54]+p[55]+p[56]+ 449 p[57]+p[58]+p[59]+p[60]+p[61]+p[62]+p[63]+ 450 p[64]+p[65]+p[66]+p[67]+p[68]+p[69]+p[70]+ 451 p[71]+p[72]+p[73]+p[74]+p[75]+p[76]+p[77]+ 452 p[78]+p[79]+p[80]+p[81]+p[82]+p[83]+p[84]+ 453 p[85]+p[86]+p[87]+p[88]+p[89]+p[90]+p[91]+ 454 p[92]+p[93]+p[94]+p[95]+p[96]+p[97]+p[98]+ 455 p[99]+p[100]+p[101]+p[102]+p[103]+p[104]+ 456 p[105]+p[106]+p[107]+p[108]+p[109]+p[110]+ 457 p[111]+p[112]+p[113]+p[114]+p[115]+p[116]+ 458 p[117]+p[118]+p[119]+p[120]+p[121]+p[122]+ 459 p[123]+p[124]+p[125]+p[126]+p[127]; 460 } 461 for (next = p + 16; (void*)next <= (void*)end; p = next, next += 16) { 462 sum += 463 p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+ 464 p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+ 465 p[15]; 466 } 467 for (next = p + 1; (void*)next <= (void*)end; p = next, next++) { 468 sum += *p; 469 } 470 return sum; 471} 472 473#pragma mark *** darbench routines 474 475 476/*ARGSUSED*/ 477int 478benchmark_initbatch(void *tsd) 479{ 480 /* 481 * initialize your state variables here second 482 */ 483 tsd_t *ts = (tsd_t *)tsd; 484 int procs; 485 486 initialize_overhead(tsd); 487 488 ts->pids = (pid_t*)malloc(ts->procs * sizeof(pid_t)); 489 if (ts->pids == NULL) 490 exit(1); 491 bzero((void*)ts->pids, ts->procs * sizeof(pid_t)); 492 procs = create_daemons(ts->p, ts->pids, 493 ts->procs, ts->process_size); 494 if (procs < ts->procs) { 495 cleanup(tsd); 496 exit(1); 497 } 498 return (0); 499} 500 501int 502benchmark_finirun() 503{ 504 return (0); 505} 506 507int 508benchmark_init() 509{ 510 /* 511 * the lm_optstr must be defined here or no options for you 512 * 513 * ...and the framework will throw an error 514 * 515 */ 516 (void) sprintf(lm_optstr, "s:"); 517 /* 518 * working hypothesis: 519 * 520 * tsd_t is the struct that we can pass around our 521 * state info in 522 * 523 * lm_tsdsize will allocate the space we need for this 524 * structure throughout the rest of the framework 525 */ 526 lm_tsdsize = sizeof (tsd_t); 527 528 (void) sprintf(lm_usage, 529 " [-s kbytes]\n" 530 " processes [processes ...]\n"); 531 532 return (0); 533} 534 535int 536benchmark_fini() 537{ 538 return (0); 539} 540 541int 542benchmark_finibatch(void *tsd) 543{ 544 tsd_t *ts = (tsd_t *)tsd; 545 int i; 546 547 /* 548 * Close the pipes and kill the children. 549 */ 550 cleanup_overhead(tsd); 551 for (i = 1; ts->pids && i < ts->procs; ++i) { 552 if (ts->pids[i] > 0) { 553 kill(ts->pids[i], SIGKILL); 554 waitpid(ts->pids[i], NULL, 0); 555 } 556 } 557 if (ts->pids) 558 free(ts->pids); 559 ts->pids = NULL; 560 return (0); 561} 562 563char * 564benchmark_result() 565{ 566 static char result = '\0'; 567 return (&result); 568} 569 570int 571benchmark_finiworker(void *tsd) 572{ 573 return (0); 574} 575 576int 577benchmark_optswitch(int opt, char *optarg) 578{ 579 580 switch (opt) { 581 case 's': 582 opts = sizetoint(optarg); 583 break; 584 default: 585 return (-1); 586 } 587 return (0); 588} 589 590int 591benchmark_initworker(void *tsd) 592{ 593 tsd_t *ts = (tsd_t *)tsd; 594 595 ts->process_size = opts; 596 597 return (0); 598} 599 600int 601benchmark_initrun() 602{ 603 return (0); 604} 605 606int 607benchmark(void *tsd, result_t *res) 608{ 609 /* 610 * initialize your state variables here last 611 * 612 * and realize that you are paying for your initialization here 613 * and it is really a bad idea 614 */ 615 tsd_t *ts = (tsd_t *)tsd; 616 int i; 617 int msg=1; 618 619 for (i = 0; i < lm_optB; i++) { 620 if (write(ts->p[0][1], &msg, sizeof(msg)) != 621 sizeof(msg)) { 622 debug("read/write on pipe"); 623 exit(1); 624 } 625 if (read(ts->p[ts->procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) { 626 debug("read/write on pipe"); 627 exit(1); 628 } 629 bread(ts->data, ts->process_size); 630 } 631 res->re_count = i; 632 633 return (0); 634} 635