gprof.c revision 97631
11573Srgrimes/* 2290494Sbapt * Copyright (c) 1983, 1993 3290494Sbapt * The Regents of the University of California. All rights reserved. 4128004Stjr * 51573Srgrimes * Redistribution and use in source and binary forms, with or without 61573Srgrimes * modification, are permitted provided that the following conditions 71573Srgrimes * are met: 81573Srgrimes * 1. Redistributions of source code must retain the above copyright 91573Srgrimes * notice, this list of conditions and the following disclaimer. 101573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 11227753Stheraven * notice, this list of conditions and the following disclaimer in the 12227753Stheraven * documentation and/or other materials provided with the distribution. 13227753Stheraven * 3. All advertising materials mentioning features or use of this software 14227753Stheraven * must display the following acknowledgement: 15227753Stheraven * This product includes software developed by the University of 161573Srgrimes * California, Berkeley and its contributors. 171573Srgrimes * 4. Neither the name of the University nor the names of its contributors 181573Srgrimes * may be used to endorse or promote products derived from this software 191573Srgrimes * without specific prior written permission. 201573Srgrimes * 211573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311573Srgrimes * SUCH DAMAGE. 321573Srgrimes */ 331573Srgrimes 341573Srgrimes#ifndef lint 351573Srgrimesstatic const char copyright[] = 361573Srgrimes"@(#) Copyright (c) 1983, 1993\n\ 371573Srgrimes The Regents of the University of California. All rights reserved.\n"; 381573Srgrimes#endif /* not lint */ 391573Srgrimes 401573Srgrimes#ifndef lint 411573Srgrimes#if 0 421573Srgrimesstatic char sccsid[] = "@(#)gprof.c 8.1 (Berkeley) 6/6/93"; 431573Srgrimes#endif 4492986Sobrienstatic const char rcsid[] = 4592986Sobrien "$FreeBSD: head/usr.bin/gprof/gprof.c 97631 2002-05-30 21:18:01Z wollman $"; 461573Srgrimes#endif /* not lint */ 47121845Stjr 48105233Stjr#include <err.h> 49121845Stjr#include <limits.h> 501573Srgrimes#include <stdint.h> 511573Srgrimes#include "gprof.h" 521573Srgrimes 53129179Stjrstatic int valcmp(const void *, const void *); 54121845Stjr 55129153Stjr 561573Srgrimesstatic struct gmonhdr gmonhdr; 57142654Sphantomstatic int lflag; 58142654Sphantomstatic int Lflag; 59142654Sphantom 60142654Sphantommain(argc, argv) 61142654Sphantom int argc; 62142654Sphantom char **argv; 63142654Sphantom{ 64142654Sphantom char **sp; 65142654Sphantom nltype **timesortnlp; 66142654Sphantom char **defaultEs; 67121845Stjr 68172619Sache --argc; 69172619Sache argv++; 70172619Sache debug = 0; 71172619Sache bflag = TRUE; 72172619Sache while ( *argv != 0 && **argv == '-' ) { 731573Srgrimes (*argv)++; 74227753Stheraven switch ( **argv ) { 751573Srgrimes case 'a': 76121845Stjr aflag = TRUE; 77227753Stheraven break; 78227753Stheraven case 'b': 79227753Stheraven bflag = FALSE; 80227753Stheraven break; 81227753Stheraven case 'C': 82227753Stheraven Cflag = TRUE; 83227753Stheraven cyclethreshold = atoi( *++argv ); 84227753Stheraven break; 851573Srgrimes case 'c': 861573Srgrimes#if defined(vax) || defined(tahoe) 871573Srgrimes cflag = TRUE; 88142654Sphantom#else 89128004Stjr errx(1, "-c isn't supported on this architecture yet"); 90128004Stjr#endif 91128004Stjr break; 92128004Stjr case 'd': 93128004Stjr dflag = TRUE; 94128004Stjr setlinebuf(stdout); 95128004Stjr debug |= atoi( *++argv ); 96128004Stjr debug |= ANYDEBUG; 97128004Stjr# ifdef DEBUG 98128004Stjr printf("[main] debug = %d\n", debug); 99142654Sphantom# else /* not DEBUG */ 100121845Stjr printf("gprof: -d ignored\n"); 101121845Stjr# endif /* DEBUG */ 1021573Srgrimes break; 103121845Stjr case 'E': 104121845Stjr ++argv; 105121845Stjr addlist( Elist , *argv ); 106121845Stjr Eflag = TRUE; 107121845Stjr addlist( elist , *argv ); 108121845Stjr eflag = TRUE; 109121845Stjr break; 110121845Stjr case 'e': 111121845Stjr addlist( elist , *++argv ); 112121845Stjr eflag = TRUE; 1131573Srgrimes break; 1141573Srgrimes case 'F': 115142654Sphantom ++argv; 116121845Stjr addlist( Flist , *argv ); 117121845Stjr Fflag = TRUE; 1181573Srgrimes addlist( flist , *argv ); 119121845Stjr fflag = TRUE; 120121845Stjr break; 121121845Stjr case 'f': 122121845Stjr addlist( flist , *++argv ); 123121845Stjr fflag = TRUE; 124121845Stjr break; 125121845Stjr case 'k': 126121845Stjr addlist( kfromlist , *++argv ); 127121845Stjr addlist( ktolist , *++argv ); 128121845Stjr kflag = TRUE; 1291573Srgrimes break; 130129179Stjr case 'K': 131142654Sphantom Kflag = TRUE; 132132497Stjr break; 133132497Stjr case 'l': 134129179Stjr lflag = 1; 135129179Stjr Lflag = 0; 136129179Stjr break; 137129179Stjr case 'L': 138132497Stjr Lflag = 1; 139132497Stjr lflag = 0; 140132497Stjr break; 141132497Stjr case 's': 142129179Stjr sflag = TRUE; 143129179Stjr break; 144129179Stjr case 'u': 145132497Stjr uflag = TRUE; 146129179Stjr break; 147129179Stjr case 'z': 148129179Stjr zflag = TRUE; 149129179Stjr break; 150129179Stjr } 151129179Stjr argv++; 152129179Stjr } 153129179Stjr if ( *argv != 0 ) { 154129179Stjr a_outname = *argv; 155129179Stjr argv++; 156142654Sphantom } else { 157132497Stjr a_outname = A_OUTNAME; 158132497Stjr } 159129179Stjr if ( *argv != 0 ) { 160129179Stjr gmonname = *argv; 161129179Stjr argv++; 162129179Stjr } else { 163129707Stjr gmonname = (char *) malloc(strlen(a_outname)+6); 164132497Stjr strcpy(gmonname, a_outname); 165129707Stjr strcat(gmonname, ".gmon"); 166129707Stjr } 167129707Stjr /* 168129707Stjr * get information from the executable file. 169129707Stjr */ 170129707Stjr if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) || 171129707Stjr (elf_getnfile(a_outname, &defaultEs) == -1 && 172129179Stjr aout_getnfile(a_outname, &defaultEs) == -1)) 173129179Stjr errx(1, "%s: bad format", a_outname); 174129179Stjr /* 175132497Stjr * sort symbol table. 176129179Stjr */ 177301069Sache qsort(nl, nname, sizeof(nltype), valcmp); 178129179Stjr /* 179129179Stjr * turn off default functions 180129179Stjr */ 181129179Stjr for ( sp = defaultEs ; *sp ; sp++ ) { 182129179Stjr Eflag = TRUE; 183129179Stjr addlist( Elist , *sp ); 184129179Stjr eflag = TRUE; 185129179Stjr addlist( elist , *sp ); 186129179Stjr } 187129179Stjr /* 188129179Stjr * get information about mon.out file(s). 189129179Stjr */ 190142654Sphantom do { 191142654Sphantom getpfile( gmonname ); 192142654Sphantom if ( *argv != 0 ) { 193227753Stheraven gmonname = *argv; 194227753Stheraven } 195227753Stheraven } while ( *argv++ != 0 ); 196227753Stheraven /* 197227753Stheraven * how many ticks per second? 198227753Stheraven * if we can't tell, report time in ticks. 199227753Stheraven */ 200227753Stheraven if (hz == 0) { 201227753Stheraven hz = 1; 202227753Stheraven fprintf(stderr, "time is in ticks, not seconds\n"); 203227753Stheraven } 204227753Stheraven /* 205281927Stheraven * dump out a gmon.sum file if requested 206227753Stheraven */ 207227753Stheraven if ( sflag ) { 208227753Stheraven dumpsum( GMONSUM ); 209227753Stheraven } 210227753Stheraven /* 211227753Stheraven * assign samples to procedures 212227753Stheraven */ 213227753Stheraven asgnsamples(); 214227753Stheraven /* 215227753Stheraven * assemble the dynamic profile 216 */ 217 timesortnlp = doarcs(); 218 /* 219 * print the dynamic profile 220 */ 221 if(!lflag) { 222 printgprof( timesortnlp ); 223 } 224 /* 225 * print the flat profile 226 */ 227 if(!Lflag) { 228 printprof(); 229 } 230 /* 231 * print the index 232 */ 233 printindex(); 234 done(); 235} 236 237 /* 238 * information from a gmon.out file is in two parts: 239 * an array of sampling hits within pc ranges, 240 * and the arcs. 241 */ 242getpfile(filename) 243 char *filename; 244{ 245 FILE *pfile; 246 FILE *openpfile(); 247 struct rawarc arc; 248 249 pfile = openpfile(filename); 250 readsamples(pfile); 251 /* 252 * the rest of the file consists of 253 * a bunch of <from,self,count> tuples. 254 */ 255 while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 256# ifdef DEBUG 257 if ( debug & SAMPLEDEBUG ) { 258 printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" , 259 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 260 } 261# endif /* DEBUG */ 262 /* 263 * add this arc 264 */ 265 tally( &arc ); 266 } 267 fclose(pfile); 268} 269 270FILE * 271openpfile(filename) 272 char *filename; 273{ 274 struct gmonhdr tmp; 275 FILE *pfile; 276 int size; 277 int rate; 278 279 if((pfile = fopen(filename, "r")) == NULL) { 280 perror(filename); 281 done(); 282 } 283 fread(&tmp, sizeof(struct gmonhdr), 1, pfile); 284 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 285 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) { 286 warnx("%s: incompatible with first gmon file", filename); 287 done(); 288 } 289 gmonhdr = tmp; 290 if ( gmonhdr.version == GMONVERSION ) { 291 rate = gmonhdr.profrate; 292 size = sizeof(struct gmonhdr); 293 } else { 294 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 295 size = sizeof(struct ophdr); 296 gmonhdr.profrate = rate = hertz(); 297 gmonhdr.version = GMONVERSION; 298 } 299 if (hz == 0) { 300 hz = rate; 301 } else if (hz != rate) { 302 fprintf(stderr, 303 "%s: profile clock rate (%d) %s (%ld) in first gmon file\n", 304 filename, rate, "incompatible with clock rate", hz); 305 done(); 306 } 307 if ( gmonhdr.histcounter_type == 0 ) { 308 /* Historical case. The type was u_short (2 bytes in practice). */ 309 histcounter_type = 16; 310 histcounter_size = 2; 311 } else { 312 histcounter_type = gmonhdr.histcounter_type; 313 histcounter_size = abs(histcounter_type) / CHAR_BIT; 314 } 315 s_lowpc = (unsigned long) gmonhdr.lpc; 316 s_highpc = (unsigned long) gmonhdr.hpc; 317 lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2; 318 highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2; 319 sampbytes = gmonhdr.ncnt - size; 320 nsamples = sampbytes / histcounter_size; 321# ifdef DEBUG 322 if ( debug & SAMPLEDEBUG ) { 323 printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n", 324 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 325 printf( "[openpfile] s_lowpc 0x%lx s_highpc 0x%lx\n" , 326 s_lowpc , s_highpc ); 327 printf( "[openpfile] lowpc 0x%lx highpc 0x%lx\n" , 328 lowpc , highpc ); 329 printf( "[openpfile] sampbytes %d nsamples %d\n" , 330 sampbytes , nsamples ); 331 printf( "[openpfile] sample rate %ld\n" , hz ); 332 } 333# endif /* DEBUG */ 334 return(pfile); 335} 336 337tally( rawp ) 338 struct rawarc *rawp; 339{ 340 nltype *parentp; 341 nltype *childp; 342 343 parentp = nllookup( rawp -> raw_frompc ); 344 childp = nllookup( rawp -> raw_selfpc ); 345 if ( parentp == 0 || childp == 0 ) 346 return; 347 if ( kflag 348 && onlist( kfromlist , parentp -> name ) 349 && onlist( ktolist , childp -> name ) ) { 350 return; 351 } 352 childp -> ncall += rawp -> raw_count; 353# ifdef DEBUG 354 if ( debug & TALLYDEBUG ) { 355 printf( "[tally] arc from %s to %s traversed %ld times\n" , 356 parentp -> name , childp -> name , rawp -> raw_count ); 357 } 358# endif /* DEBUG */ 359 addarc( parentp , childp , rawp -> raw_count ); 360} 361 362/* 363 * dump out the gmon.sum file 364 */ 365dumpsum( sumfile ) 366 char *sumfile; 367{ 368 register nltype *nlp; 369 register arctype *arcp; 370 struct rawarc arc; 371 FILE *sfile; 372 373 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) { 374 perror( sumfile ); 375 done(); 376 } 377 /* 378 * dump the header; use the last header read in 379 */ 380 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) { 381 perror( sumfile ); 382 done(); 383 } 384 /* 385 * dump the samples 386 */ 387 if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples) { 388 perror( sumfile ); 389 done(); 390 } 391 /* 392 * dump the normalized raw arc information 393 */ 394 for ( nlp = nl ; nlp < npe ; nlp++ ) { 395 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 396 arc.raw_frompc = arcp -> arc_parentp -> value; 397 arc.raw_selfpc = arcp -> arc_childp -> value; 398 arc.raw_count = arcp -> arc_count; 399 if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) { 400 perror( sumfile ); 401 done(); 402 } 403# ifdef DEBUG 404 if ( debug & SAMPLEDEBUG ) { 405 printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" , 406 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 407 } 408# endif /* DEBUG */ 409 } 410 } 411 fclose( sfile ); 412} 413 414static int 415valcmp(v1, v2) 416 const void *v1; 417 const void *v2; 418{ 419 const nltype *p1 = (const nltype *)v1; 420 const nltype *p2 = (const nltype *)v2; 421 422 if ( p1 -> value < p2 -> value ) { 423 return LESSTHAN; 424 } 425 if ( p1 -> value > p2 -> value ) { 426 return GREATERTHAN; 427 } 428 return EQUALTO; 429} 430 431readsamples(pfile) 432 FILE *pfile; 433{ 434 register i; 435 intmax_t sample; 436 437 if (samples == 0) { 438 samples = (double *) calloc(nsamples, sizeof(double)); 439 if (samples == 0) { 440 warnx("no room for %d sample pc's", nsamples); 441 done(); 442 } 443 } 444 for (i = 0; i < nsamples; i++) { 445 fread(&sample, histcounter_size, 1, pfile); 446 if (feof(pfile)) 447 break; 448 switch ( histcounter_type ) { 449 case -8: 450 samples[i] += *(int8_t *)&sample; 451 break; 452 case 8: 453 samples[i] += *(u_int8_t *)&sample; 454 break; 455 case -16: 456 samples[i] += *(int16_t *)&sample; 457 break; 458 case 16: 459 samples[i] += *(u_int16_t *)&sample; 460 break; 461 case -32: 462 samples[i] += *(int32_t *)&sample; 463 break; 464 case 32: 465 samples[i] += *(u_int32_t *)&sample; 466 break; 467 case -64: 468 samples[i] += *(int64_t *)&sample; 469 break; 470 case 64: 471 samples[i] += *(u_int64_t *)&sample; 472 break; 473 default: 474 err(1, "unsupported histogram counter type %d", histcounter_type); 475 } 476 } 477 if (i != nsamples) { 478 warnx("unexpected EOF after reading %d/%d samples", --i , nsamples ); 479 done(); 480 } 481} 482 483/* 484 * Assign samples to the procedures to which they belong. 485 * 486 * There are three cases as to where pcl and pch can be 487 * with respect to the routine entry addresses svalue0 and svalue1 488 * as shown in the following diagram. overlap computes the 489 * distance between the arrows, the fraction of the sample 490 * that is to be credited to the routine which starts at svalue0. 491 * 492 * svalue0 svalue1 493 * | | 494 * v v 495 * 496 * +-----------------------------------------------+ 497 * | | 498 * | ->| |<- ->| |<- ->| |<- | 499 * | | | | | | 500 * +---------+ +---------+ +---------+ 501 * 502 * ^ ^ ^ ^ ^ ^ 503 * | | | | | | 504 * pcl pch pcl pch pcl pch 505 * 506 * For the vax we assert that samples will never fall in the first 507 * two bytes of any routine, since that is the entry mask, 508 * thus we give call alignentries() to adjust the entry points if 509 * the entry mask falls in one bucket but the code for the routine 510 * doesn't start until the next bucket. In conjunction with the 511 * alignment of routine addresses, this should allow us to have 512 * only one sample for every four bytes of text space and never 513 * have any overlap (the two end cases, above). 514 */ 515asgnsamples() 516{ 517 register int j; 518 double ccnt; 519 double time; 520 unsigned long pcl, pch; 521 register int i; 522 unsigned long overlap; 523 unsigned long svalue0, svalue1; 524 525 /* read samples and assign to namelist symbols */ 526 scale = highpc - lowpc; 527 scale /= nsamples; 528 alignentries(); 529 for (i = 0, j = 1; i < nsamples; i++) { 530 ccnt = samples[i]; 531 if (ccnt == 0) 532 continue; 533 pcl = lowpc + (unsigned long)(scale * i); 534 pch = lowpc + (unsigned long)(scale * (i + 1)); 535 time = ccnt; 536# ifdef DEBUG 537 if ( debug & SAMPLEDEBUG ) { 538 printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" , 539 pcl , pch , ccnt ); 540 } 541# endif /* DEBUG */ 542 totime += time; 543 for (j = j - 1; j < nname; j++) { 544 svalue0 = nl[j].svalue; 545 svalue1 = nl[j+1].svalue; 546 /* 547 * if high end of tick is below entry address, 548 * go for next tick. 549 */ 550 if (pch < svalue0) 551 break; 552 /* 553 * if low end of tick into next routine, 554 * go for next routine. 555 */ 556 if (pcl >= svalue1) 557 continue; 558 overlap = min(pch, svalue1) - max(pcl, svalue0); 559 if (overlap > 0) { 560# ifdef DEBUG 561 if (debug & SAMPLEDEBUG) { 562 printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n", 563 nl[j].value / HISTORICAL_SCALE_2, 564 svalue0, svalue1, nl[j].name, 565 overlap * time / scale, overlap); 566 } 567# endif /* DEBUG */ 568 nl[j].time += overlap * time / scale; 569 } 570 } 571 } 572# ifdef DEBUG 573 if (debug & SAMPLEDEBUG) { 574 printf("[asgnsamples] totime %f\n", totime); 575 } 576# endif /* DEBUG */ 577} 578 579 580unsigned long 581min(a, b) 582 unsigned long a,b; 583{ 584 if (a<b) 585 return(a); 586 return(b); 587} 588 589unsigned long 590max(a, b) 591 unsigned long a,b; 592{ 593 if (a>b) 594 return(a); 595 return(b); 596} 597 598 /* 599 * calculate scaled entry point addresses (to save time in asgnsamples), 600 * and possibly push the scaled entry points over the entry mask, 601 * if it turns out that the entry point is in one bucket and the code 602 * for a routine is in the next bucket. 603 */ 604alignentries() 605{ 606 register struct nl *nlp; 607 unsigned long bucket_of_entry; 608 unsigned long bucket_of_code; 609 610 for (nlp = nl; nlp < npe; nlp++) { 611 nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2; 612 bucket_of_entry = (nlp->svalue - lowpc) / scale; 613 bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 - 614 lowpc) / scale; 615 if (bucket_of_entry < bucket_of_code) { 616# ifdef DEBUG 617 if (debug & SAMPLEDEBUG) { 618 printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n", 619 nlp->svalue, 620 nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2); 621 } 622# endif /* DEBUG */ 623 nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2; 624 } 625 } 626} 627 628done() 629{ 630 631 exit(0); 632} 633