gprof.c revision 256281
1243044Sjkim/* 2243044Sjkim * Copyright (c) 1983, 1993 3243044Sjkim * The Regents of the University of California. All rights reserved. 4243044Sjkim * 5243044Sjkim * Redistribution and use in source and binary forms, with or without 6243044Sjkim * modification, are permitted provided that the following conditions 7243044Sjkim * are met: 8298714Sjkim * 1. Redistributions of source code must retain the above copyright 9243044Sjkim * notice, this list of conditions and the following disclaimer. 10243044Sjkim * 2. Redistributions in binary form must reproduce the above copyright 11243044Sjkim * notice, this list of conditions and the following disclaimer in the 12243044Sjkim * documentation and/or other materials provided with the distribution. 13243044Sjkim * 4. Neither the name of the University nor the names of its contributors 14243044Sjkim * may be used to endorse or promote products derived from this software 15243044Sjkim * without specific prior written permission. 16243044Sjkim * 17243044Sjkim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18243044Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19243044Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20243044Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21243044Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22243044Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23243044Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24243044Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25243044Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26243044Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27243044Sjkim * SUCH DAMAGE. 28243044Sjkim */ 29243044Sjkim 30243044Sjkim#ifndef lint 31243044Sjkimstatic const char copyright[] = 32243044Sjkim"@(#) Copyright (c) 1983, 1993\n\ 33243044Sjkim The Regents of the University of California. All rights reserved.\n"; 34243044Sjkim#endif /* not lint */ 35243044Sjkim 36243044Sjkim#if 0 37243044Sjkim#ifndef lint 38243044Sjkimstatic char sccsid[] = "@(#)gprof.c 8.1 (Berkeley) 6/6/93"; 39243044Sjkim#endif /* not lint */ 40243044Sjkim#endif 41243044Sjkim 42243044Sjkim#include <sys/cdefs.h> 43243044Sjkim__FBSDID("$FreeBSD: stable/10/usr.bin/gprof/gprof.c 246783 2013-02-14 08:16:03Z charnier $"); 44243347Sjkim 45243347Sjkim#include <err.h> 46243347Sjkim#include <limits.h> 47243347Sjkim#include <stdint.h> 48243347Sjkim#include <string.h> 49243347Sjkim 50243044Sjkim#include "gprof.h" 51243044Sjkim 52243044Sjkimstatic int valcmp(const void *, const void *); 53243044Sjkim 54243044Sjkimstatic struct gmonhdr gmonhdr; 55243044Sjkimstatic int lflag; 56243044Sjkimstatic int Lflag; 57243044Sjkim 58243044Sjkimint 59243044Sjkimmain(int argc, char **argv) 60243044Sjkim{ 61243044Sjkim char **sp; 62243044Sjkim nltype **timesortnlp; 63243044Sjkim char **defaultEs; 64243044Sjkim 65243044Sjkim --argc; 66243044Sjkim argv++; 67243044Sjkim debug = 0; 68243044Sjkim bflag = TRUE; 69243044Sjkim while ( *argv != 0 && **argv == '-' ) { 70243044Sjkim (*argv)++; 71243044Sjkim switch ( **argv ) { 72243044Sjkim case 'a': 73243044Sjkim aflag = TRUE; 74243044Sjkim break; 75243044Sjkim case 'b': 76243044Sjkim bflag = FALSE; 77243044Sjkim break; 78243044Sjkim case 'C': 79243044Sjkim Cflag = TRUE; 80243044Sjkim cyclethreshold = atoi( *++argv ); 81243044Sjkim break; 82243044Sjkim case 'd': 83243044Sjkim dflag = TRUE; 84243044Sjkim setlinebuf(stdout); 85245582Sjkim debug |= atoi( *++argv ); 86243044Sjkim debug |= ANYDEBUG; 87243044Sjkim# ifdef DEBUG 88243044Sjkim printf("[main] debug = %d\n", debug); 89243044Sjkim# else /* not DEBUG */ 90243044Sjkim printf("gprof: -d ignored\n"); 91243044Sjkim# endif /* DEBUG */ 92243044Sjkim break; 93243044Sjkim case 'E': 94243044Sjkim ++argv; 95243044Sjkim addlist( Elist , *argv ); 96243044Sjkim Eflag = TRUE; 97243044Sjkim addlist( elist , *argv ); 98243044Sjkim eflag = TRUE; 99243044Sjkim break; 100243044Sjkim case 'e': 101243044Sjkim addlist( elist , *++argv ); 102243044Sjkim eflag = TRUE; 103243044Sjkim break; 104243044Sjkim case 'F': 105243044Sjkim ++argv; 106243044Sjkim addlist( Flist , *argv ); 107243044Sjkim Fflag = TRUE; 108298714Sjkim addlist( flist , *argv ); 109298714Sjkim fflag = TRUE; 110243044Sjkim break; 111243044Sjkim case 'f': 112243044Sjkim addlist( flist , *++argv ); 113243044Sjkim fflag = TRUE; 114243044Sjkim break; 115243044Sjkim case 'k': 116243044Sjkim addlist( kfromlist , *++argv ); 117243044Sjkim addlist( ktolist , *++argv ); 118243044Sjkim kflag = TRUE; 119243044Sjkim break; 120243044Sjkim case 'K': 121243044Sjkim Kflag = TRUE; 122243044Sjkim break; 123243044Sjkim case 'l': 124243044Sjkim lflag = 1; 125243044Sjkim Lflag = 0; 126243044Sjkim break; 127243044Sjkim case 'L': 128243044Sjkim Lflag = 1; 129243044Sjkim lflag = 0; 130243044Sjkim break; 131250838Sjkim case 's': 132243044Sjkim sflag = TRUE; 133243044Sjkim break; 134243044Sjkim case 'u': 135243044Sjkim uflag = TRUE; 136243044Sjkim break; 137243044Sjkim case 'z': 138243044Sjkim zflag = TRUE; 139243044Sjkim break; 140243044Sjkim } 141243044Sjkim argv++; 142243044Sjkim } 143243044Sjkim if ( *argv != 0 ) { 144243044Sjkim a_outname = *argv; 145243044Sjkim argv++; 146243044Sjkim } else { 147243044Sjkim a_outname = A_OUTNAME; 148243044Sjkim } 149243044Sjkim if ( *argv != 0 ) { 150243044Sjkim gmonname = *argv; 151243044Sjkim argv++; 152243044Sjkim } else { 153243044Sjkim gmonname = (char *) malloc(strlen(a_outname)+6); 154243044Sjkim strcpy(gmonname, a_outname); 155243044Sjkim strcat(gmonname, ".gmon"); 156243044Sjkim } 157243044Sjkim /* 158243044Sjkim * get information from the executable file. 159243044Sjkim */ 160243044Sjkim if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) || 161243044Sjkim (!Kflag && elf_getnfile(a_outname, &defaultEs) == -1 && 162243044Sjkim aout_getnfile(a_outname, &defaultEs) == -1)) 163243044Sjkim errx(1, "%s: bad format", a_outname); 164243044Sjkim /* 165243044Sjkim * sort symbol table. 166243044Sjkim */ 167243044Sjkim qsort(nl, nname, sizeof(nltype), valcmp); 168243044Sjkim /* 169243044Sjkim * turn off default functions 170243044Sjkim */ 171243044Sjkim for ( sp = defaultEs ; *sp ; sp++ ) { 172243044Sjkim Eflag = TRUE; 173243044Sjkim addlist( Elist , *sp ); 174243044Sjkim eflag = TRUE; 175243044Sjkim addlist( elist , *sp ); 176243044Sjkim } 177243044Sjkim /* 178243044Sjkim * get information about mon.out file(s). 179243044Sjkim */ 180243044Sjkim do { 181243044Sjkim getpfile( gmonname ); 182243044Sjkim if ( *argv != 0 ) { 183243044Sjkim gmonname = *argv; 184243044Sjkim } 185243044Sjkim } while ( *argv++ != 0 ); 186243044Sjkim /* 187243044Sjkim * how many ticks per second? 188243044Sjkim * if we can't tell, report time in ticks. 189243044Sjkim */ 190243044Sjkim if (hz == 0) { 191243044Sjkim hz = 1; 192243044Sjkim fprintf(stderr, "time is in ticks, not seconds\n"); 193243044Sjkim } 194243044Sjkim /* 195243044Sjkim * dump out a gmon.sum file if requested 196243044Sjkim */ 197243044Sjkim if ( sflag ) { 198243044Sjkim dumpsum( GMONSUM ); 199243044Sjkim } 200243044Sjkim /* 201243044Sjkim * assign samples to procedures 202243044Sjkim */ 203243044Sjkim asgnsamples(); 204243044Sjkim /* 205243044Sjkim * assemble the dynamic profile 206243044Sjkim */ 207243044Sjkim timesortnlp = doarcs(); 208243044Sjkim /* 209243044Sjkim * print the dynamic profile 210243044Sjkim */ 211243044Sjkim if(!lflag) { 212243044Sjkim printgprof( timesortnlp ); 213243044Sjkim } 214243044Sjkim /* 215243044Sjkim * print the flat profile 216243044Sjkim */ 217243044Sjkim if(!Lflag) { 218243044Sjkim printprof(); 219243044Sjkim } 220243044Sjkim /* 221243044Sjkim * print the index 222243044Sjkim */ 223243044Sjkim printindex(); 224243044Sjkim exit(0); 225243044Sjkim} 226243044Sjkim 227243044Sjkim /* 228243044Sjkim * information from a gmon.out file is in two parts: 229243044Sjkim * an array of sampling hits within pc ranges, 230243044Sjkim * and the arcs. 231243044Sjkim */ 232243044Sjkimvoid 233243044Sjkimgetpfile(char *filename) 234243044Sjkim{ 235243044Sjkim FILE *pfile; 236243044Sjkim struct rawarc arc; 237243044Sjkim 238243044Sjkim pfile = openpfile(filename); 239243044Sjkim readsamples(pfile); 240243044Sjkim /* 241243044Sjkim * the rest of the file consists of 242243044Sjkim * a bunch of <from,self,count> tuples. 243243044Sjkim */ 244243044Sjkim while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 245243044Sjkim# ifdef DEBUG 246243044Sjkim if ( debug & SAMPLEDEBUG ) { 247243044Sjkim printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" , 248243044Sjkim arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 249243044Sjkim } 250243044Sjkim# endif /* DEBUG */ 251243044Sjkim /* 252243044Sjkim * add this arc 253243044Sjkim */ 254243044Sjkim tally( &arc ); 255243044Sjkim } 256243044Sjkim fclose(pfile); 257243044Sjkim} 258243044Sjkim 259250838SjkimFILE * 260243044Sjkimopenpfile(char *filename) 261243044Sjkim{ 262243044Sjkim struct gmonhdr tmp; 263243044Sjkim FILE *pfile; 264243044Sjkim int size; 265 int rate; 266 267 if((pfile = fopen(filename, "r")) == NULL) 268 err(1, "%s", filename); 269 fread(&tmp, sizeof(struct gmonhdr), 1, pfile); 270 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 271 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) 272 errx(1, "%s: incompatible with first gmon file", filename); 273 gmonhdr = tmp; 274 if ( gmonhdr.version == GMONVERSION ) { 275 rate = gmonhdr.profrate; 276 size = sizeof(struct gmonhdr); 277 } else { 278 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 279 size = sizeof(struct ophdr); 280 gmonhdr.profrate = rate = hertz(); 281 gmonhdr.version = GMONVERSION; 282 } 283 if (hz == 0) { 284 hz = rate; 285 } else if (hz != rate) 286 errx(0, "%s: profile clock rate (%d) %s (%ld) in first gmon file", 287 filename, rate, "incompatible with clock rate", hz); 288 if ( gmonhdr.histcounter_type == 0 ) { 289 /* Historical case. The type was u_short (2 bytes in practice). */ 290 histcounter_type = 16; 291 histcounter_size = 2; 292 } else { 293 histcounter_type = gmonhdr.histcounter_type; 294 histcounter_size = abs(histcounter_type) / CHAR_BIT; 295 } 296 s_lowpc = (unsigned long) gmonhdr.lpc; 297 s_highpc = (unsigned long) gmonhdr.hpc; 298 lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2; 299 highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2; 300 sampbytes = gmonhdr.ncnt - size; 301 nsamples = sampbytes / histcounter_size; 302# ifdef DEBUG 303 if ( debug & SAMPLEDEBUG ) { 304 printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n", 305 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 306 printf( "[openpfile] s_lowpc 0x%lx s_highpc 0x%lx\n" , 307 s_lowpc , s_highpc ); 308 printf( "[openpfile] lowpc 0x%lx highpc 0x%lx\n" , 309 lowpc , highpc ); 310 printf( "[openpfile] sampbytes %d nsamples %d\n" , 311 sampbytes , nsamples ); 312 printf( "[openpfile] sample rate %ld\n" , hz ); 313 } 314# endif /* DEBUG */ 315 return(pfile); 316} 317 318void 319tally(struct rawarc *rawp) 320{ 321 nltype *parentp; 322 nltype *childp; 323 324 parentp = nllookup( rawp -> raw_frompc ); 325 childp = nllookup( rawp -> raw_selfpc ); 326 if ( parentp == 0 || childp == 0 ) 327 return; 328 if ( kflag 329 && onlist( kfromlist , parentp -> name ) 330 && onlist( ktolist , childp -> name ) ) { 331 return; 332 } 333 childp -> ncall += rawp -> raw_count; 334# ifdef DEBUG 335 if ( debug & TALLYDEBUG ) { 336 printf( "[tally] arc from %s to %s traversed %ld times\n" , 337 parentp -> name , childp -> name , rawp -> raw_count ); 338 } 339# endif /* DEBUG */ 340 addarc( parentp , childp , rawp -> raw_count ); 341} 342 343/* 344 * dump out the gmon.sum file 345 */ 346void 347dumpsum(const char *sumfile) 348{ 349 register nltype *nlp; 350 register arctype *arcp; 351 struct rawarc arc; 352 FILE *sfile; 353 354 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) 355 err( 1 , "%s" , sumfile ); 356 /* 357 * dump the header; use the last header read in 358 */ 359 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) 360 err( 1 , "%s" , sumfile ); 361 /* 362 * dump the samples 363 */ 364 if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples) 365 err( 1 , "%s" , sumfile ); 366 /* 367 * dump the normalized raw arc information 368 */ 369 for ( nlp = nl ; nlp < npe ; nlp++ ) { 370 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 371 arc.raw_frompc = arcp -> arc_parentp -> value; 372 arc.raw_selfpc = arcp -> arc_childp -> value; 373 arc.raw_count = arcp -> arc_count; 374 if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) 375 err( 1 , "%s" , sumfile ); 376# ifdef DEBUG 377 if ( debug & SAMPLEDEBUG ) { 378 printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" , 379 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 380 } 381# endif /* DEBUG */ 382 } 383 } 384 fclose( sfile ); 385} 386 387static int 388valcmp(const void *v1, const void *v2) 389{ 390 const nltype *p1 = (const nltype *)v1; 391 const nltype *p2 = (const nltype *)v2; 392 393 if ( p1 -> value < p2 -> value ) { 394 return LESSTHAN; 395 } 396 if ( p1 -> value > p2 -> value ) { 397 return GREATERTHAN; 398 } 399 return EQUALTO; 400} 401 402void 403readsamples(FILE *pfile) 404{ 405 int i; 406 intmax_t sample; 407 408 if (samples == 0) { 409 samples = (double *) calloc(nsamples, sizeof(double)); 410 if (samples == 0) 411 errx(0, "no room for %d sample pc's", nsamples); 412 } 413 for (i = 0; i < nsamples; i++) { 414 fread(&sample, histcounter_size, 1, pfile); 415 if (feof(pfile)) 416 break; 417 switch ( histcounter_type ) { 418 case -8: 419 samples[i] += *(int8_t *)&sample; 420 break; 421 case 8: 422 samples[i] += *(u_int8_t *)&sample; 423 break; 424 case -16: 425 samples[i] += *(int16_t *)&sample; 426 break; 427 case 16: 428 samples[i] += *(u_int16_t *)&sample; 429 break; 430 case -32: 431 samples[i] += *(int32_t *)&sample; 432 break; 433 case 32: 434 samples[i] += *(u_int32_t *)&sample; 435 break; 436 case -64: 437 samples[i] += *(int64_t *)&sample; 438 break; 439 case 64: 440 samples[i] += *(u_int64_t *)&sample; 441 break; 442 default: 443 err(1, "unsupported histogram counter type %d", histcounter_type); 444 } 445 } 446 if (i != nsamples) 447 errx(1, "unexpected EOF after reading %d/%d samples", --i , nsamples ); 448} 449 450/* 451 * Assign samples to the procedures to which they belong. 452 * 453 * There are three cases as to where pcl and pch can be 454 * with respect to the routine entry addresses svalue0 and svalue1 455 * as shown in the following diagram. overlap computes the 456 * distance between the arrows, the fraction of the sample 457 * that is to be credited to the routine which starts at svalue0. 458 * 459 * svalue0 svalue1 460 * | | 461 * v v 462 * 463 * +-----------------------------------------------+ 464 * | | 465 * | ->| |<- ->| |<- ->| |<- | 466 * | | | | | | 467 * +---------+ +---------+ +---------+ 468 * 469 * ^ ^ ^ ^ ^ ^ 470 * | | | | | | 471 * pcl pch pcl pch pcl pch 472 * 473 * For the vax we assert that samples will never fall in the first 474 * two bytes of any routine, since that is the entry mask, 475 * thus we give call alignentries() to adjust the entry points if 476 * the entry mask falls in one bucket but the code for the routine 477 * doesn't start until the next bucket. In conjunction with the 478 * alignment of routine addresses, this should allow us to have 479 * only one sample for every four bytes of text space and never 480 * have any overlap (the two end cases, above). 481 */ 482void 483asgnsamples(void) 484{ 485 register int j; 486 double ccnt; 487 double thetime; 488 unsigned long pcl, pch; 489 register int i; 490 unsigned long overlap; 491 unsigned long svalue0, svalue1; 492 493 /* read samples and assign to namelist symbols */ 494 scale = highpc - lowpc; 495 scale /= nsamples; 496 alignentries(); 497 for (i = 0, j = 1; i < nsamples; i++) { 498 ccnt = samples[i]; 499 if (ccnt == 0) 500 continue; 501 pcl = lowpc + (unsigned long)(scale * i); 502 pch = lowpc + (unsigned long)(scale * (i + 1)); 503 thetime = ccnt; 504# ifdef DEBUG 505 if ( debug & SAMPLEDEBUG ) { 506 printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" , 507 pcl , pch , ccnt ); 508 } 509# endif /* DEBUG */ 510 totime += thetime; 511 for (j = j - 1; j < nname; j++) { 512 svalue0 = nl[j].svalue; 513 svalue1 = nl[j+1].svalue; 514 /* 515 * if high end of tick is below entry address, 516 * go for next tick. 517 */ 518 if (pch < svalue0) 519 break; 520 /* 521 * if low end of tick into next routine, 522 * go for next routine. 523 */ 524 if (pcl >= svalue1) 525 continue; 526 overlap = min(pch, svalue1) - max(pcl, svalue0); 527 if (overlap > 0) { 528# ifdef DEBUG 529 if (debug & SAMPLEDEBUG) { 530 printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n", 531 nl[j].value / HISTORICAL_SCALE_2, 532 svalue0, svalue1, nl[j].name, 533 overlap * thetime / scale, overlap); 534 } 535# endif /* DEBUG */ 536 nl[j].time += overlap * thetime / scale; 537 } 538 } 539 } 540# ifdef DEBUG 541 if (debug & SAMPLEDEBUG) { 542 printf("[asgnsamples] totime %f\n", totime); 543 } 544# endif /* DEBUG */ 545} 546 547 548unsigned long 549min(unsigned long a, unsigned long b) 550{ 551 if (a<b) 552 return(a); 553 return(b); 554} 555 556unsigned long 557max(unsigned long a, unsigned long b) 558{ 559 if (a>b) 560 return(a); 561 return(b); 562} 563 564 /* 565 * calculate scaled entry point addresses (to save time in asgnsamples), 566 * and possibly push the scaled entry points over the entry mask, 567 * if it turns out that the entry point is in one bucket and the code 568 * for a routine is in the next bucket. 569 */ 570void 571alignentries(void) 572{ 573 register struct nl *nlp; 574 unsigned long bucket_of_entry; 575 unsigned long bucket_of_code; 576 577 for (nlp = nl; nlp < npe; nlp++) { 578 nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2; 579 bucket_of_entry = (nlp->svalue - lowpc) / scale; 580 bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 - 581 lowpc) / scale; 582 if (bucket_of_entry < bucket_of_code) { 583# ifdef DEBUG 584 if (debug & SAMPLEDEBUG) { 585 printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n", 586 nlp->svalue, 587 nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2); 588 } 589# endif /* DEBUG */ 590 nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2; 591 } 592 } 593} 594