gprof.c revision 91018
1218822Sdim/* 294536Sobrien * Copyright (c) 1983, 1993 3218822Sdim * The Regents of the University of California. All rights reserved. 4218822Sdim * 5218822Sdim * Redistribution and use in source and binary forms, with or without 6218822Sdim * modification, are permitted provided that the following conditions 7218822Sdim * are met: 8218822Sdim * 1. Redistributions of source code must retain the above copyright 9218822Sdim * notice, this list of conditions and the following disclaimer. 10218822Sdim * 2. Redistributions in binary form must reproduce the above copyright 11218822Sdim * notice, this list of conditions and the following disclaimer in the 12218822Sdim * documentation and/or other materials provided with the distribution. 13218822Sdim * 3. All advertising materials mentioning features or use of this software 14218822Sdim * must display the following acknowledgement: 15218822Sdim * This product includes software developed by the University of 16218822Sdim * California, Berkeley and its contributors. 17218822Sdim * 4. Neither the name of the University nor the names of its contributors 18218822Sdim * may be used to endorse or promote products derived from this software 19218822Sdim * without specific prior written permission. 20218822Sdim * 21218822Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22218822Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23218822Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24218822Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25218822Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26218822Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27218822Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28218822Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29218822Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30218822Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31218822Sdim * SUCH DAMAGE. 32218822Sdim */ 33218822Sdim 34218822Sdim#ifndef lint 35218822Sdimstatic const char copyright[] = 36218822Sdim"@(#) Copyright (c) 1983, 1993\n\ 37218822Sdim The Regents of the University of California. All rights reserved.\n"; 38218822Sdim#endif /* not lint */ 39218822Sdim 40218822Sdim#ifndef lint 41218822Sdim#if 0 42218822Sdimstatic char sccsid[] = "@(#)gprof.c 8.1 (Berkeley) 6/6/93"; 43218822Sdim#endif 44218822Sdimstatic const char rcsid[] = 45218822Sdim "$FreeBSD: head/usr.bin/gprof/gprof.c 91018 2002-02-21 12:07:21Z bde $"; 46218822Sdim#endif /* not lint */ 47218822Sdim 48218822Sdim#include <err.h> 49218822Sdim#include "gprof.h" 50218822Sdim 51218822Sdim#define UNITS_TO_CODE (OFFSET_OF_CODE / sizeof(UNIT)) 52218822Sdim 53218822Sdimstatic int valcmp(const void *, const void *); 54218822Sdim 55218822Sdim 56218822Sdimstatic struct gmonhdr gmonhdr; 57218822Sdimstatic int lflag; 58218822Sdimstatic int Lflag; 59218822Sdim 60218822Sdimmain(argc, argv) 61218822Sdim int argc; 62218822Sdim char **argv; 63218822Sdim{ 64218822Sdim char **sp; 65218822Sdim nltype **timesortnlp; 66218822Sdim char **defaultEs; 67218822Sdim 68218822Sdim --argc; 69218822Sdim argv++; 70218822Sdim debug = 0; 71218822Sdim bflag = TRUE; 72218822Sdim while ( *argv != 0 && **argv == '-' ) { 73218822Sdim (*argv)++; 74218822Sdim switch ( **argv ) { 75218822Sdim case 'a': 76218822Sdim aflag = TRUE; 77218822Sdim break; 78218822Sdim case 'b': 79218822Sdim bflag = FALSE; 80218822Sdim break; 81218822Sdim case 'C': 82218822Sdim Cflag = TRUE; 83218822Sdim cyclethreshold = atoi( *++argv ); 84218822Sdim break; 85218822Sdim case 'c': 86218822Sdim#if defined(vax) || defined(tahoe) 87218822Sdim cflag = TRUE; 88218822Sdim#else 89218822Sdim errx(1, "-c isn't supported on this architecture yet"); 90218822Sdim#endif 91218822Sdim break; 92218822Sdim case 'd': 93218822Sdim dflag = TRUE; 94218822Sdim setlinebuf(stdout); 95218822Sdim debug |= atoi( *++argv ); 96218822Sdim debug |= ANYDEBUG; 97218822Sdim# ifdef DEBUG 98218822Sdim printf("[main] debug = %d\n", debug); 99218822Sdim# else not DEBUG 100218822Sdim printf("gprof: -d ignored\n"); 101218822Sdim# endif DEBUG 102218822Sdim break; 103218822Sdim case 'E': 104218822Sdim ++argv; 105218822Sdim addlist( Elist , *argv ); 106218822Sdim Eflag = TRUE; 107218822Sdim addlist( elist , *argv ); 108218822Sdim eflag = TRUE; 109218822Sdim break; 110218822Sdim case 'e': 111218822Sdim addlist( elist , *++argv ); 112218822Sdim eflag = TRUE; 113218822Sdim break; 114218822Sdim case 'F': 115218822Sdim ++argv; 116218822Sdim addlist( Flist , *argv ); 117218822Sdim Fflag = TRUE; 118218822Sdim addlist( flist , *argv ); 119218822Sdim fflag = TRUE; 120218822Sdim break; 121218822Sdim case 'f': 122218822Sdim addlist( flist , *++argv ); 123218822Sdim fflag = TRUE; 124218822Sdim break; 125218822Sdim case 'k': 126218822Sdim addlist( kfromlist , *++argv ); 127218822Sdim addlist( ktolist , *++argv ); 128218822Sdim kflag = TRUE; 129218822Sdim break; 130218822Sdim case 'K': 131218822Sdim Kflag = TRUE; 132218822Sdim break; 133218822Sdim case 'l': 134218822Sdim lflag = 1; 135218822Sdim Lflag = 0; 136218822Sdim break; 137218822Sdim case 'L': 138218822Sdim Lflag = 1; 139218822Sdim lflag = 0; 140218822Sdim break; 141218822Sdim case 's': 142218822Sdim sflag = TRUE; 143218822Sdim break; 144218822Sdim case 'u': 145130561Sobrien uflag = TRUE; 146104834Sobrien break; 147218822Sdim case 'z': 148218822Sdim zflag = TRUE; 149218822Sdim break; 150218822Sdim } 151130561Sobrien argv++; 15233965Sjdp } 15333965Sjdp if ( *argv != 0 ) { 154130561Sobrien a_outname = *argv; 155130561Sobrien argv++; 156130561Sobrien } else { 15733965Sjdp a_outname = A_OUTNAME; 15833965Sjdp } 159 if ( *argv != 0 ) { 160 gmonname = *argv; 161 argv++; 162 } else { 163 gmonname = (char *) malloc(strlen(a_outname)+6); 164 strcpy(gmonname, a_outname); 165 strcat(gmonname, ".gmon"); 166 } 167 /* 168 * get information from the executable file. 169 */ 170 if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) || 171 (elf_getnfile(a_outname, &defaultEs) == -1 && 172 aout_getnfile(a_outname, &defaultEs) == -1)) 173 errx(1, "%s: bad format", a_outname); 174 /* 175 * sort symbol table. 176 */ 177 qsort(nl, nname, sizeof(nltype), valcmp); 178 /* 179 * turn off default functions 180 */ 181 for ( sp = defaultEs ; *sp ; sp++ ) { 182 Eflag = TRUE; 183 addlist( Elist , *sp ); 184 eflag = TRUE; 185 addlist( elist , *sp ); 186 } 187 /* 188 * get information about mon.out file(s). 189 */ 190 do { 191 getpfile( gmonname ); 192 if ( *argv != 0 ) { 193 gmonname = *argv; 194 } 195 } while ( *argv++ != 0 ); 196 /* 197 * how many ticks per second? 198 * if we can't tell, report time in ticks. 199 */ 200 if (hz == 0) { 201 hz = 1; 202 fprintf(stderr, "time is in ticks, not seconds\n"); 203 } 204 /* 205 * dump out a gmon.sum file if requested 206 */ 207 if ( sflag ) { 208 dumpsum( GMONSUM ); 209 } 210 /* 211 * assign samples to procedures 212 */ 213 asgnsamples(); 214 /* 215 * assemble the dynamic profile 216 */ 217 timesortnlp = doarcs(); 218 /* 219 * print the dynamic profile 220 */ 221 if(!lflag) { 222 printgprof( timesortnlp ); 223 } 224 /* 225 * print the flat profile 226 */ 227 if(!Lflag) { 228 printprof(); 229 } 230 /* 231 * print the index 232 */ 233 printindex(); 234 done(); 235} 236 237 /* 238 * information from a gmon.out file is in two parts: 239 * an array of sampling hits within pc ranges, 240 * and the arcs. 241 */ 242getpfile(filename) 243 char *filename; 244{ 245 FILE *pfile; 246 FILE *openpfile(); 247 struct rawarc arc; 248 249 pfile = openpfile(filename); 250 readsamples(pfile); 251 /* 252 * the rest of the file consists of 253 * a bunch of <from,self,count> tuples. 254 */ 255 while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 256# ifdef DEBUG 257 if ( debug & SAMPLEDEBUG ) { 258 printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" , 259 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 260 } 261# endif DEBUG 262 /* 263 * add this arc 264 */ 265 tally( &arc ); 266 } 267 fclose(pfile); 268} 269 270FILE * 271openpfile(filename) 272 char *filename; 273{ 274 struct gmonhdr tmp; 275 FILE *pfile; 276 int size; 277 int rate; 278 279 if((pfile = fopen(filename, "r")) == NULL) { 280 perror(filename); 281 done(); 282 } 283 fread(&tmp, sizeof(struct gmonhdr), 1, pfile); 284 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 285 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) { 286 warnx("%s: incompatible with first gmon file", filename); 287 done(); 288 } 289 gmonhdr = tmp; 290 if ( gmonhdr.version == GMONVERSION ) { 291 rate = gmonhdr.profrate; 292 size = sizeof(struct gmonhdr); 293 } else { 294 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 295 size = sizeof(struct ophdr); 296 gmonhdr.profrate = rate = hertz(); 297 gmonhdr.version = GMONVERSION; 298 } 299 if (hz == 0) { 300 hz = rate; 301 } else if (hz != rate) { 302 fprintf(stderr, 303 "%s: profile clock rate (%d) %s (%ld) in first gmon file\n", 304 filename, rate, "incompatible with clock rate", hz); 305 done(); 306 } 307 s_lowpc = (unsigned long) gmonhdr.lpc; 308 s_highpc = (unsigned long) gmonhdr.hpc; 309 lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT); 310 highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT); 311 sampbytes = gmonhdr.ncnt - size; 312 nsamples = sampbytes / sizeof (UNIT); 313# ifdef DEBUG 314 if ( debug & SAMPLEDEBUG ) { 315 printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n", 316 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 317 printf( "[openpfile] s_lowpc 0x%lx s_highpc 0x%lx\n" , 318 s_lowpc , s_highpc ); 319 printf( "[openpfile] lowpc 0x%lx highpc 0x%lx\n" , 320 lowpc , highpc ); 321 printf( "[openpfile] sampbytes %d nsamples %d\n" , 322 sampbytes , nsamples ); 323 printf( "[openpfile] sample rate %ld\n" , hz ); 324 } 325# endif DEBUG 326 return(pfile); 327} 328 329tally( rawp ) 330 struct rawarc *rawp; 331{ 332 nltype *parentp; 333 nltype *childp; 334 335 parentp = nllookup( rawp -> raw_frompc ); 336 childp = nllookup( rawp -> raw_selfpc ); 337 if ( parentp == 0 || childp == 0 ) 338 return; 339 if ( kflag 340 && onlist( kfromlist , parentp -> name ) 341 && onlist( ktolist , childp -> name ) ) { 342 return; 343 } 344 childp -> ncall += rawp -> raw_count; 345# ifdef DEBUG 346 if ( debug & TALLYDEBUG ) { 347 printf( "[tally] arc from %s to %s traversed %ld times\n" , 348 parentp -> name , childp -> name , rawp -> raw_count ); 349 } 350# endif DEBUG 351 addarc( parentp , childp , rawp -> raw_count ); 352} 353 354/* 355 * dump out the gmon.sum file 356 */ 357dumpsum( sumfile ) 358 char *sumfile; 359{ 360 register nltype *nlp; 361 register arctype *arcp; 362 struct rawarc arc; 363 FILE *sfile; 364 365 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) { 366 perror( sumfile ); 367 done(); 368 } 369 /* 370 * dump the header; use the last header read in 371 */ 372 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) { 373 perror( sumfile ); 374 done(); 375 } 376 /* 377 * dump the samples 378 */ 379 if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) { 380 perror( sumfile ); 381 done(); 382 } 383 /* 384 * dump the normalized raw arc information 385 */ 386 for ( nlp = nl ; nlp < npe ; nlp++ ) { 387 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 388 arc.raw_frompc = arcp -> arc_parentp -> value; 389 arc.raw_selfpc = arcp -> arc_childp -> value; 390 arc.raw_count = arcp -> arc_count; 391 if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) { 392 perror( sumfile ); 393 done(); 394 } 395# ifdef DEBUG 396 if ( debug & SAMPLEDEBUG ) { 397 printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" , 398 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 399 } 400# endif DEBUG 401 } 402 } 403 fclose( sfile ); 404} 405 406static int 407valcmp(v1, v2) 408 const void *v1; 409 const void *v2; 410{ 411 const nltype *p1 = (const nltype *)v1; 412 const nltype *p2 = (const nltype *)v2; 413 414 if ( p1 -> value < p2 -> value ) { 415 return LESSTHAN; 416 } 417 if ( p1 -> value > p2 -> value ) { 418 return GREATERTHAN; 419 } 420 return EQUALTO; 421} 422 423readsamples(pfile) 424 FILE *pfile; 425{ 426 register i; 427 UNIT sample; 428 429 if (samples == 0) { 430 samples = (UNIT *) calloc(sampbytes, sizeof (UNIT)); 431 if (samples == 0) { 432 warnx("no room for %d sample pc's", sampbytes / sizeof (UNIT)); 433 done(); 434 } 435 } 436 for (i = 0; i < nsamples; i++) { 437 fread(&sample, sizeof (UNIT), 1, pfile); 438 if (feof(pfile)) 439 break; 440 samples[i] += sample; 441 } 442 if (i != nsamples) { 443 warnx("unexpected EOF after reading %d/%d samples", --i , nsamples ); 444 done(); 445 } 446} 447 448/* 449 * Assign samples to the procedures to which they belong. 450 * 451 * There are three cases as to where pcl and pch can be 452 * with respect to the routine entry addresses svalue0 and svalue1 453 * as shown in the following diagram. overlap computes the 454 * distance between the arrows, the fraction of the sample 455 * that is to be credited to the routine which starts at svalue0. 456 * 457 * svalue0 svalue1 458 * | | 459 * v v 460 * 461 * +-----------------------------------------------+ 462 * | | 463 * | ->| |<- ->| |<- ->| |<- | 464 * | | | | | | 465 * +---------+ +---------+ +---------+ 466 * 467 * ^ ^ ^ ^ ^ ^ 468 * | | | | | | 469 * pcl pch pcl pch pcl pch 470 * 471 * For the vax we assert that samples will never fall in the first 472 * two bytes of any routine, since that is the entry mask, 473 * thus we give call alignentries() to adjust the entry points if 474 * the entry mask falls in one bucket but the code for the routine 475 * doesn't start until the next bucket. In conjunction with the 476 * alignment of routine addresses, this should allow us to have 477 * only one sample for every four bytes of text space and never 478 * have any overlap (the two end cases, above). 479 */ 480asgnsamples() 481{ 482 register int j; 483 UNIT ccnt; 484 double time; 485 unsigned long pcl, pch; 486 register int i; 487 unsigned long overlap; 488 unsigned long svalue0, svalue1; 489 490 /* read samples and assign to namelist symbols */ 491 scale = highpc - lowpc; 492 scale /= nsamples; 493 alignentries(); 494 for (i = 0, j = 1; i < nsamples; i++) { 495 ccnt = samples[i]; 496 if (ccnt == 0) 497 continue; 498 pcl = lowpc + (unsigned long)(scale * i); 499 pch = lowpc + (unsigned long)(scale * (i + 1)); 500 time = ccnt; 501# ifdef DEBUG 502 if ( debug & SAMPLEDEBUG ) { 503 printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %d\n" , 504 pcl , pch , ccnt ); 505 } 506# endif DEBUG 507 totime += time; 508 for (j = j - 1; j < nname; j++) { 509 svalue0 = nl[j].svalue; 510 svalue1 = nl[j+1].svalue; 511 /* 512 * if high end of tick is below entry address, 513 * go for next tick. 514 */ 515 if (pch < svalue0) 516 break; 517 /* 518 * if low end of tick into next routine, 519 * go for next routine. 520 */ 521 if (pcl >= svalue1) 522 continue; 523 overlap = min(pch, svalue1) - max(pcl, svalue0); 524 if (overlap > 0) { 525# ifdef DEBUG 526 if (debug & SAMPLEDEBUG) { 527 printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n", 528 nl[j].value/sizeof(UNIT), svalue0, svalue1, 529 nl[j].name, 530 overlap * time / scale, overlap); 531 } 532# endif DEBUG 533 nl[j].time += overlap * time / scale; 534 } 535 } 536 } 537# ifdef DEBUG 538 if (debug & SAMPLEDEBUG) { 539 printf("[asgnsamples] totime %f\n", totime); 540 } 541# endif DEBUG 542} 543 544 545unsigned long 546min(a, b) 547 unsigned long a,b; 548{ 549 if (a<b) 550 return(a); 551 return(b); 552} 553 554unsigned long 555max(a, b) 556 unsigned long a,b; 557{ 558 if (a>b) 559 return(a); 560 return(b); 561} 562 563 /* 564 * calculate scaled entry point addresses (to save time in asgnsamples), 565 * and possibly push the scaled entry points over the entry mask, 566 * if it turns out that the entry point is in one bucket and the code 567 * for a routine is in the next bucket. 568 */ 569alignentries() 570{ 571 register struct nl *nlp; 572 unsigned long bucket_of_entry; 573 unsigned long bucket_of_code; 574 575 for (nlp = nl; nlp < npe; nlp++) { 576 nlp -> svalue = nlp -> value / sizeof(UNIT); 577 bucket_of_entry = (nlp->svalue - lowpc) / scale; 578 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 579 if (bucket_of_entry < bucket_of_code) { 580# ifdef DEBUG 581 if (debug & SAMPLEDEBUG) { 582 printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n", 583 nlp->svalue, nlp->svalue + UNITS_TO_CODE); 584 } 585# endif DEBUG 586 nlp->svalue += UNITS_TO_CODE; 587 } 588 } 589} 590 591done() 592{ 593 594 exit(0); 595} 596