gprof.c revision 97631
11573Srgrimes/*
2290494Sbapt * Copyright (c) 1983, 1993
3290494Sbapt *	The Regents of the University of California.  All rights reserved.
4128004Stjr *
51573Srgrimes * Redistribution and use in source and binary forms, with or without
61573Srgrimes * modification, are permitted provided that the following conditions
71573Srgrimes * are met:
81573Srgrimes * 1. Redistributions of source code must retain the above copyright
91573Srgrimes *    notice, this list of conditions and the following disclaimer.
101573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
11227753Stheraven *    notice, this list of conditions and the following disclaimer in the
12227753Stheraven *    documentation and/or other materials provided with the distribution.
13227753Stheraven * 3. All advertising materials mentioning features or use of this software
14227753Stheraven *    must display the following acknowledgement:
15227753Stheraven *	This product includes software developed by the University of
161573Srgrimes *	California, Berkeley and its contributors.
171573Srgrimes * 4. Neither the name of the University nor the names of its contributors
181573Srgrimes *    may be used to endorse or promote products derived from this software
191573Srgrimes *    without specific prior written permission.
201573Srgrimes *
211573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311573Srgrimes * SUCH DAMAGE.
321573Srgrimes */
331573Srgrimes
341573Srgrimes#ifndef lint
351573Srgrimesstatic const char copyright[] =
361573Srgrimes"@(#) Copyright (c) 1983, 1993\n\
371573Srgrimes	The Regents of the University of California.  All rights reserved.\n";
381573Srgrimes#endif /* not lint */
391573Srgrimes
401573Srgrimes#ifndef lint
411573Srgrimes#if 0
421573Srgrimesstatic char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
431573Srgrimes#endif
4492986Sobrienstatic const char rcsid[] =
4592986Sobrien  "$FreeBSD: head/usr.bin/gprof/gprof.c 97631 2002-05-30 21:18:01Z wollman $";
461573Srgrimes#endif /* not lint */
47121845Stjr
48105233Stjr#include <err.h>
49121845Stjr#include <limits.h>
501573Srgrimes#include <stdint.h>
511573Srgrimes#include "gprof.h"
521573Srgrimes
53129179Stjrstatic int valcmp(const void *, const void *);
54121845Stjr
55129153Stjr
561573Srgrimesstatic struct gmonhdr	gmonhdr;
57142654Sphantomstatic int lflag;
58142654Sphantomstatic int Lflag;
59142654Sphantom
60142654Sphantommain(argc, argv)
61142654Sphantom    int argc;
62142654Sphantom    char **argv;
63142654Sphantom{
64142654Sphantom    char	**sp;
65142654Sphantom    nltype	**timesortnlp;
66142654Sphantom    char	**defaultEs;
67121845Stjr
68172619Sache    --argc;
69172619Sache    argv++;
70172619Sache    debug = 0;
71172619Sache    bflag = TRUE;
72172619Sache    while ( *argv != 0 && **argv == '-' ) {
731573Srgrimes	(*argv)++;
74227753Stheraven	switch ( **argv ) {
751573Srgrimes	case 'a':
76121845Stjr	    aflag = TRUE;
77227753Stheraven	    break;
78227753Stheraven	case 'b':
79227753Stheraven	    bflag = FALSE;
80227753Stheraven	    break;
81227753Stheraven	case 'C':
82227753Stheraven	    Cflag = TRUE;
83227753Stheraven	    cyclethreshold = atoi( *++argv );
84227753Stheraven	    break;
851573Srgrimes	case 'c':
861573Srgrimes#if defined(vax) || defined(tahoe)
871573Srgrimes	    cflag = TRUE;
88142654Sphantom#else
89128004Stjr	    errx(1, "-c isn't supported on this architecture yet");
90128004Stjr#endif
91128004Stjr	    break;
92128004Stjr	case 'd':
93128004Stjr	    dflag = TRUE;
94128004Stjr	    setlinebuf(stdout);
95128004Stjr	    debug |= atoi( *++argv );
96128004Stjr	    debug |= ANYDEBUG;
97128004Stjr#	    ifdef DEBUG
98128004Stjr		printf("[main] debug = %d\n", debug);
99142654Sphantom#	    else /* not DEBUG */
100121845Stjr		printf("gprof: -d ignored\n");
101121845Stjr#	    endif /* DEBUG */
1021573Srgrimes	    break;
103121845Stjr	case 'E':
104121845Stjr	    ++argv;
105121845Stjr	    addlist( Elist , *argv );
106121845Stjr	    Eflag = TRUE;
107121845Stjr	    addlist( elist , *argv );
108121845Stjr	    eflag = TRUE;
109121845Stjr	    break;
110121845Stjr	case 'e':
111121845Stjr	    addlist( elist , *++argv );
112121845Stjr	    eflag = TRUE;
1131573Srgrimes	    break;
1141573Srgrimes	case 'F':
115142654Sphantom	    ++argv;
116121845Stjr	    addlist( Flist , *argv );
117121845Stjr	    Fflag = TRUE;
1181573Srgrimes	    addlist( flist , *argv );
119121845Stjr	    fflag = TRUE;
120121845Stjr	    break;
121121845Stjr	case 'f':
122121845Stjr	    addlist( flist , *++argv );
123121845Stjr	    fflag = TRUE;
124121845Stjr	    break;
125121845Stjr	case 'k':
126121845Stjr	    addlist( kfromlist , *++argv );
127121845Stjr	    addlist( ktolist , *++argv );
128121845Stjr	    kflag = TRUE;
1291573Srgrimes	    break;
130129179Stjr	case 'K':
131142654Sphantom	    Kflag = TRUE;
132132497Stjr	    break;
133132497Stjr    case 'l':
134129179Stjr	    lflag = 1;
135129179Stjr	    Lflag = 0;
136129179Stjr	    break;
137129179Stjr    case 'L':
138132497Stjr	    Lflag = 1;
139132497Stjr	    lflag = 0;
140132497Stjr	    break;
141132497Stjr    case 's':
142129179Stjr	    sflag = TRUE;
143129179Stjr	    break;
144129179Stjr	case 'u':
145132497Stjr	    uflag = TRUE;
146129179Stjr	    break;
147129179Stjr	case 'z':
148129179Stjr	    zflag = TRUE;
149129179Stjr	    break;
150129179Stjr	}
151129179Stjr	argv++;
152129179Stjr    }
153129179Stjr    if ( *argv != 0 ) {
154129179Stjr	a_outname  = *argv;
155129179Stjr	argv++;
156142654Sphantom    } else {
157132497Stjr	a_outname  = A_OUTNAME;
158132497Stjr    }
159129179Stjr    if ( *argv != 0 ) {
160129179Stjr	gmonname = *argv;
161129179Stjr	argv++;
162129179Stjr    } else {
163129707Stjr	gmonname = (char *) malloc(strlen(a_outname)+6);
164132497Stjr	strcpy(gmonname, a_outname);
165129707Stjr	strcat(gmonname, ".gmon");
166129707Stjr    }
167129707Stjr	/*
168129707Stjr	 *	get information from the executable file.
169129707Stjr	 */
170129707Stjr    if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) ||
171129707Stjr      (elf_getnfile(a_outname, &defaultEs) == -1 &&
172129179Stjr      aout_getnfile(a_outname, &defaultEs) == -1))
173129179Stjr	errx(1, "%s: bad format", a_outname);
174129179Stjr	/*
175132497Stjr	 *	sort symbol table.
176129179Stjr	 */
177301069Sache    qsort(nl, nname, sizeof(nltype), valcmp);
178129179Stjr	/*
179129179Stjr	 *	turn off default functions
180129179Stjr	 */
181129179Stjr    for ( sp = defaultEs ; *sp ; sp++ ) {
182129179Stjr	Eflag = TRUE;
183129179Stjr	addlist( Elist , *sp );
184129179Stjr	eflag = TRUE;
185129179Stjr	addlist( elist , *sp );
186129179Stjr    }
187129179Stjr	/*
188129179Stjr	 *	get information about mon.out file(s).
189129179Stjr	 */
190142654Sphantom    do	{
191142654Sphantom	getpfile( gmonname );
192142654Sphantom	if ( *argv != 0 ) {
193227753Stheraven	    gmonname = *argv;
194227753Stheraven	}
195227753Stheraven    } while ( *argv++ != 0 );
196227753Stheraven	/*
197227753Stheraven	 *	how many ticks per second?
198227753Stheraven	 *	if we can't tell, report time in ticks.
199227753Stheraven	 */
200227753Stheraven    if (hz == 0) {
201227753Stheraven	hz = 1;
202227753Stheraven	fprintf(stderr, "time is in ticks, not seconds\n");
203227753Stheraven    }
204227753Stheraven	/*
205281927Stheraven	 *	dump out a gmon.sum file if requested
206227753Stheraven	 */
207227753Stheraven    if ( sflag ) {
208227753Stheraven	dumpsum( GMONSUM );
209227753Stheraven    }
210227753Stheraven	/*
211227753Stheraven	 *	assign samples to procedures
212227753Stheraven	 */
213227753Stheraven    asgnsamples();
214227753Stheraven	/*
215227753Stheraven	 *	assemble the dynamic profile
216	 */
217    timesortnlp = doarcs();
218	/*
219	 *	print the dynamic profile
220	 */
221    if(!lflag) {
222	    printgprof( timesortnlp );
223    }
224	/*
225	 *	print the flat profile
226	 */
227    if(!Lflag) {
228	    printprof();
229    }
230	/*
231	 *	print the index
232	 */
233    printindex();
234    done();
235}
236
237    /*
238     *	information from a gmon.out file is in two parts:
239     *	an array of sampling hits within pc ranges,
240     *	and the arcs.
241     */
242getpfile(filename)
243    char *filename;
244{
245    FILE		*pfile;
246    FILE		*openpfile();
247    struct rawarc	arc;
248
249    pfile = openpfile(filename);
250    readsamples(pfile);
251	/*
252	 *	the rest of the file consists of
253	 *	a bunch of <from,self,count> tuples.
254	 */
255    while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
256#	ifdef DEBUG
257	    if ( debug & SAMPLEDEBUG ) {
258		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
259			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
260	    }
261#	endif /* DEBUG */
262	    /*
263	     *	add this arc
264	     */
265	tally( &arc );
266    }
267    fclose(pfile);
268}
269
270FILE *
271openpfile(filename)
272    char *filename;
273{
274    struct gmonhdr	tmp;
275    FILE		*pfile;
276    int			size;
277    int			rate;
278
279    if((pfile = fopen(filename, "r")) == NULL) {
280	perror(filename);
281	done();
282    }
283    fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
284    if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
285	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) {
286	warnx("%s: incompatible with first gmon file", filename);
287	done();
288    }
289    gmonhdr = tmp;
290    if ( gmonhdr.version == GMONVERSION ) {
291	rate = gmonhdr.profrate;
292	size = sizeof(struct gmonhdr);
293    } else {
294	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
295	size = sizeof(struct ophdr);
296	gmonhdr.profrate = rate = hertz();
297	gmonhdr.version = GMONVERSION;
298    }
299    if (hz == 0) {
300	hz = rate;
301    } else if (hz != rate) {
302	fprintf(stderr,
303	    "%s: profile clock rate (%d) %s (%ld) in first gmon file\n",
304	    filename, rate, "incompatible with clock rate", hz);
305	done();
306    }
307    if ( gmonhdr.histcounter_type == 0 ) {
308	/* Historical case.  The type was u_short (2 bytes in practice). */
309	histcounter_type = 16;
310	histcounter_size = 2;
311    } else {
312	histcounter_type = gmonhdr.histcounter_type;
313	histcounter_size = abs(histcounter_type) / CHAR_BIT;
314    }
315    s_lowpc = (unsigned long) gmonhdr.lpc;
316    s_highpc = (unsigned long) gmonhdr.hpc;
317    lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2;
318    highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2;
319    sampbytes = gmonhdr.ncnt - size;
320    nsamples = sampbytes / histcounter_size;
321#   ifdef DEBUG
322	if ( debug & SAMPLEDEBUG ) {
323	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
324		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
325	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
326		s_lowpc , s_highpc );
327	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
328		lowpc , highpc );
329	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
330		sampbytes , nsamples );
331	    printf( "[openpfile] sample rate %ld\n" , hz );
332	}
333#   endif /* DEBUG */
334    return(pfile);
335}
336
337tally( rawp )
338    struct rawarc	*rawp;
339{
340    nltype		*parentp;
341    nltype		*childp;
342
343    parentp = nllookup( rawp -> raw_frompc );
344    childp = nllookup( rawp -> raw_selfpc );
345    if ( parentp == 0 || childp == 0 )
346	return;
347    if ( kflag
348	 && onlist( kfromlist , parentp -> name )
349	 && onlist( ktolist , childp -> name ) ) {
350	return;
351    }
352    childp -> ncall += rawp -> raw_count;
353#   ifdef DEBUG
354	if ( debug & TALLYDEBUG ) {
355	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
356		    parentp -> name , childp -> name , rawp -> raw_count );
357	}
358#   endif /* DEBUG */
359    addarc( parentp , childp , rawp -> raw_count );
360}
361
362/*
363 * dump out the gmon.sum file
364 */
365dumpsum( sumfile )
366    char *sumfile;
367{
368    register nltype *nlp;
369    register arctype *arcp;
370    struct rawarc arc;
371    FILE *sfile;
372
373    if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) {
374	perror( sumfile );
375	done();
376    }
377    /*
378     * dump the header; use the last header read in
379     */
380    if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) {
381	perror( sumfile );
382	done();
383    }
384    /*
385     * dump the samples
386     */
387    if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples) {
388	perror( sumfile );
389	done();
390    }
391    /*
392     * dump the normalized raw arc information
393     */
394    for ( nlp = nl ; nlp < npe ; nlp++ ) {
395	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
396	    arc.raw_frompc = arcp -> arc_parentp -> value;
397	    arc.raw_selfpc = arcp -> arc_childp -> value;
398	    arc.raw_count = arcp -> arc_count;
399	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) {
400		perror( sumfile );
401		done();
402	    }
403#	    ifdef DEBUG
404		if ( debug & SAMPLEDEBUG ) {
405		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
406			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
407		}
408#	    endif /* DEBUG */
409	}
410    }
411    fclose( sfile );
412}
413
414static int
415valcmp(v1, v2)
416    const void *v1;
417    const void *v2;
418{
419    const nltype *p1 = (const nltype *)v1;
420    const nltype *p2 = (const nltype *)v2;
421
422    if ( p1 -> value < p2 -> value ) {
423	return LESSTHAN;
424    }
425    if ( p1 -> value > p2 -> value ) {
426	return GREATERTHAN;
427    }
428    return EQUALTO;
429}
430
431readsamples(pfile)
432    FILE	*pfile;
433{
434    register i;
435    intmax_t	sample;
436
437    if (samples == 0) {
438	samples = (double *) calloc(nsamples, sizeof(double));
439	if (samples == 0) {
440	    warnx("no room for %d sample pc's", nsamples);
441	    done();
442	}
443    }
444    for (i = 0; i < nsamples; i++) {
445	fread(&sample, histcounter_size, 1, pfile);
446	if (feof(pfile))
447		break;
448	switch ( histcounter_type ) {
449	case -8:
450	    samples[i] += *(int8_t *)&sample;
451	    break;
452	case 8:
453	    samples[i] += *(u_int8_t *)&sample;
454	    break;
455	case -16:
456	    samples[i] += *(int16_t *)&sample;
457	    break;
458	case 16:
459	    samples[i] += *(u_int16_t *)&sample;
460	    break;
461	case -32:
462	    samples[i] += *(int32_t *)&sample;
463	    break;
464	case 32:
465	    samples[i] += *(u_int32_t *)&sample;
466	    break;
467	case -64:
468	    samples[i] += *(int64_t *)&sample;
469	    break;
470	case 64:
471	    samples[i] += *(u_int64_t *)&sample;
472	    break;
473	default:
474	    err(1, "unsupported histogram counter type %d", histcounter_type);
475	}
476    }
477    if (i != nsamples) {
478	warnx("unexpected EOF after reading %d/%d samples", --i , nsamples );
479	done();
480    }
481}
482
483/*
484 *	Assign samples to the procedures to which they belong.
485 *
486 *	There are three cases as to where pcl and pch can be
487 *	with respect to the routine entry addresses svalue0 and svalue1
488 *	as shown in the following diagram.  overlap computes the
489 *	distance between the arrows, the fraction of the sample
490 *	that is to be credited to the routine which starts at svalue0.
491 *
492 *	    svalue0                                         svalue1
493 *	       |                                               |
494 *	       v                                               v
495 *
496 *	       +-----------------------------------------------+
497 *	       |					       |
498 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
499 *	  |         |		  |         |		  |         |
500 *	  +---------+		  +---------+		  +---------+
501 *
502 *	  ^         ^		  ^         ^		  ^         ^
503 *	  |         |		  |         |		  |         |
504 *	 pcl       pch		 pcl       pch		 pcl       pch
505 *
506 *	For the vax we assert that samples will never fall in the first
507 *	two bytes of any routine, since that is the entry mask,
508 *	thus we give call alignentries() to adjust the entry points if
509 *	the entry mask falls in one bucket but the code for the routine
510 *	doesn't start until the next bucket.  In conjunction with the
511 *	alignment of routine addresses, this should allow us to have
512 *	only one sample for every four bytes of text space and never
513 *	have any overlap (the two end cases, above).
514 */
515asgnsamples()
516{
517    register int	j;
518    double		ccnt;
519    double		time;
520    unsigned long	pcl, pch;
521    register int	i;
522    unsigned long	overlap;
523    unsigned long	svalue0, svalue1;
524
525    /* read samples and assign to namelist symbols */
526    scale = highpc - lowpc;
527    scale /= nsamples;
528    alignentries();
529    for (i = 0, j = 1; i < nsamples; i++) {
530	ccnt = samples[i];
531	if (ccnt == 0)
532		continue;
533	pcl = lowpc + (unsigned long)(scale * i);
534	pch = lowpc + (unsigned long)(scale * (i + 1));
535	time = ccnt;
536#	ifdef DEBUG
537	    if ( debug & SAMPLEDEBUG ) {
538		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" ,
539			pcl , pch , ccnt );
540	    }
541#	endif /* DEBUG */
542	totime += time;
543	for (j = j - 1; j < nname; j++) {
544	    svalue0 = nl[j].svalue;
545	    svalue1 = nl[j+1].svalue;
546		/*
547		 *	if high end of tick is below entry address,
548		 *	go for next tick.
549		 */
550	    if (pch < svalue0)
551		    break;
552		/*
553		 *	if low end of tick into next routine,
554		 *	go for next routine.
555		 */
556	    if (pcl >= svalue1)
557		    continue;
558	    overlap = min(pch, svalue1) - max(pcl, svalue0);
559	    if (overlap > 0) {
560#		ifdef DEBUG
561		    if (debug & SAMPLEDEBUG) {
562			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n",
563				nl[j].value / HISTORICAL_SCALE_2,
564				svalue0, svalue1, nl[j].name,
565				overlap * time / scale, overlap);
566		    }
567#		endif /* DEBUG */
568		nl[j].time += overlap * time / scale;
569	    }
570	}
571    }
572#   ifdef DEBUG
573	if (debug & SAMPLEDEBUG) {
574	    printf("[asgnsamples] totime %f\n", totime);
575	}
576#   endif /* DEBUG */
577}
578
579
580unsigned long
581min(a, b)
582    unsigned long a,b;
583{
584    if (a<b)
585	return(a);
586    return(b);
587}
588
589unsigned long
590max(a, b)
591    unsigned long a,b;
592{
593    if (a>b)
594	return(a);
595    return(b);
596}
597
598    /*
599     *	calculate scaled entry point addresses (to save time in asgnsamples),
600     *	and possibly push the scaled entry points over the entry mask,
601     *	if it turns out that the entry point is in one bucket and the code
602     *	for a routine is in the next bucket.
603     */
604alignentries()
605{
606    register struct nl	*nlp;
607    unsigned long	bucket_of_entry;
608    unsigned long	bucket_of_code;
609
610    for (nlp = nl; nlp < npe; nlp++) {
611	nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2;
612	bucket_of_entry = (nlp->svalue - lowpc) / scale;
613	bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 -
614	  lowpc) / scale;
615	if (bucket_of_entry < bucket_of_code) {
616#	    ifdef DEBUG
617		if (debug & SAMPLEDEBUG) {
618		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
619			    nlp->svalue,
620			    nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2);
621		}
622#	    endif /* DEBUG */
623	    nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2;
624	}
625    }
626}
627
628done()
629{
630
631    exit(0);
632}
633