gprof.c revision 105243
1/*
2 * Copyright (c) 1983, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static const char copyright[] =
36"@(#) Copyright (c) 1983, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#if 0
41#ifndef lint
42static char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
43#endif /* not lint */
44#endif
45
46#include <sys/cdefs.h>
47__FBSDID("$FreeBSD: head/usr.bin/gprof/gprof.c 105243 2002-10-16 13:50:09Z charnier $");
48
49#include <err.h>
50#include <limits.h>
51#include <stdint.h>
52#include "gprof.h"
53
54static int valcmp(const void *, const void *);
55
56
57static struct gmonhdr	gmonhdr;
58static int lflag;
59static int Lflag;
60
61int
62main(argc, argv)
63    int argc;
64    char **argv;
65{
66    char	**sp;
67    nltype	**timesortnlp;
68    char	**defaultEs;
69
70    --argc;
71    argv++;
72    debug = 0;
73    bflag = TRUE;
74    while ( *argv != 0 && **argv == '-' ) {
75	(*argv)++;
76	switch ( **argv ) {
77	case 'a':
78	    aflag = TRUE;
79	    break;
80	case 'b':
81	    bflag = FALSE;
82	    break;
83	case 'C':
84	    Cflag = TRUE;
85	    cyclethreshold = atoi( *++argv );
86	    break;
87	case 'c':
88#if defined(vax) || defined(tahoe)
89	    cflag = TRUE;
90#else
91	    errx(1, "-c isn't supported on this architecture yet");
92#endif
93	    break;
94	case 'd':
95	    dflag = TRUE;
96	    setlinebuf(stdout);
97	    debug |= atoi( *++argv );
98	    debug |= ANYDEBUG;
99#	    ifdef DEBUG
100		printf("[main] debug = %d\n", debug);
101#	    else /* not DEBUG */
102		printf("gprof: -d ignored\n");
103#	    endif /* DEBUG */
104	    break;
105	case 'E':
106	    ++argv;
107	    addlist( Elist , *argv );
108	    Eflag = TRUE;
109	    addlist( elist , *argv );
110	    eflag = TRUE;
111	    break;
112	case 'e':
113	    addlist( elist , *++argv );
114	    eflag = TRUE;
115	    break;
116	case 'F':
117	    ++argv;
118	    addlist( Flist , *argv );
119	    Fflag = TRUE;
120	    addlist( flist , *argv );
121	    fflag = TRUE;
122	    break;
123	case 'f':
124	    addlist( flist , *++argv );
125	    fflag = TRUE;
126	    break;
127	case 'k':
128	    addlist( kfromlist , *++argv );
129	    addlist( ktolist , *++argv );
130	    kflag = TRUE;
131	    break;
132	case 'K':
133	    Kflag = TRUE;
134	    break;
135	case 'l':
136	    lflag = 1;
137	    Lflag = 0;
138	    break;
139	case 'L':
140	    Lflag = 1;
141	    lflag = 0;
142	    break;
143	case 's':
144	    sflag = TRUE;
145	    break;
146	case 'u':
147	    uflag = TRUE;
148	    break;
149	case 'z':
150	    zflag = TRUE;
151	    break;
152	}
153	argv++;
154    }
155    if ( *argv != 0 ) {
156	a_outname  = *argv;
157	argv++;
158    } else {
159	a_outname  = A_OUTNAME;
160    }
161    if ( *argv != 0 ) {
162	gmonname = *argv;
163	argv++;
164    } else {
165	gmonname = (char *) malloc(strlen(a_outname)+6);
166	strcpy(gmonname, a_outname);
167	strcat(gmonname, ".gmon");
168    }
169	/*
170	 *	get information from the executable file.
171	 */
172    if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) ||
173      (elf_getnfile(a_outname, &defaultEs) == -1 &&
174      aout_getnfile(a_outname, &defaultEs) == -1))
175	errx(1, "%s: bad format", a_outname);
176	/*
177	 *	sort symbol table.
178	 */
179    qsort(nl, nname, sizeof(nltype), valcmp);
180	/*
181	 *	turn off default functions
182	 */
183    for ( sp = defaultEs ; *sp ; sp++ ) {
184	Eflag = TRUE;
185	addlist( Elist , *sp );
186	eflag = TRUE;
187	addlist( elist , *sp );
188    }
189	/*
190	 *	get information about mon.out file(s).
191	 */
192    do	{
193	getpfile( gmonname );
194	if ( *argv != 0 ) {
195	    gmonname = *argv;
196	}
197    } while ( *argv++ != 0 );
198	/*
199	 *	how many ticks per second?
200	 *	if we can't tell, report time in ticks.
201	 */
202    if (hz == 0) {
203	hz = 1;
204	fprintf(stderr, "time is in ticks, not seconds\n");
205    }
206	/*
207	 *	dump out a gmon.sum file if requested
208	 */
209    if ( sflag ) {
210	dumpsum( GMONSUM );
211    }
212	/*
213	 *	assign samples to procedures
214	 */
215    asgnsamples();
216	/*
217	 *	assemble the dynamic profile
218	 */
219    timesortnlp = doarcs();
220	/*
221	 *	print the dynamic profile
222	 */
223    if(!lflag) {
224	    printgprof( timesortnlp );
225    }
226	/*
227	 *	print the flat profile
228	 */
229    if(!Lflag) {
230	    printprof();
231    }
232	/*
233	 *	print the index
234	 */
235    printindex();
236    exit(0);
237}
238
239    /*
240     *	information from a gmon.out file is in two parts:
241     *	an array of sampling hits within pc ranges,
242     *	and the arcs.
243     */
244void
245getpfile(filename)
246    char *filename;
247{
248    FILE		*pfile;
249    FILE		*openpfile();
250    struct rawarc	arc;
251
252    pfile = openpfile(filename);
253    readsamples(pfile);
254	/*
255	 *	the rest of the file consists of
256	 *	a bunch of <from,self,count> tuples.
257	 */
258    while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
259#	ifdef DEBUG
260	    if ( debug & SAMPLEDEBUG ) {
261		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
262			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
263	    }
264#	endif /* DEBUG */
265	    /*
266	     *	add this arc
267	     */
268	tally( &arc );
269    }
270    fclose(pfile);
271}
272
273FILE *
274openpfile(filename)
275    char *filename;
276{
277    struct gmonhdr	tmp;
278    FILE		*pfile;
279    int			size;
280    int			rate;
281
282    if((pfile = fopen(filename, "r")) == NULL)
283	err(1, "%s", filename);
284    fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
285    if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
286	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) )
287	errx(1, "%s: incompatible with first gmon file", filename);
288    gmonhdr = tmp;
289    if ( gmonhdr.version == GMONVERSION ) {
290	rate = gmonhdr.profrate;
291	size = sizeof(struct gmonhdr);
292    } else {
293	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
294	size = sizeof(struct ophdr);
295	gmonhdr.profrate = rate = hertz();
296	gmonhdr.version = GMONVERSION;
297    }
298    if (hz == 0) {
299	hz = rate;
300    } else if (hz != rate)
301	errx(0, "%s: profile clock rate (%d) %s (%ld) in first gmon file",
302	    filename, rate, "incompatible with clock rate", hz);
303    if ( gmonhdr.histcounter_type == 0 ) {
304	/* Historical case.  The type was u_short (2 bytes in practice). */
305	histcounter_type = 16;
306	histcounter_size = 2;
307    } else {
308	histcounter_type = gmonhdr.histcounter_type;
309	histcounter_size = abs(histcounter_type) / CHAR_BIT;
310    }
311    s_lowpc = (unsigned long) gmonhdr.lpc;
312    s_highpc = (unsigned long) gmonhdr.hpc;
313    lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2;
314    highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2;
315    sampbytes = gmonhdr.ncnt - size;
316    nsamples = sampbytes / histcounter_size;
317#   ifdef DEBUG
318	if ( debug & SAMPLEDEBUG ) {
319	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
320		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
321	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
322		s_lowpc , s_highpc );
323	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
324		lowpc , highpc );
325	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
326		sampbytes , nsamples );
327	    printf( "[openpfile] sample rate %ld\n" , hz );
328	}
329#   endif /* DEBUG */
330    return(pfile);
331}
332
333void
334tally( rawp )
335    struct rawarc	*rawp;
336{
337    nltype		*parentp;
338    nltype		*childp;
339
340    parentp = nllookup( rawp -> raw_frompc );
341    childp = nllookup( rawp -> raw_selfpc );
342    if ( parentp == 0 || childp == 0 )
343	return;
344    if ( kflag
345	 && onlist( kfromlist , parentp -> name )
346	 && onlist( ktolist , childp -> name ) ) {
347	return;
348    }
349    childp -> ncall += rawp -> raw_count;
350#   ifdef DEBUG
351	if ( debug & TALLYDEBUG ) {
352	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
353		    parentp -> name , childp -> name , rawp -> raw_count );
354	}
355#   endif /* DEBUG */
356    addarc( parentp , childp , rawp -> raw_count );
357}
358
359/*
360 * dump out the gmon.sum file
361 */
362void
363dumpsum( sumfile )
364    char *sumfile;
365{
366    register nltype *nlp;
367    register arctype *arcp;
368    struct rawarc arc;
369    FILE *sfile;
370
371    if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL )
372	err( 1 , "%s" , sumfile );
373    /*
374     * dump the header; use the last header read in
375     */
376    if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 )
377	err( 1 , "%s" , sumfile );
378    /*
379     * dump the samples
380     */
381    if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples)
382	err( 1 , "%s" , sumfile );
383    /*
384     * dump the normalized raw arc information
385     */
386    for ( nlp = nl ; nlp < npe ; nlp++ ) {
387	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
388	    arc.raw_frompc = arcp -> arc_parentp -> value;
389	    arc.raw_selfpc = arcp -> arc_childp -> value;
390	    arc.raw_count = arcp -> arc_count;
391	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 )
392		err( 1 , "%s" , sumfile );
393#	    ifdef DEBUG
394		if ( debug & SAMPLEDEBUG ) {
395		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
396			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
397		}
398#	    endif /* DEBUG */
399	}
400    }
401    fclose( sfile );
402}
403
404static int
405valcmp(v1, v2)
406    const void *v1;
407    const void *v2;
408{
409    const nltype *p1 = (const nltype *)v1;
410    const nltype *p2 = (const nltype *)v2;
411
412    if ( p1 -> value < p2 -> value ) {
413	return LESSTHAN;
414    }
415    if ( p1 -> value > p2 -> value ) {
416	return GREATERTHAN;
417    }
418    return EQUALTO;
419}
420
421void
422readsamples(pfile)
423    FILE	*pfile;
424{
425    register i;
426    intmax_t	sample;
427
428    if (samples == 0) {
429	samples = (double *) calloc(nsamples, sizeof(double));
430	if (samples == 0)
431	    errx(0, "no room for %d sample pc's", nsamples);
432    }
433    for (i = 0; i < nsamples; i++) {
434	fread(&sample, histcounter_size, 1, pfile);
435	if (feof(pfile))
436		break;
437	switch ( histcounter_type ) {
438	case -8:
439	    samples[i] += *(int8_t *)&sample;
440	    break;
441	case 8:
442	    samples[i] += *(u_int8_t *)&sample;
443	    break;
444	case -16:
445	    samples[i] += *(int16_t *)&sample;
446	    break;
447	case 16:
448	    samples[i] += *(u_int16_t *)&sample;
449	    break;
450	case -32:
451	    samples[i] += *(int32_t *)&sample;
452	    break;
453	case 32:
454	    samples[i] += *(u_int32_t *)&sample;
455	    break;
456	case -64:
457	    samples[i] += *(int64_t *)&sample;
458	    break;
459	case 64:
460	    samples[i] += *(u_int64_t *)&sample;
461	    break;
462	default:
463	    err(1, "unsupported histogram counter type %d", histcounter_type);
464	}
465    }
466    if (i != nsamples)
467	errx(1, "unexpected EOF after reading %d/%d samples", --i , nsamples );
468}
469
470/*
471 *	Assign samples to the procedures to which they belong.
472 *
473 *	There are three cases as to where pcl and pch can be
474 *	with respect to the routine entry addresses svalue0 and svalue1
475 *	as shown in the following diagram.  overlap computes the
476 *	distance between the arrows, the fraction of the sample
477 *	that is to be credited to the routine which starts at svalue0.
478 *
479 *	    svalue0                                         svalue1
480 *	       |                                               |
481 *	       v                                               v
482 *
483 *	       +-----------------------------------------------+
484 *	       |					       |
485 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
486 *	  |         |		  |         |		  |         |
487 *	  +---------+		  +---------+		  +---------+
488 *
489 *	  ^         ^		  ^         ^		  ^         ^
490 *	  |         |		  |         |		  |         |
491 *	 pcl       pch		 pcl       pch		 pcl       pch
492 *
493 *	For the vax we assert that samples will never fall in the first
494 *	two bytes of any routine, since that is the entry mask,
495 *	thus we give call alignentries() to adjust the entry points if
496 *	the entry mask falls in one bucket but the code for the routine
497 *	doesn't start until the next bucket.  In conjunction with the
498 *	alignment of routine addresses, this should allow us to have
499 *	only one sample for every four bytes of text space and never
500 *	have any overlap (the two end cases, above).
501 */
502void
503asgnsamples()
504{
505    register int	j;
506    double		ccnt;
507    double		time;
508    unsigned long	pcl, pch;
509    register int	i;
510    unsigned long	overlap;
511    unsigned long	svalue0, svalue1;
512
513    /* read samples and assign to namelist symbols */
514    scale = highpc - lowpc;
515    scale /= nsamples;
516    alignentries();
517    for (i = 0, j = 1; i < nsamples; i++) {
518	ccnt = samples[i];
519	if (ccnt == 0)
520		continue;
521	pcl = lowpc + (unsigned long)(scale * i);
522	pch = lowpc + (unsigned long)(scale * (i + 1));
523	time = ccnt;
524#	ifdef DEBUG
525	    if ( debug & SAMPLEDEBUG ) {
526		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" ,
527			pcl , pch , ccnt );
528	    }
529#	endif /* DEBUG */
530	totime += time;
531	for (j = j - 1; j < nname; j++) {
532	    svalue0 = nl[j].svalue;
533	    svalue1 = nl[j+1].svalue;
534		/*
535		 *	if high end of tick is below entry address,
536		 *	go for next tick.
537		 */
538	    if (pch < svalue0)
539		    break;
540		/*
541		 *	if low end of tick into next routine,
542		 *	go for next routine.
543		 */
544	    if (pcl >= svalue1)
545		    continue;
546	    overlap = min(pch, svalue1) - max(pcl, svalue0);
547	    if (overlap > 0) {
548#		ifdef DEBUG
549		    if (debug & SAMPLEDEBUG) {
550			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n",
551				nl[j].value / HISTORICAL_SCALE_2,
552				svalue0, svalue1, nl[j].name,
553				overlap * time / scale, overlap);
554		    }
555#		endif /* DEBUG */
556		nl[j].time += overlap * time / scale;
557	    }
558	}
559    }
560#   ifdef DEBUG
561	if (debug & SAMPLEDEBUG) {
562	    printf("[asgnsamples] totime %f\n", totime);
563	}
564#   endif /* DEBUG */
565}
566
567
568unsigned long
569min(a, b)
570    unsigned long a,b;
571{
572    if (a<b)
573	return(a);
574    return(b);
575}
576
577unsigned long
578max(a, b)
579    unsigned long a,b;
580{
581    if (a>b)
582	return(a);
583    return(b);
584}
585
586    /*
587     *	calculate scaled entry point addresses (to save time in asgnsamples),
588     *	and possibly push the scaled entry points over the entry mask,
589     *	if it turns out that the entry point is in one bucket and the code
590     *	for a routine is in the next bucket.
591     */
592void
593alignentries()
594{
595    register struct nl	*nlp;
596    unsigned long	bucket_of_entry;
597    unsigned long	bucket_of_code;
598
599    for (nlp = nl; nlp < npe; nlp++) {
600	nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2;
601	bucket_of_entry = (nlp->svalue - lowpc) / scale;
602	bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 -
603	  lowpc) / scale;
604	if (bucket_of_entry < bucket_of_code) {
605#	    ifdef DEBUG
606		if (debug & SAMPLEDEBUG) {
607		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
608			    nlp->svalue,
609			    nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2);
610		}
611#	    endif /* DEBUG */
612	    nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2;
613	}
614    }
615}
616