gprof.c revision 187116
1254721Semaste/*
2254721Semaste * Copyright (c) 1983, 1993
3254721Semaste *	The Regents of the University of California.  All rights reserved.
4254721Semaste *
5254721Semaste * Redistribution and use in source and binary forms, with or without
6254721Semaste * modification, are permitted provided that the following conditions
7254721Semaste * are met:
8254721Semaste * 1. Redistributions of source code must retain the above copyright
9254721Semaste *    notice, this list of conditions and the following disclaimer.
10254721Semaste * 2. Redistributions in binary form must reproduce the above copyright
11254721Semaste *    notice, this list of conditions and the following disclaimer in the
12254721Semaste *    documentation and/or other materials provided with the distribution.
13254721Semaste * 3. All advertising materials mentioning features or use of this software
14254721Semaste *    must display the following acknowledgement:
15254721Semaste *	This product includes software developed by the University of
16254721Semaste *	California, Berkeley and its contributors.
17254721Semaste * 4. Neither the name of the University nor the names of its contributors
18254721Semaste *    may be used to endorse or promote products derived from this software
19254721Semaste *    without specific prior written permission.
20254721Semaste *
21254721Semaste * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22254721Semaste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23254721Semaste * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24254721Semaste * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25254721Semaste * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26254721Semaste * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27254721Semaste * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28254721Semaste * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29254721Semaste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30254721Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31254721Semaste * SUCH DAMAGE.
32254721Semaste */
33254721Semaste
34254721Semaste#ifndef lint
35254721Semastestatic const char copyright[] =
36254721Semaste"@(#) Copyright (c) 1983, 1993\n\
37254721Semaste	The Regents of the University of California.  All rights reserved.\n";
38254721Semaste#endif /* not lint */
39254721Semaste
40254721Semaste#if 0
41254721Semaste#ifndef lint
42254721Semastestatic char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
43254721Semaste#endif /* not lint */
44254721Semaste#endif
45254721Semaste
46254721Semaste#include <sys/cdefs.h>
47254721Semaste__FBSDID("$FreeBSD: head/usr.bin/gprof/gprof.c 187116 2009-01-12 21:49:42Z obrien $");
48254721Semaste
49254721Semaste#include <err.h>
50254721Semaste#include <limits.h>
51254721Semaste#include <stdint.h>
52254721Semaste#include <string.h>
53254721Semaste
54254721Semaste#include "gprof.h"
55254721Semaste
56254721Semastestatic int valcmp(const void *, const void *);
57254721Semaste
58254721Semaste
59254721Semastestatic struct gmonhdr	gmonhdr;
60254721Semastestatic int lflag;
61254721Semastestatic int Lflag;
62254721Semaste
63254721Semasteint
64254721Semastemain(argc, argv)
65254721Semaste    int argc;
66254721Semaste    char **argv;
67254721Semaste{
68254721Semaste    char	**sp;
69254721Semaste    nltype	**timesortnlp;
70254721Semaste    char	**defaultEs;
71254721Semaste
72254721Semaste    --argc;
73254721Semaste    argv++;
74254721Semaste    debug = 0;
75254721Semaste    bflag = TRUE;
76254721Semaste    while ( *argv != 0 && **argv == '-' ) {
77254721Semaste	(*argv)++;
78254721Semaste	switch ( **argv ) {
79254721Semaste	case 'a':
80254721Semaste	    aflag = TRUE;
81254721Semaste	    break;
82254721Semaste	case 'b':
83254721Semaste	    bflag = FALSE;
84254721Semaste	    break;
85254721Semaste	case 'C':
86254721Semaste	    Cflag = TRUE;
87254721Semaste	    cyclethreshold = atoi( *++argv );
88254721Semaste	    break;
89254721Semaste	case 'd':
90254721Semaste	    dflag = TRUE;
91254721Semaste	    setlinebuf(stdout);
92254721Semaste	    debug |= atoi( *++argv );
93254721Semaste	    debug |= ANYDEBUG;
94254721Semaste#	    ifdef DEBUG
95254721Semaste		printf("[main] debug = %d\n", debug);
96254721Semaste#	    else /* not DEBUG */
97254721Semaste		printf("gprof: -d ignored\n");
98254721Semaste#	    endif /* DEBUG */
99254721Semaste	    break;
100254721Semaste	case 'E':
101254721Semaste	    ++argv;
102254721Semaste	    addlist( Elist , *argv );
103254721Semaste	    Eflag = TRUE;
104254721Semaste	    addlist( elist , *argv );
105254721Semaste	    eflag = TRUE;
106254721Semaste	    break;
107254721Semaste	case 'e':
108254721Semaste	    addlist( elist , *++argv );
109254721Semaste	    eflag = TRUE;
110254721Semaste	    break;
111254721Semaste	case 'F':
112254721Semaste	    ++argv;
113254721Semaste	    addlist( Flist , *argv );
114254721Semaste	    Fflag = TRUE;
115	    addlist( flist , *argv );
116	    fflag = TRUE;
117	    break;
118	case 'f':
119	    addlist( flist , *++argv );
120	    fflag = TRUE;
121	    break;
122	case 'k':
123	    addlist( kfromlist , *++argv );
124	    addlist( ktolist , *++argv );
125	    kflag = TRUE;
126	    break;
127	case 'K':
128	    Kflag = TRUE;
129	    break;
130	case 'l':
131	    lflag = 1;
132	    Lflag = 0;
133	    break;
134	case 'L':
135	    Lflag = 1;
136	    lflag = 0;
137	    break;
138	case 's':
139	    sflag = TRUE;
140	    break;
141	case 'u':
142	    uflag = TRUE;
143	    break;
144	case 'z':
145	    zflag = TRUE;
146	    break;
147	}
148	argv++;
149    }
150    if ( *argv != 0 ) {
151	a_outname  = *argv;
152	argv++;
153    } else {
154	a_outname  = A_OUTNAME;
155    }
156    if ( *argv != 0 ) {
157	gmonname = *argv;
158	argv++;
159    } else {
160	gmonname = (char *) malloc(strlen(a_outname)+6);
161	strcpy(gmonname, a_outname);
162	strcat(gmonname, ".gmon");
163    }
164	/*
165	 *	get information from the executable file.
166	 */
167    if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) ||
168      (!Kflag && elf_getnfile(a_outname, &defaultEs) == -1 &&
169      aout_getnfile(a_outname, &defaultEs) == -1))
170	errx(1, "%s: bad format", a_outname);
171	/*
172	 *	sort symbol table.
173	 */
174    qsort(nl, nname, sizeof(nltype), valcmp);
175	/*
176	 *	turn off default functions
177	 */
178    for ( sp = defaultEs ; *sp ; sp++ ) {
179	Eflag = TRUE;
180	addlist( Elist , *sp );
181	eflag = TRUE;
182	addlist( elist , *sp );
183    }
184	/*
185	 *	get information about mon.out file(s).
186	 */
187    do	{
188	getpfile( gmonname );
189	if ( *argv != 0 ) {
190	    gmonname = *argv;
191	}
192    } while ( *argv++ != 0 );
193	/*
194	 *	how many ticks per second?
195	 *	if we can't tell, report time in ticks.
196	 */
197    if (hz == 0) {
198	hz = 1;
199	fprintf(stderr, "time is in ticks, not seconds\n");
200    }
201	/*
202	 *	dump out a gmon.sum file if requested
203	 */
204    if ( sflag ) {
205	dumpsum( GMONSUM );
206    }
207	/*
208	 *	assign samples to procedures
209	 */
210    asgnsamples();
211	/*
212	 *	assemble the dynamic profile
213	 */
214    timesortnlp = doarcs();
215	/*
216	 *	print the dynamic profile
217	 */
218    if(!lflag) {
219	    printgprof( timesortnlp );
220    }
221	/*
222	 *	print the flat profile
223	 */
224    if(!Lflag) {
225	    printprof();
226    }
227	/*
228	 *	print the index
229	 */
230    printindex();
231    exit(0);
232}
233
234    /*
235     *	information from a gmon.out file is in two parts:
236     *	an array of sampling hits within pc ranges,
237     *	and the arcs.
238     */
239void
240getpfile(filename)
241    char *filename;
242{
243    FILE		*pfile;
244    FILE		*openpfile();
245    struct rawarc	arc;
246
247    pfile = openpfile(filename);
248    readsamples(pfile);
249	/*
250	 *	the rest of the file consists of
251	 *	a bunch of <from,self,count> tuples.
252	 */
253    while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
254#	ifdef DEBUG
255	    if ( debug & SAMPLEDEBUG ) {
256		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
257			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
258	    }
259#	endif /* DEBUG */
260	    /*
261	     *	add this arc
262	     */
263	tally( &arc );
264    }
265    fclose(pfile);
266}
267
268FILE *
269openpfile(filename)
270    char *filename;
271{
272    struct gmonhdr	tmp;
273    FILE		*pfile;
274    int			size;
275    int			rate;
276
277    if((pfile = fopen(filename, "r")) == NULL)
278	err(1, "%s", filename);
279    fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
280    if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
281	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) )
282	errx(1, "%s: incompatible with first gmon file", filename);
283    gmonhdr = tmp;
284    if ( gmonhdr.version == GMONVERSION ) {
285	rate = gmonhdr.profrate;
286	size = sizeof(struct gmonhdr);
287    } else {
288	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
289	size = sizeof(struct ophdr);
290	gmonhdr.profrate = rate = hertz();
291	gmonhdr.version = GMONVERSION;
292    }
293    if (hz == 0) {
294	hz = rate;
295    } else if (hz != rate)
296	errx(0, "%s: profile clock rate (%d) %s (%ld) in first gmon file",
297	    filename, rate, "incompatible with clock rate", hz);
298    if ( gmonhdr.histcounter_type == 0 ) {
299	/* Historical case.  The type was u_short (2 bytes in practice). */
300	histcounter_type = 16;
301	histcounter_size = 2;
302    } else {
303	histcounter_type = gmonhdr.histcounter_type;
304	histcounter_size = abs(histcounter_type) / CHAR_BIT;
305    }
306    s_lowpc = (unsigned long) gmonhdr.lpc;
307    s_highpc = (unsigned long) gmonhdr.hpc;
308    lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2;
309    highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2;
310    sampbytes = gmonhdr.ncnt - size;
311    nsamples = sampbytes / histcounter_size;
312#   ifdef DEBUG
313	if ( debug & SAMPLEDEBUG ) {
314	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
315		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
316	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
317		s_lowpc , s_highpc );
318	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
319		lowpc , highpc );
320	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
321		sampbytes , nsamples );
322	    printf( "[openpfile] sample rate %ld\n" , hz );
323	}
324#   endif /* DEBUG */
325    return(pfile);
326}
327
328void
329tally( rawp )
330    struct rawarc	*rawp;
331{
332    nltype		*parentp;
333    nltype		*childp;
334
335    parentp = nllookup( rawp -> raw_frompc );
336    childp = nllookup( rawp -> raw_selfpc );
337    if ( parentp == 0 || childp == 0 )
338	return;
339    if ( kflag
340	 && onlist( kfromlist , parentp -> name )
341	 && onlist( ktolist , childp -> name ) ) {
342	return;
343    }
344    childp -> ncall += rawp -> raw_count;
345#   ifdef DEBUG
346	if ( debug & TALLYDEBUG ) {
347	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
348		    parentp -> name , childp -> name , rawp -> raw_count );
349	}
350#   endif /* DEBUG */
351    addarc( parentp , childp , rawp -> raw_count );
352}
353
354/*
355 * dump out the gmon.sum file
356 */
357void
358dumpsum( sumfile )
359    char *sumfile;
360{
361    register nltype *nlp;
362    register arctype *arcp;
363    struct rawarc arc;
364    FILE *sfile;
365
366    if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL )
367	err( 1 , "%s" , sumfile );
368    /*
369     * dump the header; use the last header read in
370     */
371    if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 )
372	err( 1 , "%s" , sumfile );
373    /*
374     * dump the samples
375     */
376    if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples)
377	err( 1 , "%s" , sumfile );
378    /*
379     * dump the normalized raw arc information
380     */
381    for ( nlp = nl ; nlp < npe ; nlp++ ) {
382	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
383	    arc.raw_frompc = arcp -> arc_parentp -> value;
384	    arc.raw_selfpc = arcp -> arc_childp -> value;
385	    arc.raw_count = arcp -> arc_count;
386	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 )
387		err( 1 , "%s" , sumfile );
388#	    ifdef DEBUG
389		if ( debug & SAMPLEDEBUG ) {
390		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
391			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
392		}
393#	    endif /* DEBUG */
394	}
395    }
396    fclose( sfile );
397}
398
399static int
400valcmp(v1, v2)
401    const void *v1;
402    const void *v2;
403{
404    const nltype *p1 = (const nltype *)v1;
405    const nltype *p2 = (const nltype *)v2;
406
407    if ( p1 -> value < p2 -> value ) {
408	return LESSTHAN;
409    }
410    if ( p1 -> value > p2 -> value ) {
411	return GREATERTHAN;
412    }
413    return EQUALTO;
414}
415
416void
417readsamples(pfile)
418    FILE	*pfile;
419{
420    int		i;
421    intmax_t	sample;
422
423    if (samples == 0) {
424	samples = (double *) calloc(nsamples, sizeof(double));
425	if (samples == 0)
426	    errx(0, "no room for %d sample pc's", nsamples);
427    }
428    for (i = 0; i < nsamples; i++) {
429	fread(&sample, histcounter_size, 1, pfile);
430	if (feof(pfile))
431		break;
432	switch ( histcounter_type ) {
433	case -8:
434	    samples[i] += *(int8_t *)&sample;
435	    break;
436	case 8:
437	    samples[i] += *(u_int8_t *)&sample;
438	    break;
439	case -16:
440	    samples[i] += *(int16_t *)&sample;
441	    break;
442	case 16:
443	    samples[i] += *(u_int16_t *)&sample;
444	    break;
445	case -32:
446	    samples[i] += *(int32_t *)&sample;
447	    break;
448	case 32:
449	    samples[i] += *(u_int32_t *)&sample;
450	    break;
451	case -64:
452	    samples[i] += *(int64_t *)&sample;
453	    break;
454	case 64:
455	    samples[i] += *(u_int64_t *)&sample;
456	    break;
457	default:
458	    err(1, "unsupported histogram counter type %d", histcounter_type);
459	}
460    }
461    if (i != nsamples)
462	errx(1, "unexpected EOF after reading %d/%d samples", --i , nsamples );
463}
464
465/*
466 *	Assign samples to the procedures to which they belong.
467 *
468 *	There are three cases as to where pcl and pch can be
469 *	with respect to the routine entry addresses svalue0 and svalue1
470 *	as shown in the following diagram.  overlap computes the
471 *	distance between the arrows, the fraction of the sample
472 *	that is to be credited to the routine which starts at svalue0.
473 *
474 *	    svalue0                                         svalue1
475 *	       |                                               |
476 *	       v                                               v
477 *
478 *	       +-----------------------------------------------+
479 *	       |					       |
480 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
481 *	  |         |		  |         |		  |         |
482 *	  +---------+		  +---------+		  +---------+
483 *
484 *	  ^         ^		  ^         ^		  ^         ^
485 *	  |         |		  |         |		  |         |
486 *	 pcl       pch		 pcl       pch		 pcl       pch
487 *
488 *	For the vax we assert that samples will never fall in the first
489 *	two bytes of any routine, since that is the entry mask,
490 *	thus we give call alignentries() to adjust the entry points if
491 *	the entry mask falls in one bucket but the code for the routine
492 *	doesn't start until the next bucket.  In conjunction with the
493 *	alignment of routine addresses, this should allow us to have
494 *	only one sample for every four bytes of text space and never
495 *	have any overlap (the two end cases, above).
496 */
497void
498asgnsamples()
499{
500    register int	j;
501    double		ccnt;
502    double		time;
503    unsigned long	pcl, pch;
504    register int	i;
505    unsigned long	overlap;
506    unsigned long	svalue0, svalue1;
507
508    /* read samples and assign to namelist symbols */
509    scale = highpc - lowpc;
510    scale /= nsamples;
511    alignentries();
512    for (i = 0, j = 1; i < nsamples; i++) {
513	ccnt = samples[i];
514	if (ccnt == 0)
515		continue;
516	pcl = lowpc + (unsigned long)(scale * i);
517	pch = lowpc + (unsigned long)(scale * (i + 1));
518	time = ccnt;
519#	ifdef DEBUG
520	    if ( debug & SAMPLEDEBUG ) {
521		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" ,
522			pcl , pch , ccnt );
523	    }
524#	endif /* DEBUG */
525	totime += time;
526	for (j = j - 1; j < nname; j++) {
527	    svalue0 = nl[j].svalue;
528	    svalue1 = nl[j+1].svalue;
529		/*
530		 *	if high end of tick is below entry address,
531		 *	go for next tick.
532		 */
533	    if (pch < svalue0)
534		    break;
535		/*
536		 *	if low end of tick into next routine,
537		 *	go for next routine.
538		 */
539	    if (pcl >= svalue1)
540		    continue;
541	    overlap = min(pch, svalue1) - max(pcl, svalue0);
542	    if (overlap > 0) {
543#		ifdef DEBUG
544		    if (debug & SAMPLEDEBUG) {
545			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n",
546				nl[j].value / HISTORICAL_SCALE_2,
547				svalue0, svalue1, nl[j].name,
548				overlap * time / scale, overlap);
549		    }
550#		endif /* DEBUG */
551		nl[j].time += overlap * time / scale;
552	    }
553	}
554    }
555#   ifdef DEBUG
556	if (debug & SAMPLEDEBUG) {
557	    printf("[asgnsamples] totime %f\n", totime);
558	}
559#   endif /* DEBUG */
560}
561
562
563unsigned long
564min(a, b)
565    unsigned long a,b;
566{
567    if (a<b)
568	return(a);
569    return(b);
570}
571
572unsigned long
573max(a, b)
574    unsigned long a,b;
575{
576    if (a>b)
577	return(a);
578    return(b);
579}
580
581    /*
582     *	calculate scaled entry point addresses (to save time in asgnsamples),
583     *	and possibly push the scaled entry points over the entry mask,
584     *	if it turns out that the entry point is in one bucket and the code
585     *	for a routine is in the next bucket.
586     */
587void
588alignentries()
589{
590    register struct nl	*nlp;
591    unsigned long	bucket_of_entry;
592    unsigned long	bucket_of_code;
593
594    for (nlp = nl; nlp < npe; nlp++) {
595	nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2;
596	bucket_of_entry = (nlp->svalue - lowpc) / scale;
597	bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 -
598	  lowpc) / scale;
599	if (bucket_of_entry < bucket_of_code) {
600#	    ifdef DEBUG
601		if (debug & SAMPLEDEBUG) {
602		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
603			    nlp->svalue,
604			    nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2);
605		}
606#	    endif /* DEBUG */
607	    nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2;
608	}
609    }
610}
611