gprof.c revision 91018
1218822Sdim/*
294536Sobrien * Copyright (c) 1983, 1993
3218822Sdim *	The Regents of the University of California.  All rights reserved.
4218822Sdim *
5218822Sdim * Redistribution and use in source and binary forms, with or without
6218822Sdim * modification, are permitted provided that the following conditions
7218822Sdim * are met:
8218822Sdim * 1. Redistributions of source code must retain the above copyright
9218822Sdim *    notice, this list of conditions and the following disclaimer.
10218822Sdim * 2. Redistributions in binary form must reproduce the above copyright
11218822Sdim *    notice, this list of conditions and the following disclaimer in the
12218822Sdim *    documentation and/or other materials provided with the distribution.
13218822Sdim * 3. All advertising materials mentioning features or use of this software
14218822Sdim *    must display the following acknowledgement:
15218822Sdim *	This product includes software developed by the University of
16218822Sdim *	California, Berkeley and its contributors.
17218822Sdim * 4. Neither the name of the University nor the names of its contributors
18218822Sdim *    may be used to endorse or promote products derived from this software
19218822Sdim *    without specific prior written permission.
20218822Sdim *
21218822Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22218822Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23218822Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24218822Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25218822Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26218822Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27218822Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28218822Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29218822Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30218822Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31218822Sdim * SUCH DAMAGE.
32218822Sdim */
33218822Sdim
34218822Sdim#ifndef lint
35218822Sdimstatic const char copyright[] =
36218822Sdim"@(#) Copyright (c) 1983, 1993\n\
37218822Sdim	The Regents of the University of California.  All rights reserved.\n";
38218822Sdim#endif /* not lint */
39218822Sdim
40218822Sdim#ifndef lint
41218822Sdim#if 0
42218822Sdimstatic char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
43218822Sdim#endif
44218822Sdimstatic const char rcsid[] =
45218822Sdim  "$FreeBSD: head/usr.bin/gprof/gprof.c 91018 2002-02-21 12:07:21Z bde $";
46218822Sdim#endif /* not lint */
47218822Sdim
48218822Sdim#include <err.h>
49218822Sdim#include "gprof.h"
50218822Sdim
51218822Sdim#define	UNITS_TO_CODE	(OFFSET_OF_CODE / sizeof(UNIT))
52218822Sdim
53218822Sdimstatic int valcmp(const void *, const void *);
54218822Sdim
55218822Sdim
56218822Sdimstatic struct gmonhdr	gmonhdr;
57218822Sdimstatic int lflag;
58218822Sdimstatic int Lflag;
59218822Sdim
60218822Sdimmain(argc, argv)
61218822Sdim    int argc;
62218822Sdim    char **argv;
63218822Sdim{
64218822Sdim    char	**sp;
65218822Sdim    nltype	**timesortnlp;
66218822Sdim    char	**defaultEs;
67218822Sdim
68218822Sdim    --argc;
69218822Sdim    argv++;
70218822Sdim    debug = 0;
71218822Sdim    bflag = TRUE;
72218822Sdim    while ( *argv != 0 && **argv == '-' ) {
73218822Sdim	(*argv)++;
74218822Sdim	switch ( **argv ) {
75218822Sdim	case 'a':
76218822Sdim	    aflag = TRUE;
77218822Sdim	    break;
78218822Sdim	case 'b':
79218822Sdim	    bflag = FALSE;
80218822Sdim	    break;
81218822Sdim	case 'C':
82218822Sdim	    Cflag = TRUE;
83218822Sdim	    cyclethreshold = atoi( *++argv );
84218822Sdim	    break;
85218822Sdim	case 'c':
86218822Sdim#if defined(vax) || defined(tahoe)
87218822Sdim	    cflag = TRUE;
88218822Sdim#else
89218822Sdim	    errx(1, "-c isn't supported on this architecture yet");
90218822Sdim#endif
91218822Sdim	    break;
92218822Sdim	case 'd':
93218822Sdim	    dflag = TRUE;
94218822Sdim	    setlinebuf(stdout);
95218822Sdim	    debug |= atoi( *++argv );
96218822Sdim	    debug |= ANYDEBUG;
97218822Sdim#	    ifdef DEBUG
98218822Sdim		printf("[main] debug = %d\n", debug);
99218822Sdim#	    else not DEBUG
100218822Sdim		printf("gprof: -d ignored\n");
101218822Sdim#	    endif DEBUG
102218822Sdim	    break;
103218822Sdim	case 'E':
104218822Sdim	    ++argv;
105218822Sdim	    addlist( Elist , *argv );
106218822Sdim	    Eflag = TRUE;
107218822Sdim	    addlist( elist , *argv );
108218822Sdim	    eflag = TRUE;
109218822Sdim	    break;
110218822Sdim	case 'e':
111218822Sdim	    addlist( elist , *++argv );
112218822Sdim	    eflag = TRUE;
113218822Sdim	    break;
114218822Sdim	case 'F':
115218822Sdim	    ++argv;
116218822Sdim	    addlist( Flist , *argv );
117218822Sdim	    Fflag = TRUE;
118218822Sdim	    addlist( flist , *argv );
119218822Sdim	    fflag = TRUE;
120218822Sdim	    break;
121218822Sdim	case 'f':
122218822Sdim	    addlist( flist , *++argv );
123218822Sdim	    fflag = TRUE;
124218822Sdim	    break;
125218822Sdim	case 'k':
126218822Sdim	    addlist( kfromlist , *++argv );
127218822Sdim	    addlist( ktolist , *++argv );
128218822Sdim	    kflag = TRUE;
129218822Sdim	    break;
130218822Sdim	case 'K':
131218822Sdim	    Kflag = TRUE;
132218822Sdim	    break;
133218822Sdim    case 'l':
134218822Sdim	    lflag = 1;
135218822Sdim	    Lflag = 0;
136218822Sdim	    break;
137218822Sdim    case 'L':
138218822Sdim	    Lflag = 1;
139218822Sdim	    lflag = 0;
140218822Sdim	    break;
141218822Sdim    case 's':
142218822Sdim	    sflag = TRUE;
143218822Sdim	    break;
144218822Sdim	case 'u':
145130561Sobrien	    uflag = TRUE;
146104834Sobrien	    break;
147218822Sdim	case 'z':
148218822Sdim	    zflag = TRUE;
149218822Sdim	    break;
150218822Sdim	}
151130561Sobrien	argv++;
15233965Sjdp    }
15333965Sjdp    if ( *argv != 0 ) {
154130561Sobrien	a_outname  = *argv;
155130561Sobrien	argv++;
156130561Sobrien    } else {
15733965Sjdp	a_outname  = A_OUTNAME;
15833965Sjdp    }
159    if ( *argv != 0 ) {
160	gmonname = *argv;
161	argv++;
162    } else {
163	gmonname = (char *) malloc(strlen(a_outname)+6);
164	strcpy(gmonname, a_outname);
165	strcat(gmonname, ".gmon");
166    }
167	/*
168	 *	get information from the executable file.
169	 */
170    if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) ||
171      (elf_getnfile(a_outname, &defaultEs) == -1 &&
172      aout_getnfile(a_outname, &defaultEs) == -1))
173	errx(1, "%s: bad format", a_outname);
174	/*
175	 *	sort symbol table.
176	 */
177    qsort(nl, nname, sizeof(nltype), valcmp);
178	/*
179	 *	turn off default functions
180	 */
181    for ( sp = defaultEs ; *sp ; sp++ ) {
182	Eflag = TRUE;
183	addlist( Elist , *sp );
184	eflag = TRUE;
185	addlist( elist , *sp );
186    }
187	/*
188	 *	get information about mon.out file(s).
189	 */
190    do	{
191	getpfile( gmonname );
192	if ( *argv != 0 ) {
193	    gmonname = *argv;
194	}
195    } while ( *argv++ != 0 );
196	/*
197	 *	how many ticks per second?
198	 *	if we can't tell, report time in ticks.
199	 */
200    if (hz == 0) {
201	hz = 1;
202	fprintf(stderr, "time is in ticks, not seconds\n");
203    }
204	/*
205	 *	dump out a gmon.sum file if requested
206	 */
207    if ( sflag ) {
208	dumpsum( GMONSUM );
209    }
210	/*
211	 *	assign samples to procedures
212	 */
213    asgnsamples();
214	/*
215	 *	assemble the dynamic profile
216	 */
217    timesortnlp = doarcs();
218	/*
219	 *	print the dynamic profile
220	 */
221    if(!lflag) {
222	    printgprof( timesortnlp );
223    }
224	/*
225	 *	print the flat profile
226	 */
227    if(!Lflag) {
228	    printprof();
229    }
230	/*
231	 *	print the index
232	 */
233    printindex();
234    done();
235}
236
237    /*
238     *	information from a gmon.out file is in two parts:
239     *	an array of sampling hits within pc ranges,
240     *	and the arcs.
241     */
242getpfile(filename)
243    char *filename;
244{
245    FILE		*pfile;
246    FILE		*openpfile();
247    struct rawarc	arc;
248
249    pfile = openpfile(filename);
250    readsamples(pfile);
251	/*
252	 *	the rest of the file consists of
253	 *	a bunch of <from,self,count> tuples.
254	 */
255    while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
256#	ifdef DEBUG
257	    if ( debug & SAMPLEDEBUG ) {
258		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
259			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
260	    }
261#	endif DEBUG
262	    /*
263	     *	add this arc
264	     */
265	tally( &arc );
266    }
267    fclose(pfile);
268}
269
270FILE *
271openpfile(filename)
272    char *filename;
273{
274    struct gmonhdr	tmp;
275    FILE		*pfile;
276    int			size;
277    int			rate;
278
279    if((pfile = fopen(filename, "r")) == NULL) {
280	perror(filename);
281	done();
282    }
283    fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
284    if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
285	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) {
286	warnx("%s: incompatible with first gmon file", filename);
287	done();
288    }
289    gmonhdr = tmp;
290    if ( gmonhdr.version == GMONVERSION ) {
291	rate = gmonhdr.profrate;
292	size = sizeof(struct gmonhdr);
293    } else {
294	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
295	size = sizeof(struct ophdr);
296	gmonhdr.profrate = rate = hertz();
297	gmonhdr.version = GMONVERSION;
298    }
299    if (hz == 0) {
300	hz = rate;
301    } else if (hz != rate) {
302	fprintf(stderr,
303	    "%s: profile clock rate (%d) %s (%ld) in first gmon file\n",
304	    filename, rate, "incompatible with clock rate", hz);
305	done();
306    }
307    s_lowpc = (unsigned long) gmonhdr.lpc;
308    s_highpc = (unsigned long) gmonhdr.hpc;
309    lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT);
310    highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT);
311    sampbytes = gmonhdr.ncnt - size;
312    nsamples = sampbytes / sizeof (UNIT);
313#   ifdef DEBUG
314	if ( debug & SAMPLEDEBUG ) {
315	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
316		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
317	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
318		s_lowpc , s_highpc );
319	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
320		lowpc , highpc );
321	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
322		sampbytes , nsamples );
323	    printf( "[openpfile] sample rate %ld\n" , hz );
324	}
325#   endif DEBUG
326    return(pfile);
327}
328
329tally( rawp )
330    struct rawarc	*rawp;
331{
332    nltype		*parentp;
333    nltype		*childp;
334
335    parentp = nllookup( rawp -> raw_frompc );
336    childp = nllookup( rawp -> raw_selfpc );
337    if ( parentp == 0 || childp == 0 )
338	return;
339    if ( kflag
340	 && onlist( kfromlist , parentp -> name )
341	 && onlist( ktolist , childp -> name ) ) {
342	return;
343    }
344    childp -> ncall += rawp -> raw_count;
345#   ifdef DEBUG
346	if ( debug & TALLYDEBUG ) {
347	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
348		    parentp -> name , childp -> name , rawp -> raw_count );
349	}
350#   endif DEBUG
351    addarc( parentp , childp , rawp -> raw_count );
352}
353
354/*
355 * dump out the gmon.sum file
356 */
357dumpsum( sumfile )
358    char *sumfile;
359{
360    register nltype *nlp;
361    register arctype *arcp;
362    struct rawarc arc;
363    FILE *sfile;
364
365    if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) {
366	perror( sumfile );
367	done();
368    }
369    /*
370     * dump the header; use the last header read in
371     */
372    if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) {
373	perror( sumfile );
374	done();
375    }
376    /*
377     * dump the samples
378     */
379    if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) {
380	perror( sumfile );
381	done();
382    }
383    /*
384     * dump the normalized raw arc information
385     */
386    for ( nlp = nl ; nlp < npe ; nlp++ ) {
387	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
388	    arc.raw_frompc = arcp -> arc_parentp -> value;
389	    arc.raw_selfpc = arcp -> arc_childp -> value;
390	    arc.raw_count = arcp -> arc_count;
391	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) {
392		perror( sumfile );
393		done();
394	    }
395#	    ifdef DEBUG
396		if ( debug & SAMPLEDEBUG ) {
397		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
398			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
399		}
400#	    endif DEBUG
401	}
402    }
403    fclose( sfile );
404}
405
406static int
407valcmp(v1, v2)
408    const void *v1;
409    const void *v2;
410{
411    const nltype *p1 = (const nltype *)v1;
412    const nltype *p2 = (const nltype *)v2;
413
414    if ( p1 -> value < p2 -> value ) {
415	return LESSTHAN;
416    }
417    if ( p1 -> value > p2 -> value ) {
418	return GREATERTHAN;
419    }
420    return EQUALTO;
421}
422
423readsamples(pfile)
424    FILE	*pfile;
425{
426    register i;
427    UNIT	sample;
428
429    if (samples == 0) {
430	samples = (UNIT *) calloc(sampbytes, sizeof (UNIT));
431	if (samples == 0) {
432	    warnx("no room for %d sample pc's", sampbytes / sizeof (UNIT));
433	    done();
434	}
435    }
436    for (i = 0; i < nsamples; i++) {
437	fread(&sample, sizeof (UNIT), 1, pfile);
438	if (feof(pfile))
439		break;
440	samples[i] += sample;
441    }
442    if (i != nsamples) {
443	warnx("unexpected EOF after reading %d/%d samples", --i , nsamples );
444	done();
445    }
446}
447
448/*
449 *	Assign samples to the procedures to which they belong.
450 *
451 *	There are three cases as to where pcl and pch can be
452 *	with respect to the routine entry addresses svalue0 and svalue1
453 *	as shown in the following diagram.  overlap computes the
454 *	distance between the arrows, the fraction of the sample
455 *	that is to be credited to the routine which starts at svalue0.
456 *
457 *	    svalue0                                         svalue1
458 *	       |                                               |
459 *	       v                                               v
460 *
461 *	       +-----------------------------------------------+
462 *	       |					       |
463 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
464 *	  |         |		  |         |		  |         |
465 *	  +---------+		  +---------+		  +---------+
466 *
467 *	  ^         ^		  ^         ^		  ^         ^
468 *	  |         |		  |         |		  |         |
469 *	 pcl       pch		 pcl       pch		 pcl       pch
470 *
471 *	For the vax we assert that samples will never fall in the first
472 *	two bytes of any routine, since that is the entry mask,
473 *	thus we give call alignentries() to adjust the entry points if
474 *	the entry mask falls in one bucket but the code for the routine
475 *	doesn't start until the next bucket.  In conjunction with the
476 *	alignment of routine addresses, this should allow us to have
477 *	only one sample for every four bytes of text space and never
478 *	have any overlap (the two end cases, above).
479 */
480asgnsamples()
481{
482    register int	j;
483    UNIT		ccnt;
484    double		time;
485    unsigned long	pcl, pch;
486    register int	i;
487    unsigned long	overlap;
488    unsigned long	svalue0, svalue1;
489
490    /* read samples and assign to namelist symbols */
491    scale = highpc - lowpc;
492    scale /= nsamples;
493    alignentries();
494    for (i = 0, j = 1; i < nsamples; i++) {
495	ccnt = samples[i];
496	if (ccnt == 0)
497		continue;
498	pcl = lowpc + (unsigned long)(scale * i);
499	pch = lowpc + (unsigned long)(scale * (i + 1));
500	time = ccnt;
501#	ifdef DEBUG
502	    if ( debug & SAMPLEDEBUG ) {
503		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %d\n" ,
504			pcl , pch , ccnt );
505	    }
506#	endif DEBUG
507	totime += time;
508	for (j = j - 1; j < nname; j++) {
509	    svalue0 = nl[j].svalue;
510	    svalue1 = nl[j+1].svalue;
511		/*
512		 *	if high end of tick is below entry address,
513		 *	go for next tick.
514		 */
515	    if (pch < svalue0)
516		    break;
517		/*
518		 *	if low end of tick into next routine,
519		 *	go for next routine.
520		 */
521	    if (pcl >= svalue1)
522		    continue;
523	    overlap = min(pch, svalue1) - max(pcl, svalue0);
524	    if (overlap > 0) {
525#		ifdef DEBUG
526		    if (debug & SAMPLEDEBUG) {
527			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n",
528				nl[j].value/sizeof(UNIT), svalue0, svalue1,
529				nl[j].name,
530				overlap * time / scale, overlap);
531		    }
532#		endif DEBUG
533		nl[j].time += overlap * time / scale;
534	    }
535	}
536    }
537#   ifdef DEBUG
538	if (debug & SAMPLEDEBUG) {
539	    printf("[asgnsamples] totime %f\n", totime);
540	}
541#   endif DEBUG
542}
543
544
545unsigned long
546min(a, b)
547    unsigned long a,b;
548{
549    if (a<b)
550	return(a);
551    return(b);
552}
553
554unsigned long
555max(a, b)
556    unsigned long a,b;
557{
558    if (a>b)
559	return(a);
560    return(b);
561}
562
563    /*
564     *	calculate scaled entry point addresses (to save time in asgnsamples),
565     *	and possibly push the scaled entry points over the entry mask,
566     *	if it turns out that the entry point is in one bucket and the code
567     *	for a routine is in the next bucket.
568     */
569alignentries()
570{
571    register struct nl	*nlp;
572    unsigned long	bucket_of_entry;
573    unsigned long	bucket_of_code;
574
575    for (nlp = nl; nlp < npe; nlp++) {
576	nlp -> svalue = nlp -> value / sizeof(UNIT);
577	bucket_of_entry = (nlp->svalue - lowpc) / scale;
578	bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
579	if (bucket_of_entry < bucket_of_code) {
580#	    ifdef DEBUG
581		if (debug & SAMPLEDEBUG) {
582		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
583			    nlp->svalue, nlp->svalue + UNITS_TO_CODE);
584		}
585#	    endif DEBUG
586	    nlp->svalue += UNITS_TO_CODE;
587	}
588    }
589}
590
591done()
592{
593
594    exit(0);
595}
596