gprof.c revision 91735
1/*
2 * Copyright (c) 1983, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static const char copyright[] =
36"@(#) Copyright (c) 1983, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#ifndef lint
41#if 0
42static char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
43#endif
44static const char rcsid[] =
45  "$FreeBSD: head/usr.bin/gprof/gprof.c 91735 2002-03-06 09:47:36Z bde $";
46#endif /* not lint */
47
48#include <err.h>
49#include "gprof.h"
50
51static int valcmp(const void *, const void *);
52
53
54static struct gmonhdr	gmonhdr;
55static int lflag;
56static int Lflag;
57
58main(argc, argv)
59    int argc;
60    char **argv;
61{
62    char	**sp;
63    nltype	**timesortnlp;
64    char	**defaultEs;
65
66    --argc;
67    argv++;
68    debug = 0;
69    bflag = TRUE;
70    while ( *argv != 0 && **argv == '-' ) {
71	(*argv)++;
72	switch ( **argv ) {
73	case 'a':
74	    aflag = TRUE;
75	    break;
76	case 'b':
77	    bflag = FALSE;
78	    break;
79	case 'C':
80	    Cflag = TRUE;
81	    cyclethreshold = atoi( *++argv );
82	    break;
83	case 'c':
84#if defined(vax) || defined(tahoe)
85	    cflag = TRUE;
86#else
87	    errx(1, "-c isn't supported on this architecture yet");
88#endif
89	    break;
90	case 'd':
91	    dflag = TRUE;
92	    setlinebuf(stdout);
93	    debug |= atoi( *++argv );
94	    debug |= ANYDEBUG;
95#	    ifdef DEBUG
96		printf("[main] debug = %d\n", debug);
97#	    else not DEBUG
98		printf("gprof: -d ignored\n");
99#	    endif DEBUG
100	    break;
101	case 'E':
102	    ++argv;
103	    addlist( Elist , *argv );
104	    Eflag = TRUE;
105	    addlist( elist , *argv );
106	    eflag = TRUE;
107	    break;
108	case 'e':
109	    addlist( elist , *++argv );
110	    eflag = TRUE;
111	    break;
112	case 'F':
113	    ++argv;
114	    addlist( Flist , *argv );
115	    Fflag = TRUE;
116	    addlist( flist , *argv );
117	    fflag = TRUE;
118	    break;
119	case 'f':
120	    addlist( flist , *++argv );
121	    fflag = TRUE;
122	    break;
123	case 'k':
124	    addlist( kfromlist , *++argv );
125	    addlist( ktolist , *++argv );
126	    kflag = TRUE;
127	    break;
128	case 'K':
129	    Kflag = TRUE;
130	    break;
131    case 'l':
132	    lflag = 1;
133	    Lflag = 0;
134	    break;
135    case 'L':
136	    Lflag = 1;
137	    lflag = 0;
138	    break;
139    case 's':
140	    sflag = TRUE;
141	    break;
142	case 'u':
143	    uflag = TRUE;
144	    break;
145	case 'z':
146	    zflag = TRUE;
147	    break;
148	}
149	argv++;
150    }
151    if ( *argv != 0 ) {
152	a_outname  = *argv;
153	argv++;
154    } else {
155	a_outname  = A_OUTNAME;
156    }
157    if ( *argv != 0 ) {
158	gmonname = *argv;
159	argv++;
160    } else {
161	gmonname = (char *) malloc(strlen(a_outname)+6);
162	strcpy(gmonname, a_outname);
163	strcat(gmonname, ".gmon");
164    }
165	/*
166	 *	get information from the executable file.
167	 */
168    if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) ||
169      (elf_getnfile(a_outname, &defaultEs) == -1 &&
170      aout_getnfile(a_outname, &defaultEs) == -1))
171	errx(1, "%s: bad format", a_outname);
172	/*
173	 *	sort symbol table.
174	 */
175    qsort(nl, nname, sizeof(nltype), valcmp);
176	/*
177	 *	turn off default functions
178	 */
179    for ( sp = defaultEs ; *sp ; sp++ ) {
180	Eflag = TRUE;
181	addlist( Elist , *sp );
182	eflag = TRUE;
183	addlist( elist , *sp );
184    }
185	/*
186	 *	get information about mon.out file(s).
187	 */
188    do	{
189	getpfile( gmonname );
190	if ( *argv != 0 ) {
191	    gmonname = *argv;
192	}
193    } while ( *argv++ != 0 );
194	/*
195	 *	how many ticks per second?
196	 *	if we can't tell, report time in ticks.
197	 */
198    if (hz == 0) {
199	hz = 1;
200	fprintf(stderr, "time is in ticks, not seconds\n");
201    }
202	/*
203	 *	dump out a gmon.sum file if requested
204	 */
205    if ( sflag ) {
206	dumpsum( GMONSUM );
207    }
208	/*
209	 *	assign samples to procedures
210	 */
211    asgnsamples();
212	/*
213	 *	assemble the dynamic profile
214	 */
215    timesortnlp = doarcs();
216	/*
217	 *	print the dynamic profile
218	 */
219    if(!lflag) {
220	    printgprof( timesortnlp );
221    }
222	/*
223	 *	print the flat profile
224	 */
225    if(!Lflag) {
226	    printprof();
227    }
228	/*
229	 *	print the index
230	 */
231    printindex();
232    done();
233}
234
235    /*
236     *	information from a gmon.out file is in two parts:
237     *	an array of sampling hits within pc ranges,
238     *	and the arcs.
239     */
240getpfile(filename)
241    char *filename;
242{
243    FILE		*pfile;
244    FILE		*openpfile();
245    struct rawarc	arc;
246
247    pfile = openpfile(filename);
248    readsamples(pfile);
249	/*
250	 *	the rest of the file consists of
251	 *	a bunch of <from,self,count> tuples.
252	 */
253    while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
254#	ifdef DEBUG
255	    if ( debug & SAMPLEDEBUG ) {
256		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
257			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
258	    }
259#	endif DEBUG
260	    /*
261	     *	add this arc
262	     */
263	tally( &arc );
264    }
265    fclose(pfile);
266}
267
268FILE *
269openpfile(filename)
270    char *filename;
271{
272    struct gmonhdr	tmp;
273    FILE		*pfile;
274    int			size;
275    int			rate;
276
277    if((pfile = fopen(filename, "r")) == NULL) {
278	perror(filename);
279	done();
280    }
281    fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
282    if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
283	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) {
284	warnx("%s: incompatible with first gmon file", filename);
285	done();
286    }
287    gmonhdr = tmp;
288    if ( gmonhdr.version == GMONVERSION ) {
289	rate = gmonhdr.profrate;
290	size = sizeof(struct gmonhdr);
291    } else {
292	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
293	size = sizeof(struct ophdr);
294	gmonhdr.profrate = rate = hertz();
295	gmonhdr.version = GMONVERSION;
296    }
297    if (hz == 0) {
298	hz = rate;
299    } else if (hz != rate) {
300	fprintf(stderr,
301	    "%s: profile clock rate (%d) %s (%ld) in first gmon file\n",
302	    filename, rate, "incompatible with clock rate", hz);
303	done();
304    }
305    s_lowpc = (unsigned long) gmonhdr.lpc;
306    s_highpc = (unsigned long) gmonhdr.hpc;
307    lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2;
308    highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2;
309    sampbytes = gmonhdr.ncnt - size;
310    nsamples = sampbytes / sizeof (UNIT);
311#   ifdef DEBUG
312	if ( debug & SAMPLEDEBUG ) {
313	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
314		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
315	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
316		s_lowpc , s_highpc );
317	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
318		lowpc , highpc );
319	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
320		sampbytes , nsamples );
321	    printf( "[openpfile] sample rate %ld\n" , hz );
322	}
323#   endif DEBUG
324    return(pfile);
325}
326
327tally( rawp )
328    struct rawarc	*rawp;
329{
330    nltype		*parentp;
331    nltype		*childp;
332
333    parentp = nllookup( rawp -> raw_frompc );
334    childp = nllookup( rawp -> raw_selfpc );
335    if ( parentp == 0 || childp == 0 )
336	return;
337    if ( kflag
338	 && onlist( kfromlist , parentp -> name )
339	 && onlist( ktolist , childp -> name ) ) {
340	return;
341    }
342    childp -> ncall += rawp -> raw_count;
343#   ifdef DEBUG
344	if ( debug & TALLYDEBUG ) {
345	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
346		    parentp -> name , childp -> name , rawp -> raw_count );
347	}
348#   endif DEBUG
349    addarc( parentp , childp , rawp -> raw_count );
350}
351
352/*
353 * dump out the gmon.sum file
354 */
355dumpsum( sumfile )
356    char *sumfile;
357{
358    register nltype *nlp;
359    register arctype *arcp;
360    struct rawarc arc;
361    FILE *sfile;
362
363    if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) {
364	perror( sumfile );
365	done();
366    }
367    /*
368     * dump the header; use the last header read in
369     */
370    if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) {
371	perror( sumfile );
372	done();
373    }
374    /*
375     * dump the samples
376     */
377    if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) {
378	perror( sumfile );
379	done();
380    }
381    /*
382     * dump the normalized raw arc information
383     */
384    for ( nlp = nl ; nlp < npe ; nlp++ ) {
385	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
386	    arc.raw_frompc = arcp -> arc_parentp -> value;
387	    arc.raw_selfpc = arcp -> arc_childp -> value;
388	    arc.raw_count = arcp -> arc_count;
389	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) {
390		perror( sumfile );
391		done();
392	    }
393#	    ifdef DEBUG
394		if ( debug & SAMPLEDEBUG ) {
395		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
396			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
397		}
398#	    endif DEBUG
399	}
400    }
401    fclose( sfile );
402}
403
404static int
405valcmp(v1, v2)
406    const void *v1;
407    const void *v2;
408{
409    const nltype *p1 = (const nltype *)v1;
410    const nltype *p2 = (const nltype *)v2;
411
412    if ( p1 -> value < p2 -> value ) {
413	return LESSTHAN;
414    }
415    if ( p1 -> value > p2 -> value ) {
416	return GREATERTHAN;
417    }
418    return EQUALTO;
419}
420
421readsamples(pfile)
422    FILE	*pfile;
423{
424    register i;
425    UNIT	sample;
426
427    if (samples == 0) {
428	samples = (UNIT *) calloc(sampbytes, sizeof (UNIT));
429	if (samples == 0) {
430	    warnx("no room for %d sample pc's", sampbytes / sizeof (UNIT));
431	    done();
432	}
433    }
434    for (i = 0; i < nsamples; i++) {
435	fread(&sample, sizeof (UNIT), 1, pfile);
436	if (feof(pfile))
437		break;
438	samples[i] += sample;
439    }
440    if (i != nsamples) {
441	warnx("unexpected EOF after reading %d/%d samples", --i , nsamples );
442	done();
443    }
444}
445
446/*
447 *	Assign samples to the procedures to which they belong.
448 *
449 *	There are three cases as to where pcl and pch can be
450 *	with respect to the routine entry addresses svalue0 and svalue1
451 *	as shown in the following diagram.  overlap computes the
452 *	distance between the arrows, the fraction of the sample
453 *	that is to be credited to the routine which starts at svalue0.
454 *
455 *	    svalue0                                         svalue1
456 *	       |                                               |
457 *	       v                                               v
458 *
459 *	       +-----------------------------------------------+
460 *	       |					       |
461 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
462 *	  |         |		  |         |		  |         |
463 *	  +---------+		  +---------+		  +---------+
464 *
465 *	  ^         ^		  ^         ^		  ^         ^
466 *	  |         |		  |         |		  |         |
467 *	 pcl       pch		 pcl       pch		 pcl       pch
468 *
469 *	For the vax we assert that samples will never fall in the first
470 *	two bytes of any routine, since that is the entry mask,
471 *	thus we give call alignentries() to adjust the entry points if
472 *	the entry mask falls in one bucket but the code for the routine
473 *	doesn't start until the next bucket.  In conjunction with the
474 *	alignment of routine addresses, this should allow us to have
475 *	only one sample for every four bytes of text space and never
476 *	have any overlap (the two end cases, above).
477 */
478asgnsamples()
479{
480    register int	j;
481    UNIT		ccnt;
482    double		time;
483    unsigned long	pcl, pch;
484    register int	i;
485    unsigned long	overlap;
486    unsigned long	svalue0, svalue1;
487
488    /* read samples and assign to namelist symbols */
489    scale = highpc - lowpc;
490    scale /= nsamples;
491    alignentries();
492    for (i = 0, j = 1; i < nsamples; i++) {
493	ccnt = samples[i];
494	if (ccnt == 0)
495		continue;
496	pcl = lowpc + (unsigned long)(scale * i);
497	pch = lowpc + (unsigned long)(scale * (i + 1));
498	time = ccnt;
499#	ifdef DEBUG
500	    if ( debug & SAMPLEDEBUG ) {
501		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %d\n" ,
502			pcl , pch , ccnt );
503	    }
504#	endif DEBUG
505	totime += time;
506	for (j = j - 1; j < nname; j++) {
507	    svalue0 = nl[j].svalue;
508	    svalue1 = nl[j+1].svalue;
509		/*
510		 *	if high end of tick is below entry address,
511		 *	go for next tick.
512		 */
513	    if (pch < svalue0)
514		    break;
515		/*
516		 *	if low end of tick into next routine,
517		 *	go for next routine.
518		 */
519	    if (pcl >= svalue1)
520		    continue;
521	    overlap = min(pch, svalue1) - max(pcl, svalue0);
522	    if (overlap > 0) {
523#		ifdef DEBUG
524		    if (debug & SAMPLEDEBUG) {
525			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n",
526				nl[j].value / HISTORICAL_SCALE_2,
527				svalue0, svalue1, nl[j].name,
528				overlap * time / scale, overlap);
529		    }
530#		endif DEBUG
531		nl[j].time += overlap * time / scale;
532	    }
533	}
534    }
535#   ifdef DEBUG
536	if (debug & SAMPLEDEBUG) {
537	    printf("[asgnsamples] totime %f\n", totime);
538	}
539#   endif DEBUG
540}
541
542
543unsigned long
544min(a, b)
545    unsigned long a,b;
546{
547    if (a<b)
548	return(a);
549    return(b);
550}
551
552unsigned long
553max(a, b)
554    unsigned long a,b;
555{
556    if (a>b)
557	return(a);
558    return(b);
559}
560
561    /*
562     *	calculate scaled entry point addresses (to save time in asgnsamples),
563     *	and possibly push the scaled entry points over the entry mask,
564     *	if it turns out that the entry point is in one bucket and the code
565     *	for a routine is in the next bucket.
566     */
567alignentries()
568{
569    register struct nl	*nlp;
570    unsigned long	bucket_of_entry;
571    unsigned long	bucket_of_code;
572
573    for (nlp = nl; nlp < npe; nlp++) {
574	nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2;
575	bucket_of_entry = (nlp->svalue - lowpc) / scale;
576	bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 -
577	  lowpc) / scale;
578	if (bucket_of_entry < bucket_of_code) {
579#	    ifdef DEBUG
580		if (debug & SAMPLEDEBUG) {
581		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
582			    nlp->svalue,
583			    nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2);
584		}
585#	    endif DEBUG
586	    nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2;
587	}
588    }
589}
590
591done()
592{
593
594    exit(0);
595}
596