gprof.c revision 50477
1/*
2 * Copyright (c) 1983, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static const char copyright[] =
36"@(#) Copyright (c) 1983, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#ifndef lint
41#if 0
42static char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
43#endif
44static const char rcsid[] =
45  "$FreeBSD: head/usr.bin/gprof/gprof.c 50477 1999-08-28 01:08:13Z peter $";
46#endif /* not lint */
47
48#include <err.h>
49#include "gprof.h"
50
51static int valcmp(const void *, const void *);
52
53
54static struct gmonhdr	gmonhdr;
55static int lflag;
56static int Lflag;
57
58main(argc, argv)
59    int argc;
60    char **argv;
61{
62    char	**sp;
63    nltype	**timesortnlp;
64    char	**defaultEs;
65
66    --argc;
67    argv++;
68    debug = 0;
69    bflag = TRUE;
70    while ( *argv != 0 && **argv == '-' ) {
71	(*argv)++;
72	switch ( **argv ) {
73	case 'a':
74	    aflag = TRUE;
75	    break;
76	case 'b':
77	    bflag = FALSE;
78	    break;
79	case 'C':
80	    Cflag = TRUE;
81	    cyclethreshold = atoi( *++argv );
82	    break;
83	case 'c':
84#if defined(vax) || defined(tahoe)
85	    cflag = TRUE;
86#else
87	    errx(1, "-c isn't supported on this architecture yet");
88#endif
89	    break;
90	case 'd':
91	    dflag = TRUE;
92	    setlinebuf(stdout);
93	    debug |= atoi( *++argv );
94	    debug |= ANYDEBUG;
95#	    ifdef DEBUG
96		printf("[main] debug = %d\n", debug);
97#	    else not DEBUG
98		printf("gprof: -d ignored\n");
99#	    endif DEBUG
100	    break;
101	case 'E':
102	    ++argv;
103	    addlist( Elist , *argv );
104	    Eflag = TRUE;
105	    addlist( elist , *argv );
106	    eflag = TRUE;
107	    break;
108	case 'e':
109	    addlist( elist , *++argv );
110	    eflag = TRUE;
111	    break;
112	case 'F':
113	    ++argv;
114	    addlist( Flist , *argv );
115	    Fflag = TRUE;
116	    addlist( flist , *argv );
117	    fflag = TRUE;
118	    break;
119	case 'f':
120	    addlist( flist , *++argv );
121	    fflag = TRUE;
122	    break;
123	case 'k':
124	    addlist( kfromlist , *++argv );
125	    addlist( ktolist , *++argv );
126	    kflag = TRUE;
127	    break;
128    case 'l':
129	    lflag = 1;
130	    Lflag = 0;
131	    break;
132    case 'L':
133	    Lflag = 1;
134	    lflag = 0;
135	    break;
136    case 's':
137	    sflag = TRUE;
138	    break;
139	case 'u':
140	    uflag = TRUE;
141	    break;
142	case 'z':
143	    zflag = TRUE;
144	    break;
145	}
146	argv++;
147    }
148    if ( *argv != 0 ) {
149	a_outname  = *argv;
150	argv++;
151    } else {
152	a_outname  = A_OUTNAME;
153    }
154    if ( *argv != 0 ) {
155	gmonname = *argv;
156	argv++;
157    } else {
158	gmonname = (char *) malloc(strlen(a_outname)+6);
159	strcpy(gmonname, a_outname);
160	strcat(gmonname, ".gmon");
161    }
162	/*
163	 *	get information from the executable file.
164	 */
165    if (elf_getnfile(a_outname, &defaultEs) == -1 &&
166      aout_getnfile(a_outname, &defaultEs) == -1)
167	errx(1, "%s: bad format", a_outname);
168	/*
169	 *	sort symbol table.
170	 */
171    qsort(nl, nname, sizeof(nltype), valcmp);
172	/*
173	 *	turn off default functions
174	 */
175    for ( sp = defaultEs ; *sp ; sp++ ) {
176	Eflag = TRUE;
177	addlist( Elist , *sp );
178	eflag = TRUE;
179	addlist( elist , *sp );
180    }
181	/*
182	 *	get information about mon.out file(s).
183	 */
184    do	{
185	getpfile( gmonname );
186	if ( *argv != 0 ) {
187	    gmonname = *argv;
188	}
189    } while ( *argv++ != 0 );
190	/*
191	 *	how many ticks per second?
192	 *	if we can't tell, report time in ticks.
193	 */
194    if (hz == 0) {
195	hz = 1;
196	fprintf(stderr, "time is in ticks, not seconds\n");
197    }
198	/*
199	 *	dump out a gmon.sum file if requested
200	 */
201    if ( sflag ) {
202	dumpsum( GMONSUM );
203    }
204	/*
205	 *	assign samples to procedures
206	 */
207    asgnsamples();
208	/*
209	 *	assemble the dynamic profile
210	 */
211    timesortnlp = doarcs();
212	/*
213	 *	print the dynamic profile
214	 */
215    if(!lflag) {
216	    printgprof( timesortnlp );
217    }
218	/*
219	 *	print the flat profile
220	 */
221    if(!Lflag) {
222	    printprof();
223    }
224	/*
225	 *	print the index
226	 */
227    printindex();
228    done();
229}
230
231    /*
232     *	information from a gmon.out file is in two parts:
233     *	an array of sampling hits within pc ranges,
234     *	and the arcs.
235     */
236getpfile(filename)
237    char *filename;
238{
239    FILE		*pfile;
240    FILE		*openpfile();
241    struct rawarc	arc;
242
243    pfile = openpfile(filename);
244    readsamples(pfile);
245	/*
246	 *	the rest of the file consists of
247	 *	a bunch of <from,self,count> tuples.
248	 */
249    while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
250#	ifdef DEBUG
251	    if ( debug & SAMPLEDEBUG ) {
252		printf( "[getpfile] frompc 0x%x selfpc 0x%x count %d\n" ,
253			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
254	    }
255#	endif DEBUG
256	    /*
257	     *	add this arc
258	     */
259	tally( &arc );
260    }
261    fclose(pfile);
262}
263
264FILE *
265openpfile(filename)
266    char *filename;
267{
268    struct gmonhdr	tmp;
269    FILE		*pfile;
270    int			size;
271    int			rate;
272
273    if((pfile = fopen(filename, "r")) == NULL) {
274	perror(filename);
275	done();
276    }
277    fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
278    if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
279	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) {
280	warnx("%s: incompatible with first gmon file", filename);
281	done();
282    }
283    gmonhdr = tmp;
284    if ( gmonhdr.version == GMONVERSION ) {
285	rate = gmonhdr.profrate;
286	size = sizeof(struct gmonhdr);
287    } else {
288	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
289	size = sizeof(struct ophdr);
290	gmonhdr.profrate = rate = hertz();
291	gmonhdr.version = GMONVERSION;
292    }
293    if (hz == 0) {
294	hz = rate;
295    } else if (hz != rate) {
296	fprintf(stderr,
297	    "%s: profile clock rate (%d) %s (%d) in first gmon file\n",
298	    filename, rate, "incompatible with clock rate", hz);
299	done();
300    }
301    s_lowpc = (unsigned long) gmonhdr.lpc;
302    s_highpc = (unsigned long) gmonhdr.hpc;
303    lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT);
304    highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT);
305    sampbytes = gmonhdr.ncnt - size;
306    nsamples = sampbytes / sizeof (UNIT);
307#   ifdef DEBUG
308	if ( debug & SAMPLEDEBUG ) {
309	    printf( "[openpfile] hdr.lpc 0x%x hdr.hpc 0x%x hdr.ncnt %d\n",
310		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
311	    printf( "[openpfile]   s_lowpc 0x%x   s_highpc 0x%x\n" ,
312		s_lowpc , s_highpc );
313	    printf( "[openpfile]     lowpc 0x%x     highpc 0x%x\n" ,
314		lowpc , highpc );
315	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
316		sampbytes , nsamples );
317	    printf( "[openpfile] sample rate %d\n" , hz );
318	}
319#   endif DEBUG
320    return(pfile);
321}
322
323tally( rawp )
324    struct rawarc	*rawp;
325{
326    nltype		*parentp;
327    nltype		*childp;
328
329    parentp = nllookup( rawp -> raw_frompc );
330    childp = nllookup( rawp -> raw_selfpc );
331    if ( parentp == 0 || childp == 0 )
332	return;
333    if ( kflag
334	 && onlist( kfromlist , parentp -> name )
335	 && onlist( ktolist , childp -> name ) ) {
336	return;
337    }
338    childp -> ncall += rawp -> raw_count;
339#   ifdef DEBUG
340	if ( debug & TALLYDEBUG ) {
341	    printf( "[tally] arc from %s to %s traversed %d times\n" ,
342		    parentp -> name , childp -> name , rawp -> raw_count );
343	}
344#   endif DEBUG
345    addarc( parentp , childp , rawp -> raw_count );
346}
347
348/*
349 * dump out the gmon.sum file
350 */
351dumpsum( sumfile )
352    char *sumfile;
353{
354    register nltype *nlp;
355    register arctype *arcp;
356    struct rawarc arc;
357    FILE *sfile;
358
359    if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) {
360	perror( sumfile );
361	done();
362    }
363    /*
364     * dump the header; use the last header read in
365     */
366    if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) {
367	perror( sumfile );
368	done();
369    }
370    /*
371     * dump the samples
372     */
373    if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) {
374	perror( sumfile );
375	done();
376    }
377    /*
378     * dump the normalized raw arc information
379     */
380    for ( nlp = nl ; nlp < npe ; nlp++ ) {
381	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
382	    arc.raw_frompc = arcp -> arc_parentp -> value;
383	    arc.raw_selfpc = arcp -> arc_childp -> value;
384	    arc.raw_count = arcp -> arc_count;
385	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) {
386		perror( sumfile );
387		done();
388	    }
389#	    ifdef DEBUG
390		if ( debug & SAMPLEDEBUG ) {
391		    printf( "[dumpsum] frompc 0x%x selfpc 0x%x count %d\n" ,
392			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
393		}
394#	    endif DEBUG
395	}
396    }
397    fclose( sfile );
398}
399
400static int
401valcmp(v1, v2)
402    const void *v1;
403    const void *v2;
404{
405    const nltype *p1 = (const nltype *)v1;
406    const nltype *p2 = (const nltype *)v2;
407
408    if ( p1 -> value < p2 -> value ) {
409	return LESSTHAN;
410    }
411    if ( p1 -> value > p2 -> value ) {
412	return GREATERTHAN;
413    }
414    return EQUALTO;
415}
416
417readsamples(pfile)
418    FILE	*pfile;
419{
420    register i;
421    UNIT	sample;
422
423    if (samples == 0) {
424	samples = (UNIT *) calloc(sampbytes, sizeof (UNIT));
425	if (samples == 0) {
426	    warnx("no room for %d sample pc's", sampbytes / sizeof (UNIT));
427	    done();
428	}
429    }
430    for (i = 0; i < nsamples; i++) {
431	fread(&sample, sizeof (UNIT), 1, pfile);
432	if (feof(pfile))
433		break;
434	samples[i] += sample;
435    }
436    if (i != nsamples) {
437	warnx("unexpected EOF after reading %d/%d samples", --i , nsamples );
438	done();
439    }
440}
441
442/*
443 *	Assign samples to the procedures to which they belong.
444 *
445 *	There are three cases as to where pcl and pch can be
446 *	with respect to the routine entry addresses svalue0 and svalue1
447 *	as shown in the following diagram.  overlap computes the
448 *	distance between the arrows, the fraction of the sample
449 *	that is to be credited to the routine which starts at svalue0.
450 *
451 *	    svalue0                                         svalue1
452 *	       |                                               |
453 *	       v                                               v
454 *
455 *	       +-----------------------------------------------+
456 *	       |					       |
457 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
458 *	  |         |		  |         |		  |         |
459 *	  +---------+		  +---------+		  +---------+
460 *
461 *	  ^         ^		  ^         ^		  ^         ^
462 *	  |         |		  |         |		  |         |
463 *	 pcl       pch		 pcl       pch		 pcl       pch
464 *
465 *	For the vax we assert that samples will never fall in the first
466 *	two bytes of any routine, since that is the entry mask,
467 *	thus we give call alignentries() to adjust the entry points if
468 *	the entry mask falls in one bucket but the code for the routine
469 *	doesn't start until the next bucket.  In conjunction with the
470 *	alignment of routine addresses, this should allow us to have
471 *	only one sample for every four bytes of text space and never
472 *	have any overlap (the two end cases, above).
473 */
474asgnsamples()
475{
476    register int	j;
477    UNIT		ccnt;
478    double		time;
479    unsigned long	pcl, pch;
480    register int	i;
481    unsigned long	overlap;
482    unsigned long	svalue0, svalue1;
483
484    /* read samples and assign to namelist symbols */
485    scale = highpc - lowpc;
486    scale /= nsamples;
487    alignentries();
488    for (i = 0, j = 1; i < nsamples; i++) {
489	ccnt = samples[i];
490	if (ccnt == 0)
491		continue;
492	pcl = lowpc + (unsigned long)(scale * i);
493	pch = lowpc + (unsigned long)(scale * (i + 1));
494	time = ccnt;
495#	ifdef DEBUG
496	    if ( debug & SAMPLEDEBUG ) {
497		printf( "[asgnsamples] pcl 0x%x pch 0x%x ccnt %d\n" ,
498			pcl , pch , ccnt );
499	    }
500#	endif DEBUG
501	totime += time;
502	for (j = j - 1; j < nname; j++) {
503	    svalue0 = nl[j].svalue;
504	    svalue1 = nl[j+1].svalue;
505		/*
506		 *	if high end of tick is below entry address,
507		 *	go for next tick.
508		 */
509	    if (pch < svalue0)
510		    break;
511		/*
512		 *	if low end of tick into next routine,
513		 *	go for next routine.
514		 */
515	    if (pcl >= svalue1)
516		    continue;
517	    overlap = min(pch, svalue1) - max(pcl, svalue0);
518	    if (overlap > 0) {
519#		ifdef DEBUG
520		    if (debug & SAMPLEDEBUG) {
521			printf("[asgnsamples] (0x%x->0x%x-0x%x) %s gets %f ticks %d overlap\n",
522				nl[j].value/sizeof(UNIT), svalue0, svalue1,
523				nl[j].name,
524				overlap * time / scale, overlap);
525		    }
526#		endif DEBUG
527		nl[j].time += overlap * time / scale;
528	    }
529	}
530    }
531#   ifdef DEBUG
532	if (debug & SAMPLEDEBUG) {
533	    printf("[asgnsamples] totime %f\n", totime);
534	}
535#   endif DEBUG
536}
537
538
539unsigned long
540min(a, b)
541    unsigned long a,b;
542{
543    if (a<b)
544	return(a);
545    return(b);
546}
547
548unsigned long
549max(a, b)
550    unsigned long a,b;
551{
552    if (a>b)
553	return(a);
554    return(b);
555}
556
557    /*
558     *	calculate scaled entry point addresses (to save time in asgnsamples),
559     *	and possibly push the scaled entry points over the entry mask,
560     *	if it turns out that the entry point is in one bucket and the code
561     *	for a routine is in the next bucket.
562     */
563alignentries()
564{
565    register struct nl	*nlp;
566    unsigned long	bucket_of_entry;
567    unsigned long	bucket_of_code;
568
569    for (nlp = nl; nlp < npe; nlp++) {
570	nlp -> svalue = nlp -> value / sizeof(UNIT);
571	bucket_of_entry = (nlp->svalue - lowpc) / scale;
572	bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
573	if (bucket_of_entry < bucket_of_code) {
574#	    ifdef DEBUG
575		if (debug & SAMPLEDEBUG) {
576		    printf("[alignentries] pushing svalue 0x%x to 0x%x\n",
577			    nlp->svalue, nlp->svalue + UNITS_TO_CODE);
578		}
579#	    endif DEBUG
580	    nlp->svalue += UNITS_TO_CODE;
581	}
582    }
583}
584
585done()
586{
587
588    exit(0);
589}
590