1/*	$OpenBSD: gprof.c,v 1.27 2021/01/27 07:18:41 deraadt Exp $	*/
2/*	$NetBSD: gprof.c,v 1.8 1995/04/19 07:15:59 cgd Exp $	*/
3
4/*
5 * Copyright (c) 1983, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include "gprof.h"
34
35int valcmp(const void *, const void *);
36
37static struct gmonhdr	gmonhdr;
38extern char *__progname;
39
40long    hz;
41char    *a_outname;
42char    *gmonname;
43nltype  *nl;                    /* the whole namelist */
44nltype  *npe;                   /* the virtual end of the namelist */
45int     nname;                  /* the number of function names */
46arctype *archead;               /* the head of arcs in current cycle list */
47cltype  *cyclehead;             /* the head of the list */
48int     cyclecnt;               /* the number of cycles found */
49nltype  *cyclenl;               /* cycle header namelist */
50int     ncycle;                 /* number of cycles discovered */
51int     debug;
52UNIT    *samples;
53unsigned long   s_lowpc;        /* lowpc from the profile file */
54unsigned long   s_highpc;       /* highpc from the profile file */
55unsigned long   lowpc, highpc;  /* range profiled, in UNIT's */
56unsigned sampbytes;             /* number of bytes of samples */
57int     nsamples;               /* number of samples */
58double  actime;                 /* accumulated time thus far for putprofline */
59double  totime;                 /* total time for all routines */
60double  printtime;              /* total of time being printed */
61double  scale;                  /* scale factor converting samples to pc */
62unsigned char   *textspace;     /* text space of a.out in core */
63int     cyclethreshold;         /* with -C, minimum cycle size to ignore */
64bool    aflag;                          /* suppress static functions */
65bool    bflag;                          /* blurbs, too */
66bool    cflag;                          /* discovered call graph, too */
67bool    Cflag;                          /* find cut-set to eliminate cycles */
68bool    dflag;                          /* debugging options */
69bool    eflag;                          /* specific functions excluded */
70bool    Eflag;                          /* functions excluded with time */
71bool    fflag;                          /* specific functions requested */
72bool    Fflag;                          /* functions requested with time */
73bool    kflag;                          /* arcs to be deleted */
74bool    sflag;                          /* sum multiple gmon.out files */
75bool    zflag;                          /* zero time/called functions, too */
76
77int
78main(int argc, char *argv[])
79{
80    char	**sp;
81    nltype	**timesortnlp;
82    char	**defaultEs;
83
84    if (pledge("stdio rpath wpath cpath", NULL) == -1)
85        err(1, NULL);
86
87    --argc;
88    argv++;
89    debug = 0;
90    bflag = TRUE;
91    while ( *argv != 0 && **argv == '-' ) {
92	(*argv)++;
93	switch ( **argv ) {
94	case 'a':
95	    aflag = TRUE;
96	    break;
97	case 'b':
98	    bflag = FALSE;
99	    break;
100	case 'C':
101	    Cflag = TRUE;
102	    cyclethreshold = atoi( *++argv );
103	    break;
104	case 'c':
105#if defined(__i386__) || defined(__mips64__)
106	    cflag = TRUE;
107#else
108	    fprintf(stderr, "%s: -c isn't supported on this architecture yet\n", __progname);
109	    exit(1);
110#endif
111	    break;
112	case 'd':
113	    dflag = TRUE;
114	    setvbuf(stdout, NULL, _IOLBF, 0);
115	    debug |= atoi( *++argv );
116	    debug |= ANYDEBUG;
117#	    ifdef DEBUG
118		printf("[main] debug = %d\n", debug);
119#	    else /* not DEBUG */
120		warnx("-d ignored");
121#	    endif /* DEBUG */
122	    break;
123	case 'E':
124	    ++argv;
125	    addlist( Elist , *argv );
126	    Eflag = TRUE;
127	    addlist( elist , *argv );
128	    eflag = TRUE;
129	    break;
130	case 'e':
131	    addlist( elist , *++argv );
132	    eflag = TRUE;
133	    break;
134	case 'F':
135	    ++argv;
136	    addlist( Flist , *argv );
137	    Fflag = TRUE;
138	    addlist( flist , *argv );
139	    fflag = TRUE;
140	    break;
141	case 'f':
142	    addlist( flist , *++argv );
143	    fflag = TRUE;
144	    break;
145	case 'k':
146	    addlist( kfromlist , *++argv );
147	    addlist( ktolist , *++argv );
148	    kflag = TRUE;
149	    break;
150	case 's':
151	    sflag = TRUE;
152	    break;
153	case 'z':
154	    zflag = TRUE;
155	    break;
156	}
157	argv++;
158    }
159    if ( *argv != 0 ) {
160	a_outname  = *argv;
161	argv++;
162    } else {
163	a_outname  = A_OUTNAME;
164    }
165    if ( *argv != 0 ) {
166	gmonname = *argv;
167	argv++;
168    } else {
169	gmonname = GMONNAME;
170    }
171    if ( sflag == FALSE ) {
172        if (pledge("stdio rpath", NULL) == -1)
173            err(1, "pledge");
174    }
175	/*
176	 *	get information about a.out file.
177	 */
178    if (getnfile(a_outname, &defaultEs) == -1)
179	errx(1, "%s: bad format", a_outname);
180	/*
181	 *	sort symbol table.
182	 */
183    qsort(nl, nname, sizeof(nltype), valcmp);
184	/*
185	 *	turn off default functions
186	 */
187    for ( sp = &defaultEs[0] ; *sp ; sp++ ) {
188	Eflag = TRUE;
189	addlist( Elist , *sp );
190	eflag = TRUE;
191	addlist( elist , *sp );
192    }
193	/*
194	 *	get information about mon.out file(s).
195	 */
196    do	{
197	getpfile( gmonname );
198	if ( *argv != 0 ) {
199	    gmonname = *argv;
200	}
201    } while ( *argv++ != 0 );
202	/*
203	 *	how many ticks per second?
204	 *	if we can't tell, report time in ticks.
205	 */
206    if (hz == 0) {
207	hz = 1;
208	warnx("time is in ticks, not seconds");
209    }
210	/*
211	 *	dump out a gmon.sum file if requested
212	 */
213    if ( sflag ) {
214	dumpsum( GMONSUM );
215    }
216	/*
217	 *	assign samples to procedures
218	 */
219    asgnsamples();
220	/*
221	 *	assemble the dynamic profile
222	 */
223    timesortnlp = doarcs();
224	/*
225	 *	print the dynamic profile
226	 */
227    printgprof( timesortnlp );
228	/*
229	 *	print the flat profile
230	 */
231    printprof();
232	/*
233	 *	print the index
234	 */
235    printindex();
236
237    return (0);
238}
239
240    /*
241     *	information from a gmon.out file is in two parts:
242     *	an array of sampling hits within pc ranges,
243     *	and the arcs.
244     */
245void
246getpfile(const char *filename)
247{
248    FILE		*pfile;
249    struct rawarc	arc;
250
251    pfile = openpfile(filename);
252    readsamples(pfile);
253	/*
254	 *	the rest of the file consists of
255	 *	a bunch of <from,self,count> tuples.
256	 */
257    while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
258#	ifdef DEBUG
259	    if ( debug & SAMPLEDEBUG ) {
260		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
261			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
262	    }
263#	endif /* DEBUG */
264	    /*
265	     *	add this arc
266	     */
267	tally( &arc );
268    }
269    fclose(pfile);
270}
271
272FILE *
273openpfile(const char *filename)
274{
275    struct gmonhdr	tmp;
276    FILE		*pfile;
277    int			size;
278    int			rate;
279
280    if((pfile = fopen(filename, "r")) == NULL)
281	err(1, "fopen: %s", filename);
282    if (fread(&tmp, sizeof(struct gmonhdr), 1, pfile) != 1)
283	errx(1, "%s: bad gmon header", filename);
284    if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
285	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt))
286	errx(1, "%s: incompatible with first gmon file", filename);
287    gmonhdr = tmp;
288    if ( gmonhdr.version == GMONVERSION ) {
289	rate = gmonhdr.profrate;
290	size = sizeof(struct gmonhdr);
291    } else {
292	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
293	size = sizeof(struct ophdr);
294	gmonhdr.profrate = rate = hertz();
295	gmonhdr.version = GMONVERSION;
296    }
297    if (hz == 0) {
298	hz = rate;
299    } else if (hz != rate)
300	errx(1, "%s: profile clock rate (%d) incompatible with clock rate "
301	    "(%ld) in first gmon file", filename, rate, hz);
302    s_lowpc = (unsigned long) gmonhdr.lpc;
303    s_highpc = (unsigned long) gmonhdr.hpc;
304    lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT);
305    highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT);
306    sampbytes = gmonhdr.ncnt - size;
307    nsamples = sampbytes / sizeof (UNIT);
308#   ifdef DEBUG
309	if ( debug & SAMPLEDEBUG ) {
310	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
311		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
312	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
313		s_lowpc , s_highpc );
314	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
315		lowpc , highpc );
316	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
317		sampbytes , nsamples );
318	    printf( "[openpfile] sample rate %ld\n" , hz );
319	}
320#   endif /* DEBUG */
321    return(pfile);
322}
323
324void
325tally(struct rawarc *rawp)
326{
327    nltype		*parentp;
328    nltype		*childp;
329
330    parentp = nllookup( rawp -> raw_frompc );
331    childp = nllookup( rawp -> raw_selfpc );
332    if ( parentp == 0 || childp == 0 )
333	return;
334    if ( kflag
335	 && onlist( kfromlist , parentp -> name )
336	 && onlist( ktolist , childp -> name ) ) {
337	return;
338    }
339    childp -> ncall += rawp -> raw_count;
340#   ifdef DEBUG
341	if ( debug & TALLYDEBUG ) {
342	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
343		    parentp -> name , childp -> name , rawp -> raw_count );
344	}
345#   endif /* DEBUG */
346    addarc( parentp , childp , rawp -> raw_count );
347}
348
349/*
350 * dump out the gmon.sum file
351 */
352void
353dumpsum(const char *sumfile)
354{
355    nltype *nlp;
356    arctype *arcp;
357    struct rawarc arc;
358    FILE *sfile;
359
360    if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL )
361	err(1, "fopen: %s", sumfile);
362    /*
363     * dump the header; use the last header read in
364     */
365    if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 )
366	err(1, "fwrite: %s", sumfile);
367    /*
368     * dump the samples
369     */
370    if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples)
371	err(1, "fwrite: %s", sumfile);
372    /*
373     * dump the normalized raw arc information
374     */
375    for ( nlp = nl ; nlp < npe ; nlp++ ) {
376	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
377	    arc.raw_frompc = arcp -> arc_parentp -> value;
378	    arc.raw_selfpc = arcp -> arc_childp -> value;
379	    arc.raw_count = arcp -> arc_count;
380	    if (fwrite ( &arc , sizeof arc , 1 , sfile ) != 1)
381	        err(1, "fwrite: %s", sumfile);
382#	    ifdef DEBUG
383		if ( debug & SAMPLEDEBUG ) {
384		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
385			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
386		}
387#	    endif /* DEBUG */
388	}
389    }
390    fclose( sfile );
391}
392
393int
394valcmp(const void *vp1, const void *vp2)
395{
396    const nltype *p1 = vp1;
397    const nltype *p2 = vp2;
398
399    if ( p1 -> value < p2 -> value ) {
400	return LESSTHAN;
401    }
402    if ( p1 -> value > p2 -> value ) {
403	return GREATERTHAN;
404    }
405    return EQUALTO;
406}
407
408void
409readsamples(FILE *pfile)
410{
411    UNIT	sample;
412    int i;
413
414    if (samples == 0) {
415	samples = calloc(sampbytes, sizeof (UNIT));
416	if (samples == 0)
417	    errx(1, "No room for %ld sample pc's", sampbytes / sizeof (UNIT));
418    }
419    for (i = 0; i < nsamples; i++) {
420	fread(&sample, sizeof (UNIT), 1, pfile);
421	if (feof(pfile))
422		break;
423	samples[i] += sample;
424    }
425    if (i != nsamples)
426	errx(1, "unexpected EOF after reading %d/%d samples", i, nsamples );
427}
428
429/*
430 *	Assign samples to the procedures to which they belong.
431 *
432 *	There are three cases as to where pcl and pch can be
433 *	with respect to the routine entry addresses svalue0 and svalue1
434 *	as shown in the following diagram.  overlap computes the
435 *	distance between the arrows, the fraction of the sample
436 *	that is to be credited to the routine which starts at svalue0.
437 *
438 *	    svalue0                                         svalue1
439 *	       |                                               |
440 *	       v                                               v
441 *
442 *	       +-----------------------------------------------+
443 *	       |					       |
444 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
445 *	  |         |		  |         |		  |         |
446 *	  +---------+		  +---------+		  +---------+
447 *
448 *	  ^         ^		  ^         ^		  ^         ^
449 *	  |         |		  |         |		  |         |
450 *	 pcl       pch		 pcl       pch		 pcl       pch
451 *
452 *	For the vax we assert that samples will never fall in the first
453 *	two bytes of any routine, since that is the entry mask,
454 *	thus we give call alignentries() to adjust the entry points if
455 *	the entry mask falls in one bucket but the code for the routine
456 *	doesn't start until the next bucket.  In conjunction with the
457 *	alignment of routine addresses, this should allow us to have
458 *	only one sample for every four bytes of text space and never
459 *	have any overlap (the two end cases, above).
460 */
461void
462asgnsamples(void)
463{
464    int	j;
465    UNIT		ccnt;
466    double		time;
467    unsigned long	pcl, pch;
468    unsigned long	i;
469    unsigned long	overlap;
470    unsigned long	svalue0, svalue1;
471
472    /* read samples and assign to namelist symbols */
473    scale = highpc - lowpc;
474    scale /= nsamples;
475    alignentries();
476    for (i = 0, j = 1; i < nsamples; i++) {
477	ccnt = samples[i];
478	if (ccnt == 0)
479		continue;
480	pcl = lowpc + (unsigned long)(scale * i);
481	pch = lowpc + (unsigned long)(scale * (i + 1));
482	time = ccnt;
483#	ifdef DEBUG
484	    if ( debug & SAMPLEDEBUG ) {
485		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %d\n" ,
486			pcl , pch , ccnt );
487	    }
488#	endif /* DEBUG */
489	totime += time;
490	for (j = j - 1; j < nname; j++) {
491	    svalue0 = nl[j].svalue;
492	    svalue1 = nl[j+1].svalue;
493		/*
494		 *	if high end of tick is below entry address,
495		 *	go for next tick.
496		 */
497	    if (pch < svalue0)
498		    break;
499		/*
500		 *	if low end of tick into next routine,
501		 *	go for next routine.
502		 */
503	    if (pcl >= svalue1)
504		    continue;
505	    overlap = min(pch, svalue1) - max(pcl, svalue0);
506	    if (overlap > 0) {
507#		ifdef DEBUG
508		    if (debug & SAMPLEDEBUG) {
509			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %ld overlap\n",
510				nl[j].value/sizeof(UNIT), svalue0, svalue1,
511				nl[j].name,
512				overlap * time / scale, overlap);
513		    }
514#		endif /* DEBUG */
515		nl[j].time += overlap * time / scale;
516	    }
517	}
518    }
519#   ifdef DEBUG
520	if (debug & SAMPLEDEBUG) {
521	    printf("[asgnsamples] totime %f\n", totime);
522	}
523#   endif /* DEBUG */
524}
525
526
527unsigned long
528min(unsigned long a, unsigned long b)
529{
530    if (a<b)
531	return(a);
532    return(b);
533}
534
535unsigned long
536max(unsigned long a, unsigned long b)
537{
538    if (a>b)
539	return(a);
540    return(b);
541}
542
543    /*
544     *	calculate scaled entry point addresses (to save time in asgnsamples),
545     *	and possibly push the scaled entry points over the entry mask,
546     *	if it turns out that the entry point is in one bucket and the code
547     *	for a routine is in the next bucket.
548     */
549void
550alignentries(void)
551{
552    struct nl		*nlp;
553    unsigned long	bucket_of_entry;
554    unsigned long	bucket_of_code;
555
556    for (nlp = nl; nlp < npe; nlp++) {
557	nlp -> svalue = nlp -> value / sizeof(UNIT);
558	bucket_of_entry = (nlp->svalue - lowpc) / scale;
559	bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
560	if (bucket_of_entry < bucket_of_code) {
561#	    ifdef DEBUG
562		if (debug & SAMPLEDEBUG) {
563		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
564			    nlp->svalue, nlp->svalue + UNITS_TO_CODE);
565		}
566#	    endif /* DEBUG */
567	    nlp->svalue += UNITS_TO_CODE;
568	}
569    }
570}
571