1/*
2 * Copyright (c) 1983, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#ifndef lint
31static const char copyright[] =
32"@(#) Copyright (c) 1983, 1993\n\
33	The Regents of the University of California.  All rights reserved.\n";
34#endif /* not lint */
35
36#if 0
37#ifndef lint
38static char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
39#endif /* not lint */
40#endif
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: stable/11/usr.bin/gprof/gprof.c 359763 2020-04-10 00:27:19Z kevans $");
44
45#include <err.h>
46#include <limits.h>
47#include <stdint.h>
48#include <string.h>
49
50#define	EXTERN
51#include "gprof.h"
52
53static int valcmp(const void *, const void *);
54
55static struct gmonhdr	gmonhdr;
56static int lflag;
57static int Lflag;
58
59int
60main(int argc, char **argv)
61{
62    char	**sp;
63    nltype	**timesortnlp;
64    char	**defaultEs;
65
66    --argc;
67    argv++;
68    debug = 0;
69    bflag = TRUE;
70    while ( *argv != 0 && **argv == '-' ) {
71	(*argv)++;
72	switch ( **argv ) {
73	case 'a':
74	    aflag = TRUE;
75	    break;
76	case 'b':
77	    bflag = FALSE;
78	    break;
79	case 'C':
80	    Cflag = TRUE;
81	    cyclethreshold = atoi( *++argv );
82	    break;
83	case 'd':
84	    dflag = TRUE;
85	    setlinebuf(stdout);
86	    debug |= atoi( *++argv );
87	    debug |= ANYDEBUG;
88#	    ifdef DEBUG
89		printf("[main] debug = %d\n", debug);
90#	    else /* not DEBUG */
91		printf("gprof: -d ignored\n");
92#	    endif /* DEBUG */
93	    break;
94	case 'E':
95	    ++argv;
96	    addlist( Elist , *argv );
97	    Eflag = TRUE;
98	    addlist( elist , *argv );
99	    eflag = TRUE;
100	    break;
101	case 'e':
102	    addlist( elist , *++argv );
103	    eflag = TRUE;
104	    break;
105	case 'F':
106	    ++argv;
107	    addlist( Flist , *argv );
108	    Fflag = TRUE;
109	    addlist( flist , *argv );
110	    fflag = TRUE;
111	    break;
112	case 'f':
113	    addlist( flist , *++argv );
114	    fflag = TRUE;
115	    break;
116	case 'k':
117	    addlist( kfromlist , *++argv );
118	    addlist( ktolist , *++argv );
119	    kflag = TRUE;
120	    break;
121	case 'K':
122	    Kflag = TRUE;
123	    break;
124	case 'l':
125	    lflag = 1;
126	    Lflag = 0;
127	    break;
128	case 'L':
129	    Lflag = 1;
130	    lflag = 0;
131	    break;
132	case 's':
133	    sflag = TRUE;
134	    break;
135	case 'u':
136	    uflag = TRUE;
137	    break;
138	case 'z':
139	    zflag = TRUE;
140	    break;
141	}
142	argv++;
143    }
144    if ( *argv != 0 ) {
145	a_outname  = *argv;
146	argv++;
147    } else {
148	a_outname  = A_OUTNAME;
149    }
150    if ( *argv != 0 ) {
151	gmonname = *argv;
152	argv++;
153    } else {
154	gmonname = (char *) malloc(strlen(a_outname)+6);
155	strcpy(gmonname, a_outname);
156	strcat(gmonname, ".gmon");
157    }
158	/*
159	 *	get information from the executable file.
160	 */
161    if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) ||
162      (!Kflag && elf_getnfile(a_outname, &defaultEs) == -1 &&
163      aout_getnfile(a_outname, &defaultEs) == -1))
164	errx(1, "%s: bad format", a_outname);
165	/*
166	 *	sort symbol table.
167	 */
168    qsort(nl, nname, sizeof(nltype), valcmp);
169	/*
170	 *	turn off default functions
171	 */
172    for ( sp = defaultEs ; *sp ; sp++ ) {
173	Eflag = TRUE;
174	addlist( Elist , *sp );
175	eflag = TRUE;
176	addlist( elist , *sp );
177    }
178	/*
179	 *	get information about mon.out file(s).
180	 */
181    do	{
182	getpfile( gmonname );
183	if ( *argv != 0 ) {
184	    gmonname = *argv;
185	}
186    } while ( *argv++ != 0 );
187	/*
188	 *	how many ticks per second?
189	 *	if we can't tell, report time in ticks.
190	 */
191    if (hz == 0) {
192	hz = 1;
193	fprintf(stderr, "time is in ticks, not seconds\n");
194    }
195	/*
196	 *	dump out a gmon.sum file if requested
197	 */
198    if ( sflag ) {
199	dumpsum( GMONSUM );
200    }
201	/*
202	 *	assign samples to procedures
203	 */
204    asgnsamples();
205	/*
206	 *	assemble the dynamic profile
207	 */
208    timesortnlp = doarcs();
209	/*
210	 *	print the dynamic profile
211	 */
212    if(!lflag) {
213	    printgprof( timesortnlp );
214    }
215	/*
216	 *	print the flat profile
217	 */
218    if(!Lflag) {
219	    printprof();
220    }
221	/*
222	 *	print the index
223	 */
224    printindex();
225    exit(0);
226}
227
228    /*
229     *	information from a gmon.out file is in two parts:
230     *	an array of sampling hits within pc ranges,
231     *	and the arcs.
232     */
233void
234getpfile(char *filename)
235{
236    FILE		*pfile;
237    struct rawarc	arc;
238
239    pfile = openpfile(filename);
240    readsamples(pfile);
241	/*
242	 *	the rest of the file consists of
243	 *	a bunch of <from,self,count> tuples.
244	 */
245    while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
246#	ifdef DEBUG
247	    if ( debug & SAMPLEDEBUG ) {
248		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
249			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
250	    }
251#	endif /* DEBUG */
252	    /*
253	     *	add this arc
254	     */
255	tally( &arc );
256    }
257    fclose(pfile);
258}
259
260FILE *
261openpfile(char *filename)
262{
263    struct gmonhdr	tmp;
264    FILE		*pfile;
265    int			size;
266    int			rate;
267
268    if((pfile = fopen(filename, "r")) == NULL)
269	err(1, "%s", filename);
270    fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
271    if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
272	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) )
273	errx(1, "%s: incompatible with first gmon file", filename);
274    gmonhdr = tmp;
275    if ( gmonhdr.version == GMONVERSION ) {
276	rate = gmonhdr.profrate;
277	size = sizeof(struct gmonhdr);
278    } else {
279	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
280	size = sizeof(struct ophdr);
281	gmonhdr.profrate = rate = hertz();
282	gmonhdr.version = GMONVERSION;
283    }
284    if (hz == 0) {
285	hz = rate;
286    } else if (hz != rate)
287	errx(0, "%s: profile clock rate (%d) %s (%ld) in first gmon file",
288	    filename, rate, "incompatible with clock rate", hz);
289    if ( gmonhdr.histcounter_type == 0 ) {
290	/* Historical case.  The type was u_short (2 bytes in practice). */
291	histcounter_type = 16;
292	histcounter_size = 2;
293    } else {
294	histcounter_type = gmonhdr.histcounter_type;
295	histcounter_size = abs(histcounter_type) / CHAR_BIT;
296    }
297    s_lowpc = (unsigned long) gmonhdr.lpc;
298    s_highpc = (unsigned long) gmonhdr.hpc;
299    lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2;
300    highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2;
301    sampbytes = gmonhdr.ncnt - size;
302    nsamples = sampbytes / histcounter_size;
303#   ifdef DEBUG
304	if ( debug & SAMPLEDEBUG ) {
305	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
306		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
307	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
308		s_lowpc , s_highpc );
309	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
310		lowpc , highpc );
311	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
312		sampbytes , nsamples );
313	    printf( "[openpfile] sample rate %ld\n" , hz );
314	}
315#   endif /* DEBUG */
316    return(pfile);
317}
318
319void
320tally(struct rawarc *rawp)
321{
322    nltype		*parentp;
323    nltype		*childp;
324
325    parentp = nllookup( rawp -> raw_frompc );
326    childp = nllookup( rawp -> raw_selfpc );
327    if ( parentp == 0 || childp == 0 )
328	return;
329    if ( kflag
330	 && onlist( kfromlist , parentp -> name )
331	 && onlist( ktolist , childp -> name ) ) {
332	return;
333    }
334    childp -> ncall += rawp -> raw_count;
335#   ifdef DEBUG
336	if ( debug & TALLYDEBUG ) {
337	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
338		    parentp -> name , childp -> name , rawp -> raw_count );
339	}
340#   endif /* DEBUG */
341    addarc( parentp , childp , rawp -> raw_count );
342}
343
344/*
345 * dump out the gmon.sum file
346 */
347void
348dumpsum(const char *sumfile)
349{
350    register nltype *nlp;
351    register arctype *arcp;
352    struct rawarc arc;
353    FILE *sfile;
354
355    if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL )
356	err( 1 , "%s" , sumfile );
357    /*
358     * dump the header; use the last header read in
359     */
360    if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 )
361	err( 1 , "%s" , sumfile );
362    /*
363     * dump the samples
364     */
365    if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples)
366	err( 1 , "%s" , sumfile );
367    /*
368     * dump the normalized raw arc information
369     */
370    for ( nlp = nl ; nlp < npe ; nlp++ ) {
371	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
372	    arc.raw_frompc = arcp -> arc_parentp -> value;
373	    arc.raw_selfpc = arcp -> arc_childp -> value;
374	    arc.raw_count = arcp -> arc_count;
375	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 )
376		err( 1 , "%s" , sumfile );
377#	    ifdef DEBUG
378		if ( debug & SAMPLEDEBUG ) {
379		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
380			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
381		}
382#	    endif /* DEBUG */
383	}
384    }
385    fclose( sfile );
386}
387
388static int
389valcmp(const void *v1, const void *v2)
390{
391    const nltype *p1 = (const nltype *)v1;
392    const nltype *p2 = (const nltype *)v2;
393
394    if ( p1 -> value < p2 -> value ) {
395	return LESSTHAN;
396    }
397    if ( p1 -> value > p2 -> value ) {
398	return GREATERTHAN;
399    }
400    return EQUALTO;
401}
402
403void
404readsamples(FILE *pfile)
405{
406    int		i;
407    intmax_t	sample;
408
409    if (samples == 0) {
410	samples = (double *) calloc(nsamples, sizeof(double));
411	if (samples == NULL)
412	    errx(0, "no room for %d sample pc's", nsamples);
413    }
414    for (i = 0; i < nsamples; i++) {
415	fread(&sample, histcounter_size, 1, pfile);
416	if (feof(pfile))
417		break;
418	switch ( histcounter_type ) {
419	case -8:
420	    samples[i] += *(int8_t *)&sample;
421	    break;
422	case 8:
423	    samples[i] += *(u_int8_t *)&sample;
424	    break;
425	case -16:
426	    samples[i] += *(int16_t *)&sample;
427	    break;
428	case 16:
429	    samples[i] += *(u_int16_t *)&sample;
430	    break;
431	case -32:
432	    samples[i] += *(int32_t *)&sample;
433	    break;
434	case 32:
435	    samples[i] += *(u_int32_t *)&sample;
436	    break;
437	case -64:
438	    samples[i] += *(int64_t *)&sample;
439	    break;
440	case 64:
441	    samples[i] += *(u_int64_t *)&sample;
442	    break;
443	default:
444	    err(1, "unsupported histogram counter type %d", histcounter_type);
445	}
446    }
447    if (i != nsamples)
448	errx(1, "unexpected EOF after reading %d/%d samples", --i , nsamples );
449}
450
451/*
452 *	Assign samples to the procedures to which they belong.
453 *
454 *	There are three cases as to where pcl and pch can be
455 *	with respect to the routine entry addresses svalue0 and svalue1
456 *	as shown in the following diagram.  overlap computes the
457 *	distance between the arrows, the fraction of the sample
458 *	that is to be credited to the routine which starts at svalue0.
459 *
460 *	    svalue0                                         svalue1
461 *	       |                                               |
462 *	       v                                               v
463 *
464 *	       +-----------------------------------------------+
465 *	       |					       |
466 *	  |  ->|    |<-		->|         |<-		->|    |<-  |
467 *	  |         |		  |         |		  |         |
468 *	  +---------+		  +---------+		  +---------+
469 *
470 *	  ^         ^		  ^         ^		  ^         ^
471 *	  |         |		  |         |		  |         |
472 *	 pcl       pch		 pcl       pch		 pcl       pch
473 *
474 *	For the vax we assert that samples will never fall in the first
475 *	two bytes of any routine, since that is the entry mask,
476 *	thus we give call alignentries() to adjust the entry points if
477 *	the entry mask falls in one bucket but the code for the routine
478 *	doesn't start until the next bucket.  In conjunction with the
479 *	alignment of routine addresses, this should allow us to have
480 *	only one sample for every four bytes of text space and never
481 *	have any overlap (the two end cases, above).
482 */
483void
484asgnsamples(void)
485{
486    register int	j;
487    double		ccnt;
488    double		thetime;
489    unsigned long	pcl, pch;
490    register int	i;
491    unsigned long	overlap;
492    unsigned long	svalue0, svalue1;
493
494    /* read samples and assign to namelist symbols */
495    scale = highpc - lowpc;
496    scale /= nsamples;
497    alignentries();
498    for (i = 0, j = 1; i < nsamples; i++) {
499	ccnt = samples[i];
500	if (ccnt == 0)
501		continue;
502	pcl = lowpc + (unsigned long)(scale * i);
503	pch = lowpc + (unsigned long)(scale * (i + 1));
504	thetime = ccnt;
505#	ifdef DEBUG
506	    if ( debug & SAMPLEDEBUG ) {
507		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" ,
508			pcl , pch , ccnt );
509	    }
510#	endif /* DEBUG */
511	totime += thetime;
512	for (j = j - 1; j < nname; j++) {
513	    svalue0 = nl[j].svalue;
514	    svalue1 = nl[j+1].svalue;
515		/*
516		 *	if high end of tick is below entry address,
517		 *	go for next tick.
518		 */
519	    if (pch < svalue0)
520		    break;
521		/*
522		 *	if low end of tick into next routine,
523		 *	go for next routine.
524		 */
525	    if (pcl >= svalue1)
526		    continue;
527	    overlap = min(pch, svalue1) - max(pcl, svalue0);
528	    if (overlap > 0) {
529#		ifdef DEBUG
530		    if (debug & SAMPLEDEBUG) {
531			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n",
532				nl[j].value / HISTORICAL_SCALE_2,
533				svalue0, svalue1, nl[j].name,
534				overlap * thetime / scale, overlap);
535		    }
536#		endif /* DEBUG */
537		nl[j].time += overlap * thetime / scale;
538	    }
539	}
540    }
541#   ifdef DEBUG
542	if (debug & SAMPLEDEBUG) {
543	    printf("[asgnsamples] totime %f\n", totime);
544	}
545#   endif /* DEBUG */
546}
547
548
549unsigned long
550min(unsigned long a, unsigned long b)
551{
552    if (a<b)
553	return(a);
554    return(b);
555}
556
557unsigned long
558max(unsigned long a, unsigned long b)
559{
560    if (a>b)
561	return(a);
562    return(b);
563}
564
565    /*
566     *	calculate scaled entry point addresses (to save time in asgnsamples),
567     *	and possibly push the scaled entry points over the entry mask,
568     *	if it turns out that the entry point is in one bucket and the code
569     *	for a routine is in the next bucket.
570     */
571void
572alignentries(void)
573{
574    register struct nl	*nlp;
575    unsigned long	bucket_of_entry;
576    unsigned long	bucket_of_code;
577
578    for (nlp = nl; nlp < npe; nlp++) {
579	nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2;
580	bucket_of_entry = (nlp->svalue - lowpc) / scale;
581	bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 -
582	  lowpc) / scale;
583	if (bucket_of_entry < bucket_of_code) {
584#	    ifdef DEBUG
585		if (debug & SAMPLEDEBUG) {
586		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
587			    nlp->svalue,
588			    nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2);
589		}
590#	    endif /* DEBUG */
591	    nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2;
592	}
593    }
594}
595