1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * HISTORY
33 *
34 * Revision 1.1.1.1  1998/09/22 21:05:49  wsanchez
35 * Import of Mac OS X kernel (~semeria)
36 *
37 * Revision 1.1.1.1  1998/03/07 02:26:08  wsanchez
38 * Import of OSF Mach kernel (~mburg)
39 *
40 * Revision 1.1.7.1  1997/09/22  17:41:24  barbou
41 * 	MP+RT: protect cpu_number() usage against preemption.
42 * 	[97/09/16            barbou]
43 *
44 * Revision 1.1.5.1  1995/01/06  19:53:37  devrcs
45 * 	mk6 CR668 - 1.3b26 merge
46 * 	new file for mk6
47 * 	[1994/10/12  22:25:20  dwm]
48 *
49 * Revision 1.1.2.2  1994/05/16  19:19:17  meissner
50 * 	Add support for converting 64-bit integers to a decimal string.
51 * 	Use the correct address (selfpc) when creating the prof header for gprof.
52 * 	[1994/04/28  21:44:59  meissner]
53 *
54 * Revision 1.1.2.1  1994/04/08  17:51:42  meissner
55 * 	Make most stats 64 bits, except for things like memory allocation.
56 * 	[1994/04/02  14:58:21  meissner]
57 *
58 * 	Do not provide old mcount support under MK or server.
59 * 	Fixup stats size so it is the same as in profile-md.h.
60 * 	[1994/03/29  21:00:03  meissner]
61 *
62 * 	Use faster sequence for overflow addition.
63 * 	Keep {dummy,prof,gprof,old}_mcount counts in double precision.
64 * 	Add kernel NCPUS > 1 support.
65 * 	[1994/03/17  20:13:23  meissner]
66 *
67 * 	Add gprof/prof overflow support
68 * 	[1994/03/17  14:56:44  meissner]
69 *
70 * 	Add size of histogram counters & unused fields to profile_profil struct
71 * 	[1994/02/17  21:41:44  meissner]
72 *
73 * 	Add too_low/too_high to profile_stats.
74 * 	[1994/02/16  22:38:11  meissner]
75 *
76 * 	Bump # allocation contexts to 32 from 16.
77 * 	Store unique ptr address in gprof function header structure for _profile_reset.
78 * 	Add new fields from profile-{internal,md}.h.
79 * 	Align loop looking for an unlocked acontext.
80 * 	Count # times a locked context block was found.
81 * 	Expand copyright.
82 * 	[1994/02/07  12:40:56  meissner]
83 *
84 * 	Keep track of the number of times the kernel overflows the HISTCOUNTER counter.
85 * 	[1994/02/03  20:13:23  meissner]
86 *
87 * 	Add stats for {user,kernel,idle} mode in the kernel.
88 * 	[1994/02/03  15:17:22  meissner]
89 *
90 * 	No change.
91 * 	[1994/02/03  00:58:49  meissner]
92 *
93 * 	Combine _profile_{vars,stats,md}; Allow more than one _profile_vars.
94 * 	[1994/02/01  12:03:56  meissner]
95 *
96 * 	Move _mcount_ptr to be closer to other data declarations.
97 * 	Add text_len to profile_profil structure for mk.
98 * 	Split records_cnt into prof_cnt/gprof_cnt.
99 * 	Always update prof_cnt/gprof_cnt even if not DO_STATS.
100 * 	Add current/max cpu indicator to stats for kernel.
101 * 	[1994/01/28  23:33:20  meissner]
102 *
103 * 	Don't do 4+Lgotoff(lab), use separate labels.
104 * 	Change GPROF_HASH_SHIFT to 9 (from 8).
105 * 	[1994/01/26  22:00:59  meissner]
106 *
107 * 	Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads.
108 * 	[1994/01/26  20:30:57  meissner]
109 *
110 * 	Move callback pointers into separate allocation context.
111 * 	Add size fields for other structures to profile-vars.
112 * 	Allocate string table as one large allocation.
113 * 	Rewrite old mcount code once again.
114 * 	Use multiply to make hash value, not divide.
115 * 	Hash table is now a power of two.
116 * 	[1994/01/26  20:23:32  meissner]
117 *
118 * 	Cut hash table size back to 16189.
119 * 	Add size fields to all structures.
120 * 	Add major/minor version number to _profile_md.
121 * 	Move allocation context block pointers to _profile_vars.
122 * 	Move _gprof_dummy after _profile_md.
123 * 	New function header code now falls into hash an element
124 * 	to avoid having the hash code duplicated or use a macro.
125 * 	Fix bug in _gprof_mcount with ELF shared libraries.
126 * 	[1994/01/25  01:45:59  meissner]
127 *
128 * 	Move init functions to C code; rearrange profil varaibles.
129 * 	[1994/01/22  01:11:14  meissner]
130 *
131 * 	No change.
132 * 	[1994/01/20  20:56:43  meissner]
133 *
134 * 	Fixup copyright.
135 * 	[1994/01/18  23:07:39  meissner]
136 *
137 * 	Make flags byte-sized.
138 * 	Add have_bb flag.
139 * 	Add init_format flag.
140 * 	Always put word size multipler first in .space.
141 * 	[1994/01/18  21:57:14  meissner]
142 *
143 * 	Fix elfpic problems in last change.
144 * 	[1994/01/16  14:04:26  meissner]
145 *
146 * 	Rewrite gprof caching to be faster & not need a lock.
147 * 	Record prof information for gprof too.
148 * 	Bump reserved stats to 64.
149 * 	Bump up hash table size 30799.
150 * 	Conditionally use lock prefix.
151 * 	Change most #ifdef's to #if.
152 * 	DEBUG_PROFILE turns on stack frames now.
153 * 	Conditionally add externs to gprof to determine where time is spent.
154 * 	Prof_mcount uses xchgl to update function pointer.
155 * 	[1994/01/15  18:40:33  meissner]
156 *
157 * 	Fix a comment.
158 * 	Separate statistics from debugging (though debugging turns it on).
159 * 	Remove debug code that traces each gprof request.
160 * 	[1994/01/15  00:59:02  meissner]
161 *
162 * 	Move max hash bucket calculation into _gprof_write & put info in stats structure.
163 * 	[1994/01/04  16:15:14  meissner]
164 *
165 * 	Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to.
166 * 	[1994/01/04  15:37:44  meissner]
167 *
168 * 	Add more allocation memory pools (gprof function hdrs in particular).
169 * 	For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time.
170 * 	Add major/minor version numbers to _profile_{vars,stats}.
171 * 	Add # profil buckets field to _profil_stats.
172 * 	[19
173 *
174 * $EndLog$
175 */
176
177/*
178 * Common 386 profiling module that is shared between the kernel, mach
179 * servers, and the user space library.  Each environment includes
180 * this file.
181 */
182
183	.file	"profile-asm.s"
184
185#include <machine/asm.h>
186
187/*
188 * By default, debugging turns on statistics and stack frames.
189 */
190
191#if DEBUG_PROFILE
192#ifndef DO_STATS
193#define DO_STATS 1
194#endif
195
196#ifndef STACK_FRAMES
197#define STACK_FRAMES 1
198#endif
199#endif
200
201#ifndef OLD_MCOUNT
202#define OLD_MCOUNT 0			/* do not compile old code for mcount */
203#endif
204
205#ifndef DO_STATS
206#define DO_STATS 1			/* compile in statistics code */
207#endif
208
209#ifndef DO_LOCK
210#define	DO_LOCK 0			/* use lock; in front of increments */
211#endif
212
213#ifndef LOCK_STATS
214#define LOCK_STATS DO_LOCK		/* update stats with lock set */
215#endif
216
217#ifndef STACK_FRAMES
218#define STACK_FRAMES 0			/* create stack frames for debugger */
219#endif
220
221#ifndef NO_RECURSIVE_ALLOC
222#define NO_RECURSIVE_ALLOC 0		/* check for recursive allocs */
223					/* (not thread safe!) */
224#endif
225
226#ifndef MARK_GPROF
227#define MARK_GPROF 0			/* add externs for gprof profiling */
228#endif
229
230#ifndef OVERFLOW
231#define	OVERFLOW 1			/* add overflow checking support */
232#endif
233
234/*
235 * Turn on the use of the lock prefix if desired.
236 */
237
238#ifndef LOCK
239#if DO_LOCK
240#define LOCK lock;
241#else
242#define LOCK
243#endif
244#endif
245
246#ifndef SLOCK
247#if LOCK_STATS
248#define SLOCK LOCK
249#else
250#define SLOCK
251#endif
252#endif
253
254/*
255 * Double or single precision incrementing
256 */
257
258#if OVERFLOW
259#define DINC(mem)		LOCK addl $1,mem; LOCK adcl $0,4+mem
260#define DINC2(mem,mem2)		LOCK addl $1,mem; LOCK adcl $0,mem2
261#define SDINC(mem)		SLOCK addl $1,mem; SLOCK adcl $0,4+mem
262#define SDADD(val,mem)		SLOCK addl val,mem; SLOCK adcl $0,4+mem
263#define SDADDNEG(val,mem)	SLOCK subl val,mem; SLOCK adcl $0,4+mem
264#define SDSUB(val,mem)		SLOCK subl val,mem; SLOCK sbbl $0,4+mem
265
266#else
267#define DINC(mem)		LOCK incl mem
268#define DINC2(mem,mem2)		LOCK incl mem
269#define SDINC(mem)		SLOCK incl mem
270#define	SDADD(val,mem)		SLOCK addl val,mem
271#define	SDADDNEG(val,mem)	SLOCK subl val,mem
272#define	SDSUB(val,mem)		SLOCK subl val,mem
273#endif
274
275/*
276 * Stack frame support so that debugger traceback works.
277 */
278
279#if STACK_FRAMES
280#define	ENTER	pushl %ebp; movl %esp,%ebp
281#define	LEAVE0	popl %ebp
282#define	Estack	4
283#else
284#define	ENTER
285#define	LEAVE0
286#define	Estack	0
287#endif
288
289/*
290 * Gprof profiling.
291 */
292
293#if MARK_GPROF
294#define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name)
295#else
296#define MARK(name)
297#endif
298
299/*
300 * Profiling allocation context block.  Each time memory is needed, the
301 * allocator loops until it finds an unlocked context block, and allocates
302 * from that block.  If no context blocks are available, a new memory
303 * pool is allocated, and added to the end of the chain.
304 */
305
306LCL(A_next)		= 0			/* next context block link (must be 0) */
307LCL(A_plist)		= LCL(A_next)+4		/* head of page list for context block */
308LCL(A_lock)		= LCL(A_plist)+4	/* lock word */
309LCL(A_size)		= LCL(A_lock)+4		/* size of context block */
310
311#define	A_next		LCL(A_next)
312#define	A_plist		LCL(A_plist)
313#define	A_lock		LCL(A_lock)
314#define	A_size		LCL(A_size)
315
316/*
317 * Allocation contexts used.
318 */
319
320LCL(C_prof)		= 0			/* prof records */
321LCL(C_gprof)		= 1			/* gprof arc records */
322LCL(C_gfunc)		= 2			/* gprof function headers */
323LCL(C_misc)		= 3			/* misc. allocations */
324LCL(C_profil)		= 4			/* memory for profil */
325LCL(C_dci)		= 5			/* memory for dci */
326LCL(C_bb)		= 6			/* memory for basic blocks */
327LCL(C_callback)		= 7			/* memory for callbacks */
328LCL(C_max)		= 32			/* # allocation contexts */
329
330#define	C_prof		LCL(C_prof)
331#define	C_gprof		LCL(C_gprof)
332#define	C_gfunc		LCL(C_gfunc)
333#define	C_max		LCL(C_max)
334
335/*
336 * Linked list of memory allocations.
337 */
338
339LCL(M_first)		= 0			/* pointer to first byte available */
340LCL(M_ptr)		= LCL(M_first)+4	/* pointer to next available byte */
341LCL(M_next)		= LCL(M_ptr)+4		/* next page allocated */
342LCL(M_nfree)		= LCL(M_next)+4		/* # bytes available */
343LCL(M_nalloc)		= LCL(M_nfree)+4	/* # bytes allocated */
344LCL(M_num)		= LCL(M_nalloc)+4	/* # allocations done on this page */
345LCL(M_size)		= LCL(M_num)+4		/* size of page header */
346
347#define	M_first		LCL(M_first)
348#define	M_ptr		LCL(M_ptr)
349#define	M_next		LCL(M_next)
350#define	M_nfree		LCL(M_nfree)
351#define	M_nalloc	LCL(M_nalloc)
352#define	M_num		LCL(M_num)
353#define	M_size		LCL(M_size)
354
355/*
356 * Prof data type.
357 */
358
359LCL(P_addr)		= 0			/* function address */
360LCL(P_count)		= LCL(P_addr)+4		/* # times function called */
361LCL(P_overflow)		= LCL(P_count)+4	/* # times count overflowed */
362LCL(P_size)		= LCL(P_overflow)+4	/* size of prof data type */
363
364#define	P_addr		LCL(P_addr)
365#define	P_count		LCL(P_count)
366#define	P_overflow	LCL(P_overflow)
367#define	P_size		LCL(P_size)
368
369/*
370 * Gprof data type.
371 */
372
373LCL(G_next)		= 0			/* next hash link (must be 0) */
374LCL(G_frompc)		= LCL(G_next)+4		/* caller's caller */
375LCL(G_selfpc)		= LCL(G_frompc)+4	/* caller's address */
376LCL(G_count)		= LCL(G_selfpc)+4	/* # times arc traversed */
377LCL(G_overflow)		= LCL(G_count)+4	/* # times count overflowed */
378LCL(G_size)		= LCL(G_overflow)+4	/* size of gprof data type */
379
380#define	G_next		LCL(G_next)
381#define	G_frompc	LCL(G_frompc)
382#define	G_selfpc	LCL(G_selfpc)
383#define	G_count		LCL(G_count)
384#define	G_overflow	LCL(G_overflow)
385#define	G_size		LCL(G_size)
386
387/*
388 * Gprof header.
389 *
390 * At least one header is allocated for each unique function that is profiled.
391 * In order to save time calculating the hash value, the last H_maxcache
392 * distinct arcs are cached within this structure.  Also, to avoid loading
393 * the GOT when searching the hash table, we copy the hash pointer to this
394 * structure, so that we only load the GOT when we need to allocate an arc.
395 */
396
397LCL(H_maxcache)		= 3			/* # of cache table entries */
398LCL(H_csize)		= 4*LCL(H_maxcache)	/* size of each cache array */
399
400LCL(H_hash_ptr)		= 0			/* hash table to use */
401LCL(H_unique_ptr)	= LCL(H_hash_ptr)+4	/* function unique pointer */
402LCL(H_prof)		= LCL(H_unique_ptr)+4	/* prof statistics */
403LCL(H_cache_ptr)	= LCL(H_prof)+P_size	/* cache table of element pointers */
404LCL(H_size)		= LCL(H_cache_ptr)+LCL(H_csize)	/* size of gprof header type */
405
406#define	H_maxcache	LCL(H_maxcache)
407#define	H_csize		LCL(H_csize)
408#define	H_hash_ptr	LCL(H_hash_ptr)
409#define	H_unique_ptr	LCL(H_unique_ptr)
410#define	H_prof		LCL(H_prof)
411#define	H_cache_ptr	LCL(H_cache_ptr)
412#define	H_size		LCL(H_size)
413
414/*
415 * Number of digits needed to write a 64 bit number including trailing null.
416 * (rounded up to be divisable by 4).
417 */
418
419#define N_digit		24
420
421
422	.data
423
424/*
425 * Default gprof hash table size, which must be a power of two.
426 * The shift specifies how many low order bits to eliminate when
427 * calculating the hash value.
428 */
429
430#ifndef GPROF_HASH_SIZE
431#define GPROF_HASH_SIZE 16384
432#endif
433
434#ifndef GPROF_HASH_SHIFT
435#define	GPROF_HASH_SHIFT 9
436#endif
437
438#define GPROF_HASH_MASK (GPROF_HASH_SIZE-1)
439
440DATA(_profile_hash_size)
441	.long	GPROF_HASH_SIZE
442ENDDATA(_profile_hash_size)
443
444
445
446/*
447 * Pointer that the compiler uses to call to the appropriate mcount function.
448 */
449
450DATA(_mcount_ptr)
451	.long	EXT(_dummy_mcount)
452ENDDATA(_mcount_ptr)
453
454/*
455 * Global profile variables.  The structure that accesses this in C is declared
456 * in profile-internal.h.  All items in .data that follow this will be used as
457 * one giant record, and each unique machine, thread, kgmon output or what have
458 * you will create a separate instance.  Typically there is only one instance
459 * which will be the memory laid out below.
460 */
461
462LCL(var_major_version)	= 0				/* major version number */
463LCL(var_minor_version)	= LCL(var_major_version)+4	/* minor version number */
464LCL(vars_size)		= LCL(var_minor_version)+4	/* size of _profile_vars structure */
465LCL(plist_size)		= LCL(vars_size)+4		/* size of page_list structure */
466LCL(acontext_size)	= LCL(plist_size)+4		/* size of allocation contexts */
467LCL(callback_size)	= LCL(acontext_size)+4		/* size of callback structure */
468LCL(type)		= LCL(callback_size)+4		/* profile type (gprof, prof) */
469LCL(error_msg)		= LCL(type)+4			/* error message for perror */
470LCL(filename)		= LCL(error_msg)+4		/* filename to write to */
471LCL(str_ptr)		= LCL(filename)+4		/* string table pointer */
472LCL(stream)		= LCL(str_ptr)+4		/* stdio stream to write to */
473LCL(diag_stream)	= LCL(stream)+4			/* stdio stream to write diagnostics to */
474LCL(fwrite_func)	= LCL(diag_stream)+4		/* function like fwrite to output bytes */
475LCL(page_size)		= LCL(fwrite_func)+4		/* page size in bytes */
476LCL(str_bytes)		= LCL(page_size)+4		/* # bytes in string table */
477LCL(str_total)		= LCL(str_bytes)+4		/* # total bytes allocated for string table */
478LCL(clock_ticks)	= LCL(str_total)+4		/* # clock ticks per second */
479
480							/* profil variables */
481LCL(profil_start)	= LCL(clock_ticks)+4		/* start of profil variables */
482LCL(lowpc)		= LCL(clock_ticks)+4		/* lowest address */
483LCL(highpc)		= LCL(lowpc)+4			/* highest address */
484LCL(text_len)		= LCL(highpc)+4			/* highpc-lowpc */
485LCL(profil_len)		= LCL(text_len)+4		/* size of profil buffer */
486LCL(counter_size)	= LCL(profil_len)+4		/* size of indivual counter */
487LCL(scale)		= LCL(counter_size)+4		/* scale factor */
488LCL(profil_unused)	= LCL(scale)+4			/* unused fields */
489LCL(profil_end)		= LCL(profil_unused)+4*8	/* end of profil_info structure */
490LCL(profil_buf)		= LCL(profil_end)		/* buffer for profil */
491
492							/* Output selection func ptrs */
493LCL(output_init)	= LCL(profil_buf)+4		/* Initialization */
494LCL(output)		= LCL(output_init)+4		/* Write out profiling info */
495LCL(output_ptr)		= LCL(output)+4			/* Output specific data ptr */
496
497							/* Memory allocation support */
498LCL(acontext)		= LCL(output_ptr)+4		/* pointers to allocation context blocks */
499
500LCL(bogus_func)		= LCL(acontext)+4*C_max		/* function to use if gprof arc is bad */
501LCL(vars_unused)	= LCL(bogus_func)+4		/* future growth */
502
503							/* flags */
504LCL(init)		= LCL(vars_unused)+4*63		/* whether initializations were done */
505LCL(active)		= LCL(init)+1			/* whether profiling is active */
506LCL(do_profile)		= LCL(active)+1			/* whether to do profiling */
507LCL(use_dci)		= LCL(do_profile)+1		/* whether to use DCI */
508LCL(use_profil)		= LCL(use_dci)+1		/* whether to use profil */
509LCL(recursive_alloc)	= LCL(use_profil)+1		/* alloc called recursively */
510LCL(output_uarea)	= LCL(recursive_alloc)+1	/* output uarea */
511LCL(output_stats)	= LCL(output_uarea)+1		/* output stats info */
512LCL(output_clock)	= LCL(output_stats)+1		/* output the clock ticks */
513LCL(multiple_sections)	= LCL(output_clock)+1		/* multiple sections are ok */
514LCL(have_bb)		= LCL(multiple_sections)+1	/* whether we have basic block data */
515LCL(init_format)	= LCL(have_bb)+1		/* The output format has been chosen */
516LCL(debug)		= LCL(init_format)+1		/* Whether or not we are debugging */
517LCL(check_funcs)	= LCL(debug)+1			/* Whether to check functions for validity */
518LCL(flag_unused)	= LCL(check_funcs)+1		/* unused flags */
519LCL(end_of_vars)	= LCL(flag_unused)+62		/* size of machine independent vars */
520
521/*
522 * Data that contains profile statistics that can be dumped out
523 * into the {,g}mon.out file.  This is defined in profile-md.h.
524 */
525
526LCL(stats_start)	= LCL(end_of_vars)		/* start of stats substructure */
527LCL(stats_major_version)= LCL(stats_start)		/* major version number */
528LCL(stats_minor_version)= LCL(stats_major_version)+4	/* minor version number */
529LCL(stats_size)		= LCL(stats_minor_version)+4	/* size of _profile_stats structure */
530LCL(profil_buckets)	= LCL(stats_size)+4		/* # profil buckets */
531LCL(my_cpu)		= LCL(profil_buckets)+4		/* identify which cpu/thread this is */
532LCL(max_cpu)		= LCL(my_cpu)+4			/* identify which cpu/thread this is */
533LCL(prof_records)	= LCL(max_cpu)+4		/* # of profiled functions */
534LCL(gprof_records)	= LCL(prof_records)+4		/* # of gprof arcs created */
535LCL(hash_buckets)	= LCL(gprof_records)+4		/* max gprof hash buckets on a chain */
536LCL(bogus_count)	= LCL(hash_buckets)+4		/* # bogus functions found in gprof */
537
538LCL(cnt)		= LCL(bogus_count)+4		/* # of _{prof,gprof}_mcount calls */
539LCL(dummy)		= LCL(cnt)+8			/* # of _dummy_mcount calls */
540LCL(old_mcount)		= LCL(dummy)+8			/* # of old mcount calls */
541LCL(hash_search)	= LCL(old_mcount)+8		/* # gprof hash buckets searched */
542LCL(hash_num)		= LCL(hash_search)+8		/* # times hash table searched */
543LCL(user_ticks)		= LCL(hash_num)+8		/* # ticks within user space */
544LCL(kernel_ticks)	= LCL(user_ticks)+8		/* # ticks within kernel space */
545LCL(idle_ticks)		= LCL(kernel_ticks)+8		/* # ticks cpu was idle */
546LCL(overflow_ticks)	= LCL(idle_ticks)+8		/* # ticks where histcounter overflowed */
547LCL(acontext_locked)	= LCL(overflow_ticks)+8		/* # times an acontext was locked */
548LCL(too_low)		= LCL(acontext_locked)+8	/* # times histogram tick too low */
549LCL(too_high)		= LCL(too_low)+8		/* # times histogram tick too low */
550LCL(prof_overflow)	= LCL(too_high)+8		/* # times the prof count field overflowed */
551LCL(gprof_overflow)	= LCL(prof_overflow)+8		/* # times the gprof count field overflowed */
552LCL(num_alloc)		= LCL(gprof_overflow)+8		/* # allocations in each context */
553LCL(bytes_alloc)	= LCL(num_alloc)+4*C_max	/* bytes allocated in each context */
554LCL(num_context)	= LCL(bytes_alloc)+4*C_max	/* # allocation context blocks */
555LCL(wasted)		= LCL(num_context)+4*C_max	/* # bytes wasted */
556LCL(overhead)		= LCL(wasted)+4*C_max		/* # bytes of overhead */
557LCL(buckets)		= LCL(overhead)+4*C_max		/* # hash indexes that have n buckets */
558LCL(cache_hits1)	= LCL(buckets)+4*10		/* # gprof cache hits in bucket #1 */
559LCL(cache_hits2)	= LCL(cache_hits1)+8		/* # gprof cache hits in bucket #2 */
560LCL(cache_hits3)	= LCL(cache_hits2)+8		/* # gprof cache hits in bucket #3 */
561LCL(stats_unused)	= LCL(cache_hits3)+8		/* reserved for future use */
562LCL(stats_end)		= LCL(stats_unused)+8*64	/* end of stats structure */
563
564/*
565 * Machine dependent variables that no C file should access (except for
566 * profile-md.c).
567 */
568
569LCL(md_start)		= LCL(stats_end)		/* start of md structure */
570LCL(md_major_version)	= LCL(md_start)			/* major version number */
571LCL(md_minor_version)	= LCL(md_major_version)+4	/* minor version number */
572LCL(md_size)		= LCL(md_minor_version)+4	/* size of _profile_stats structure */
573LCL(hash_ptr)		= LCL(md_size)+4		/* gprof hash pointer */
574LCL(hash_size)		= LCL(hash_ptr)+4		/* gprof hash size */
575LCL(num_cache)		= LCL(hash_size)+4		/* # of cache entries */
576LCL(save_mcount_ptr)	= LCL(num_cache)+4		/* save for mcount_ptr when suspending profiling */
577LCL(mcount_ptr_ptr)	= LCL(save_mcount_ptr)+4	/* pointer to _mcount_ptr */
578LCL(dummy_ptr)		= LCL(mcount_ptr_ptr)+4		/* pointer to gprof_dummy */
579LCL(alloc_pages)	= LCL(dummy_ptr)+4		/* allocate more memory */
580LCL(num_buffer)		= LCL(alloc_pages)+4		/* buffer to convert 64 bit ints in */
581LCL(md_unused)		= LCL(num_buffer)+N_digit	/* unused fields */
582LCL(md_end)		= LCL(md_unused)+4*58		/* end of md structure */
583LCL(total_size)		= LCL(md_end)			/* size of entire structure */
584
585/*
586 * Size of the entire _profile_vars structure.
587 */
588
589DATA(_profile_size)
590	.long	LCL(total_size)
591ENDDATA(_profile_size)
592
593/*
594 * Size of the statistics substructure.
595 */
596
597DATA(_profile_stats_size)
598	.long	LCL(stats_end)-LCL(stats_start)
599ENDDATA(_profile_stats_size)
600
601/*
602 * Size of the profil info substructure.
603 */
604
605DATA(_profile_profil_size)
606	.long	LCL(profil_end)-LCL(profil_start)
607ENDDATA(_profile_profil_size)
608
609/*
610 * Size of the machine dependent substructure.
611 */
612
613DATA(_profile_md_size)
614	.long	LCL(md_end)-LCL(md_start)
615ENDDATA(_profile_profil_size)
616
617/*
618 * Whether statistics are supported.
619 */
620
621DATA(_profile_do_stats)
622	.long	DO_STATS
623ENDDATA(_profile_do_stats)
624
625	.text
626
627/*
628 * Map LCL(xxx) -> into simpler names
629 */
630
631#define	V_acontext		LCL(acontext)
632#define	V_acontext_locked	LCL(acontext_locked)
633#define	V_alloc_pages		LCL(alloc_pages)
634#define	V_bogus_func		LCL(bogus_func)
635#define	V_bytes_alloc		LCL(bytes_alloc)
636#define	V_cache_hits1		LCL(cache_hits1)
637#define	V_cache_hits2		LCL(cache_hits2)
638#define	V_cache_hits3		LCL(cache_hits3)
639#define	V_cnt			LCL(cnt)
640#define	V_cnt_overflow		LCL(cnt_overflow)
641#define	V_check_funcs		LCL(check_funcs)
642#define	V_dummy			LCL(dummy)
643#define	V_dummy_overflow	LCL(dummy_overflow)
644#define	V_dummy_ptr		LCL(dummy_ptr)
645#define	V_gprof_records		LCL(gprof_records)
646#define	V_hash_num		LCL(hash_num)
647#define	V_hash_ptr		LCL(hash_ptr)
648#define	V_hash_search		LCL(hash_search)
649#define	V_mcount_ptr_ptr	LCL(mcount_ptr_ptr)
650#define	V_num_alloc		LCL(num_alloc)
651#define	V_num_buffer		LCL(num_buffer)
652#define	V_num_context		LCL(num_context)
653#define	V_old_mcount		LCL(old_mcount)
654#define	V_old_mcount_overflow	LCL(old_mcount_overflow)
655#define	V_overhead		LCL(overhead)
656#define	V_page_size		LCL(page_size)
657#define	V_prof_records		LCL(prof_records)
658#define	V_recursive_alloc	LCL(recursive_alloc)
659#define	V_wasted		LCL(wasted)
660
661/*
662 * Loadup %ebx with the address of _profile_vars.  On a multiprocessor, this
663 * will loads up the appropriate machine's _profile_vars structure.
664 * For ELF shared libraries, rely on the fact that we won't need a GOT,
665 * except to load this pointer.
666 */
667
668#if defined (MACH_KERNEL)
669#define ASSEMBLER
670#include <i386/mp.h>
671
672#if SQT
673#include <i386/SQT/asm_macros.h>
674#endif
675
676#ifndef CPU_NUMBER
677#error "Cannot determine how to get CPU number"
678#endif
679
680#define Vload	CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx
681
682#else	/* not kernel */
683#define	Vload	Gload; Egaddr(%ebx,_profile_vars)
684#endif
685
686
687/*
688 * Allocate some memory for profiling.  This memory is guaranteed to
689 * be zero.
690 * %eax contains the memory size requested and will contain ptr on exit.
691 * %ebx contains the address of the appropriate profile_vars structure.
692 * %ecx is the number of the memory pool to allocate from (trashed on exit).
693 * %edx is trashed.
694 * %esi is preserved.
695 * %edi is preserved.
696 * %ebp is preserved.
697 */
698
699Entry(_profile_alloc_asm)
700	ENTER
701	pushl	%esi
702	pushl	%edi
703
704	movl	%ecx,%edi			/* move context number to saved reg */
705
706#if NO_RECURSIVE_ALLOC
707	movb	$-1,%cl
708	xchgb	%cl,V_recursive_alloc(%ebx)
709	cmpb	$0,%cl
710	je	LCL(no_recurse)
711
712	int	$3
713
714	.align	ALIGN
715LCL(no_recurse):
716#endif
717
718	leal	V_acontext(%ebx,%edi,4),%ecx
719
720	/* Loop looking for a free allocation context. */
721	/* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */
722	/* %edi = context number */
723
724	.align	ALIGN
725LCL(alloc_loop):
726	movl	%ecx,%esi			/* save ptr in case no more contexts */
727	movl	A_next(%ecx),%ecx		/* next context block */
728	cmpl	$0,%ecx
729	je	LCL(alloc_context)		/* need to allocate a new context block */
730
731	movl	$-1,%edx
732	xchgl	%edx,A_lock(%ecx)		/* %edx == 0 if context available */
733
734#if DO_STATS
735	SDADDNEG(%edx,V_acontext_locked(%ebx))	/* increment counter if lock was held */
736#endif
737
738	cmpl	$0,%edx
739	jne	LCL(alloc_loop)			/* go back if this context block is not available */
740
741	/* Allocation context found (%ecx), now allocate. */
742	movl	A_plist(%ecx),%edx		/* pointer to current block */
743	cmpl	$0,%edx				/* first allocation? */
744	je	LCL(alloc_new)
745
746	cmpl	%eax,M_nfree(%edx)		/* see if we have enough space */
747	jl	LCL(alloc_new)			/* jump if not enough space */
748
749	/* Allocate from local block (and common exit) */
750	/* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */
751	/* %edi = context number */
752
753	.align	ALIGN
754LCL(alloc_ret):
755
756#if DO_STATS
757	SLOCK incl V_num_alloc(%ebx,%edi,4)	/* update global counters */
758	SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4)
759	SLOCK subl %eax,V_wasted(%ebx,%edi,4)
760#endif
761
762	movl	M_ptr(%edx),%esi		/* pointer return value */
763	subl	%eax,M_nfree(%edx)		/* decrement bytes remaining */
764	addl	%eax,M_nalloc(%edx)		/* increment bytes allocated */
765	incl	M_num(%edx)			/* increment # allocations */
766	addl	%eax,M_ptr(%edx)		/* advance pointer */
767	movl	$0,A_lock(%ecx)			/* unlock context block */
768	movl	%esi,%eax			/* return pointer */
769
770#if NO_RECURSIVE_ALLOC
771	movb	$0,V_recursive_alloc(%ebx)
772#endif
773
774	popl	%edi
775	popl	%esi
776	LEAVE0
777	ret					/* return to the caller */
778
779	/* Allocate space in whole number of pages */
780	/* %eax = bytes to allocate, %ebx = vars address, %ecx = context */
781	/* %edi = context number */
782
783	.align	ALIGN
784LCL(alloc_new):
785	pushl	%eax				/* save regs */
786	pushl	%ecx
787	movl	V_page_size(%ebx),%edx
788	addl	$(M_size-1),%eax		/* add in overhead size & subtract 1 */
789	decl	%edx				/* page_size - 1 */
790	addl	%edx,%eax			/* round up to whole number of pages */
791	notl	%edx
792	andl	%edx,%eax
793	leal	-M_size(%eax),%esi		/* save allocation size */
794	pushl	%eax				/* argument to _profile_alloc_pages */
795	call	*V_alloc_pages(%ebx)		/* allocate some memory */
796	addl	$4,%esp				/* pop off argument */
797
798#if DO_STATS
799	SLOCK addl %esi,V_wasted(%ebx,%edi,4)	/* udpate global counters */
800	SLOCK addl $(M_size),V_overhead(%ebx,%edi,4)
801#endif
802
803	popl	%ecx				/* context block */
804	movl	%eax,%edx			/* memory block pointer */
805	movl	%esi,M_nfree(%edx)		/* # free bytes */
806	addl	$(M_size),%eax			/* bump past overhead */
807	movl	A_plist(%ecx),%esi		/* previous memory block or 0 */
808	movl	%eax,M_first(%edx)		/* first space available */
809	movl	%eax,M_ptr(%edx)		/* current address available */
810	movl	%esi,M_next(%edx)		/* next memory block allocated */
811	movl	%edx,A_plist(%ecx)		/* update current page list */
812	popl	%eax				/* user size request */
813	jmp	LCL(alloc_ret)			/* goto common return code */
814
815	/* Allocate a context header in addition to memory block header + data */
816	/* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */
817	/* %edi = context number */
818
819	.align	ALIGN
820LCL(alloc_context):
821	pushl	%eax				/* save regs */
822	pushl	%esi
823	movl	V_page_size(%ebx),%edx
824	addl	$(A_size+M_size-1),%eax		/* add in overhead size & subtract 1 */
825	decl	%edx				/* page_size - 1 */
826	addl	%edx,%eax			/* round up to whole number of pages */
827	notl	%edx
828	andl	%edx,%eax
829	leal	-A_size-M_size(%eax),%esi	/* save allocation size */
830	pushl	%eax				/* argument to _profile_alloc_pages */
831	call	*V_alloc_pages(%ebx)		/* allocate some memory */
832	addl	$4,%esp				/* pop off argument */
833
834#if DO_STATS
835	SLOCK incl V_num_context(%ebx,%edi,4)	/* bump # context blocks */
836	SLOCK addl %esi,V_wasted(%ebx,%edi,4)	/* update global counters */
837	SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4)
838#endif
839
840	movl	%eax,%ecx			/* context pointer */
841	leal	A_size(%eax),%edx		/* memory block pointer */
842	movl	%esi,M_nfree(%edx)		/* # free bytes */
843	addl	$(A_size+M_size),%eax		/* bump past overhead */
844	movl	%eax,M_first(%edx)		/* first space available */
845	movl	%eax,M_ptr(%edx)		/* current address available */
846	movl	$0,M_next(%edx)			/* next memory block allocated */
847	movl	%edx,A_plist(%ecx)		/* head of memory block list */
848	movl	$1,A_lock(%ecx)			/* set lock */
849	popl	%esi				/* ptr to store context block link */
850	movl	%ecx,%eax			/* context pointer temp */
851	xchgl	%eax,A_next(%esi)		/* link into chain */
852	movl	%eax,A_next(%ecx)		/* add links in case of threading */
853	popl	%eax				/* user size request */
854	jmp	LCL(alloc_ret)			/* goto common return code */
855
856END(_profile_alloc_asm)
857
858/*
859 * C callable version of the profile memory allocator.
860 * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t);
861*/
862
863Entry(_profile_alloc)
864	ENTER
865	pushl	%ebx
866	movl	12+Estack(%esp),%eax		/* memory size */
867	movl	8+Estack(%esp),%ebx		/* provile_vars address */
868	addl	$3,%eax				/* round up to word boundary */
869	movl	16+Estack(%esp),%ecx		/* which memory pool to allocate from */
870	andl	$0xfffffffc,%eax
871	call	EXT(_profile_alloc_asm)
872	popl	%ebx
873	LEAVE0
874	ret
875END(_profile_alloc)
876
877
878/*
879 * Dummy mcount routine that just returns.
880 *
881 *		+-------------------------------+
882 *		|				|
883 *		|				|
884 *		| caller's caller stack,	|
885 *		| saved registers, params.	|
886 *		|				|
887 *		|				|
888 *		+-------------------------------+
889 *		| caller's caller return addr.	|
890 *		+-------------------------------+
891 *	esp -->	| caller's return address	|
892 *		+-------------------------------+
893 *
894 *	edx --> function unqiue LCL
895 */
896
897Entry(_dummy_mcount)
898	ENTER
899
900#if DO_STATS
901	pushl	%ebx
902	MP_DISABLE_PREEMPTION(%ebx)
903	Vload
904	SDINC(V_dummy(%ebx))
905	MP_ENABLE_PREEMPTION(%ebx)
906	popl	%ebx
907#endif
908
909	LEAVE0
910	ret
911END(_dummy_mcount)
912
913
914/*
915 * Entry point for System V based profiling, count how many times each function
916 * is called.  The function label is passed in %edx, and the top two words on
917 * the stack are the caller's address, and the caller's return address.
918 *
919 *		+-------------------------------+
920 *		|				|
921 *		|				|
922 *		| caller's caller stack,	|
923 *		| saved registers, params.	|
924 *		|				|
925 *		|				|
926 *		+-------------------------------+
927 *		| caller's caller return addr.	|
928 *		+-------------------------------+
929 *	esp -->	| caller's return address	|
930 *		+-------------------------------+
931 *
932 *	edx --> function unique label
933 *
934 * We don't worry about the possibility about two threads calling
935 * the same function for the first time simulataneously.  If that
936 * happens, two records will be created, and one of the records
937 * address will be stored in in the function unique label (which
938 * is aligned by the compiler, so we don't have to watch out for
939 * crossing page/cache boundaries).
940 */
941
942Entry(_prof_mcount)
943	ENTER
944
945#if DO_STATS
946	pushl	%ebx
947	MP_DISABLE_PREEMPTION(%ebx)
948	Vload
949	SDINC(V_cnt(%ebx))
950#endif
951
952	movl	(%edx),%eax			/* initialized? */
953	cmpl	$0,%eax
954	je	LCL(pnew)
955
956	DINC2(P_count(%eax),P_overflow(%eax))	/* bump function count (double precision) */
957
958#if DO_STATS
959	MP_ENABLE_PREEMPTION(%ebx)
960	popl	%ebx
961#endif
962
963	LEAVE0
964	ret
965
966	.align	ALIGN
967LCL(pnew):
968
969#if !DO_STATS
970	pushl	%ebx
971	MP_DISABLE_PREEMPTION(%ebx)
972	Vload
973#endif
974
975	SLOCK incl V_prof_records(%ebx)
976	pushl	%edx
977	movl	$(P_size),%eax			/* allocation size */
978	movl	$(C_prof),%ecx			/* allocation pool */
979	call	EXT(_profile_alloc_asm)		/* allocate a new record */
980	popl	%edx
981
982	movl	Estack+4(%esp),%ecx		/* caller's address */
983	movl	%ecx,P_addr(%eax)
984	movl	$1,P_count(%eax)		/* call count */
985	xchgl	%eax,(%edx)			/* update function header */
986	MP_ENABLE_PREEMPTION(%ebx)
987	popl	%ebx
988	LEAVE0
989	ret
990
991END(_prof_mcount)
992
993
994/*
995 * Entry point for BSD based graph profiling, count how many times each unique
996 * call graph (caller + callee) is called.  The function label is passed in
997 * %edx, and the top two words on the stack are the caller's address, and the
998 * caller's return address.
999 *
1000 *		+-------------------------------+
1001 *		|				|
1002 *		|				|
1003 *		| caller's caller stack,	|
1004 *		| saved registers, params.	|
1005 *		|				|
1006 *		|				|
1007 *		+-------------------------------+
1008 *		| caller's caller return addr.	|
1009 *		+-------------------------------+
1010 *	esp -->	| caller's return address	|
1011 *		+-------------------------------+
1012 *
1013 *	edx --> function unqiue label
1014 *
1015 * We don't worry about the possibility about two threads calling the same
1016 * function simulataneously.  If that happens, two records will be created, and
1017 * one of the records address will be stored in in the function unique label
1018 * (which is aligned by the compiler).
1019 *
1020 * By design, the gprof header is not locked.  Each of the cache pointers is
1021 * always a valid pointer (possibily to a null record), and if another thread
1022 * comes in and modifies the pointer, it does so automatically with a simple store.
1023 * Since all arcs are in the hash table, the caches are just to avoid doing
1024 * a multiplication in the common case, and if they don't match, the arcs will
1025 * still be found.
1026 */
1027
1028Entry(_gprof_mcount)
1029
1030	ENTER
1031	movl	Estack+4(%esp),%ecx		/* caller's caller address */
1032
1033#if DO_STATS
1034	pushl	%ebx
1035	MP_DISABLE_PREEMPTION(%ebx)
1036	Vload
1037	SDINC(V_cnt(%ebx))			/* bump profile call counter (double int) */
1038#endif
1039
1040	movl	(%edx),%eax			/* Gprof header allocated? */
1041	cmpl	$0,%eax
1042	je	LCL(gnew)			/* skip if first call */
1043
1044	DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax))	/* bump function count */
1045
1046	/* See if this call arc is the same as the last time */
1047MARK(_gprof_mcount_cache1)
1048	movl	H_cache_ptr(%eax),%edx		/* last arc searched */
1049	cmpl	%ecx,G_frompc(%edx)		/* skip if not equal */
1050	jne	LCL(gcache2)
1051
1052	/* Same as last time, increment and return */
1053
1054	DINC2(G_count(%edx),G_overflow(%edx))	/* bump arc count */
1055
1056#if DO_STATS
1057	SDINC(V_cache_hits1(%ebx))		/* update counter */
1058	MP_ENABLE_PREEMPTION(%ebx)
1059	popl	%ebx
1060#endif
1061
1062	LEAVE0
1063	ret
1064
1065	/* Search second cache entry */
1066	/* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1067	/* %edx = first arc searched */
1068	/* %ebx if DO_STATS pushed on stack */
1069
1070	.align	ALIGN
1071MARK(_gprof_mcount_cache2)
1072LCL(gcache2):
1073	pushl	%esi				/* get a saved register */
1074	movl	H_cache_ptr+4(%eax),%esi	/* 2nd arc to be searched */
1075	cmpl	%ecx,G_frompc(%esi)		/* skip if not equal */
1076	jne	LCL(gcache3)
1077
1078	/* Element found, increment, reset last arc searched and return */
1079
1080	DINC2(G_count(%esi),G_overflow(%esi))	/* bump arc count */
1081
1082	movl	%esi,H_cache_ptr+0(%eax)	/* swap 1st and 2nd cached arcs */
1083	popl	%esi
1084	movl	%edx,H_cache_ptr+4(%eax)
1085
1086#if DO_STATS
1087	SDINC(V_cache_hits2(%ebx))		/* update counter */
1088	MP_ENABLE_PREEMPTION(%ebx)
1089	popl	%ebx
1090#endif
1091
1092	LEAVE0
1093	ret
1094
1095	/* Search third cache entry */
1096	/* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1097	/* %edx = first arc searched, %esi = second arc searched */
1098	/* %esi, %ebx if DO_STATS pushed on stack */
1099
1100	.align	ALIGN
1101MARK(_gprof_mcount_cache3)
1102LCL(gcache3):
1103	pushl	%edi
1104	movl	H_cache_ptr+8(%eax),%edi	/* 3rd arc to be searched */
1105	cmpl	%ecx,G_frompc(%edi)		/* skip if not equal */
1106	jne	LCL(gnocache)
1107
1108	/* Element found, increment, reset last arc searched and return */
1109
1110	DINC2(G_count(%edi),G_overflow(%edi))	/* bump arc count */
1111
1112	movl	%edi,H_cache_ptr+0(%eax)	/* make this 1st cached arc */
1113	movl	%esi,H_cache_ptr+8(%eax)
1114	movl	%edx,H_cache_ptr+4(%eax)
1115	popl	%edi
1116	popl	%esi
1117
1118#if DO_STATS
1119	SDINC(V_cache_hits3(%ebx))		/* update counter */
1120	MP_ENABLE_PREEMPTION(%ebx)
1121	popl	%ebx
1122#endif
1123
1124	LEAVE0
1125	ret
1126
1127	/* No function context, allocate a new context */
1128	/* %ebx is the variables address if DO_STATS */
1129	/* %ecx is the caller's caller's address */
1130	/* %edx is the unique function pointer */
1131	/* %ebx if DO_STATS pushed on stack */
1132
1133	.align	ALIGN
1134MARK(_gprof_mcount_new)
1135LCL(gnew):
1136	pushl	%esi
1137	pushl	%edi
1138
1139#if !DO_STATS
1140	pushl	%ebx				/* Address of vars needed for alloc */
1141	MP_DISABLE_PREEMPTION(%ebx)
1142	Vload				       	/* stats already loaded address */
1143#endif
1144
1145	SLOCK incl V_prof_records(%ebx)
1146	movl	%edx,%esi			/* save unique function ptr */
1147	movl	%ecx,%edi			/* and caller's caller address */
1148	movl	$(H_size),%eax			/* memory block size */
1149	movl	$(C_gfunc),%ecx			/* gprof function header memory pool */
1150	call	EXT(_profile_alloc_asm)
1151
1152	movl	V_hash_ptr(%ebx),%ecx		/* copy hash_ptr to func header */
1153	movl	V_dummy_ptr(%ebx),%edx		/* dummy cache entry */
1154	movl	%ecx,H_hash_ptr(%eax)
1155	movl	%edx,H_cache_ptr+0(%eax)	/* store dummy cache ptrs */
1156	movl	%edx,H_cache_ptr+4(%eax)
1157	movl	%edx,H_cache_ptr+8(%eax)
1158	movl	%esi,H_unique_ptr(%eax)		/* remember function unique ptr */
1159	movl	Estack+12(%esp),%ecx		/* caller's address */
1160	movl	$1,H_prof+P_count(%eax)		/* function called once so far */
1161	movl	%ecx,H_prof+P_addr(%eax)	/* set up prof information */
1162	movl	%eax,(%esi)			/* update context block address */
1163	movl	%edi,%ecx			/* caller's caller address */
1164	movl	%edx,%esi			/* 2nd cached arc */
1165
1166#if !DO_STATS
1167	popl	%ebx
1168#endif
1169
1170	/* Fall through to add element to the hash table.  This may involve */
1171	/* searching a few hash table elements that don't need to be searched */
1172	/* since we have a new element, but it allows the hash table function */
1173	/* to be specified in only one place */
1174
1175	/* Didn't find entry in cache, search the global hash table */
1176	/* %eax = gprof func header, %ebx = vars address if DO_STATS */
1177	/* %ecx = caller's caller */
1178	/* %edx, %esi = cached arcs that were searched */
1179	/* %edi, %esi, %ebx if DO_STATS pushed on stack */
1180
1181	.align	ALIGN
1182MARK(_gprof_mcount_hash)
1183LCL(gnocache):
1184
1185	pushl	%esi				/* save 2nd arc searched */
1186	pushl	%edx				/* save 1st arc searched */
1187	movl	%eax,%esi			/* save gprof func header */
1188
1189#if DO_STATS
1190	SDINC(V_hash_num(%ebx))
1191	movl	Estack+20(%esp),%edi		/* caller's address */
1192#else
1193	movl	Estack+16(%esp),%edi		/* caller's address */
1194#endif
1195	movl	%ecx,%eax			/* caller's caller address */
1196	imull	%edi,%eax			/* multiply to get hash */
1197	movl	H_hash_ptr(%esi),%edx		/* hash pointer */
1198	shrl	$(GPROF_HASH_SHIFT),%eax	/* eliminate low order bits */
1199	andl	$(GPROF_HASH_MASK),%eax		/* mask to get hash value */
1200	leal	0(%edx,%eax,4),%eax		/* pointer to hash bucket */
1201	movl	%eax,%edx			/* save hash bucket address */
1202
1203	/* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1204	/* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1205	/* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1206
1207	.align	ALIGN
1208LCL(ghash):
1209	movl	G_next(%eax),%eax		/* get next hash element */
1210	cmpl	$0,%eax				/* end of line? */
1211	je	LCL(ghashnew)			/* skip if allocate new hash */
1212
1213#if DO_STATS
1214	SDINC(V_hash_search(%ebx))
1215#endif
1216
1217	cmpl	G_selfpc(%eax),%edi		/* loop back if not one we want */
1218	jne	LCL(ghash)
1219
1220	cmpl	G_frompc(%eax),%ecx		/* loop back if not one we want */
1221	jne	LCL(ghash)
1222
1223	/* Found an entry, increment count, set up for caching, and return */
1224	/* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */
1225	/* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1226
1227	DINC2(G_count(%eax),G_overflow(%eax))	/* bump arc count */
1228
1229	popl	%ecx				/* previous 1st arc searched */
1230	movl	%eax,H_cache_ptr+0(%esi)	/* this element is now 1st arc */
1231	popl	%edi				/* previous 2nd arc searched */
1232	movl	%ecx,H_cache_ptr+4(%esi)	/* new 2nd arc to be searched */
1233	movl	%edi,H_cache_ptr+8(%esi)	/* new 3rd arc to be searched */
1234	popl	%edi
1235	popl	%esi
1236
1237#if DO_STATS
1238	MP_ENABLE_PREEMPTION(%ebx)
1239	popl	%ebx
1240#endif
1241
1242	LEAVE0
1243	ret					/* return to user */
1244
1245	/* Allocate new arc */
1246	/* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1247	/* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1248	/* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1249
1250	.align	ALIGN
1251MARK(_gprof_mcount_hashnew)
1252LCL(ghashnew):
1253
1254#if !DO_STATS
1255	pushl	%ebx				/* load address of vars if we haven't */
1256	MP_DISABLE_PREEMPTION(%ebx)
1257	Vload					/* already done so */
1258#endif
1259
1260	SLOCK incl V_gprof_records(%ebx)
1261	pushl	%edx
1262	movl	%ecx,%edi			/* save caller's caller */
1263	movl	$(G_size),%eax			/* arc size */
1264	movl	$(C_gprof),%ecx			/* gprof memory pool */
1265	call	EXT(_profile_alloc_asm)
1266	popl	%edx
1267
1268	movl	$1,G_count(%eax)		/* set call count */
1269	movl	Estack+20(%esp),%ecx		/* caller's address */
1270	movl	%edi,G_frompc(%eax)		/* caller's caller */
1271	movl	%ecx,G_selfpc(%eax)
1272
1273#if !DO_STATS
1274	popl	%ebx				/* release %ebx if no stats */
1275#endif
1276
1277	movl	(%edx),%ecx			/* first hash bucket */
1278	movl	%ecx,G_next(%eax)		/* update link */
1279	movl	%eax,%ecx			/* copy for xchgl */
1280	xchgl	%ecx,(%edx)			/* add to hash linked list */
1281	movl	%ecx,G_next(%eax)		/* update in case list changed */
1282
1283	popl	%ecx				/* previous 1st arc searched */
1284	popl	%edi				/* previous 2nd arc searched */
1285	movl	%eax,H_cache_ptr+0(%esi)	/* this element is now 1st arc */
1286	movl	%ecx,H_cache_ptr+4(%esi)	/* new 2nd arc to be searched */
1287	movl	%edi,H_cache_ptr+8(%esi)	/* new 3rd arc to be searched */
1288
1289	popl	%edi
1290	popl	%esi
1291
1292#if DO_STATS
1293	MP_ENABLE_PREEMPTION(%ebx)
1294	popl	%ebx
1295#endif
1296
1297	LEAVE0
1298	ret					/* return to user */
1299
1300END(_gprof_mcount)
1301
1302
1303/*
1304 * This function assumes that neither the caller or it's caller
1305 * has not omitted the frame pointer in order to get the caller's
1306 * caller.  The stack looks like the following at the time of the call:
1307 *
1308 *		+-------------------------------+
1309 *		|				|
1310 *		|				|
1311 *		| caller's caller stack,	|
1312 *		| saved registers, params.	|
1313 *		|				|
1314 *		|				|
1315 *		+-------------------------------+
1316 *		| caller's caller return addr.	|
1317 *		+-------------------------------+
1318 *	fp -->	| previous frame pointer	|
1319 *		+-------------------------------+
1320 *		|				|
1321 *		| caller's stack, saved regs,	|
1322 *		| params.			|
1323 *		|				|
1324 *		+-------------------------------+
1325 *	sp -->	| caller's return address	|
1326 *		+-------------------------------+
1327 *
1328 * Recent versions of the compiler put the address of the pointer
1329 * sized word in %edx.  Previous versions did not, but this code
1330 * does not support them.
1331 */
1332
1333/*
1334 * Note that OSF/rose blew defining _mcount, since it prepends leading
1335 * underscores, and _mcount didn't have a second leading underscore.  However,
1336 * some of the kernel/server functions 'know' that mcount has a leading
1337 * underscore, so we satisfy both camps.
1338 */
1339
1340#if OLD_MCOUNT
1341	.globl	mcount
1342	.globl	_mcount
1343	ELF_FUNC(mcount)
1344	ELF_FUNC(_mcount)
1345	.align	FALIGN
1346_mcount:
1347mcount:
1348
1349	pushl	%ebx
1350	MP_DISABLE_PREEMPTION(%ebx)
1351	Vload
1352
1353#if DO_STATS
1354	SDINC(V_old_mcount(%ebx))
1355#endif
1356
1357	/* In calling the functions, we will actually leave 1 extra word on the */
1358	/* top of the stack, but generated code will not notice, since the function */
1359	/* uses a frame pointer */
1360
1361	movl	V_mcount_ptr_ptr(%ebx),%ecx	/* address of mcount_ptr */
1362	MP_ENABLE_PREEMPTION(%ebx)
1363	popl	%ebx
1364	movl	4(%ebp),%eax			/* caller's caller return address */
1365	xchgl	%eax,(%esp)			/* push & get return address */
1366	pushl	%eax				/* push return address */
1367	jmp	*(%ecx)				/* go to profile the function */
1368
1369End(mcount)
1370End(_mcount)
1371#endif
1372
1373
1374#if !defined(KERNEL) && !defined(MACH_KERNEL)
1375
1376/*
1377 * Convert a 64-bit integer to a string.
1378 * Arg #1 is a pointer to a string (at least 24 bytes) or NULL
1379 * Arg #2 is the low part of the 64-bit integer.
1380 * Arg #3 is the high part of the 64-bit integer.
1381 */
1382
1383Entry(_profile_cnt_to_decimal)
1384	ENTER
1385	pushl	%ebx
1386	pushl	%esi
1387	pushl	%edi
1388	movl	Estack+16(%esp),%ebx		/* pointer or null */
1389	movl	Estack+20(%esp),%edi		/* low part of number */
1390	movl	$10,%ecx			/* divisor */
1391	cmpl	$0,%ebx				/* skip if pointer ok */
1392	jne	LCL(cvt_nonnull)
1393
1394	MP_DISABLE_PREEMPTION(%ebx)
1395	Vload					/* get _profile_vars address */
1396	leal	V_num_buffer(%ebx),%ebx		/* temp buffer to use */
1397
1398	.align	ALIGN
1399LCL(cvt_nonnull):
1400	addl	$(N_digit-1),%ebx		/* point string at end */
1401	movb	$0,0(%ebx)			/* null terminate string */
1402
1403#if OVERFLOW
1404	movl	Estack+24(%esp),%esi		/* high part of number */
1405	cmpl	$0,%esi				/* any thing left in high part? */
1406	je	LCL(cvt_low)
1407
1408	.align	ALIGN
1409LCL(cvt_high):
1410	movl	%esi,%eax			/* calculate high/10 & high%10 */
1411	xorl	%edx,%edx
1412	divl	%ecx
1413	movl	%eax,%esi
1414
1415	movl	%edi,%eax			/* calculate (low + (high%10)*2^32) / 10 */
1416	divl	%ecx
1417	movl	%eax,%edi
1418
1419	decl	%ebx				/* decrement string pointer */
1420	addl	$48,%edx			/* convert from 0..9 -> '0'..'9' */
1421	movb	%dl,0(%ebx)			/* store digit in string */
1422	cmpl	$0,%esi				/* any thing left in high part? */
1423	jne	LCL(cvt_high)
1424
1425#endif	/* OVERFLOW */
1426
1427	.align	ALIGN
1428LCL(cvt_low):
1429	movl	%edi,%eax			/* get low part into %eax */
1430
1431	.align	ALIGN
1432LCL(cvt_low2):
1433	xorl	%edx,%edx			/* 0 */
1434	divl	%ecx				/* calculate next digit */
1435	decl	%ebx				/* decrement string pointer */
1436	addl	$48,%edx			/* convert from 0..9 -> '0'..'9' */
1437	movb	%dl,0(%ebx)			/* store digit in string */
1438	cmpl	$0,%eax				/* any more digits to convert? */
1439	jne	LCL(cvt_low2)
1440
1441	movl	%ebx,%eax			/* return value */
1442	popl	%edi
1443	popl	%esi
1444	MP_ENABLE_PREEMPTION(%ebx)
1445	popl	%ebx
1446	LEAVE0
1447	ret
1448
1449END(_profile_cnt_to_decimal)
1450
1451#endif
1452