1/* 2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * HISTORY 33 * 34 * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez 35 * Import of Mac OS X kernel (~semeria) 36 * 37 * Revision 1.1.1.1 1998/03/07 02:26:08 wsanchez 38 * Import of OSF Mach kernel (~mburg) 39 * 40 * Revision 1.1.7.1 1997/09/22 17:41:24 barbou 41 * MP+RT: protect cpu_number() usage against preemption. 42 * [97/09/16 barbou] 43 * 44 * Revision 1.1.5.1 1995/01/06 19:53:37 devrcs 45 * mk6 CR668 - 1.3b26 merge 46 * new file for mk6 47 * [1994/10/12 22:25:20 dwm] 48 * 49 * Revision 1.1.2.2 1994/05/16 19:19:17 meissner 50 * Add support for converting 64-bit integers to a decimal string. 51 * Use the correct address (selfpc) when creating the prof header for gprof. 52 * [1994/04/28 21:44:59 meissner] 53 * 54 * Revision 1.1.2.1 1994/04/08 17:51:42 meissner 55 * Make most stats 64 bits, except for things like memory allocation. 56 * [1994/04/02 14:58:21 meissner] 57 * 58 * Do not provide old mcount support under MK or server. 59 * Fixup stats size so it is the same as in profile-md.h. 60 * [1994/03/29 21:00:03 meissner] 61 * 62 * Use faster sequence for overflow addition. 63 * Keep {dummy,prof,gprof,old}_mcount counts in double precision. 64 * Add kernel NCPUS > 1 support. 65 * [1994/03/17 20:13:23 meissner] 66 * 67 * Add gprof/prof overflow support 68 * [1994/03/17 14:56:44 meissner] 69 * 70 * Add size of histogram counters & unused fields to profile_profil struct 71 * [1994/02/17 21:41:44 meissner] 72 * 73 * Add too_low/too_high to profile_stats. 74 * [1994/02/16 22:38:11 meissner] 75 * 76 * Bump # allocation contexts to 32 from 16. 77 * Store unique ptr address in gprof function header structure for _profile_reset. 78 * Add new fields from profile-{internal,md}.h. 79 * Align loop looking for an unlocked acontext. 80 * Count # times a locked context block was found. 81 * Expand copyright. 82 * [1994/02/07 12:40:56 meissner] 83 * 84 * Keep track of the number of times the kernel overflows the HISTCOUNTER counter. 85 * [1994/02/03 20:13:23 meissner] 86 * 87 * Add stats for {user,kernel,idle} mode in the kernel. 88 * [1994/02/03 15:17:22 meissner] 89 * 90 * No change. 91 * [1994/02/03 00:58:49 meissner] 92 * 93 * Combine _profile_{vars,stats,md}; Allow more than one _profile_vars. 94 * [1994/02/01 12:03:56 meissner] 95 * 96 * Move _mcount_ptr to be closer to other data declarations. 97 * Add text_len to profile_profil structure for mk. 98 * Split records_cnt into prof_cnt/gprof_cnt. 99 * Always update prof_cnt/gprof_cnt even if not DO_STATS. 100 * Add current/max cpu indicator to stats for kernel. 101 * [1994/01/28 23:33:20 meissner] 102 * 103 * Don't do 4+Lgotoff(lab), use separate labels. 104 * Change GPROF_HASH_SHIFT to 9 (from 8). 105 * [1994/01/26 22:00:59 meissner] 106 * 107 * Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads. 108 * [1994/01/26 20:30:57 meissner] 109 * 110 * Move callback pointers into separate allocation context. 111 * Add size fields for other structures to profile-vars. 112 * Allocate string table as one large allocation. 113 * Rewrite old mcount code once again. 114 * Use multiply to make hash value, not divide. 115 * Hash table is now a power of two. 116 * [1994/01/26 20:23:32 meissner] 117 * 118 * Cut hash table size back to 16189. 119 * Add size fields to all structures. 120 * Add major/minor version number to _profile_md. 121 * Move allocation context block pointers to _profile_vars. 122 * Move _gprof_dummy after _profile_md. 123 * New function header code now falls into hash an element 124 * to avoid having the hash code duplicated or use a macro. 125 * Fix bug in _gprof_mcount with ELF shared libraries. 126 * [1994/01/25 01:45:59 meissner] 127 * 128 * Move init functions to C code; rearrange profil varaibles. 129 * [1994/01/22 01:11:14 meissner] 130 * 131 * No change. 132 * [1994/01/20 20:56:43 meissner] 133 * 134 * Fixup copyright. 135 * [1994/01/18 23:07:39 meissner] 136 * 137 * Make flags byte-sized. 138 * Add have_bb flag. 139 * Add init_format flag. 140 * Always put word size multipler first in .space. 141 * [1994/01/18 21:57:14 meissner] 142 * 143 * Fix elfpic problems in last change. 144 * [1994/01/16 14:04:26 meissner] 145 * 146 * Rewrite gprof caching to be faster & not need a lock. 147 * Record prof information for gprof too. 148 * Bump reserved stats to 64. 149 * Bump up hash table size 30799. 150 * Conditionally use lock prefix. 151 * Change most #ifdef's to #if. 152 * DEBUG_PROFILE turns on stack frames now. 153 * Conditionally add externs to gprof to determine where time is spent. 154 * Prof_mcount uses xchgl to update function pointer. 155 * [1994/01/15 18:40:33 meissner] 156 * 157 * Fix a comment. 158 * Separate statistics from debugging (though debugging turns it on). 159 * Remove debug code that traces each gprof request. 160 * [1994/01/15 00:59:02 meissner] 161 * 162 * Move max hash bucket calculation into _gprof_write & put info in stats structure. 163 * [1994/01/04 16:15:14 meissner] 164 * 165 * Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to. 166 * [1994/01/04 15:37:44 meissner] 167 * 168 * Add more allocation memory pools (gprof function hdrs in particular). 169 * For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time. 170 * Add major/minor version numbers to _profile_{vars,stats}. 171 * Add # profil buckets field to _profil_stats. 172 * [19 173 * 174 * $EndLog$ 175 */ 176 177/* 178 * Common 386 profiling module that is shared between the kernel, mach 179 * servers, and the user space library. Each environment includes 180 * this file. 181 */ 182 183 .file "profile-asm.s" 184 185#include <machine/asm.h> 186 187/* 188 * By default, debugging turns on statistics and stack frames. 189 */ 190 191#if DEBUG_PROFILE 192#ifndef DO_STATS 193#define DO_STATS 1 194#endif 195 196#ifndef STACK_FRAMES 197#define STACK_FRAMES 1 198#endif 199#endif 200 201#ifndef OLD_MCOUNT 202#define OLD_MCOUNT 0 /* do not compile old code for mcount */ 203#endif 204 205#ifndef DO_STATS 206#define DO_STATS 1 /* compile in statistics code */ 207#endif 208 209#ifndef DO_LOCK 210#define DO_LOCK 0 /* use lock; in front of increments */ 211#endif 212 213#ifndef LOCK_STATS 214#define LOCK_STATS DO_LOCK /* update stats with lock set */ 215#endif 216 217#ifndef STACK_FRAMES 218#define STACK_FRAMES 0 /* create stack frames for debugger */ 219#endif 220 221#ifndef NO_RECURSIVE_ALLOC 222#define NO_RECURSIVE_ALLOC 0 /* check for recursive allocs */ 223 /* (not thread safe!) */ 224#endif 225 226#ifndef MARK_GPROF 227#define MARK_GPROF 0 /* add externs for gprof profiling */ 228#endif 229 230#ifndef OVERFLOW 231#define OVERFLOW 1 /* add overflow checking support */ 232#endif 233 234/* 235 * Turn on the use of the lock prefix if desired. 236 */ 237 238#ifndef LOCK 239#if DO_LOCK 240#define LOCK lock; 241#else 242#define LOCK 243#endif 244#endif 245 246#ifndef SLOCK 247#if LOCK_STATS 248#define SLOCK LOCK 249#else 250#define SLOCK 251#endif 252#endif 253 254/* 255 * Double or single precision incrementing 256 */ 257 258#if OVERFLOW 259#define DINC(mem) LOCK addl $1,mem; LOCK adcl $0,4+mem 260#define DINC2(mem,mem2) LOCK addl $1,mem; LOCK adcl $0,mem2 261#define SDINC(mem) SLOCK addl $1,mem; SLOCK adcl $0,4+mem 262#define SDADD(val,mem) SLOCK addl val,mem; SLOCK adcl $0,4+mem 263#define SDADDNEG(val,mem) SLOCK subl val,mem; SLOCK adcl $0,4+mem 264#define SDSUB(val,mem) SLOCK subl val,mem; SLOCK sbbl $0,4+mem 265 266#else 267#define DINC(mem) LOCK incl mem 268#define DINC2(mem,mem2) LOCK incl mem 269#define SDINC(mem) SLOCK incl mem 270#define SDADD(val,mem) SLOCK addl val,mem 271#define SDADDNEG(val,mem) SLOCK subl val,mem 272#define SDSUB(val,mem) SLOCK subl val,mem 273#endif 274 275/* 276 * Stack frame support so that debugger traceback works. 277 */ 278 279#if STACK_FRAMES 280#define ENTER pushl %ebp; movl %esp,%ebp 281#define LEAVE0 popl %ebp 282#define Estack 4 283#else 284#define ENTER 285#define LEAVE0 286#define Estack 0 287#endif 288 289/* 290 * Gprof profiling. 291 */ 292 293#if MARK_GPROF 294#define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name) 295#else 296#define MARK(name) 297#endif 298 299/* 300 * Profiling allocation context block. Each time memory is needed, the 301 * allocator loops until it finds an unlocked context block, and allocates 302 * from that block. If no context blocks are available, a new memory 303 * pool is allocated, and added to the end of the chain. 304 */ 305 306LCL(A_next) = 0 /* next context block link (must be 0) */ 307LCL(A_plist) = LCL(A_next)+4 /* head of page list for context block */ 308LCL(A_lock) = LCL(A_plist)+4 /* lock word */ 309LCL(A_size) = LCL(A_lock)+4 /* size of context block */ 310 311#define A_next LCL(A_next) 312#define A_plist LCL(A_plist) 313#define A_lock LCL(A_lock) 314#define A_size LCL(A_size) 315 316/* 317 * Allocation contexts used. 318 */ 319 320LCL(C_prof) = 0 /* prof records */ 321LCL(C_gprof) = 1 /* gprof arc records */ 322LCL(C_gfunc) = 2 /* gprof function headers */ 323LCL(C_misc) = 3 /* misc. allocations */ 324LCL(C_profil) = 4 /* memory for profil */ 325LCL(C_dci) = 5 /* memory for dci */ 326LCL(C_bb) = 6 /* memory for basic blocks */ 327LCL(C_callback) = 7 /* memory for callbacks */ 328LCL(C_max) = 32 /* # allocation contexts */ 329 330#define C_prof LCL(C_prof) 331#define C_gprof LCL(C_gprof) 332#define C_gfunc LCL(C_gfunc) 333#define C_max LCL(C_max) 334 335/* 336 * Linked list of memory allocations. 337 */ 338 339LCL(M_first) = 0 /* pointer to first byte available */ 340LCL(M_ptr) = LCL(M_first)+4 /* pointer to next available byte */ 341LCL(M_next) = LCL(M_ptr)+4 /* next page allocated */ 342LCL(M_nfree) = LCL(M_next)+4 /* # bytes available */ 343LCL(M_nalloc) = LCL(M_nfree)+4 /* # bytes allocated */ 344LCL(M_num) = LCL(M_nalloc)+4 /* # allocations done on this page */ 345LCL(M_size) = LCL(M_num)+4 /* size of page header */ 346 347#define M_first LCL(M_first) 348#define M_ptr LCL(M_ptr) 349#define M_next LCL(M_next) 350#define M_nfree LCL(M_nfree) 351#define M_nalloc LCL(M_nalloc) 352#define M_num LCL(M_num) 353#define M_size LCL(M_size) 354 355/* 356 * Prof data type. 357 */ 358 359LCL(P_addr) = 0 /* function address */ 360LCL(P_count) = LCL(P_addr)+4 /* # times function called */ 361LCL(P_overflow) = LCL(P_count)+4 /* # times count overflowed */ 362LCL(P_size) = LCL(P_overflow)+4 /* size of prof data type */ 363 364#define P_addr LCL(P_addr) 365#define P_count LCL(P_count) 366#define P_overflow LCL(P_overflow) 367#define P_size LCL(P_size) 368 369/* 370 * Gprof data type. 371 */ 372 373LCL(G_next) = 0 /* next hash link (must be 0) */ 374LCL(G_frompc) = LCL(G_next)+4 /* caller's caller */ 375LCL(G_selfpc) = LCL(G_frompc)+4 /* caller's address */ 376LCL(G_count) = LCL(G_selfpc)+4 /* # times arc traversed */ 377LCL(G_overflow) = LCL(G_count)+4 /* # times count overflowed */ 378LCL(G_size) = LCL(G_overflow)+4 /* size of gprof data type */ 379 380#define G_next LCL(G_next) 381#define G_frompc LCL(G_frompc) 382#define G_selfpc LCL(G_selfpc) 383#define G_count LCL(G_count) 384#define G_overflow LCL(G_overflow) 385#define G_size LCL(G_size) 386 387/* 388 * Gprof header. 389 * 390 * At least one header is allocated for each unique function that is profiled. 391 * In order to save time calculating the hash value, the last H_maxcache 392 * distinct arcs are cached within this structure. Also, to avoid loading 393 * the GOT when searching the hash table, we copy the hash pointer to this 394 * structure, so that we only load the GOT when we need to allocate an arc. 395 */ 396 397LCL(H_maxcache) = 3 /* # of cache table entries */ 398LCL(H_csize) = 4*LCL(H_maxcache) /* size of each cache array */ 399 400LCL(H_hash_ptr) = 0 /* hash table to use */ 401LCL(H_unique_ptr) = LCL(H_hash_ptr)+4 /* function unique pointer */ 402LCL(H_prof) = LCL(H_unique_ptr)+4 /* prof statistics */ 403LCL(H_cache_ptr) = LCL(H_prof)+P_size /* cache table of element pointers */ 404LCL(H_size) = LCL(H_cache_ptr)+LCL(H_csize) /* size of gprof header type */ 405 406#define H_maxcache LCL(H_maxcache) 407#define H_csize LCL(H_csize) 408#define H_hash_ptr LCL(H_hash_ptr) 409#define H_unique_ptr LCL(H_unique_ptr) 410#define H_prof LCL(H_prof) 411#define H_cache_ptr LCL(H_cache_ptr) 412#define H_size LCL(H_size) 413 414/* 415 * Number of digits needed to write a 64 bit number including trailing null. 416 * (rounded up to be divisable by 4). 417 */ 418 419#define N_digit 24 420 421 422 .data 423 424/* 425 * Default gprof hash table size, which must be a power of two. 426 * The shift specifies how many low order bits to eliminate when 427 * calculating the hash value. 428 */ 429 430#ifndef GPROF_HASH_SIZE 431#define GPROF_HASH_SIZE 16384 432#endif 433 434#ifndef GPROF_HASH_SHIFT 435#define GPROF_HASH_SHIFT 9 436#endif 437 438#define GPROF_HASH_MASK (GPROF_HASH_SIZE-1) 439 440DATA(_profile_hash_size) 441 .long GPROF_HASH_SIZE 442ENDDATA(_profile_hash_size) 443 444 445 446/* 447 * Pointer that the compiler uses to call to the appropriate mcount function. 448 */ 449 450DATA(_mcount_ptr) 451 .long EXT(_dummy_mcount) 452ENDDATA(_mcount_ptr) 453 454/* 455 * Global profile variables. The structure that accesses this in C is declared 456 * in profile-internal.h. All items in .data that follow this will be used as 457 * one giant record, and each unique machine, thread, kgmon output or what have 458 * you will create a separate instance. Typically there is only one instance 459 * which will be the memory laid out below. 460 */ 461 462LCL(var_major_version) = 0 /* major version number */ 463LCL(var_minor_version) = LCL(var_major_version)+4 /* minor version number */ 464LCL(vars_size) = LCL(var_minor_version)+4 /* size of _profile_vars structure */ 465LCL(plist_size) = LCL(vars_size)+4 /* size of page_list structure */ 466LCL(acontext_size) = LCL(plist_size)+4 /* size of allocation contexts */ 467LCL(callback_size) = LCL(acontext_size)+4 /* size of callback structure */ 468LCL(type) = LCL(callback_size)+4 /* profile type (gprof, prof) */ 469LCL(error_msg) = LCL(type)+4 /* error message for perror */ 470LCL(filename) = LCL(error_msg)+4 /* filename to write to */ 471LCL(str_ptr) = LCL(filename)+4 /* string table pointer */ 472LCL(stream) = LCL(str_ptr)+4 /* stdio stream to write to */ 473LCL(diag_stream) = LCL(stream)+4 /* stdio stream to write diagnostics to */ 474LCL(fwrite_func) = LCL(diag_stream)+4 /* function like fwrite to output bytes */ 475LCL(page_size) = LCL(fwrite_func)+4 /* page size in bytes */ 476LCL(str_bytes) = LCL(page_size)+4 /* # bytes in string table */ 477LCL(str_total) = LCL(str_bytes)+4 /* # total bytes allocated for string table */ 478LCL(clock_ticks) = LCL(str_total)+4 /* # clock ticks per second */ 479 480 /* profil variables */ 481LCL(profil_start) = LCL(clock_ticks)+4 /* start of profil variables */ 482LCL(lowpc) = LCL(clock_ticks)+4 /* lowest address */ 483LCL(highpc) = LCL(lowpc)+4 /* highest address */ 484LCL(text_len) = LCL(highpc)+4 /* highpc-lowpc */ 485LCL(profil_len) = LCL(text_len)+4 /* size of profil buffer */ 486LCL(counter_size) = LCL(profil_len)+4 /* size of indivual counter */ 487LCL(scale) = LCL(counter_size)+4 /* scale factor */ 488LCL(profil_unused) = LCL(scale)+4 /* unused fields */ 489LCL(profil_end) = LCL(profil_unused)+4*8 /* end of profil_info structure */ 490LCL(profil_buf) = LCL(profil_end) /* buffer for profil */ 491 492 /* Output selection func ptrs */ 493LCL(output_init) = LCL(profil_buf)+4 /* Initialization */ 494LCL(output) = LCL(output_init)+4 /* Write out profiling info */ 495LCL(output_ptr) = LCL(output)+4 /* Output specific data ptr */ 496 497 /* Memory allocation support */ 498LCL(acontext) = LCL(output_ptr)+4 /* pointers to allocation context blocks */ 499 500LCL(bogus_func) = LCL(acontext)+4*C_max /* function to use if gprof arc is bad */ 501LCL(vars_unused) = LCL(bogus_func)+4 /* future growth */ 502 503 /* flags */ 504LCL(init) = LCL(vars_unused)+4*63 /* whether initializations were done */ 505LCL(active) = LCL(init)+1 /* whether profiling is active */ 506LCL(do_profile) = LCL(active)+1 /* whether to do profiling */ 507LCL(use_dci) = LCL(do_profile)+1 /* whether to use DCI */ 508LCL(use_profil) = LCL(use_dci)+1 /* whether to use profil */ 509LCL(recursive_alloc) = LCL(use_profil)+1 /* alloc called recursively */ 510LCL(output_uarea) = LCL(recursive_alloc)+1 /* output uarea */ 511LCL(output_stats) = LCL(output_uarea)+1 /* output stats info */ 512LCL(output_clock) = LCL(output_stats)+1 /* output the clock ticks */ 513LCL(multiple_sections) = LCL(output_clock)+1 /* multiple sections are ok */ 514LCL(have_bb) = LCL(multiple_sections)+1 /* whether we have basic block data */ 515LCL(init_format) = LCL(have_bb)+1 /* The output format has been chosen */ 516LCL(debug) = LCL(init_format)+1 /* Whether or not we are debugging */ 517LCL(check_funcs) = LCL(debug)+1 /* Whether to check functions for validity */ 518LCL(flag_unused) = LCL(check_funcs)+1 /* unused flags */ 519LCL(end_of_vars) = LCL(flag_unused)+62 /* size of machine independent vars */ 520 521/* 522 * Data that contains profile statistics that can be dumped out 523 * into the {,g}mon.out file. This is defined in profile-md.h. 524 */ 525 526LCL(stats_start) = LCL(end_of_vars) /* start of stats substructure */ 527LCL(stats_major_version)= LCL(stats_start) /* major version number */ 528LCL(stats_minor_version)= LCL(stats_major_version)+4 /* minor version number */ 529LCL(stats_size) = LCL(stats_minor_version)+4 /* size of _profile_stats structure */ 530LCL(profil_buckets) = LCL(stats_size)+4 /* # profil buckets */ 531LCL(my_cpu) = LCL(profil_buckets)+4 /* identify which cpu/thread this is */ 532LCL(max_cpu) = LCL(my_cpu)+4 /* identify which cpu/thread this is */ 533LCL(prof_records) = LCL(max_cpu)+4 /* # of profiled functions */ 534LCL(gprof_records) = LCL(prof_records)+4 /* # of gprof arcs created */ 535LCL(hash_buckets) = LCL(gprof_records)+4 /* max gprof hash buckets on a chain */ 536LCL(bogus_count) = LCL(hash_buckets)+4 /* # bogus functions found in gprof */ 537 538LCL(cnt) = LCL(bogus_count)+4 /* # of _{prof,gprof}_mcount calls */ 539LCL(dummy) = LCL(cnt)+8 /* # of _dummy_mcount calls */ 540LCL(old_mcount) = LCL(dummy)+8 /* # of old mcount calls */ 541LCL(hash_search) = LCL(old_mcount)+8 /* # gprof hash buckets searched */ 542LCL(hash_num) = LCL(hash_search)+8 /* # times hash table searched */ 543LCL(user_ticks) = LCL(hash_num)+8 /* # ticks within user space */ 544LCL(kernel_ticks) = LCL(user_ticks)+8 /* # ticks within kernel space */ 545LCL(idle_ticks) = LCL(kernel_ticks)+8 /* # ticks cpu was idle */ 546LCL(overflow_ticks) = LCL(idle_ticks)+8 /* # ticks where histcounter overflowed */ 547LCL(acontext_locked) = LCL(overflow_ticks)+8 /* # times an acontext was locked */ 548LCL(too_low) = LCL(acontext_locked)+8 /* # times histogram tick too low */ 549LCL(too_high) = LCL(too_low)+8 /* # times histogram tick too low */ 550LCL(prof_overflow) = LCL(too_high)+8 /* # times the prof count field overflowed */ 551LCL(gprof_overflow) = LCL(prof_overflow)+8 /* # times the gprof count field overflowed */ 552LCL(num_alloc) = LCL(gprof_overflow)+8 /* # allocations in each context */ 553LCL(bytes_alloc) = LCL(num_alloc)+4*C_max /* bytes allocated in each context */ 554LCL(num_context) = LCL(bytes_alloc)+4*C_max /* # allocation context blocks */ 555LCL(wasted) = LCL(num_context)+4*C_max /* # bytes wasted */ 556LCL(overhead) = LCL(wasted)+4*C_max /* # bytes of overhead */ 557LCL(buckets) = LCL(overhead)+4*C_max /* # hash indexes that have n buckets */ 558LCL(cache_hits1) = LCL(buckets)+4*10 /* # gprof cache hits in bucket #1 */ 559LCL(cache_hits2) = LCL(cache_hits1)+8 /* # gprof cache hits in bucket #2 */ 560LCL(cache_hits3) = LCL(cache_hits2)+8 /* # gprof cache hits in bucket #3 */ 561LCL(stats_unused) = LCL(cache_hits3)+8 /* reserved for future use */ 562LCL(stats_end) = LCL(stats_unused)+8*64 /* end of stats structure */ 563 564/* 565 * Machine dependent variables that no C file should access (except for 566 * profile-md.c). 567 */ 568 569LCL(md_start) = LCL(stats_end) /* start of md structure */ 570LCL(md_major_version) = LCL(md_start) /* major version number */ 571LCL(md_minor_version) = LCL(md_major_version)+4 /* minor version number */ 572LCL(md_size) = LCL(md_minor_version)+4 /* size of _profile_stats structure */ 573LCL(hash_ptr) = LCL(md_size)+4 /* gprof hash pointer */ 574LCL(hash_size) = LCL(hash_ptr)+4 /* gprof hash size */ 575LCL(num_cache) = LCL(hash_size)+4 /* # of cache entries */ 576LCL(save_mcount_ptr) = LCL(num_cache)+4 /* save for mcount_ptr when suspending profiling */ 577LCL(mcount_ptr_ptr) = LCL(save_mcount_ptr)+4 /* pointer to _mcount_ptr */ 578LCL(dummy_ptr) = LCL(mcount_ptr_ptr)+4 /* pointer to gprof_dummy */ 579LCL(alloc_pages) = LCL(dummy_ptr)+4 /* allocate more memory */ 580LCL(num_buffer) = LCL(alloc_pages)+4 /* buffer to convert 64 bit ints in */ 581LCL(md_unused) = LCL(num_buffer)+N_digit /* unused fields */ 582LCL(md_end) = LCL(md_unused)+4*58 /* end of md structure */ 583LCL(total_size) = LCL(md_end) /* size of entire structure */ 584 585/* 586 * Size of the entire _profile_vars structure. 587 */ 588 589DATA(_profile_size) 590 .long LCL(total_size) 591ENDDATA(_profile_size) 592 593/* 594 * Size of the statistics substructure. 595 */ 596 597DATA(_profile_stats_size) 598 .long LCL(stats_end)-LCL(stats_start) 599ENDDATA(_profile_stats_size) 600 601/* 602 * Size of the profil info substructure. 603 */ 604 605DATA(_profile_profil_size) 606 .long LCL(profil_end)-LCL(profil_start) 607ENDDATA(_profile_profil_size) 608 609/* 610 * Size of the machine dependent substructure. 611 */ 612 613DATA(_profile_md_size) 614 .long LCL(md_end)-LCL(md_start) 615ENDDATA(_profile_profil_size) 616 617/* 618 * Whether statistics are supported. 619 */ 620 621DATA(_profile_do_stats) 622 .long DO_STATS 623ENDDATA(_profile_do_stats) 624 625 .text 626 627/* 628 * Map LCL(xxx) -> into simpler names 629 */ 630 631#define V_acontext LCL(acontext) 632#define V_acontext_locked LCL(acontext_locked) 633#define V_alloc_pages LCL(alloc_pages) 634#define V_bogus_func LCL(bogus_func) 635#define V_bytes_alloc LCL(bytes_alloc) 636#define V_cache_hits1 LCL(cache_hits1) 637#define V_cache_hits2 LCL(cache_hits2) 638#define V_cache_hits3 LCL(cache_hits3) 639#define V_cnt LCL(cnt) 640#define V_cnt_overflow LCL(cnt_overflow) 641#define V_check_funcs LCL(check_funcs) 642#define V_dummy LCL(dummy) 643#define V_dummy_overflow LCL(dummy_overflow) 644#define V_dummy_ptr LCL(dummy_ptr) 645#define V_gprof_records LCL(gprof_records) 646#define V_hash_num LCL(hash_num) 647#define V_hash_ptr LCL(hash_ptr) 648#define V_hash_search LCL(hash_search) 649#define V_mcount_ptr_ptr LCL(mcount_ptr_ptr) 650#define V_num_alloc LCL(num_alloc) 651#define V_num_buffer LCL(num_buffer) 652#define V_num_context LCL(num_context) 653#define V_old_mcount LCL(old_mcount) 654#define V_old_mcount_overflow LCL(old_mcount_overflow) 655#define V_overhead LCL(overhead) 656#define V_page_size LCL(page_size) 657#define V_prof_records LCL(prof_records) 658#define V_recursive_alloc LCL(recursive_alloc) 659#define V_wasted LCL(wasted) 660 661/* 662 * Loadup %ebx with the address of _profile_vars. On a multiprocessor, this 663 * will loads up the appropriate machine's _profile_vars structure. 664 * For ELF shared libraries, rely on the fact that we won't need a GOT, 665 * except to load this pointer. 666 */ 667 668#if defined (MACH_KERNEL) 669#define ASSEMBLER 670#include <i386/mp.h> 671 672#if SQT 673#include <i386/SQT/asm_macros.h> 674#endif 675 676#ifndef CPU_NUMBER 677#error "Cannot determine how to get CPU number" 678#endif 679 680#define Vload CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx 681 682#else /* not kernel */ 683#define Vload Gload; Egaddr(%ebx,_profile_vars) 684#endif 685 686 687/* 688 * Allocate some memory for profiling. This memory is guaranteed to 689 * be zero. 690 * %eax contains the memory size requested and will contain ptr on exit. 691 * %ebx contains the address of the appropriate profile_vars structure. 692 * %ecx is the number of the memory pool to allocate from (trashed on exit). 693 * %edx is trashed. 694 * %esi is preserved. 695 * %edi is preserved. 696 * %ebp is preserved. 697 */ 698 699Entry(_profile_alloc_asm) 700 ENTER 701 pushl %esi 702 pushl %edi 703 704 movl %ecx,%edi /* move context number to saved reg */ 705 706#if NO_RECURSIVE_ALLOC 707 movb $-1,%cl 708 xchgb %cl,V_recursive_alloc(%ebx) 709 cmpb $0,%cl 710 je LCL(no_recurse) 711 712 int $3 713 714 .align ALIGN 715LCL(no_recurse): 716#endif 717 718 leal V_acontext(%ebx,%edi,4),%ecx 719 720 /* Loop looking for a free allocation context. */ 721 /* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */ 722 /* %edi = context number */ 723 724 .align ALIGN 725LCL(alloc_loop): 726 movl %ecx,%esi /* save ptr in case no more contexts */ 727 movl A_next(%ecx),%ecx /* next context block */ 728 cmpl $0,%ecx 729 je LCL(alloc_context) /* need to allocate a new context block */ 730 731 movl $-1,%edx 732 xchgl %edx,A_lock(%ecx) /* %edx == 0 if context available */ 733 734#if DO_STATS 735 SDADDNEG(%edx,V_acontext_locked(%ebx)) /* increment counter if lock was held */ 736#endif 737 738 cmpl $0,%edx 739 jne LCL(alloc_loop) /* go back if this context block is not available */ 740 741 /* Allocation context found (%ecx), now allocate. */ 742 movl A_plist(%ecx),%edx /* pointer to current block */ 743 cmpl $0,%edx /* first allocation? */ 744 je LCL(alloc_new) 745 746 cmpl %eax,M_nfree(%edx) /* see if we have enough space */ 747 jl LCL(alloc_new) /* jump if not enough space */ 748 749 /* Allocate from local block (and common exit) */ 750 /* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */ 751 /* %edi = context number */ 752 753 .align ALIGN 754LCL(alloc_ret): 755 756#if DO_STATS 757 SLOCK incl V_num_alloc(%ebx,%edi,4) /* update global counters */ 758 SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4) 759 SLOCK subl %eax,V_wasted(%ebx,%edi,4) 760#endif 761 762 movl M_ptr(%edx),%esi /* pointer return value */ 763 subl %eax,M_nfree(%edx) /* decrement bytes remaining */ 764 addl %eax,M_nalloc(%edx) /* increment bytes allocated */ 765 incl M_num(%edx) /* increment # allocations */ 766 addl %eax,M_ptr(%edx) /* advance pointer */ 767 movl $0,A_lock(%ecx) /* unlock context block */ 768 movl %esi,%eax /* return pointer */ 769 770#if NO_RECURSIVE_ALLOC 771 movb $0,V_recursive_alloc(%ebx) 772#endif 773 774 popl %edi 775 popl %esi 776 LEAVE0 777 ret /* return to the caller */ 778 779 /* Allocate space in whole number of pages */ 780 /* %eax = bytes to allocate, %ebx = vars address, %ecx = context */ 781 /* %edi = context number */ 782 783 .align ALIGN 784LCL(alloc_new): 785 pushl %eax /* save regs */ 786 pushl %ecx 787 movl V_page_size(%ebx),%edx 788 addl $(M_size-1),%eax /* add in overhead size & subtract 1 */ 789 decl %edx /* page_size - 1 */ 790 addl %edx,%eax /* round up to whole number of pages */ 791 notl %edx 792 andl %edx,%eax 793 leal -M_size(%eax),%esi /* save allocation size */ 794 pushl %eax /* argument to _profile_alloc_pages */ 795 call *V_alloc_pages(%ebx) /* allocate some memory */ 796 addl $4,%esp /* pop off argument */ 797 798#if DO_STATS 799 SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* udpate global counters */ 800 SLOCK addl $(M_size),V_overhead(%ebx,%edi,4) 801#endif 802 803 popl %ecx /* context block */ 804 movl %eax,%edx /* memory block pointer */ 805 movl %esi,M_nfree(%edx) /* # free bytes */ 806 addl $(M_size),%eax /* bump past overhead */ 807 movl A_plist(%ecx),%esi /* previous memory block or 0 */ 808 movl %eax,M_first(%edx) /* first space available */ 809 movl %eax,M_ptr(%edx) /* current address available */ 810 movl %esi,M_next(%edx) /* next memory block allocated */ 811 movl %edx,A_plist(%ecx) /* update current page list */ 812 popl %eax /* user size request */ 813 jmp LCL(alloc_ret) /* goto common return code */ 814 815 /* Allocate a context header in addition to memory block header + data */ 816 /* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */ 817 /* %edi = context number */ 818 819 .align ALIGN 820LCL(alloc_context): 821 pushl %eax /* save regs */ 822 pushl %esi 823 movl V_page_size(%ebx),%edx 824 addl $(A_size+M_size-1),%eax /* add in overhead size & subtract 1 */ 825 decl %edx /* page_size - 1 */ 826 addl %edx,%eax /* round up to whole number of pages */ 827 notl %edx 828 andl %edx,%eax 829 leal -A_size-M_size(%eax),%esi /* save allocation size */ 830 pushl %eax /* argument to _profile_alloc_pages */ 831 call *V_alloc_pages(%ebx) /* allocate some memory */ 832 addl $4,%esp /* pop off argument */ 833 834#if DO_STATS 835 SLOCK incl V_num_context(%ebx,%edi,4) /* bump # context blocks */ 836 SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* update global counters */ 837 SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4) 838#endif 839 840 movl %eax,%ecx /* context pointer */ 841 leal A_size(%eax),%edx /* memory block pointer */ 842 movl %esi,M_nfree(%edx) /* # free bytes */ 843 addl $(A_size+M_size),%eax /* bump past overhead */ 844 movl %eax,M_first(%edx) /* first space available */ 845 movl %eax,M_ptr(%edx) /* current address available */ 846 movl $0,M_next(%edx) /* next memory block allocated */ 847 movl %edx,A_plist(%ecx) /* head of memory block list */ 848 movl $1,A_lock(%ecx) /* set lock */ 849 popl %esi /* ptr to store context block link */ 850 movl %ecx,%eax /* context pointer temp */ 851 xchgl %eax,A_next(%esi) /* link into chain */ 852 movl %eax,A_next(%ecx) /* add links in case of threading */ 853 popl %eax /* user size request */ 854 jmp LCL(alloc_ret) /* goto common return code */ 855 856END(_profile_alloc_asm) 857 858/* 859 * C callable version of the profile memory allocator. 860 * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t); 861*/ 862 863Entry(_profile_alloc) 864 ENTER 865 pushl %ebx 866 movl 12+Estack(%esp),%eax /* memory size */ 867 movl 8+Estack(%esp),%ebx /* provile_vars address */ 868 addl $3,%eax /* round up to word boundary */ 869 movl 16+Estack(%esp),%ecx /* which memory pool to allocate from */ 870 andl $0xfffffffc,%eax 871 call EXT(_profile_alloc_asm) 872 popl %ebx 873 LEAVE0 874 ret 875END(_profile_alloc) 876 877 878/* 879 * Dummy mcount routine that just returns. 880 * 881 * +-------------------------------+ 882 * | | 883 * | | 884 * | caller's caller stack, | 885 * | saved registers, params. | 886 * | | 887 * | | 888 * +-------------------------------+ 889 * | caller's caller return addr. | 890 * +-------------------------------+ 891 * esp --> | caller's return address | 892 * +-------------------------------+ 893 * 894 * edx --> function unqiue LCL 895 */ 896 897Entry(_dummy_mcount) 898 ENTER 899 900#if DO_STATS 901 pushl %ebx 902 MP_DISABLE_PREEMPTION(%ebx) 903 Vload 904 SDINC(V_dummy(%ebx)) 905 MP_ENABLE_PREEMPTION(%ebx) 906 popl %ebx 907#endif 908 909 LEAVE0 910 ret 911END(_dummy_mcount) 912 913 914/* 915 * Entry point for System V based profiling, count how many times each function 916 * is called. The function label is passed in %edx, and the top two words on 917 * the stack are the caller's address, and the caller's return address. 918 * 919 * +-------------------------------+ 920 * | | 921 * | | 922 * | caller's caller stack, | 923 * | saved registers, params. | 924 * | | 925 * | | 926 * +-------------------------------+ 927 * | caller's caller return addr. | 928 * +-------------------------------+ 929 * esp --> | caller's return address | 930 * +-------------------------------+ 931 * 932 * edx --> function unique label 933 * 934 * We don't worry about the possibility about two threads calling 935 * the same function for the first time simulataneously. If that 936 * happens, two records will be created, and one of the records 937 * address will be stored in in the function unique label (which 938 * is aligned by the compiler, so we don't have to watch out for 939 * crossing page/cache boundaries). 940 */ 941 942Entry(_prof_mcount) 943 ENTER 944 945#if DO_STATS 946 pushl %ebx 947 MP_DISABLE_PREEMPTION(%ebx) 948 Vload 949 SDINC(V_cnt(%ebx)) 950#endif 951 952 movl (%edx),%eax /* initialized? */ 953 cmpl $0,%eax 954 je LCL(pnew) 955 956 DINC2(P_count(%eax),P_overflow(%eax)) /* bump function count (double precision) */ 957 958#if DO_STATS 959 MP_ENABLE_PREEMPTION(%ebx) 960 popl %ebx 961#endif 962 963 LEAVE0 964 ret 965 966 .align ALIGN 967LCL(pnew): 968 969#if !DO_STATS 970 pushl %ebx 971 MP_DISABLE_PREEMPTION(%ebx) 972 Vload 973#endif 974 975 SLOCK incl V_prof_records(%ebx) 976 pushl %edx 977 movl $(P_size),%eax /* allocation size */ 978 movl $(C_prof),%ecx /* allocation pool */ 979 call EXT(_profile_alloc_asm) /* allocate a new record */ 980 popl %edx 981 982 movl Estack+4(%esp),%ecx /* caller's address */ 983 movl %ecx,P_addr(%eax) 984 movl $1,P_count(%eax) /* call count */ 985 xchgl %eax,(%edx) /* update function header */ 986 MP_ENABLE_PREEMPTION(%ebx) 987 popl %ebx 988 LEAVE0 989 ret 990 991END(_prof_mcount) 992 993 994/* 995 * Entry point for BSD based graph profiling, count how many times each unique 996 * call graph (caller + callee) is called. The function label is passed in 997 * %edx, and the top two words on the stack are the caller's address, and the 998 * caller's return address. 999 * 1000 * +-------------------------------+ 1001 * | | 1002 * | | 1003 * | caller's caller stack, | 1004 * | saved registers, params. | 1005 * | | 1006 * | | 1007 * +-------------------------------+ 1008 * | caller's caller return addr. | 1009 * +-------------------------------+ 1010 * esp --> | caller's return address | 1011 * +-------------------------------+ 1012 * 1013 * edx --> function unqiue label 1014 * 1015 * We don't worry about the possibility about two threads calling the same 1016 * function simulataneously. If that happens, two records will be created, and 1017 * one of the records address will be stored in in the function unique label 1018 * (which is aligned by the compiler). 1019 * 1020 * By design, the gprof header is not locked. Each of the cache pointers is 1021 * always a valid pointer (possibily to a null record), and if another thread 1022 * comes in and modifies the pointer, it does so automatically with a simple store. 1023 * Since all arcs are in the hash table, the caches are just to avoid doing 1024 * a multiplication in the common case, and if they don't match, the arcs will 1025 * still be found. 1026 */ 1027 1028Entry(_gprof_mcount) 1029 1030 ENTER 1031 movl Estack+4(%esp),%ecx /* caller's caller address */ 1032 1033#if DO_STATS 1034 pushl %ebx 1035 MP_DISABLE_PREEMPTION(%ebx) 1036 Vload 1037 SDINC(V_cnt(%ebx)) /* bump profile call counter (double int) */ 1038#endif 1039 1040 movl (%edx),%eax /* Gprof header allocated? */ 1041 cmpl $0,%eax 1042 je LCL(gnew) /* skip if first call */ 1043 1044 DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax)) /* bump function count */ 1045 1046 /* See if this call arc is the same as the last time */ 1047MARK(_gprof_mcount_cache1) 1048 movl H_cache_ptr(%eax),%edx /* last arc searched */ 1049 cmpl %ecx,G_frompc(%edx) /* skip if not equal */ 1050 jne LCL(gcache2) 1051 1052 /* Same as last time, increment and return */ 1053 1054 DINC2(G_count(%edx),G_overflow(%edx)) /* bump arc count */ 1055 1056#if DO_STATS 1057 SDINC(V_cache_hits1(%ebx)) /* update counter */ 1058 MP_ENABLE_PREEMPTION(%ebx) 1059 popl %ebx 1060#endif 1061 1062 LEAVE0 1063 ret 1064 1065 /* Search second cache entry */ 1066 /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */ 1067 /* %edx = first arc searched */ 1068 /* %ebx if DO_STATS pushed on stack */ 1069 1070 .align ALIGN 1071MARK(_gprof_mcount_cache2) 1072LCL(gcache2): 1073 pushl %esi /* get a saved register */ 1074 movl H_cache_ptr+4(%eax),%esi /* 2nd arc to be searched */ 1075 cmpl %ecx,G_frompc(%esi) /* skip if not equal */ 1076 jne LCL(gcache3) 1077 1078 /* Element found, increment, reset last arc searched and return */ 1079 1080 DINC2(G_count(%esi),G_overflow(%esi)) /* bump arc count */ 1081 1082 movl %esi,H_cache_ptr+0(%eax) /* swap 1st and 2nd cached arcs */ 1083 popl %esi 1084 movl %edx,H_cache_ptr+4(%eax) 1085 1086#if DO_STATS 1087 SDINC(V_cache_hits2(%ebx)) /* update counter */ 1088 MP_ENABLE_PREEMPTION(%ebx) 1089 popl %ebx 1090#endif 1091 1092 LEAVE0 1093 ret 1094 1095 /* Search third cache entry */ 1096 /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */ 1097 /* %edx = first arc searched, %esi = second arc searched */ 1098 /* %esi, %ebx if DO_STATS pushed on stack */ 1099 1100 .align ALIGN 1101MARK(_gprof_mcount_cache3) 1102LCL(gcache3): 1103 pushl %edi 1104 movl H_cache_ptr+8(%eax),%edi /* 3rd arc to be searched */ 1105 cmpl %ecx,G_frompc(%edi) /* skip if not equal */ 1106 jne LCL(gnocache) 1107 1108 /* Element found, increment, reset last arc searched and return */ 1109 1110 DINC2(G_count(%edi),G_overflow(%edi)) /* bump arc count */ 1111 1112 movl %edi,H_cache_ptr+0(%eax) /* make this 1st cached arc */ 1113 movl %esi,H_cache_ptr+8(%eax) 1114 movl %edx,H_cache_ptr+4(%eax) 1115 popl %edi 1116 popl %esi 1117 1118#if DO_STATS 1119 SDINC(V_cache_hits3(%ebx)) /* update counter */ 1120 MP_ENABLE_PREEMPTION(%ebx) 1121 popl %ebx 1122#endif 1123 1124 LEAVE0 1125 ret 1126 1127 /* No function context, allocate a new context */ 1128 /* %ebx is the variables address if DO_STATS */ 1129 /* %ecx is the caller's caller's address */ 1130 /* %edx is the unique function pointer */ 1131 /* %ebx if DO_STATS pushed on stack */ 1132 1133 .align ALIGN 1134MARK(_gprof_mcount_new) 1135LCL(gnew): 1136 pushl %esi 1137 pushl %edi 1138 1139#if !DO_STATS 1140 pushl %ebx /* Address of vars needed for alloc */ 1141 MP_DISABLE_PREEMPTION(%ebx) 1142 Vload /* stats already loaded address */ 1143#endif 1144 1145 SLOCK incl V_prof_records(%ebx) 1146 movl %edx,%esi /* save unique function ptr */ 1147 movl %ecx,%edi /* and caller's caller address */ 1148 movl $(H_size),%eax /* memory block size */ 1149 movl $(C_gfunc),%ecx /* gprof function header memory pool */ 1150 call EXT(_profile_alloc_asm) 1151 1152 movl V_hash_ptr(%ebx),%ecx /* copy hash_ptr to func header */ 1153 movl V_dummy_ptr(%ebx),%edx /* dummy cache entry */ 1154 movl %ecx,H_hash_ptr(%eax) 1155 movl %edx,H_cache_ptr+0(%eax) /* store dummy cache ptrs */ 1156 movl %edx,H_cache_ptr+4(%eax) 1157 movl %edx,H_cache_ptr+8(%eax) 1158 movl %esi,H_unique_ptr(%eax) /* remember function unique ptr */ 1159 movl Estack+12(%esp),%ecx /* caller's address */ 1160 movl $1,H_prof+P_count(%eax) /* function called once so far */ 1161 movl %ecx,H_prof+P_addr(%eax) /* set up prof information */ 1162 movl %eax,(%esi) /* update context block address */ 1163 movl %edi,%ecx /* caller's caller address */ 1164 movl %edx,%esi /* 2nd cached arc */ 1165 1166#if !DO_STATS 1167 popl %ebx 1168#endif 1169 1170 /* Fall through to add element to the hash table. This may involve */ 1171 /* searching a few hash table elements that don't need to be searched */ 1172 /* since we have a new element, but it allows the hash table function */ 1173 /* to be specified in only one place */ 1174 1175 /* Didn't find entry in cache, search the global hash table */ 1176 /* %eax = gprof func header, %ebx = vars address if DO_STATS */ 1177 /* %ecx = caller's caller */ 1178 /* %edx, %esi = cached arcs that were searched */ 1179 /* %edi, %esi, %ebx if DO_STATS pushed on stack */ 1180 1181 .align ALIGN 1182MARK(_gprof_mcount_hash) 1183LCL(gnocache): 1184 1185 pushl %esi /* save 2nd arc searched */ 1186 pushl %edx /* save 1st arc searched */ 1187 movl %eax,%esi /* save gprof func header */ 1188 1189#if DO_STATS 1190 SDINC(V_hash_num(%ebx)) 1191 movl Estack+20(%esp),%edi /* caller's address */ 1192#else 1193 movl Estack+16(%esp),%edi /* caller's address */ 1194#endif 1195 movl %ecx,%eax /* caller's caller address */ 1196 imull %edi,%eax /* multiply to get hash */ 1197 movl H_hash_ptr(%esi),%edx /* hash pointer */ 1198 shrl $(GPROF_HASH_SHIFT),%eax /* eliminate low order bits */ 1199 andl $(GPROF_HASH_MASK),%eax /* mask to get hash value */ 1200 leal 0(%edx,%eax,4),%eax /* pointer to hash bucket */ 1201 movl %eax,%edx /* save hash bucket address */ 1202 1203 /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */ 1204 /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */ 1205 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ 1206 1207 .align ALIGN 1208LCL(ghash): 1209 movl G_next(%eax),%eax /* get next hash element */ 1210 cmpl $0,%eax /* end of line? */ 1211 je LCL(ghashnew) /* skip if allocate new hash */ 1212 1213#if DO_STATS 1214 SDINC(V_hash_search(%ebx)) 1215#endif 1216 1217 cmpl G_selfpc(%eax),%edi /* loop back if not one we want */ 1218 jne LCL(ghash) 1219 1220 cmpl G_frompc(%eax),%ecx /* loop back if not one we want */ 1221 jne LCL(ghash) 1222 1223 /* Found an entry, increment count, set up for caching, and return */ 1224 /* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */ 1225 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ 1226 1227 DINC2(G_count(%eax),G_overflow(%eax)) /* bump arc count */ 1228 1229 popl %ecx /* previous 1st arc searched */ 1230 movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */ 1231 popl %edi /* previous 2nd arc searched */ 1232 movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */ 1233 movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */ 1234 popl %edi 1235 popl %esi 1236 1237#if DO_STATS 1238 MP_ENABLE_PREEMPTION(%ebx) 1239 popl %ebx 1240#endif 1241 1242 LEAVE0 1243 ret /* return to user */ 1244 1245 /* Allocate new arc */ 1246 /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */ 1247 /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */ 1248 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ 1249 1250 .align ALIGN 1251MARK(_gprof_mcount_hashnew) 1252LCL(ghashnew): 1253 1254#if !DO_STATS 1255 pushl %ebx /* load address of vars if we haven't */ 1256 MP_DISABLE_PREEMPTION(%ebx) 1257 Vload /* already done so */ 1258#endif 1259 1260 SLOCK incl V_gprof_records(%ebx) 1261 pushl %edx 1262 movl %ecx,%edi /* save caller's caller */ 1263 movl $(G_size),%eax /* arc size */ 1264 movl $(C_gprof),%ecx /* gprof memory pool */ 1265 call EXT(_profile_alloc_asm) 1266 popl %edx 1267 1268 movl $1,G_count(%eax) /* set call count */ 1269 movl Estack+20(%esp),%ecx /* caller's address */ 1270 movl %edi,G_frompc(%eax) /* caller's caller */ 1271 movl %ecx,G_selfpc(%eax) 1272 1273#if !DO_STATS 1274 popl %ebx /* release %ebx if no stats */ 1275#endif 1276 1277 movl (%edx),%ecx /* first hash bucket */ 1278 movl %ecx,G_next(%eax) /* update link */ 1279 movl %eax,%ecx /* copy for xchgl */ 1280 xchgl %ecx,(%edx) /* add to hash linked list */ 1281 movl %ecx,G_next(%eax) /* update in case list changed */ 1282 1283 popl %ecx /* previous 1st arc searched */ 1284 popl %edi /* previous 2nd arc searched */ 1285 movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */ 1286 movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */ 1287 movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */ 1288 1289 popl %edi 1290 popl %esi 1291 1292#if DO_STATS 1293 MP_ENABLE_PREEMPTION(%ebx) 1294 popl %ebx 1295#endif 1296 1297 LEAVE0 1298 ret /* return to user */ 1299 1300END(_gprof_mcount) 1301 1302 1303/* 1304 * This function assumes that neither the caller or it's caller 1305 * has not omitted the frame pointer in order to get the caller's 1306 * caller. The stack looks like the following at the time of the call: 1307 * 1308 * +-------------------------------+ 1309 * | | 1310 * | | 1311 * | caller's caller stack, | 1312 * | saved registers, params. | 1313 * | | 1314 * | | 1315 * +-------------------------------+ 1316 * | caller's caller return addr. | 1317 * +-------------------------------+ 1318 * fp --> | previous frame pointer | 1319 * +-------------------------------+ 1320 * | | 1321 * | caller's stack, saved regs, | 1322 * | params. | 1323 * | | 1324 * +-------------------------------+ 1325 * sp --> | caller's return address | 1326 * +-------------------------------+ 1327 * 1328 * Recent versions of the compiler put the address of the pointer 1329 * sized word in %edx. Previous versions did not, but this code 1330 * does not support them. 1331 */ 1332 1333/* 1334 * Note that OSF/rose blew defining _mcount, since it prepends leading 1335 * underscores, and _mcount didn't have a second leading underscore. However, 1336 * some of the kernel/server functions 'know' that mcount has a leading 1337 * underscore, so we satisfy both camps. 1338 */ 1339 1340#if OLD_MCOUNT 1341 .globl mcount 1342 .globl _mcount 1343 ELF_FUNC(mcount) 1344 ELF_FUNC(_mcount) 1345 .align FALIGN 1346_mcount: 1347mcount: 1348 1349 pushl %ebx 1350 MP_DISABLE_PREEMPTION(%ebx) 1351 Vload 1352 1353#if DO_STATS 1354 SDINC(V_old_mcount(%ebx)) 1355#endif 1356 1357 /* In calling the functions, we will actually leave 1 extra word on the */ 1358 /* top of the stack, but generated code will not notice, since the function */ 1359 /* uses a frame pointer */ 1360 1361 movl V_mcount_ptr_ptr(%ebx),%ecx /* address of mcount_ptr */ 1362 MP_ENABLE_PREEMPTION(%ebx) 1363 popl %ebx 1364 movl 4(%ebp),%eax /* caller's caller return address */ 1365 xchgl %eax,(%esp) /* push & get return address */ 1366 pushl %eax /* push return address */ 1367 jmp *(%ecx) /* go to profile the function */ 1368 1369End(mcount) 1370End(_mcount) 1371#endif 1372 1373 1374#if !defined(KERNEL) && !defined(MACH_KERNEL) 1375 1376/* 1377 * Convert a 64-bit integer to a string. 1378 * Arg #1 is a pointer to a string (at least 24 bytes) or NULL 1379 * Arg #2 is the low part of the 64-bit integer. 1380 * Arg #3 is the high part of the 64-bit integer. 1381 */ 1382 1383Entry(_profile_cnt_to_decimal) 1384 ENTER 1385 pushl %ebx 1386 pushl %esi 1387 pushl %edi 1388 movl Estack+16(%esp),%ebx /* pointer or null */ 1389 movl Estack+20(%esp),%edi /* low part of number */ 1390 movl $10,%ecx /* divisor */ 1391 cmpl $0,%ebx /* skip if pointer ok */ 1392 jne LCL(cvt_nonnull) 1393 1394 MP_DISABLE_PREEMPTION(%ebx) 1395 Vload /* get _profile_vars address */ 1396 leal V_num_buffer(%ebx),%ebx /* temp buffer to use */ 1397 1398 .align ALIGN 1399LCL(cvt_nonnull): 1400 addl $(N_digit-1),%ebx /* point string at end */ 1401 movb $0,0(%ebx) /* null terminate string */ 1402 1403#if OVERFLOW 1404 movl Estack+24(%esp),%esi /* high part of number */ 1405 cmpl $0,%esi /* any thing left in high part? */ 1406 je LCL(cvt_low) 1407 1408 .align ALIGN 1409LCL(cvt_high): 1410 movl %esi,%eax /* calculate high/10 & high%10 */ 1411 xorl %edx,%edx 1412 divl %ecx 1413 movl %eax,%esi 1414 1415 movl %edi,%eax /* calculate (low + (high%10)*2^32) / 10 */ 1416 divl %ecx 1417 movl %eax,%edi 1418 1419 decl %ebx /* decrement string pointer */ 1420 addl $48,%edx /* convert from 0..9 -> '0'..'9' */ 1421 movb %dl,0(%ebx) /* store digit in string */ 1422 cmpl $0,%esi /* any thing left in high part? */ 1423 jne LCL(cvt_high) 1424 1425#endif /* OVERFLOW */ 1426 1427 .align ALIGN 1428LCL(cvt_low): 1429 movl %edi,%eax /* get low part into %eax */ 1430 1431 .align ALIGN 1432LCL(cvt_low2): 1433 xorl %edx,%edx /* 0 */ 1434 divl %ecx /* calculate next digit */ 1435 decl %ebx /* decrement string pointer */ 1436 addl $48,%edx /* convert from 0..9 -> '0'..'9' */ 1437 movb %dl,0(%ebx) /* store digit in string */ 1438 cmpl $0,%eax /* any more digits to convert? */ 1439 jne LCL(cvt_low2) 1440 1441 movl %ebx,%eax /* return value */ 1442 popl %edi 1443 popl %esi 1444 MP_ENABLE_PREEMPTION(%ebx) 1445 popl %ebx 1446 LEAVE0 1447 ret 1448 1449END(_profile_cnt_to_decimal) 1450 1451#endif 1452