prof_machdep.c revision 219461
1/*- 2 * Copyright (c) 1996 Bruce D. Evans. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/amd64/amd64/prof_machdep.c 219461 2011-03-10 20:02:58Z jkim $"); 29 30#ifdef GUPROF 31#if 0 32#include "opt_i586_guprof.h" 33#include "opt_perfmon.h" 34#endif 35 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/bus.h> 39#include <sys/cpu.h> 40#include <sys/eventhandler.h> 41#include <sys/gmon.h> 42#include <sys/kernel.h> 43#include <sys/smp.h> 44#include <sys/sysctl.h> 45 46#include <machine/clock.h> 47#if 0 48#include <machine/perfmon.h> 49#endif 50#include <machine/timerreg.h> 51 52#define CPUTIME_CLOCK_UNINITIALIZED 0 53#define CPUTIME_CLOCK_I8254 1 54#define CPUTIME_CLOCK_TSC 2 55#define CPUTIME_CLOCK_I586_PMC 3 56#define CPUTIME_CLOCK_I8254_SHIFT 7 57 58int cputime_bias = 1; /* initialize for locality of reference */ 59 60static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; 61#if defined(PERFMON) && defined(I586_PMC_GUPROF) 62static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; 63static int cputime_clock_pmc_init; 64static struct gmonparam saved_gmp; 65#endif 66#if defined(I586_CPU) || defined(I686_CPU) 67static int cputime_prof_active; 68#endif 69#endif /* GUPROF */ 70 71#ifdef __GNUCLIKE_ASM 72__asm(" \n\ 73GM_STATE = 0 \n\ 74GMON_PROF_OFF = 3 \n\ 75 \n\ 76 .text \n\ 77 .p2align 4,0x90 \n\ 78 .globl __mcount \n\ 79 .type __mcount,@function \n\ 80__mcount: \n\ 81 # \n\ 82 # Check that we are profiling. Do it early for speed. \n\ 83 # \n\ 84 cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ 85 je .mcount_exit \n\ 86 # \n\ 87 # __mcount is the same as [.]mcount except the caller \n\ 88 # hasn't changed the stack except to call here, so the \n\ 89 # caller's raddr is above our raddr. \n\ 90 # \n\ 91 pushq %rax \n\ 92 pushq %rdx \n\ 93 pushq %rcx \n\ 94 pushq %rsi \n\ 95 pushq %rdi \n\ 96 pushq %r8 \n\ 97 pushq %r9 \n\ 98 movq 7*8+8(%rsp),%rdi \n\ 99 jmp .got_frompc \n\ 100 \n\ 101 .p2align 4,0x90 \n\ 102 .globl .mcount \n\ 103.mcount: \n\ 104 cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ 105 je .mcount_exit \n\ 106 # \n\ 107 # The caller's stack frame has already been built, so \n\ 108 # %rbp is the caller's frame pointer. The caller's \n\ 109 # raddr is in the caller's frame following the caller's \n\ 110 # caller's frame pointer. \n\ 111 # \n\ 112 pushq %rax \n\ 113 pushq %rdx \n\ 114 pushq %rcx \n\ 115 pushq %rsi \n\ 116 pushq %rdi \n\ 117 pushq %r8 \n\ 118 pushq %r9 \n\ 119 movq 8(%rbp),%rdi \n\ 120.got_frompc: \n\ 121 # \n\ 122 # Our raddr is the caller's pc. \n\ 123 # \n\ 124 movq 7*8(%rsp),%rsi \n\ 125 \n\ 126 pushfq \n\ 127 cli \n\ 128 call mcount \n\ 129 popfq \n\ 130 popq %r9 \n\ 131 popq %r8 \n\ 132 popq %rdi \n\ 133 popq %rsi \n\ 134 popq %rcx \n\ 135 popq %rdx \n\ 136 popq %rax \n\ 137.mcount_exit: \n\ 138 ret $0 \n\ 139"); 140#else /* !__GNUCLIKE_ASM */ 141#error "this file needs to be ported to your compiler" 142#endif /* __GNUCLIKE_ASM */ 143 144#ifdef GUPROF 145/* 146 * [.]mexitcount saves the return register(s), loads selfpc and calls 147 * mexitcount(selfpc) to do the work. Someday it should be in a machine 148 * dependent file together with cputime(), __mcount and [.]mcount. cputime() 149 * can't just be put in machdep.c because it has to be compiled without -pg. 150 */ 151#ifdef __GNUCLIKE_ASM 152__asm(" \n\ 153 .text \n\ 154# \n\ 155# Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ 156# \n\ 157 .p2align 4,0x90 \n\ 158 .globl __mexitcount \n\ 159 .type __mexitcount,@function \n\ 160__mexitcount: \n\ 161 nop \n\ 162 \n\ 163GMON_PROF_HIRES = 4 \n\ 164 \n\ 165 .p2align 4,0x90 \n\ 166 .globl .mexitcount \n\ 167.mexitcount: \n\ 168 cmpl $GMON_PROF_HIRES,_gmonparam+GM_STATE \n\ 169 jne .mexitcount_exit \n\ 170 pushq %rax \n\ 171 pushq %rdx \n\ 172 pushq %rcx \n\ 173 pushq %rsi \n\ 174 pushq %rdi \n\ 175 pushq %r8 \n\ 176 pushq %r9 \n\ 177 movq 7*8(%rsp),%rdi \n\ 178 pushfq \n\ 179 cli \n\ 180 call mexitcount \n\ 181 popfq \n\ 182 popq %r9 \n\ 183 popq %r8 \n\ 184 popq %rdi \n\ 185 popq %rsi \n\ 186 popq %rcx \n\ 187 popq %rdx \n\ 188 popq %rax \n\ 189.mexitcount_exit: \n\ 190 ret $0 \n\ 191"); 192#endif /* __GNUCLIKE_ASM */ 193 194/* 195 * Return the time elapsed since the last call. The units are machine- 196 * dependent. 197 */ 198int 199cputime() 200{ 201 u_int count; 202 int delta; 203#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) && \ 204 defined(PERFMON) && defined(I586_PMC_GUPROF) 205 u_quad_t event_count; 206#endif 207 u_char high, low; 208 static u_int prev_count; 209 210#if defined(I586_CPU) || defined(I686_CPU) 211 if (cputime_clock == CPUTIME_CLOCK_TSC) { 212 /* 213 * Scale the TSC a little to make cputime()'s frequency 214 * fit in an int, assuming that the TSC frequency fits 215 * in a u_int. Use a fixed scale since dynamic scaling 216 * would be slower and we can't really use the low bit 217 * of precision. 218 */ 219 count = (u_int)rdtsc() & ~1u; 220 delta = (int)(count - prev_count) >> 1; 221 prev_count = count; 222 return (delta); 223 } 224#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP) 225 if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 226 /* 227 * XXX permon_read() should be inlined so that the 228 * perfmon module doesn't need to be compiled with 229 * profiling disabled and so that it is fast. 230 */ 231 perfmon_read(0, &event_count); 232 233 count = (u_int)event_count; 234 delta = (int)(count - prev_count); 235 prev_count = count; 236 return (delta); 237 } 238#endif /* PERFMON && I586_PMC_GUPROF && !SMP */ 239#endif /* I586_CPU || I686_CPU */ 240 241 /* 242 * Read the current value of the 8254 timer counter 0. 243 */ 244 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); 245 low = inb(TIMER_CNTR0); 246 high = inb(TIMER_CNTR0); 247 count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; 248 249 /* 250 * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. 251 * While profiling is enabled, this routine is called at least twice 252 * per timer reset (for mcounting and mexitcounting hardclock()), 253 * so at most one reset has occurred since the last call, and one 254 * has occurred iff the current count is larger than the previous 255 * count. This allows counter underflow to be detected faster 256 * than in microtime(). 257 */ 258 delta = prev_count - count; 259 prev_count = count; 260 if ((int) delta <= 0) 261 return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT)); 262 return (delta); 263} 264 265static int 266sysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) 267{ 268 int clock; 269 int error; 270#if defined(PERFMON) && defined(I586_PMC_GUPROF) 271 int event; 272 struct pmc pmc; 273#endif 274 275 clock = cputime_clock; 276#if defined(PERFMON) && defined(I586_PMC_GUPROF) 277 if (clock == CPUTIME_CLOCK_I586_PMC) { 278 pmc.pmc_val = cputime_clock_pmc_conf; 279 clock += pmc.pmc_event; 280 } 281#endif 282 error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); 283 if (error == 0 && req->newptr != NULL) { 284#if defined(PERFMON) && defined(I586_PMC_GUPROF) 285 if (clock >= CPUTIME_CLOCK_I586_PMC) { 286 event = clock - CPUTIME_CLOCK_I586_PMC; 287 if (event >= 256) 288 return (EINVAL); 289 pmc.pmc_num = 0; 290 pmc.pmc_event = event; 291 pmc.pmc_unit = 0; 292 pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; 293 pmc.pmc_mask = 0; 294 cputime_clock_pmc_conf = pmc.pmc_val; 295 cputime_clock = CPUTIME_CLOCK_I586_PMC; 296 } else 297#endif 298 { 299 if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) 300 return (EINVAL); 301 cputime_clock = clock; 302 } 303 } 304 return (error); 305} 306 307SYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, 308 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); 309 310/* 311 * The start and stop routines need not be here since we turn off profiling 312 * before calling them. They are here for convenience. 313 */ 314 315void 316startguprof(gp) 317 struct gmonparam *gp; 318{ 319 if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { 320 cputime_clock = CPUTIME_CLOCK_I8254; 321#if defined(I586_CPU) || defined(I686_CPU) 322 if (tsc_freq != 0 && mp_ncpus == 1) 323 cputime_clock = CPUTIME_CLOCK_TSC; 324#endif 325 } 326 gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT; 327#if defined(I586_CPU) || defined(I686_CPU) 328 if (cputime_clock == CPUTIME_CLOCK_TSC) { 329 gp->profrate = tsc_freq >> 1; 330 cputime_prof_active = 1; 331 } 332#if defined(PERFMON) && defined(I586_PMC_GUPROF) 333 else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 334 if (perfmon_avail() && 335 perfmon_setup(0, cputime_clock_pmc_conf) == 0) { 336 if (perfmon_start(0) != 0) 337 perfmon_fini(0); 338 else { 339 /* XXX 1 event == 1 us. */ 340 gp->profrate = 1000000; 341 342 saved_gmp = *gp; 343 344 /* Zap overheads. They are invalid. */ 345 gp->cputime_overhead = 0; 346 gp->mcount_overhead = 0; 347 gp->mcount_post_overhead = 0; 348 gp->mcount_pre_overhead = 0; 349 gp->mexitcount_overhead = 0; 350 gp->mexitcount_post_overhead = 0; 351 gp->mexitcount_pre_overhead = 0; 352 353 cputime_clock_pmc_init = TRUE; 354 } 355 } 356 } 357#endif /* PERFMON && I586_PMC_GUPROF */ 358#endif /* I586_CPU || I686_CPU */ 359 cputime_bias = 0; 360 cputime(); 361} 362 363void 364stopguprof(gp) 365 struct gmonparam *gp; 366{ 367#if defined(PERFMON) && defined(I586_PMC_GUPROF) 368 if (cputime_clock_pmc_init) { 369 *gp = saved_gmp; 370 perfmon_fini(0); 371 cputime_clock_pmc_init = FALSE; 372 } 373#endif 374#if defined(I586_CPU) || defined(I686_CPU) 375 if (cputime_clock == CPUTIME_CLOCK_TSC) 376 cputime_prof_active = 0; 377#endif 378} 379 380#if defined(I586_CPU) || defined(I686_CPU) 381/* If the cpu frequency changed while profiling, report a warning. */ 382static void 383tsc_freq_changed(void *arg, const struct cf_level *level, int status) 384{ 385 386 /* 387 * If there was an error during the transition or 388 * TSC is P-state invariant, don't do anything. 389 */ 390 if (status != 0 || tsc_is_invariant) 391 return; 392 if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC) 393 printf("warning: cpu freq changed while profiling active\n"); 394} 395 396EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 397 EVENTHANDLER_PRI_ANY); 398#endif /* I586_CPU || I686_CPU */ 399 400#endif /* GUPROF */ 401