perfmon.c revision 92651
180708Sjake/* 280708Sjake * Copyright 1996 Massachusetts Institute of Technology 385586Sjake * 485586Sjake * Permission to use, copy, modify, and distribute this software and 580708Sjake * its documentation for any purpose and without fee is hereby 680708Sjake * granted, provided that both the above copyright notice and this 785586Sjake * permission notice appear in all copies, that both the above 885586Sjake * copyright notice and this permission notice appear in all 985586Sjake * supporting documentation, and that the name of M.I.T. not be used 1080708Sjake * in advertising or publicity pertaining to distribution of the 1180708Sjake * software without specific, written prior permission. M.I.T. makes 1280708Sjake * no representations about the suitability of this software for any 1380708Sjake * purpose. It is provided "as is" without express or implied 1480708Sjake * warranty. 1580708Sjake * 1680708Sjake * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 1780708Sjake * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 1885586Sjake * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 1985586Sjake * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 2085586Sjake * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2185586Sjake * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2285586Sjake * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 2385586Sjake * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 2485586Sjake * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 2580708Sjake * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 2685586Sjake * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2785586Sjake * SUCH DAMAGE. 2885586Sjake * 2985586Sjake * $FreeBSD: head/sys/i386/i386/perfmon.c 92651 2002-03-19 06:45:25Z alc $ 3085586Sjake */ 3185586Sjake 3285586Sjake#include <sys/param.h> 3385586Sjake#include <sys/systm.h> 3485586Sjake#include <sys/conf.h> 3585586Sjake#include <sys/fcntl.h> 3685586Sjake#include <sys/kernel.h> 3780708Sjake 3885586Sjake#ifndef SMP 3980708Sjake#include <machine/cputypes.h> 4080708Sjake#endif 4180708Sjake#include <machine/clock.h> 4280708Sjake#include <machine/perfmon.h> 43100384Speter 4480708Sjakestatic int perfmon_inuse; 45102808Sjakestatic int perfmon_cpuok; 46102808Sjake#ifndef SMP 4780708Sjakestatic int msr_ctl[NPMC]; 48100384Speter#endif 49100384Speterstatic int msr_pmc[NPMC]; 50100384Speterstatic unsigned int ctl_shadow[NPMC]; 51100384Speterstatic quad_t pmc_shadow[NPMC]; /* used when ctr is stopped on P5 */ 52100384Speterstatic int (*writectl)(int); 53102808Sjake#ifndef SMP 54102808Sjakestatic int writectl5(int); 55102808Sjakestatic int writectl6(int); 56102808Sjake#endif 5780708Sjake 5880708Sjakestatic d_close_t perfmon_close; 5985586Sjakestatic d_open_t perfmon_open; 6085586Sjakestatic d_ioctl_t perfmon_ioctl; 61100384Speter 62100384Speter/* 63100384Speter * XXX perfmon_init_dev(void *) is a split from the perfmon_init() funtion. 64100384Speter * This solves a problem for DEVFS users. It loads the "perfmon" driver after 65100384Speter * the DEVFS subsystem has been kicked into action. The SI_ORDER_ANY is to 66102808Sjake * assure that it is the most lowest priority task which, guarantees the 67100384Speter * above. 68102808Sjake */ 69102808Sjakestatic void perfmon_init_dev __P((void *)); 70102808SjakeSYSINIT(cpu, SI_SUB_DRIVERS, SI_ORDER_ANY, perfmon_init_dev, NULL); 71100384Speter 72102555Sjake#define CDEV_MAJOR 2 /* We're really a minor of mem.c */ 73102555Sjakestatic struct cdevsw perfmon_cdevsw = { 74102808Sjake /* open */ perfmon_open, 75100384Speter /* close */ perfmon_close, 76100384Speter /* read */ noread, 77100384Speter /* write */ nowrite, 78102808Sjake /* ioctl */ perfmon_ioctl, 79102808Sjake /* poll */ nopoll, 80102808Sjake /* mmap */ nommap, 81102808Sjake /* strategy */ nostrategy, 82102808Sjake /* name */ "perfmon", 83102808Sjake /* maj */ CDEV_MAJOR, 84102808Sjake /* dump */ nodump, 85102808Sjake /* psize */ nopsize, 86120422Speter /* flags */ 0, 87120422Speter}; 88100384Speter 89100384Speter/* 90100384Speter * Must be called after cpu_class is set up. 91100384Speter */ 92100384Spetervoid 93100384Speterperfmon_init(void) 94123742Speter{ 95119015Sgordon#ifndef SMP 96123742Speter switch(cpu_class) { 97123742Speter case CPUCLASS_586: 98100384Speter perfmon_cpuok = 1; 99100384Speter msr_ctl[0] = 0x11; 100100384Speter msr_ctl[1] = 0x11; 101100384Speter msr_pmc[0] = 0x12; 102100384Speter msr_pmc[1] = 0x13; 103100384Speter writectl = writectl5; 104123742Speter break; 105123742Speter case CPUCLASS_686: 106123742Speter perfmon_cpuok = 1; 107123742Speter msr_ctl[0] = 0x186; 108123742Speter msr_ctl[1] = 0x187; 109123742Speter msr_pmc[0] = 0xc1; 110123742Speter msr_pmc[1] = 0xc2; 111123742Speter writectl = writectl6; 112123742Speter break; 113123742Speter 114123742Speter default: 115123742Speter perfmon_cpuok = 0; 116123742Speter break; 117123742Speter } 118133464Smarcel#endif /* SMP */ 119133464Smarcel} 120133464Smarcel 121133464Smarcelstatic void 122133464Smarcelperfmon_init_dev(dummy) 123133464Smarcel void *dummy; 124133464Smarcel{ 125133464Smarcel make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon"); 12685586Sjake} 12785586Sjake 12885586Sjakeint 12985586Sjakeperfmon_avail(void) 13085586Sjake{ 13185586Sjake return perfmon_cpuok; 13285586Sjake} 13385586Sjake 13485586Sjakeint 13585586Sjakeperfmon_setup(int pmc, unsigned int control) 13685586Sjake{ 13785586Sjake critical_t savecrit; 13885586Sjake 13985586Sjake if (pmc < 0 || pmc >= NPMC) 14085586Sjake return EINVAL; 14185586Sjake 14285586Sjake perfmon_inuse |= (1 << pmc); 14385586Sjake control &= ~(PMCF_SYS_FLAGS << 16); 14485586Sjake savecrit = cpu_critical_enter(); 14585586Sjake ctl_shadow[pmc] = control; 14685586Sjake writectl(pmc); 14785586Sjake wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); 14885586Sjake cpu_critical_exit(savecrit); 14985586Sjake return 0; 15085586Sjake} 15185586Sjake 15285586Sjakeint 15385586Sjakeperfmon_get(int pmc, unsigned int *control) 15485586Sjake{ 15585586Sjake if (pmc < 0 || pmc >= NPMC) 15685586Sjake return EINVAL; 15785586Sjake 158104072Sjake if (perfmon_inuse & (1 << pmc)) { 15985586Sjake *control = ctl_shadow[pmc]; 16085586Sjake return 0; 161104072Sjake } 16285586Sjake return EBUSY; /* XXX reversed sense */ 16385586Sjake} 16485586Sjake 16585586Sjakeint 16685586Sjakeperfmon_fini(int pmc) 16785586Sjake{ 16885586Sjake if (pmc < 0 || pmc >= NPMC) 16985586Sjake return EINVAL; 17085586Sjake 171104072Sjake if (perfmon_inuse & (1 << pmc)) { 172104072Sjake perfmon_stop(pmc); 17385586Sjake ctl_shadow[pmc] = 0; 17485586Sjake perfmon_inuse &= ~(1 << pmc); 17585586Sjake return 0; 17685586Sjake } 17785586Sjake return EBUSY; /* XXX reversed sense */ 17885586Sjake} 17985586Sjake 18085586Sjakeint 18185586Sjakeperfmon_start(int pmc) 18285586Sjake{ 18385586Sjake critical_t savecrit; 18485586Sjake 18585586Sjake if (pmc < 0 || pmc >= NPMC) 18685586Sjake return EINVAL; 18785586Sjake 18885586Sjake if (perfmon_inuse & (1 << pmc)) { 18985586Sjake savecrit = cpu_critical_enter(); 19085586Sjake ctl_shadow[pmc] |= (PMCF_EN << 16); 19185586Sjake wrmsr(msr_pmc[pmc], pmc_shadow[pmc]); 19285586Sjake writectl(pmc); 19385586Sjake cpu_critical_exit(savecrit); 19485586Sjake return 0; 19585586Sjake } 19685586Sjake return EBUSY; 19785586Sjake} 19885586Sjake 19985586Sjakeint 20085586Sjakeperfmon_stop(int pmc) 20185586Sjake{ 20285586Sjake critical_t savecrit; 20385586Sjake 20485586Sjake if (pmc < 0 || pmc >= NPMC) 20585586Sjake return EINVAL; 20685586Sjake 20785586Sjake if (perfmon_inuse & (1 << pmc)) { 208104072Sjake savecrit = cpu_critical_enter(); 209104072Sjake pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; 210104072Sjake ctl_shadow[pmc] &= ~(PMCF_EN << 16); 211104072Sjake writectl(pmc); 212104072Sjake cpu_critical_exit(savecrit); 213104072Sjake return 0; 214104072Sjake } 215104072Sjake return EBUSY; 216104072Sjake} 217104072Sjake 218104072Sjakeint 219104072Sjakeperfmon_read(int pmc, quad_t *val) 220104072Sjake{ 221104072Sjake if (pmc < 0 || pmc >= NPMC) 222104072Sjake return EINVAL; 223104072Sjake 22485586Sjake if (perfmon_inuse & (1 << pmc)) { 22585586Sjake if (ctl_shadow[pmc] & (PMCF_EN << 16)) 22685586Sjake *val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; 22785586Sjake else 22885586Sjake *val = pmc_shadow[pmc]; 22985586Sjake return 0; 23085586Sjake } 23185586Sjake 23285586Sjake return EBUSY; 23385586Sjake} 23485586Sjake 23585586Sjakeint 23685586Sjakeperfmon_reset(int pmc) 23785586Sjake{ 23885586Sjake if (pmc < 0 || pmc >= NPMC) 23985586Sjake return EINVAL; 24085586Sjake 24185586Sjake if (perfmon_inuse & (1 << pmc)) { 24285586Sjake wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); 24385586Sjake return 0; 24485586Sjake } 24585586Sjake return EBUSY; 24685586Sjake} 24785586Sjake 24885586Sjake#ifndef SMP 24985586Sjake/* 25085586Sjake * Unfortunately, the performance-monitoring registers are laid out 25185586Sjake * differently in the P5 and P6. We keep everything in P6 format 25285586Sjake * internally (except for the event code), and convert to P5 25385586Sjake * format as needed on those CPUs. The writectl function pointer 25485586Sjake * is set up to point to one of these functions by perfmon_init(). 25585586Sjake */ 25685586Sjakeint 25785586Sjakewritectl6(int pmc) 25885586Sjake{ 25985586Sjake if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) { 26085586Sjake wrmsr(msr_ctl[pmc], 0); 26185586Sjake } else { 262109605Sjake wrmsr(msr_ctl[pmc], ctl_shadow[pmc]); 263129282Speter } 264129282Speter return 0; 265109605Sjake} 266109605Sjake 267109605Sjake#define P5FLAG_P 0x200 268109605Sjake#define P5FLAG_E 0x100 269109605Sjake#define P5FLAG_USR 0x80 270109605Sjake#define P5FLAG_OS 0x40 271109605Sjake 272109605Sjakeint 273109605Sjakewritectl5(int pmc) 274109605Sjake{ 275109605Sjake quad_t newval = 0; 276109605Sjake 277109605Sjake if (ctl_shadow[1] & (PMCF_EN << 16)) { 278109605Sjake if (ctl_shadow[1] & (PMCF_USR << 16)) 279109605Sjake newval |= P5FLAG_USR << 16; 280109605Sjake if (ctl_shadow[1] & (PMCF_OS << 16)) 281109605Sjake newval |= P5FLAG_OS << 16; 282109605Sjake if (!(ctl_shadow[1] & (PMCF_E << 16))) 283109605Sjake newval |= P5FLAG_E << 16; 284109605Sjake newval |= (ctl_shadow[1] & 0x3f) << 16; 28585586Sjake } 28680708Sjake if (ctl_shadow[0] & (PMCF_EN << 16)) { 287129282Speter if (ctl_shadow[0] & (PMCF_USR << 16)) 288129282Speter newval |= P5FLAG_USR; 28980708Sjake if (ctl_shadow[0] & (PMCF_OS << 16)) 29085586Sjake newval |= P5FLAG_OS; 291153504Smarcel if (!(ctl_shadow[0] & (PMCF_E << 16))) 29285586Sjake newval |= P5FLAG_E; 293153504Smarcel newval |= ctl_shadow[0] & 0x3f; 29485586Sjake } 29585586Sjake 29698635Smux wrmsr(msr_ctl[0], newval); 29785586Sjake return 0; /* XXX should check for unimplemented bits */ 29885586Sjake} 29985586Sjake#endif /* !SMP */ 30085586Sjake 30185586Sjake/* 30285586Sjake * Now the user-mode interface, called from a subdevice of mem.c. 303153504Smarcel */ 30485586Sjakestatic int writer; 30595410Smarcelstatic int writerpmc; 30685586Sjake 307109605Sjakestatic int 30885586Sjakeperfmon_open(dev_t dev, int flags, int fmt, struct thread *td) 30985586Sjake{ 31085586Sjake if (!perfmon_cpuok) 31185586Sjake return ENXIO; 31285586Sjake 31385586Sjake if (flags & FWRITE) { 31485586Sjake if (writer) { 31585586Sjake return EBUSY; 31685586Sjake } else { 31785586Sjake writer = 1; 31885586Sjake writerpmc = 0; 31985586Sjake } 320129282Speter } 321107517Stmm return 0; 322107517Stmm} 323107517Stmm 32485586Sjakestatic int 32585586Sjakeperfmon_close(dev_t dev, int flags, int fmt, struct thread *td) 32685586Sjake{ 32785586Sjake if (flags & FWRITE) { 32885586Sjake int i; 32985586Sjake 33098635Smux for (i = 0; i < NPMC; i++) { 33185586Sjake if (writerpmc & (1 << i)) 33285586Sjake perfmon_fini(i); 33385586Sjake } 33485586Sjake writer = 0; 33585586Sjake } 33685586Sjake return 0; 33785586Sjake} 33885586Sjake 33985586Sjakestatic int 34085586Sjakeperfmon_ioctl(dev_t dev, u_long cmd, caddr_t param, int flags, struct thread *td) 34185586Sjake{ 34285586Sjake struct pmc *pmc; 34385586Sjake struct pmc_data *pmcd; 34480708Sjake struct pmc_tstamp *pmct; 34580708Sjake int *ip; 346105469Smarcel int rv; 347105469Smarcel 348105469Smarcel switch(cmd) { 349105469Smarcel case PMIOSETUP: 350105469Smarcel if (!(flags & FWRITE)) 351105469Smarcel return EPERM; 352105469Smarcel pmc = (struct pmc *)param; 353105469Smarcel 354105469Smarcel rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val); 355105469Smarcel if (!rv) { 356105469Smarcel writerpmc |= (1 << pmc->pmc_num); 357105469Smarcel } 358105469Smarcel break; 359105469Smarcel 360 case PMIOGET: 361 pmc = (struct pmc *)param; 362 rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val); 363 break; 364 365 case PMIOSTART: 366 if (!(flags & FWRITE)) 367 return EPERM; 368 369 ip = (int *)param; 370 rv = perfmon_start(*ip); 371 break; 372 373 case PMIOSTOP: 374 if (!(flags & FWRITE)) 375 return EPERM; 376 377 ip = (int *)param; 378 rv = perfmon_stop(*ip); 379 break; 380 381 case PMIORESET: 382 if (!(flags & FWRITE)) 383 return EPERM; 384 385 ip = (int *)param; 386 rv = perfmon_reset(*ip); 387 break; 388 389 case PMIOREAD: 390 pmcd = (struct pmc_data *)param; 391 rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value); 392 break; 393 394 case PMIOTSTAMP: 395 if (!tsc_freq) { 396 rv = ENOTTY; 397 break; 398 } 399 pmct = (struct pmc_tstamp *)param; 400 /* XXX interface loses precision. */ 401 pmct->pmct_rate = tsc_freq / 1000000; 402 pmct->pmct_value = rdtsc(); 403 rv = 0; 404 break; 405 default: 406 rv = ENOTTY; 407 } 408 409 return rv; 410} 411