1/*- 2 * Copyright 1996 Massachusetts Institute of Technology 3 * 4 * Permission to use, copy, modify, and distribute this software and 5 * its documentation for any purpose and without fee is hereby 6 * granted, provided that both the above copyright notice and this 7 * permission notice appear in all copies, that both the above 8 * copyright notice and this permission notice appear in all 9 * supporting documentation, and that the name of M.I.T. not be used 10 * in advertising or publicity pertaining to distribution of the 11 * software without specific, written prior permission. M.I.T. makes 12 * no representations about the suitability of this software for any 13 * purpose. It is provided "as is" without express or implied 14 * warranty. 15 * 16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS 17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, 18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT 20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD$"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/conf.h> 36#include <sys/fcntl.h> 37#include <sys/kernel.h> 38 39#ifndef SMP 40#include <machine/cputypes.h> 41#endif 42#include <machine/clock.h> 43#include <machine/perfmon.h> 44#include <machine/specialreg.h> 45 46static int perfmon_inuse; 47static int perfmon_cpuok; 48#ifndef SMP 49static int msr_ctl[NPMC]; 50#endif 51static int msr_pmc[NPMC]; 52static unsigned int ctl_shadow[NPMC]; 53static quad_t pmc_shadow[NPMC]; /* used when ctr is stopped on P5 */ 54static int (*writectl)(int); 55#ifndef SMP 56static int writectl5(int); 57static int writectl6(int); 58#endif 59 60static d_close_t perfmon_close; 61static d_open_t perfmon_open; 62static d_ioctl_t perfmon_ioctl; 63 64/* 65 * XXX perfmon_init_dev(void *) is a split from the perfmon_init() funtion. 66 * This solves a problem for DEVFS users. It loads the "perfmon" driver after 67 * the DEVFS subsystem has been kicked into action. The SI_ORDER_ANY is to 68 * assure that it is the most lowest priority task which, guarantees the 69 * above. 70 */ 71static void perfmon_init_dev(void *); 72SYSINIT(cpu, SI_SUB_DRIVERS, SI_ORDER_ANY, perfmon_init_dev, NULL); 73 74static struct cdevsw perfmon_cdevsw = { 75 .d_version = D_VERSION, 76 .d_flags = D_NEEDGIANT, 77 .d_open = perfmon_open, 78 .d_close = perfmon_close, 79 .d_ioctl = perfmon_ioctl, 80 .d_name = "perfmon", 81}; 82 83/* 84 * Must be called after cpu_class is set up. 85 */ 86void 87perfmon_init(void) 88{ 89#ifndef SMP 90 switch(cpu_class) { 91 case CPUCLASS_586: 92 perfmon_cpuok = 1; 93 msr_ctl[0] = MSR_P5_CESR; 94 msr_ctl[1] = MSR_P5_CESR; 95 msr_pmc[0] = MSR_P5_CTR0; 96 msr_pmc[1] = MSR_P5_CTR1; 97 writectl = writectl5; 98 break; 99 case CPUCLASS_686: 100 perfmon_cpuok = 1; 101 msr_ctl[0] = MSR_EVNTSEL0; 102 msr_ctl[1] = MSR_EVNTSEL1; 103 msr_pmc[0] = MSR_PERFCTR0; 104 msr_pmc[1] = MSR_PERFCTR1; 105 writectl = writectl6; 106 break; 107 108 default: 109 perfmon_cpuok = 0; 110 break; 111 } 112#endif /* SMP */ 113} 114 115static void 116perfmon_init_dev(dummy) 117 void *dummy; 118{ 119 make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon"); 120} 121 122int 123perfmon_avail(void) 124{ 125 return perfmon_cpuok; 126} 127 128int 129perfmon_setup(int pmc, unsigned int control) 130{ 131 register_t saveintr; 132 133 if (pmc < 0 || pmc >= NPMC) 134 return EINVAL; 135 136 perfmon_inuse |= (1 << pmc); 137 control &= ~(PMCF_SYS_FLAGS << 16); 138 saveintr = intr_disable(); 139 ctl_shadow[pmc] = control; 140 writectl(pmc); 141 wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); 142 intr_restore(saveintr); 143 return 0; 144} 145 146int 147perfmon_get(int pmc, unsigned int *control) 148{ 149 if (pmc < 0 || pmc >= NPMC) 150 return EINVAL; 151 152 if (perfmon_inuse & (1 << pmc)) { 153 *control = ctl_shadow[pmc]; 154 return 0; 155 } 156 return EBUSY; /* XXX reversed sense */ 157} 158 159int 160perfmon_fini(int pmc) 161{ 162 if (pmc < 0 || pmc >= NPMC) 163 return EINVAL; 164 165 if (perfmon_inuse & (1 << pmc)) { 166 perfmon_stop(pmc); 167 ctl_shadow[pmc] = 0; 168 perfmon_inuse &= ~(1 << pmc); 169 return 0; 170 } 171 return EBUSY; /* XXX reversed sense */ 172} 173 174int 175perfmon_start(int pmc) 176{ 177 register_t saveintr; 178 179 if (pmc < 0 || pmc >= NPMC) 180 return EINVAL; 181 182 if (perfmon_inuse & (1 << pmc)) { 183 saveintr = intr_disable(); 184 ctl_shadow[pmc] |= (PMCF_EN << 16); 185 wrmsr(msr_pmc[pmc], pmc_shadow[pmc]); 186 writectl(pmc); 187 intr_restore(saveintr); 188 return 0; 189 } 190 return EBUSY; 191} 192 193int 194perfmon_stop(int pmc) 195{ 196 register_t saveintr; 197 198 if (pmc < 0 || pmc >= NPMC) 199 return EINVAL; 200 201 if (perfmon_inuse & (1 << pmc)) { 202 saveintr = intr_disable(); 203 pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; 204 ctl_shadow[pmc] &= ~(PMCF_EN << 16); 205 writectl(pmc); 206 intr_restore(saveintr); 207 return 0; 208 } 209 return EBUSY; 210} 211 212int 213perfmon_read(int pmc, quad_t *val) 214{ 215 if (pmc < 0 || pmc >= NPMC) 216 return EINVAL; 217 218 if (perfmon_inuse & (1 << pmc)) { 219 if (ctl_shadow[pmc] & (PMCF_EN << 16)) 220 *val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL; 221 else 222 *val = pmc_shadow[pmc]; 223 return 0; 224 } 225 226 return EBUSY; 227} 228 229int 230perfmon_reset(int pmc) 231{ 232 if (pmc < 0 || pmc >= NPMC) 233 return EINVAL; 234 235 if (perfmon_inuse & (1 << pmc)) { 236 wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0); 237 return 0; 238 } 239 return EBUSY; 240} 241 242#ifndef SMP 243/* 244 * Unfortunately, the performance-monitoring registers are laid out 245 * differently in the P5 and P6. We keep everything in P6 format 246 * internally (except for the event code), and convert to P5 247 * format as needed on those CPUs. The writectl function pointer 248 * is set up to point to one of these functions by perfmon_init(). 249 */ 250int 251writectl6(int pmc) 252{ 253 if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) { 254 wrmsr(msr_ctl[pmc], 0); 255 } else { 256 wrmsr(msr_ctl[pmc], ctl_shadow[pmc]); 257 } 258 return 0; 259} 260 261#define P5FLAG_P 0x200 262#define P5FLAG_E 0x100 263#define P5FLAG_USR 0x80 264#define P5FLAG_OS 0x40 265 266int 267writectl5(int pmc) 268{ 269 quad_t newval = 0; 270 271 if (ctl_shadow[1] & (PMCF_EN << 16)) { 272 if (ctl_shadow[1] & (PMCF_USR << 16)) 273 newval |= P5FLAG_USR << 16; 274 if (ctl_shadow[1] & (PMCF_OS << 16)) 275 newval |= P5FLAG_OS << 16; 276 if (!(ctl_shadow[1] & (PMCF_E << 16))) 277 newval |= P5FLAG_E << 16; 278 newval |= (ctl_shadow[1] & 0x3f) << 16; 279 } 280 if (ctl_shadow[0] & (PMCF_EN << 16)) { 281 if (ctl_shadow[0] & (PMCF_USR << 16)) 282 newval |= P5FLAG_USR; 283 if (ctl_shadow[0] & (PMCF_OS << 16)) 284 newval |= P5FLAG_OS; 285 if (!(ctl_shadow[0] & (PMCF_E << 16))) 286 newval |= P5FLAG_E; 287 newval |= ctl_shadow[0] & 0x3f; 288 } 289 290 wrmsr(msr_ctl[0], newval); 291 return 0; /* XXX should check for unimplemented bits */ 292} 293#endif /* !SMP */ 294 295/* 296 * Now the user-mode interface, called from a subdevice of mem.c. 297 */ 298static int writer; 299static int writerpmc; 300 301static int 302perfmon_open(struct cdev *dev, int flags, int fmt, struct thread *td) 303{ 304 if (!perfmon_cpuok) 305 return ENXIO; 306 307 if (flags & FWRITE) { 308 if (writer) { 309 return EBUSY; 310 } else { 311 writer = 1; 312 writerpmc = 0; 313 } 314 } 315 return 0; 316} 317 318static int 319perfmon_close(struct cdev *dev, int flags, int fmt, struct thread *td) 320{ 321 if (flags & FWRITE) { 322 int i; 323 324 for (i = 0; i < NPMC; i++) { 325 if (writerpmc & (1 << i)) 326 perfmon_fini(i); 327 } 328 writer = 0; 329 } 330 return 0; 331} 332 333static int 334perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_t param, int flags, struct thread *td) 335{ 336 struct pmc *pmc; 337 struct pmc_data *pmcd; 338 struct pmc_tstamp *pmct; 339 uint64_t freq; 340 int *ip; 341 int rv; 342 343 switch(cmd) { 344 case PMIOSETUP: 345 if (!(flags & FWRITE)) 346 return EPERM; 347 pmc = (struct pmc *)param; 348 349 rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val); 350 if (!rv) { 351 writerpmc |= (1 << pmc->pmc_num); 352 } 353 break; 354 355 case PMIOGET: 356 pmc = (struct pmc *)param; 357 rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val); 358 break; 359 360 case PMIOSTART: 361 if (!(flags & FWRITE)) 362 return EPERM; 363 364 ip = (int *)param; 365 rv = perfmon_start(*ip); 366 break; 367 368 case PMIOSTOP: 369 if (!(flags & FWRITE)) 370 return EPERM; 371 372 ip = (int *)param; 373 rv = perfmon_stop(*ip); 374 break; 375 376 case PMIORESET: 377 if (!(flags & FWRITE)) 378 return EPERM; 379 380 ip = (int *)param; 381 rv = perfmon_reset(*ip); 382 break; 383 384 case PMIOREAD: 385 pmcd = (struct pmc_data *)param; 386 rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value); 387 break; 388 389 case PMIOTSTAMP: 390 freq = atomic_load_acq_64(&tsc_freq); 391 if (freq == 0) { 392 rv = ENOTTY; 393 break; 394 } 395 pmct = (struct pmc_tstamp *)param; 396 /* XXX interface loses precision. */ 397 pmct->pmct_rate = freq / 1000000; 398 pmct->pmct_value = rdtsc(); 399 rv = 0; 400 break; 401 default: 402 rv = ENOTTY; 403 } 404 405 return rv; 406} 407