1// Copyright 2017 The Fuchsia Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// See the README.md in this directory for documentation. 6 7#include <ddk/binding.h> 8#include <ddk/debug.h> 9#include <ddk/device.h> 10#include <ddk/driver.h> 11#include <ddk/io-buffer.h> 12 13#include <lib/zircon-internal/device/cpu-trace/intel-pm.h> 14#include <lib/zircon-internal/mtrace.h> 15#include <zircon/syscalls.h> 16#include <zircon/syscalls/resource.h> 17#include <zircon/types.h> 18 19#include <assert.h> 20#include <cpuid.h> 21#include <inttypes.h> 22#include <stdbool.h> 23#include <stdio.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include "cpu-trace-private.h" 28 29// TODO(dje): Having trouble getting this working, so just punt for now. 30#define TRY_FREEZE_ON_PMI 0 31 32// Individual bits in the fixed counter enable field. 33// See Intel Volume 3, Figure 18-2 "Layout of IA32_FIXED_CTR_CTRL MSR". 34#define FIXED_CTR_ENABLE_OS 1 35#define FIXED_CTR_ENABLE_USR 2 36 37// There's only a few fixed events, so handle them directly. 38typedef enum { 39#define DEF_FIXED_EVENT(symbol, id, regnum, flags, name, description) \ 40 symbol ## _ID = CPUPERF_MAKE_EVENT_ID(CPUPERF_UNIT_FIXED, id), 41#include <lib/zircon-internal/device/cpu-trace/intel-pm-events.inc> 42} fixed_event_id_t; 43 44// Verify each fixed counter regnum < IPM_MAX_FIXED_COUNTERS. 45#define DEF_FIXED_EVENT(symbol, id, regnum, flags, name, description) \ 46 && (regnum) < IPM_MAX_FIXED_COUNTERS 47static_assert(1 48#include <lib/zircon-internal/device/cpu-trace/intel-pm-events.inc> 49 , ""); 50 51typedef enum { 52#define DEF_MISC_SKL_EVENT(symbol, id, offset, size, flags, name, description) \ 53 symbol ## _ID = CPUPERF_MAKE_EVENT_ID(CPUPERF_UNIT_MISC, id), 54#include <lib/zircon-internal/device/cpu-trace/skylake-misc-events.inc> 55} misc_event_id_t; 56 57// Misc event ids needn't be consecutive. 58// Build a lookup table we can use to track duplicates. 59typedef enum { 60#define DEF_MISC_SKL_EVENT(symbol, id, offset, size, flags, name, description) \ 61 symbol ## _NUMBER, 62#include <lib/zircon-internal/device/cpu-trace/skylake-misc-events.inc> 63 NUM_MISC_EVENTS 64} misc_event_number_t; 65 66// This table is sorted at startup. 67static cpuperf_event_id_t misc_event_table_contents[NUM_MISC_EVENTS] = { 68#define DEF_MISC_SKL_EVENT(symbol, id, offset, size, flags, name, description) \ 69 CPUPERF_MAKE_EVENT_ID(CPUPERF_UNIT_MISC, id), 70#include <lib/zircon-internal/device/cpu-trace/skylake-misc-events.inc> 71}; 72 73// Const accessor to give the illusion of the table being const. 74static const cpuperf_event_id_t* misc_event_table = &misc_event_table_contents[0]; 75 76static void ipm_init_misc_event_table(void); 77 78typedef enum { 79#define DEF_ARCH_EVENT(symbol, id, ebx_bit, event, umask, flags, name, description) \ 80 symbol, 81#include <lib/zircon-internal/device/cpu-trace/intel-pm-events.inc> 82} arch_event_t; 83 84typedef enum { 85#define DEF_SKL_EVENT(symbol, id, event, umask, flags, name, description) \ 86 symbol, 87#include <lib/zircon-internal/device/cpu-trace/skylake-pm-events.inc> 88} model_event_t; 89 90typedef struct { 91 uint32_t event; 92 uint32_t umask; 93 uint32_t flags; 94} event_details_t; 95 96static const event_details_t kArchEvents[] = { 97#define DEF_ARCH_EVENT(symbol, id, ebx_bit, event, umask, flags, name, description) \ 98 { event, umask, flags }, 99#include <lib/zircon-internal/device/cpu-trace/intel-pm-events.inc> 100}; 101 102static const event_details_t kModelEvents[] = { 103#define DEF_SKL_EVENT(symbol, id, event, umask, flags, name, description) \ 104 { event, umask, flags }, 105#include <lib/zircon-internal/device/cpu-trace/skylake-pm-events.inc> 106}; 107 108static const uint16_t kArchEventMap[] = { 109#define DEF_ARCH_EVENT(symbol, id, ebx_bit, event, umask, flags, name, description) \ 110 [id] = symbol, 111#include <lib/zircon-internal/device/cpu-trace/intel-pm-events.inc> 112}; 113static_assert(countof(kArchEventMap) <= CPUPERF_MAX_EVENT + 1, ""); 114 115static const uint16_t kModelEventMap[] = { 116#define DEF_SKL_EVENT(symbol, id, event, umask, flags, name, description) \ 117 [id] = symbol, 118#include <lib/zircon-internal/device/cpu-trace/skylake-pm-events.inc> 119}; 120static_assert(countof(kModelEventMap) <= CPUPERF_MAX_EVENT + 1, ""); 121 122// All configuration data is staged here before writing any MSRs, etc. 123// Then when ready the "START" ioctl will write all the necessary MSRS, 124// and do whatever kernel operations are required for collecting data. 125 126typedef struct ipm_per_trace_state { 127 // true if |config| has been set. 128 bool configured; 129 130 // The trace configuration as given to us via the ioctl. 131 cpuperf_config_t ioctl_config; 132 133 // The internalized form of |config| that we pass to the kernel. 134 zx_x86_ipm_config_t config; 135 136 // # of entries in |buffers|. 137 // TODO(dje): This is generally the number of cpus, but it could be 138 // something else later. 139 uint32_t num_buffers; 140 141 // Each buffer is the same size (at least for now, KISS). 142 // There is one buffer per cpu. 143 // This is a uint32 instead of uint64 as there's no point in supporting 144 // that large of a buffer. 145 uint32_t buffer_size; 146 147 io_buffer_t* buffers; 148} ipm_per_trace_state_t; 149 150typedef struct cpuperf_device { 151 // Once tracing has started various things are not allowed until it stops. 152 bool active; 153 154 // one entry for each trace 155 // TODO(dje): At the moment we only support one trace at a time. 156 // "trace" == "data collection run" 157 ipm_per_trace_state_t* per_trace_state; 158} cpuperf_device_t; 159 160static bool ipm_supported = false; 161// This is only valid if |ipm_supported| is true. 162static zx_x86_ipm_properties_t ipm_properties; 163 164// maximum space, in bytes, for trace buffers (per cpu) 165#define MAX_PER_TRACE_SPACE (256 * 1024 * 1024) 166 167void cpuperf_init_once(void) 168{ 169 ipm_init_misc_event_table(); 170 171 zx_x86_ipm_properties_t props; 172 zx_handle_t resource = get_root_resource(); 173 zx_status_t status = 174 zx_mtrace_control(resource, MTRACE_KIND_CPUPERF, MTRACE_CPUPERF_GET_PROPERTIES, 175 0, &props, sizeof(props)); 176 if (status != ZX_OK) { 177 if (status == ZX_ERR_NOT_SUPPORTED) 178 zxlogf(INFO, "%s: No PM support\n", __func__); 179 else 180 zxlogf(INFO, "%s: Error %d fetching ipm properties\n", 181 __func__, status); 182 return; 183 } 184 185 // Skylake supports version 4. KISS and begin with that. 186 // Note: This should agree with the kernel driver's check. 187 if (props.pm_version < 4) { 188 zxlogf(INFO, "%s: PM version 4 or above is required\n", __func__); 189 return; 190 } 191 192 ipm_supported = true; 193 ipm_properties = props; 194 195 zxlogf(TRACE, "Intel Performance Monitor configuration for this chipset:\n"); 196 zxlogf(TRACE, "IPM: version: %u\n", ipm_properties.pm_version); 197 zxlogf(TRACE, "IPM: num_programmable_events: %u\n", 198 ipm_properties.num_programmable_events); 199 zxlogf(TRACE, "IPM: num_fixed_events: %u\n", 200 ipm_properties.num_fixed_events); 201 zxlogf(TRACE, "IPM: num_misc_events: %u\n", 202 ipm_properties.num_misc_events); 203 zxlogf(TRACE, "IPM: programmable_counter_width: %u\n", 204 ipm_properties.programmable_counter_width); 205 zxlogf(TRACE, "IPM: fixed_counter_width: %u\n", 206 ipm_properties.fixed_counter_width); 207 zxlogf(TRACE, "IPM: perf_capabilities: 0x%lx\n", 208 ipm_properties.perf_capabilities); 209} 210 211 212// Helper routines for the ioctls. 213 214static void ipm_free_buffers_for_trace(ipm_per_trace_state_t* per_trace, uint32_t num_allocated) { 215 // Note: This may be called with partially allocated buffers. 216 assert(per_trace->buffers); 217 assert(num_allocated <= per_trace->num_buffers); 218 for (uint32_t i = 0; i < num_allocated; ++i) 219 io_buffer_release(&per_trace->buffers[i]); 220 free(per_trace->buffers); 221 per_trace->buffers = NULL; 222} 223 224// Map a fixed counter event id to its h/w register number. 225// Returns IPM_MAX_FIXED_COUNTERS if |id| is unknown. 226static unsigned ipm_fixed_counter_number(cpuperf_event_id_t id) { 227 enum { 228#define DEF_FIXED_EVENT(symbol, id, regnum, flags, name, description) \ 229 symbol ## _NUMBER = regnum, 230#include <lib/zircon-internal/device/cpu-trace/intel-pm-events.inc> 231 }; 232 switch (id) { 233 case FIXED_INSTRUCTIONS_RETIRED_ID: 234 return FIXED_INSTRUCTIONS_RETIRED_NUMBER; 235 case FIXED_UNHALTED_CORE_CYCLES_ID: 236 return FIXED_UNHALTED_CORE_CYCLES_NUMBER; 237 case FIXED_UNHALTED_REFERENCE_CYCLES_ID: 238 return FIXED_UNHALTED_REFERENCE_CYCLES_NUMBER; 239 default: 240 return IPM_MAX_FIXED_COUNTERS; 241 } 242} 243 244static int ipm_compare_cpuperf_event_id(const void* ap, const void* bp) { 245 const cpuperf_event_id_t* a = ap; 246 const cpuperf_event_id_t* b = bp; 247 if (*a < *b) 248 return -1; 249 if (*a > *b) 250 return 1; 251 return 0; 252} 253 254static void ipm_init_misc_event_table(void) { 255 qsort(misc_event_table_contents, 256 countof(misc_event_table_contents), 257 sizeof(misc_event_table_contents[0]), 258 ipm_compare_cpuperf_event_id); 259} 260 261// Map a misc event id to its ordinal (unique number in range 262// 0 ... NUM_MISC_EVENTS - 1). 263// Returns -1 if |id| is unknown. 264static int ipm_lookup_misc_event(cpuperf_event_id_t id) { 265 cpuperf_event_id_t* p = bsearch(&id, misc_event_table, 266 countof(misc_event_table_contents), 267 sizeof(id), 268 ipm_compare_cpuperf_event_id); 269 if (!p) 270 return -1; 271 ptrdiff_t result = p - misc_event_table; 272 assert(result < NUM_MISC_EVENTS); 273 return (int) result; 274} 275 276 277// The userspace side of the driver. 278 279static zx_status_t ipm_get_properties(cpu_trace_device_t* dev, 280 void* reply, size_t replymax, 281 size_t* out_actual) { 282 zxlogf(TRACE, "%s called\n", __func__); 283 284 if (!ipm_supported) 285 return ZX_ERR_NOT_SUPPORTED; 286 287 cpuperf_properties_t props; 288 if (replymax < sizeof(props)) 289 return ZX_ERR_BUFFER_TOO_SMALL; 290 291 memset(&props, 0, sizeof(props)); 292 props.api_version = CPUPERF_API_VERSION; 293 props.pm_version = ipm_properties.pm_version; 294 // To the arch-independent API, the misc events on Intel are currently 295 // all "fixed" in the sense that they don't occupy a limited number of 296 // programmable slots. Ultimately there could still be limitations (e.g., 297 // some combination of events can't be supported) but that's ok. This 298 // data is for informational/debug purposes. 299 // TODO(dje): Something more elaborate can wait for publishing them via 300 // some namespace. 301 props.num_fixed_events = (ipm_properties.num_fixed_events + 302 ipm_properties.num_misc_events); 303 props.num_programmable_events = ipm_properties.num_programmable_events; 304 props.fixed_counter_width = ipm_properties.fixed_counter_width; 305 props.programmable_counter_width = ipm_properties.programmable_counter_width; 306 307 memcpy(reply, &props, sizeof(props)); 308 *out_actual = sizeof(props); 309 return ZX_OK; 310} 311 312static zx_status_t ipm_alloc_trace(cpu_trace_device_t* dev, 313 const void* cmd, size_t cmdlen) { 314 zxlogf(TRACE, "%s called\n", __func__); 315 316 if (!ipm_supported) 317 return ZX_ERR_NOT_SUPPORTED; 318 if (dev->cpuperf) 319 return ZX_ERR_BAD_STATE; 320 321 // Note: The remaining API calls don't have to check |ipm_supported| 322 // because this will never succeed otherwise, and they all require this 323 // to be done first. 324 325 ioctl_cpuperf_alloc_t alloc; 326 if (cmdlen != sizeof(alloc)) 327 return ZX_ERR_INVALID_ARGS; 328 memcpy(&alloc, cmd, sizeof(alloc)); 329 if (alloc.buffer_size > MAX_PER_TRACE_SPACE) 330 return ZX_ERR_INVALID_ARGS; 331 uint32_t num_cpus = zx_system_get_num_cpus(); 332 if (alloc.num_buffers != num_cpus) // TODO(dje): for now 333 return ZX_ERR_INVALID_ARGS; 334 335 cpuperf_device_t* ipm = calloc(1, sizeof(*dev->cpuperf)); 336 if (!ipm) 337 return ZX_ERR_NO_MEMORY; 338 339 ipm_per_trace_state_t* per_trace = calloc(1, sizeof(ipm->per_trace_state[0])); 340 if (!per_trace) { 341 free(ipm); 342 return ZX_ERR_NO_MEMORY; 343 } 344 345 per_trace->buffers = calloc(num_cpus, sizeof(per_trace->buffers[0])); 346 if (!per_trace->buffers) { 347 free(per_trace); 348 free(ipm); 349 return ZX_ERR_NO_MEMORY; 350 } 351 352 uint32_t i = 0; 353 for ( ; i < num_cpus; ++i) { 354 zx_status_t status = 355 io_buffer_init(&per_trace->buffers[i], dev->bti, alloc.buffer_size, IO_BUFFER_RW); 356 if (status != ZX_OK) 357 break; 358 } 359 if (i != num_cpus) { 360 ipm_free_buffers_for_trace(per_trace, i); 361 free(per_trace); 362 free(ipm); 363 return ZX_ERR_NO_MEMORY; 364 } 365 366 per_trace->num_buffers = alloc.num_buffers; 367 per_trace->buffer_size = alloc.buffer_size; 368 ipm->per_trace_state = per_trace; 369 dev->cpuperf = ipm; 370 return ZX_OK; 371} 372 373static zx_status_t ipm_free_trace(cpu_trace_device_t* dev) { 374 zxlogf(TRACE, "%s called\n", __func__); 375 376 cpuperf_device_t* ipm = dev->cpuperf; 377 if (!ipm) 378 return ZX_ERR_BAD_STATE; 379 if (ipm->active) 380 return ZX_ERR_BAD_STATE; 381 382 ipm_per_trace_state_t* per_trace = ipm->per_trace_state; 383 ipm_free_buffers_for_trace(per_trace, per_trace->num_buffers); 384 free(per_trace); 385 free(ipm); 386 dev->cpuperf = NULL; 387 return ZX_OK; 388} 389 390static zx_status_t ipm_get_alloc(cpu_trace_device_t* dev, 391 void* reply, size_t replymax, 392 size_t* out_actual) { 393 zxlogf(TRACE, "%s called\n", __func__); 394 395 const cpuperf_device_t* ipm = dev->cpuperf; 396 if (!ipm) 397 return ZX_ERR_BAD_STATE; 398 399 ioctl_cpuperf_alloc_t alloc; 400 if (replymax < sizeof(alloc)) 401 return ZX_ERR_BUFFER_TOO_SMALL; 402 403 alloc.num_buffers = ipm->per_trace_state->num_buffers; 404 alloc.buffer_size = ipm->per_trace_state->buffer_size; 405 memcpy(reply, &alloc, sizeof(alloc)); 406 *out_actual = sizeof(alloc); 407 return ZX_OK; 408} 409 410static zx_status_t ipm_get_buffer_handle(cpu_trace_device_t* dev, 411 const void* cmd, size_t cmdlen, 412 void* reply, size_t replymax, 413 size_t* out_actual) { 414 zxlogf(TRACE, "%s called\n", __func__); 415 416 cpuperf_device_t* ipm = dev->cpuperf; 417 if (!ipm) 418 return ZX_ERR_BAD_STATE; 419 420 ioctl_cpuperf_buffer_handle_req_t req; 421 zx_handle_t h; 422 423 if (cmdlen != sizeof(req)) 424 return ZX_ERR_INVALID_ARGS; 425 if (replymax < sizeof(h)) 426 return ZX_ERR_BUFFER_TOO_SMALL; 427 const ipm_per_trace_state_t* per_trace = ipm->per_trace_state; 428 memcpy(&req, cmd, sizeof(req)); 429 if (req.descriptor >= per_trace->num_buffers) 430 return ZX_ERR_INVALID_ARGS; 431 432 zx_status_t status = zx_handle_duplicate(per_trace->buffers[req.descriptor].vmo_handle, ZX_RIGHT_SAME_RIGHTS, &h); 433 if (status < 0) 434 return status; 435 memcpy(reply, &h, sizeof(h)); 436 *out_actual = sizeof(h); 437 return ZX_OK; 438} 439 440typedef struct { 441 // Maximum number of each event we can handle. 442 unsigned max_num_fixed; 443 unsigned max_num_programmable; 444 unsigned max_num_misc; 445 446 // The number of events in use. 447 unsigned num_fixed; 448 unsigned num_programmable; 449 unsigned num_misc; 450 451 // The maximum value the counter can have before overflowing. 452 uint64_t max_fixed_value; 453 uint64_t max_programmable_value; 454 455 // For catching duplicates of the fixed counters. 456 bool have_fixed[IPM_MAX_FIXED_COUNTERS]; 457 // For catching duplicates of the misc events, 1 bit per event. 458 uint64_t have_misc[(NUM_MISC_EVENTS + 63) / 64]; 459 460 bool have_timebase0_user; 461} staging_state_t; 462 463static zx_status_t ipm_stage_fixed_config(const cpuperf_config_t* icfg, 464 staging_state_t* ss, 465 unsigned input_index, 466 zx_x86_ipm_config_t* ocfg) { 467 const unsigned ii = input_index; 468 const cpuperf_event_id_t id = icfg->events[ii]; 469 bool uses_timebase0 = !!(icfg->flags[ii] & CPUPERF_CONFIG_FLAG_TIMEBASE0); 470 unsigned counter = ipm_fixed_counter_number(id); 471 472 if (counter == IPM_MAX_FIXED_COUNTERS || 473 counter >= countof(ocfg->fixed_ids) || 474 counter >= ss->max_num_fixed) { 475 zxlogf(ERROR, "%s: Invalid fixed event [%u]\n", __func__, ii); 476 return ZX_ERR_INVALID_ARGS; 477 } 478 if (ss->have_fixed[counter]) { 479 zxlogf(ERROR, "%s: Fixed event [%u] already provided\n", 480 __func__, counter); 481 return ZX_ERR_INVALID_ARGS; 482 } 483 ss->have_fixed[counter] = true; 484 ocfg->fixed_ids[ss->num_fixed] = id; 485 if ((uses_timebase0 && input_index != 0) || icfg->rate[ii] == 0) { 486 ocfg->fixed_initial_value[ss->num_fixed] = 0; 487 } else { 488 if (icfg->rate[ii] > ss->max_fixed_value) { 489 zxlogf(ERROR, "%s: Rate too large, event [%u]\n", __func__, ii); 490 return ZX_ERR_INVALID_ARGS; 491 } 492 ocfg->fixed_initial_value[ss->num_fixed] = 493 ss->max_fixed_value - icfg->rate[ii] + 1; 494 } 495 // KISS: For now don't generate PMI's for counters that use 496 // another as the timebase. 497 if (!uses_timebase0 || ii == 0) 498 ocfg->fixed_ctrl |= IA32_FIXED_CTR_CTRL_PMI_MASK(counter); 499 unsigned enable = 0; 500 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_OS) 501 enable |= FIXED_CTR_ENABLE_OS; 502 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_USER) 503 enable |= FIXED_CTR_ENABLE_USR; 504 ocfg->fixed_ctrl |= enable << IA32_FIXED_CTR_CTRL_EN_SHIFT(counter); 505 ocfg->global_ctrl |= IA32_PERF_GLOBAL_CTRL_FIXED_EN_MASK(counter); 506 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_TIMEBASE0) 507 ocfg->fixed_flags[ss->num_fixed] |= IPM_CONFIG_FLAG_TIMEBASE; 508 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_PC) 509 ocfg->fixed_flags[ss->num_fixed] |= IPM_CONFIG_FLAG_PC; 510 511 ++ss->num_fixed; 512 return ZX_OK; 513} 514 515static zx_status_t ipm_stage_programmable_config(const cpuperf_config_t* icfg, 516 staging_state_t* ss, 517 unsigned input_index, 518 zx_x86_ipm_config_t* ocfg) { 519 const unsigned ii = input_index; 520 cpuperf_event_id_t id = icfg->events[ii]; 521 unsigned unit = CPUPERF_EVENT_ID_UNIT(id); 522 unsigned event = CPUPERF_EVENT_ID_EVENT(id); 523 bool uses_timebase0 = !!(icfg->flags[ii] & CPUPERF_CONFIG_FLAG_TIMEBASE0); 524 525 // TODO(dje): Verify no duplicates. 526 if (ss->num_programmable == ss->max_num_programmable) { 527 zxlogf(ERROR, "%s: Too many programmable counters provided\n", 528 __func__); 529 return ZX_ERR_INVALID_ARGS; 530 } 531 ocfg->programmable_ids[ss->num_programmable] = id; 532 if ((uses_timebase0 && input_index != 0) || icfg->rate[ii] == 0) { 533 ocfg->programmable_initial_value[ss->num_programmable] = 0; 534 } else { 535 if (icfg->rate[ii] > ss->max_programmable_value) { 536 zxlogf(ERROR, "%s: Rate too large, event [%u]\n", __func__, ii); 537 return ZX_ERR_INVALID_ARGS; 538 } 539 ocfg->programmable_initial_value[ss->num_programmable] = 540 ss->max_programmable_value - icfg->rate[ii] + 1; 541 } 542 const event_details_t* details = NULL; 543 switch (unit) { 544 case CPUPERF_UNIT_ARCH: 545 if (event >= countof(kArchEventMap)) { 546 zxlogf(ERROR, "%s: Invalid event id, event [%u]\n", __func__, ii); 547 return ZX_ERR_INVALID_ARGS; 548 } 549 details = &kArchEvents[kArchEventMap[event]]; 550 break; 551 case CPUPERF_UNIT_MODEL: 552 if (event >= countof(kModelEventMap)) { 553 zxlogf(ERROR, "%s: Invalid event id, event [%u]\n", __func__, ii); 554 return ZX_ERR_INVALID_ARGS; 555 } 556 details = &kModelEvents[kModelEventMap[event]]; 557 break; 558 default: 559 zxlogf(ERROR, "%s: Invalid event id, event [%u]\n", __func__, ii); 560 return ZX_ERR_INVALID_ARGS; 561 } 562 if (details->event == 0 && details->umask == 0) { 563 zxlogf(ERROR, "%s: Invalid event id, event [%u]\n", __func__, ii); 564 return ZX_ERR_INVALID_ARGS; 565 } 566 uint64_t evtsel = 0; 567 evtsel |= details->event << IA32_PERFEVTSEL_EVENT_SELECT_SHIFT; 568 evtsel |= details->umask << IA32_PERFEVTSEL_UMASK_SHIFT; 569 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_OS) 570 evtsel |= IA32_PERFEVTSEL_OS_MASK; 571 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_USER) 572 evtsel |= IA32_PERFEVTSEL_USR_MASK; 573 if (details->flags & IPM_REG_FLAG_EDG) 574 evtsel |= IA32_PERFEVTSEL_E_MASK; 575 if (details->flags & IPM_REG_FLAG_ANYT) 576 evtsel |= IA32_PERFEVTSEL_ANY_MASK; 577 if (details->flags & IPM_REG_FLAG_INV) 578 evtsel |= IA32_PERFEVTSEL_INV_MASK; 579 evtsel |= (details->flags & IPM_REG_FLAG_CMSK_MASK) << IA32_PERFEVTSEL_CMASK_SHIFT; 580 // KISS: For now don't generate PMI's for counters that use 581 // another as the timebase. We still generate interrupts in 582 // "counting mode" in case the counter overflows. 583 if (!uses_timebase0 || ii == 0) 584 evtsel |= IA32_PERFEVTSEL_INT_MASK; 585 evtsel |= IA32_PERFEVTSEL_EN_MASK; 586 ocfg->programmable_events[ss->num_programmable] = evtsel; 587 ocfg->global_ctrl |= IA32_PERF_GLOBAL_CTRL_PMC_EN_MASK(ss->num_programmable); 588 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_TIMEBASE0) 589 ocfg->programmable_flags[ss->num_programmable] |= IPM_CONFIG_FLAG_TIMEBASE; 590 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_PC) 591 ocfg->programmable_flags[ss->num_programmable] |= IPM_CONFIG_FLAG_PC; 592 593 ++ss->num_programmable; 594 return ZX_OK; 595} 596 597static zx_status_t ipm_stage_misc_config(const cpuperf_config_t* icfg, 598 staging_state_t* ss, 599 unsigned input_index, 600 zx_x86_ipm_config_t* ocfg) { 601 const unsigned ii = input_index; 602 cpuperf_event_id_t id = icfg->events[ii]; 603 int event = ipm_lookup_misc_event(id); 604 605 if (event < 0) { 606 zxlogf(ERROR, "%s: Invalid misc event [%u]\n", __func__, ii); 607 return ZX_ERR_INVALID_ARGS; 608 } 609 if (ss->num_misc == ss->max_num_misc) { 610 zxlogf(ERROR, "%s: Too many misc counters provided\n", 611 __func__); 612 return ZX_ERR_INVALID_ARGS; 613 } 614 if (ss->have_misc[event / 64] & (1ul << (event % 64))) { 615 zxlogf(ERROR, "%s: Misc event [%u] already provided\n", 616 __func__, ii); 617 return ZX_ERR_INVALID_ARGS; 618 } 619 ss->have_misc[event / 64] |= 1ul << (event % 64); 620 ocfg->misc_ids[ss->num_misc] = id; 621 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_TIMEBASE0) { 622 ocfg->misc_flags[ss->num_misc] |= IPM_CONFIG_FLAG_TIMEBASE; 623 } else { 624 if (icfg->rate[ii] != 0) { 625 zxlogf(ERROR, "%s: Misc event [%u] requires a timebase\n", 626 __func__, ii); 627 return ZX_ERR_INVALID_ARGS; 628 } 629 } 630 631 ++ss->num_misc; 632 return ZX_OK; 633} 634 635static zx_status_t ipm_stage_config(cpu_trace_device_t* dev, 636 const void* cmd, size_t cmdlen) { 637 zxlogf(TRACE, "%s called\n", __func__); 638 639 cpuperf_device_t* ipm = dev->cpuperf; 640 if (!ipm) 641 return ZX_ERR_BAD_STATE; 642 if (ipm->active) 643 return ZX_ERR_BAD_STATE; 644 645 // If we subsequently get an error, make sure any previous configuration 646 // can't be used. 647 ipm_per_trace_state_t* per_trace = ipm->per_trace_state; 648 per_trace->configured = false; 649 650 cpuperf_config_t ioctl_config; 651 cpuperf_config_t* icfg = &ioctl_config; 652 if (cmdlen != sizeof(*icfg)) 653 return ZX_ERR_INVALID_ARGS; 654 memcpy(icfg, cmd, sizeof(*icfg)); 655 656 zx_x86_ipm_config_t* ocfg = &per_trace->config; 657 memset(ocfg, 0, sizeof(*ocfg)); 658 659 // Validate the config and convert it to our internal form. 660 // TODO(dje): Multiplexing support. 661 662 staging_state_t staging_state; 663 staging_state_t* ss = &staging_state; 664 ss->max_num_fixed = ipm_properties.num_fixed_events; 665 ss->max_num_programmable = ipm_properties.num_programmable_events; 666 ss->max_num_misc = ipm_properties.num_misc_events; 667 ss->num_fixed = 0; 668 ss->num_programmable = 0; 669 ss->num_misc = 0; 670 ss->max_fixed_value = 671 (ipm_properties.fixed_counter_width < 64 672 ? (1ul << ipm_properties.fixed_counter_width) - 1 673 : ~0ul); 674 ss->max_programmable_value = 675 (ipm_properties.programmable_counter_width < 64 676 ? (1ul << ipm_properties.programmable_counter_width) - 1 677 : ~0ul); 678 for (unsigned i = 0; i < countof(ss->have_fixed); ++i) 679 ss->have_fixed[i] = false; 680 for (unsigned i = 0; i < countof(ss->have_misc); ++i) 681 ss->have_misc[i] = false; 682 ss->have_timebase0_user = false; 683 684 zx_status_t status; 685 unsigned ii; // ii: input index 686 for (ii = 0; ii < countof(icfg->events); ++ii) { 687 cpuperf_event_id_t id = icfg->events[ii]; 688 zxlogf(TRACE, "%s: processing [%u] = %u\n", __func__, ii, id); 689 if (id == 0) 690 break; 691 unsigned unit = CPUPERF_EVENT_ID_UNIT(id); 692 693 if (icfg->flags[ii] & ~CPUPERF_CONFIG_FLAG_MASK) { 694 zxlogf(ERROR, "%s: reserved flag bits set [%u]\n", __func__, ii); 695 return ZX_ERR_INVALID_ARGS; 696 } 697 698 switch (unit) { 699 case CPUPERF_UNIT_FIXED: 700 status = ipm_stage_fixed_config(icfg, ss, ii, ocfg); 701 if (status != ZX_OK) 702 return status; 703 break; 704 case CPUPERF_UNIT_ARCH: 705 case CPUPERF_UNIT_MODEL: 706 status = ipm_stage_programmable_config(icfg, ss, ii, ocfg); 707 if (status != ZX_OK) 708 return status; 709 break; 710 case CPUPERF_UNIT_MISC: 711 status = ipm_stage_misc_config(icfg, ss, ii, ocfg); 712 if (status != ZX_OK) 713 return status; 714 break; 715 default: 716 zxlogf(ERROR, "%s: Invalid event [%u] (bad unit)\n", 717 __func__, ii); 718 return ZX_ERR_INVALID_ARGS; 719 } 720 721 if (icfg->flags[ii] & CPUPERF_CONFIG_FLAG_TIMEBASE0) 722 ss->have_timebase0_user = true; 723 } 724 if (ii == 0) { 725 zxlogf(ERROR, "%s: No events provided\n", __func__); 726 return ZX_ERR_INVALID_ARGS; 727 } 728 729 // Ensure there are no holes. 730 for (; ii < countof(icfg->events); ++ii) { 731 if (icfg->events[ii] != 0) { 732 zxlogf(ERROR, "%s: Hole at event [%u]\n", __func__, ii); 733 return ZX_ERR_INVALID_ARGS; 734 } 735 } 736 737 if (ss->have_timebase0_user) { 738 ocfg->timebase_id = icfg->events[0]; 739 } 740 741#if TRY_FREEZE_ON_PMI 742 ocfg->debug_ctrl |= IA32_DEBUGCTL_FREEZE_PERFMON_ON_PMI_MASK; 743#endif 744 745 // Require something to be enabled in order to start tracing. 746 // This is mostly a sanity check. 747 if (per_trace->config.global_ctrl == 0) { 748 zxlogf(ERROR, "%s: Requested config doesn't collect any data\n", 749 __func__); 750 return ZX_ERR_INVALID_ARGS; 751 } 752 753 per_trace->ioctl_config = *icfg; 754 per_trace->configured = true; 755 return ZX_OK; 756} 757 758static zx_status_t ipm_get_config(cpu_trace_device_t* dev, 759 void* reply, size_t replymax, 760 size_t* out_actual) { 761 zxlogf(TRACE, "%s called\n", __func__); 762 763 const cpuperf_device_t* ipm = dev->cpuperf; 764 if (!ipm) 765 return ZX_ERR_BAD_STATE; 766 767 const ipm_per_trace_state_t* per_trace = ipm->per_trace_state; 768 if (!per_trace->configured) 769 return ZX_ERR_BAD_STATE; 770 771 const cpuperf_config_t* config = &per_trace->ioctl_config; 772 if (replymax < sizeof(*config)) 773 return ZX_ERR_BUFFER_TOO_SMALL; 774 775 memcpy(reply, config, sizeof(*config)); 776 *out_actual = sizeof(*config); 777 return ZX_OK; 778} 779 780static zx_status_t ipm_start(cpu_trace_device_t* dev) { 781 zxlogf(TRACE, "%s called\n", __func__); 782 783 cpuperf_device_t* ipm = dev->cpuperf; 784 if (!ipm) 785 return ZX_ERR_BAD_STATE; 786 if (ipm->active) 787 return ZX_ERR_BAD_STATE; 788 789 ipm_per_trace_state_t* per_trace = ipm->per_trace_state; 790 if (!per_trace->configured) 791 return ZX_ERR_BAD_STATE; 792 793 // Step 1: Get the configuration data into the kernel for use by START. 794 795 zxlogf(TRACE, "%s: global ctrl 0x%" PRIx64 ", fixed ctrl 0x%" PRIx64 "\n", 796 __func__, per_trace->config.global_ctrl, 797 per_trace->config.fixed_ctrl); 798 799 // |per_trace->configured| should not have been set if there's nothing 800 // to trace. 801 assert(per_trace->config.global_ctrl != 0); 802 803 zx_handle_t resource = get_root_resource(); 804 805 zx_status_t status = 806 zx_mtrace_control(resource, MTRACE_KIND_CPUPERF, 807 MTRACE_CPUPERF_INIT, 0, NULL, 0); 808 if (status != ZX_OK) 809 return status; 810 811 uint32_t num_cpus = zx_system_get_num_cpus(); 812 for (uint32_t cpu = 0; cpu < num_cpus; ++cpu) { 813 zx_x86_ipm_buffer_t buffer; 814 io_buffer_t* io_buffer = &per_trace->buffers[cpu]; 815 buffer.vmo = io_buffer->vmo_handle; 816 status = zx_mtrace_control(resource, MTRACE_KIND_CPUPERF, 817 MTRACE_CPUPERF_ASSIGN_BUFFER, cpu, 818 &buffer, sizeof(buffer)); 819 if (status != ZX_OK) 820 goto fail; 821 } 822 823 status = zx_mtrace_control(resource, MTRACE_KIND_CPUPERF, 824 MTRACE_CPUPERF_STAGE_CONFIG, 0, 825 &per_trace->config, sizeof(per_trace->config)); 826 if (status != ZX_OK) 827 goto fail; 828 829 // Step 2: Start data collection. 830 831 status = zx_mtrace_control(resource, MTRACE_KIND_CPUPERF, MTRACE_CPUPERF_START, 832 0, NULL, 0); 833 if (status != ZX_OK) 834 goto fail; 835 836 ipm->active = true; 837 return ZX_OK; 838 839 fail: 840 { 841 zx_status_t status2 = 842 zx_mtrace_control(resource, MTRACE_KIND_CPUPERF, 843 MTRACE_CPUPERF_FINI, 0, NULL, 0); 844 if (status2 != ZX_OK) 845 zxlogf(TRACE, "%s: MTRACE_CPUPERF_FINI failed: %d\n", __func__, status2); 846 assert(status2 == ZX_OK); 847 return status; 848 } 849} 850 851static zx_status_t ipm_stop(cpu_trace_device_t* dev) { 852 zxlogf(TRACE, "%s called\n", __func__); 853 854 cpuperf_device_t* ipm = dev->cpuperf; 855 if (!ipm) 856 return ZX_ERR_BAD_STATE; 857 858 zx_handle_t resource = get_root_resource(); 859 zx_status_t status = 860 zx_mtrace_control(resource, MTRACE_KIND_CPUPERF, 861 MTRACE_CPUPERF_STOP, 0, NULL, 0); 862 if (status == ZX_OK) { 863 ipm->active = false; 864 status = zx_mtrace_control(resource, MTRACE_KIND_CPUPERF, 865 MTRACE_CPUPERF_FINI, 0, NULL, 0); 866 } 867 return status; 868} 869 870zx_status_t cpuperf_ioctl(cpu_trace_device_t* dev, uint32_t op, 871 const void* cmd, size_t cmdlen, 872 void* reply, size_t replymax, 873 size_t* out_actual) { 874 assert(IOCTL_FAMILY(op) == IOCTL_FAMILY_CPUPERF); 875 876 switch (op) { 877 case IOCTL_CPUPERF_GET_PROPERTIES: 878 if (cmdlen != 0) 879 return ZX_ERR_INVALID_ARGS; 880 return ipm_get_properties(dev, reply, replymax, out_actual); 881 882 case IOCTL_CPUPERF_ALLOC_TRACE: 883 if (replymax != 0) 884 return ZX_ERR_INVALID_ARGS; 885 return ipm_alloc_trace(dev, cmd, cmdlen); 886 887 case IOCTL_CPUPERF_FREE_TRACE: 888 if (cmdlen != 0 || replymax != 0) 889 return ZX_ERR_INVALID_ARGS; 890 return ipm_free_trace(dev); 891 892 case IOCTL_CPUPERF_GET_ALLOC: 893 if (cmdlen != 0) 894 return ZX_ERR_INVALID_ARGS; 895 return ipm_get_alloc(dev, reply, replymax, out_actual); 896 897 case IOCTL_CPUPERF_GET_BUFFER_HANDLE: 898 return ipm_get_buffer_handle(dev, cmd, cmdlen, reply, replymax, out_actual); 899 900 case IOCTL_CPUPERF_STAGE_CONFIG: 901 if (replymax != 0) 902 return ZX_ERR_INVALID_ARGS; 903 return ipm_stage_config(dev, cmd, cmdlen); 904 905 case IOCTL_CPUPERF_GET_CONFIG: 906 return ipm_get_config(dev, reply, replymax, out_actual); 907 908 case IOCTL_CPUPERF_START: 909 if (cmdlen != 0 || replymax != 0) 910 return ZX_ERR_INVALID_ARGS; 911 return ipm_start(dev); 912 913 case IOCTL_CPUPERF_STOP: 914 if (cmdlen != 0 || replymax != 0) 915 return ZX_ERR_INVALID_ARGS; 916 return ipm_stop(dev); 917 918 default: 919 return ZX_ERR_INVALID_ARGS; 920 } 921} 922 923void cpuperf_release(cpu_trace_device_t* dev) { 924 // TODO(dje): None of these should fail. What to do? 925 // Suggest flagging things as busted and prevent further use. 926 ipm_stop(dev); 927 ipm_free_trace(dev); 928} 929