1// Copyright 2016 The Fuchsia Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// See the README.md in this directory for documentation. 6 7#include <ddk/binding.h> 8#include <ddk/debug.h> 9#include <ddk/device.h> 10#include <ddk/driver.h> 11#include <ddk/io-buffer.h> 12 13#include <lib/zircon-internal/device/cpu-trace/intel-pt.h> 14#include <lib/zircon-internal/mtrace.h> 15#include <zircon/syscalls.h> 16#include <zircon/syscalls/resource.h> 17#include <zircon/types.h> 18 19#include <assert.h> 20#include <cpuid.h> 21#include <inttypes.h> 22#include <limits.h> 23#include <stdio.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include "cpu-trace-private.h" 28 29typedef enum { 30 IPT_TRACE_CPUS, 31 IPT_TRACE_THREADS 32} ipt_trace_mode_t; 33 34typedef struct ipt_per_trace_state { 35 // the cpu or thread this buffer is assigned to 36 // Which value to use is determined by the trace mode. 37 union { 38 uint32_t cpuno; 39 zx_handle_t thread; 40 } owner; 41 42 // number of chunks, each 2^|chunk_order| pages in size 43 uint32_t num_chunks; 44 // log2 size of each chunk, in pages 45 uint32_t chunk_order; 46 // if true then the buffer is circular, otherwise tracing stops when the 47 // buffer fills 48 bool is_circular; 49 // true if allocated 50 bool allocated; 51 // number of ToPA tables needed 52 uint32_t num_tables; 53 54 // msrs 55 uint64_t ctl; 56 uint64_t status; 57 uint64_t output_base; 58 uint64_t output_mask_ptrs; 59 uint64_t cr3_match; 60 struct { 61 uint64_t a,b; 62 } addr_ranges[IPT_MAX_NUM_ADDR_RANGES]; 63 64 // trace buffers and ToPA tables 65 // ToPA: Table of Physical Addresses 66 // A "trace buffer" is a set of N chunks. 67 io_buffer_t* chunks; 68 io_buffer_t* topas; 69} ipt_per_trace_state_t; 70 71typedef struct insntrace_device { 72 ipt_trace_mode_t mode; 73 74 // # of entries in |per_trace_state|. 75 // When tracing by cpu, this is the max number of cpus. 76 // When tracing by thread, this is the max number of threads. 77 // TODO(dje): Add support for dynamically growing the vector. 78 uint32_t num_traces; 79 80 // one entry for each trace 81 ipt_per_trace_state_t* per_trace_state; 82 83 // Once tracing has started various things are not allowed until it stops. 84 bool active; 85 86 // Borrowed handle from cpu_trace_device. Must not close 87 zx_handle_t bti; 88} insntrace_device_t; 89 90static uint32_t ipt_config_family; 91static uint32_t ipt_config_model; 92static uint32_t ipt_config_stepping; 93 94static uint32_t ipt_config_addr_cfg_max = 0; 95static uint32_t ipt_config_mtc_freq_mask = 0; 96static uint32_t ipt_config_cyc_thresh_mask = 0; 97static uint32_t ipt_config_psb_freq_mask = 0; 98static uint32_t ipt_config_num_addr_ranges = 0; 99static uint32_t ipt_config_bus_freq = 0; 100 101static bool ipt_config_supported = false; 102 103static bool ipt_config_cr3_filtering = false; 104static bool ipt_config_psb = false; 105static bool ipt_config_ip_filtering = false; 106static bool ipt_config_mtc = false; 107static bool ipt_config_ptwrite = false; 108static bool ipt_config_power_events = false; 109static bool ipt_config_output_topa = false; 110static bool ipt_config_output_topa_multi = false; 111static bool ipt_config_output_single = false; 112static bool ipt_config_output_transport = false; 113static bool ipt_config_lip = false; 114 115// maximum space, in bytes, for trace buffers (per cpu) 116// This isn't necessarily 117// MAX_NUM_CHUNKS * (1 << (MAX_CHUNK_ORDER + PAGE_SIZE_SHIFT)). 118// Buffers have to be naturally aligned contiguous pages, but we can have 119// a lot of them. Supporting large buffers and/or lots of them is for 120// experimentation. 121#define MAX_PER_TRACE_SPACE (256 * 1024 * 1024) 122 123// maximum number of buffers 124#define MAX_NUM_CHUNKS 4096 125 126// maximum size of each buffer, in pages (1MB) 127#define MAX_CHUNK_ORDER 8 128 129#if PAGE_SIZE == 4096 130#define PAGE_SIZE_SHIFT 12 131#else 132#error "unsupported page size" 133#endif 134 135#define BIT(x, b) ((x) & (1u << (b))) 136 137static zx_status_t x86_pt_free(insntrace_device_t* ipt_dev); 138 139 140// The userspace side of the driver 141 142void insntrace_init_once(void) 143{ 144 unsigned a, b, c, d, max_leaf; 145 146 max_leaf = __get_cpuid_max(0, NULL); 147 if (max_leaf < 0x14) { 148 zxlogf(INFO, "IntelPT: No PT support\n"); 149 return; 150 } 151 152 __cpuid(1, a, b, c, d); 153 ipt_config_stepping = a & 0xf; 154 ipt_config_model = (a >> 4) & 0xf; 155 ipt_config_family = (a >> 8) & 0xf; 156 if (ipt_config_family == 0xf) 157 ipt_config_family += (a >> 20) & 0xff; 158 if (ipt_config_family == 6 || ipt_config_family == 0xf) 159 ipt_config_model += ((a >> 16) & 0xf) << 4; 160 161 __cpuid_count(0x07, 0, a, b, c, d); 162 if (!BIT(b, 25)) { 163 zxlogf(INFO, "IntelPT: No PT support\n"); 164 return; 165 } 166 167 ipt_config_supported = true; 168 169 __cpuid_count(0x14, 0, a, b, c, d); 170 if (BIT(b, 2)) 171 ipt_config_addr_cfg_max = 2; 172 if (BIT(b, 1) && a >= 1) { 173 unsigned a1, b1, c1, d1; 174 __cpuid_count(0x14, 1, a1, b1, c1, d1); 175 ipt_config_mtc_freq_mask = (a1 >> 16) & 0xffff; 176 ipt_config_cyc_thresh_mask = b1 & 0xffff; 177 ipt_config_psb_freq_mask = (b1 >> 16) & 0xffff; 178 ipt_config_num_addr_ranges = a1 & 0x7; 179 } 180 181 if (max_leaf >= 0x15) { 182 unsigned a1 = 0, b1 = 0, c1 = 0, d1 = 0; 183 __cpuid(0x15, a1, b1, c1, d1); 184 if (a1 && b1) 185 ipt_config_bus_freq = 1. / ((float)a1 / (float)b1); 186 } 187 188 ipt_config_cr3_filtering = !!BIT(b, 0); 189 ipt_config_psb = !!BIT(b, 1); 190 ipt_config_ip_filtering = !!BIT(b, 2); 191 ipt_config_mtc = !!BIT(b, 3); 192 ipt_config_ptwrite = !!BIT(b, 4); 193 ipt_config_power_events = !!BIT(b, 5); 194 195 ipt_config_output_topa = !!BIT(c, 0); 196 ipt_config_output_topa_multi = !!BIT(c, 1); 197 ipt_config_output_single = !!BIT(c, 2); 198 ipt_config_output_transport = !!BIT(c, 3); 199 ipt_config_lip = !!BIT(c, 31); 200 201 zxlogf(INFO, "Intel Processor Trace configuration for this chipset:\n"); 202 // No need to print everything, but these are useful. 203 zxlogf(INFO, "mtc_freq_mask: 0x%x\n", ipt_config_mtc_freq_mask); 204 zxlogf(INFO, "cyc_thresh_mask: 0x%x\n", ipt_config_cyc_thresh_mask); 205 zxlogf(INFO, "psb_freq_mask: 0x%x\n", ipt_config_psb_freq_mask); 206 zxlogf(INFO, "num addr ranges: %u\n", ipt_config_num_addr_ranges); 207} 208 209// Create the ToPA for the configured number of pages for |cpu|. 210// A circular collection of buffers is set up, even if we're going to apply 211// the stop bit to the last entry. 212static void make_topa(insntrace_device_t* ipt_dev, ipt_per_trace_state_t* per_trace) { 213 const size_t run_len_log2 = per_trace->chunk_order; 214 assert(run_len_log2 + PAGE_SIZE_SHIFT <= IPT_TOPA_MAX_SHIFT); 215 assert(run_len_log2 + PAGE_SIZE_SHIFT >= IPT_TOPA_MIN_SHIFT); 216 217 uint32_t curr_table = 0; 218 uint32_t curr_idx = 0; 219 uint64_t* last_entry = NULL; 220 221 // Note: An early version of this patch auto-computed the desired grouping 222 // of pages with sufficient alignment. If you find yourself needing this 223 // functionality again, see change 9470. 224 225 for (uint32_t i = 0; i < per_trace->num_chunks; ++i) { 226 io_buffer_t* buffer = &per_trace->chunks[i]; 227 io_buffer_t* topa = &per_trace->topas[curr_table]; 228 zx_paddr_t pa = io_buffer_phys(buffer); 229 230 uint64_t val = IPT_TOPA_ENTRY_PHYS_ADDR(pa) | 231 IPT_TOPA_ENTRY_SIZE(run_len_log2 + PAGE_SIZE_SHIFT); 232 uint64_t* table = io_buffer_virt(topa); 233 table[curr_idx] = val; 234 last_entry = &table[curr_idx]; 235 236 // Make sure we leave one at the end of the table for the END marker. 237 if (unlikely(curr_idx >= IPT_TOPA_MAX_TABLE_ENTRIES - 2)) { 238 curr_idx = 0; 239 curr_table++; 240 } else { 241 curr_idx++; 242 } 243 } 244 245 assert(curr_table + 1 == per_trace->num_tables || 246 // If the last table is full curr_table will be the next one. 247 (curr_table == per_trace->num_tables && curr_idx == 0)); 248 249 // Populate END entries for completed tables 250 // Assume the table is circular. We'll set the stop bit on the last 251 // entry later. 252 for (uint32_t i = 0; i < curr_table; ++i) { 253 io_buffer_t* this_table = &per_trace->topas[i]; 254 io_buffer_t* next_table; 255 if (i == per_trace->num_tables - 1) { 256 next_table = &per_trace->topas[0]; 257 } else { 258 next_table = &per_trace->topas[i + 1]; 259 } 260 261 zx_paddr_t next_table_pa = io_buffer_phys(next_table); 262 uint64_t val = IPT_TOPA_ENTRY_PHYS_ADDR(next_table_pa) | IPT_TOPA_ENTRY_END; 263 uint64_t* table = io_buffer_virt(this_table); 264 table[IPT_TOPA_MAX_TABLE_ENTRIES - 1] = val; 265 } 266 267 // Populate the END entry for a possibly non-full last table 268 if (curr_table < per_trace->num_tables) { 269 io_buffer_t* this_table = &per_trace->topas[curr_table]; 270 io_buffer_t* first_table = &per_trace->topas[0]; 271 zx_paddr_t first_table_pa = io_buffer_phys(first_table); 272 uint64_t val = IPT_TOPA_ENTRY_PHYS_ADDR(first_table_pa) | IPT_TOPA_ENTRY_END; 273 uint64_t* table = io_buffer_virt(this_table); 274 table[curr_idx] = val; 275 } 276 277 // Add the STOP flag to the last non-END entry in the tables 278 assert(last_entry); 279 if (!per_trace->is_circular) 280 *last_entry |= IPT_TOPA_ENTRY_STOP; 281} 282 283// Compute the number of ToPA entries needed for the configured number of 284// buffers. 285// The output count includes the END entries across all needed tables. 286static uint32_t compute_topa_entry_count(insntrace_device_t* ipt_dev, 287 ipt_per_trace_state_t* per_trace) { 288 uint32_t num_entries = per_trace->num_chunks; 289 uint32_t num_end_entries = (num_entries + IPT_TOPA_MAX_TABLE_ENTRIES - 2) / 290 (IPT_TOPA_MAX_TABLE_ENTRIES - 1); 291 uint32_t result = num_entries + num_end_entries; 292 293 zxlogf(DEBUG1, "IPT: compute_topa_entry_count: num_entries: %u\n", num_entries); 294 zxlogf(DEBUG1, "IPT: compute_topa_entry_count: num_end_entries: %u\n", num_end_entries); 295 zxlogf(DEBUG1, "IPT: compute_topa_entry_count: total entries: %u\n", result); 296 297 return result; 298} 299 300// Walk the tables to discover how much data has been captured for |per_trace|. 301// Note: If this is a circular buffer this is just where tracing stopped. 302static size_t compute_capture_size(insntrace_device_t* ipt_dev, 303 const ipt_per_trace_state_t* per_trace) { 304 uint64_t curr_table_paddr = per_trace->output_base; 305 uint32_t curr_table_entry_idx = (uint32_t)per_trace->output_mask_ptrs >> 7; 306 uint32_t curr_entry_offset = (uint32_t)(per_trace->output_mask_ptrs >> 32); 307 308 zxlogf(DEBUG1, "IPT: compute_capture_size: trace %tu\n", per_trace - ipt_dev->per_trace_state); 309 zxlogf(DEBUG1, "IPT: curr_table_paddr 0x%" PRIx64 ", curr_table_entry_idx %u, curr_entry_offset %u\n", 310 curr_table_paddr, curr_table_entry_idx, curr_entry_offset); 311 312 size_t total_size = 0; 313 for (uint32_t table = 0; table < per_trace->num_tables; ++table) { 314 // Get the physical address so that we can compare it with the value 315 // in output_base. 316 zx_paddr_t table_paddr = io_buffer_phys(&per_trace->topas[table]); 317 318 for (uint32_t entry = 0; entry < IPT_TOPA_MAX_TABLE_ENTRIES - 1; ++entry) { 319 if (table_paddr == curr_table_paddr && entry >= curr_table_entry_idx) { 320 total_size += curr_entry_offset; 321 return total_size; 322 } 323 uint64_t* table_ptr = io_buffer_virt(&per_trace->topas[table]); 324 uint64_t topa_entry = table_ptr[entry]; 325 total_size += 1UL << IPT_TOPA_ENTRY_EXTRACT_SIZE(topa_entry); 326 } 327 } 328 329 // Should be unreachable. 330 // TODO(dje): Later flag state as broken. 331 zxlogf(ERROR, "IPT: unexpectedly exited capture loop\n"); 332 return 0; 333} 334 335static zx_status_t x86_pt_alloc_buffer1(insntrace_device_t* ipt_dev, 336 ipt_per_trace_state_t* per_trace, 337 uint32_t num, uint32_t order, 338 bool is_circular) { 339 zx_status_t status; 340 size_t chunk_pages = 1 << order; 341 342 memset(per_trace, 0, sizeof(*per_trace)); 343 344 per_trace->chunks = calloc(num, sizeof(io_buffer_t)); 345 if (per_trace->chunks == NULL) 346 return ZX_ERR_NO_MEMORY; 347 348 for (uint32_t i = 0; i < num; ++i) { 349 // ToPA entries of size N must be aligned to N, too. 350 uint32_t alignment_log2 = PAGE_SIZE_SHIFT + order; 351 status = io_buffer_init_aligned(&per_trace->chunks[i], ipt_dev->bti, 352 chunk_pages * PAGE_SIZE, alignment_log2, 353 IO_BUFFER_RW | IO_BUFFER_CONTIG); 354 if (status != ZX_OK) 355 return status; 356 // Keep track of allocated buffers as we go in case we later fail: 357 // we want to be able to free those that got allocated. 358 ++per_trace->num_chunks; 359 // Catch bugs in io_buffer_init_aligned. If it doesn't give us a 360 // properly aligned buffer we'll get an "operational error" later. 361 // See Intel Vol3 36.2.6.2. 362 zx_paddr_t pa = io_buffer_phys(&per_trace->chunks[i]); 363 zx_paddr_t align_mask = (1ull << alignment_log2) - 1; 364 if (pa & align_mask) { 365 zxlogf(ERROR, "%s: WARNING: chunk has bad alignment: alignment %u, got 0x%" PRIx64 "\n", 366 __func__, alignment_log2, pa); 367 return ZX_ERR_INTERNAL; 368 } 369 } 370 assert(per_trace->num_chunks == num); 371 372 per_trace->chunk_order = order; 373 per_trace->is_circular = is_circular; 374 375 // TODO(dje): No need to allocate the max on the last table. 376 uint32_t entry_count = compute_topa_entry_count(ipt_dev, per_trace); 377 uint32_t table_count = (entry_count + IPT_TOPA_MAX_TABLE_ENTRIES - 1) / 378 IPT_TOPA_MAX_TABLE_ENTRIES; 379 380 if (entry_count < 2) { 381 zxlogf(INFO, "IPT: INVALID ENTRY COUNT: %u\n", entry_count); 382 return ZX_ERR_INVALID_ARGS; 383 } 384 385 // Some early Processor Trace implementations only supported having a 386 // table with a single real entry and an END. 387 if (!ipt_config_output_topa_multi && entry_count > 2) 388 return ZX_ERR_NOT_SUPPORTED; 389 390 // Allocate Table(s) of Physical Addresses (ToPA) for each cpu. 391 392 per_trace->topas = calloc(table_count, sizeof(io_buffer_t)); 393 if (per_trace->topas == NULL) 394 return ZX_ERR_NO_MEMORY; 395 396 for (uint32_t i = 0; i < table_count; ++i) { 397 status = io_buffer_init(&per_trace->topas[i], ipt_dev->bti, 398 sizeof(uint64_t) * IPT_TOPA_MAX_TABLE_ENTRIES, 399 IO_BUFFER_RW | IO_BUFFER_CONTIG); 400 if (status != ZX_OK) 401 return ZX_ERR_NO_MEMORY; 402 // Keep track of allocated tables as we go in case we later fail: 403 // we want to be able to free those that got allocated. 404 ++per_trace->num_tables; 405 } 406 assert(per_trace->num_tables == table_count); 407 408 make_topa(ipt_dev, per_trace); 409 410 return ZX_OK; 411} 412 413static void x86_pt_free_buffer1(insntrace_device_t* ipt_dev, ipt_per_trace_state_t* per_trace) { 414 if (per_trace->chunks) { 415 for (uint32_t i = 0; i < per_trace->num_chunks; ++i) { 416 io_buffer_release(&per_trace->chunks[i]); 417 } 418 } 419 free(per_trace->chunks); 420 per_trace->chunks = NULL; 421 422 if (per_trace->topas) { 423 for (uint32_t i = 0; i < per_trace->num_tables; ++i) { 424 io_buffer_release(&per_trace->topas[i]); 425 } 426 } 427 free(per_trace->topas); 428 per_trace->topas = NULL; 429 430 per_trace->allocated = false; 431} 432 433static zx_status_t x86_pt_alloc_buffer(insntrace_device_t* ipt_dev, 434 const ioctl_insntrace_buffer_config_t* config, 435 zx_itrace_buffer_descriptor_t* out_descriptor) { 436 zxlogf(DEBUG1, "%s: num_chunks %u, chunk_order %u\n", 437 __func__, config->num_chunks, config->chunk_order); 438 439 if (config->num_chunks == 0 || config->num_chunks > MAX_NUM_CHUNKS) 440 return ZX_ERR_INVALID_ARGS; 441 if (config->chunk_order > MAX_CHUNK_ORDER) 442 return ZX_ERR_INVALID_ARGS; 443 size_t chunk_pages = 1 << config->chunk_order; 444 size_t nr_pages = config->num_chunks * chunk_pages; 445 size_t total_per_trace = nr_pages * PAGE_SIZE; 446 if (total_per_trace > MAX_PER_TRACE_SPACE) 447 return ZX_ERR_INVALID_ARGS; 448 449 uint64_t settable_ctl_mask = ( 450 IPT_CTL_OS_ALLOWED_MASK | 451 IPT_CTL_USER_ALLOWED_MASK | 452 IPT_CTL_TSC_EN_MASK | 453 IPT_CTL_DIS_RETC_MASK | 454 IPT_CTL_BRANCH_EN_MASK 455 ); 456 if (ipt_config_ptwrite) 457 settable_ctl_mask |= IPT_CTL_PTW_EN_MASK | IPT_CTL_FUP_ON_PTW_MASK; 458 if (ipt_config_cr3_filtering) 459 settable_ctl_mask |= IPT_CTL_CR3_FILTER_MASK; 460 if (ipt_config_mtc) 461 settable_ctl_mask |= IPT_CTL_MTC_EN_MASK | IPT_CTL_MTC_FREQ_MASK; 462 if (ipt_config_power_events) 463 settable_ctl_mask |= IPT_CTL_POWER_EVENT_EN_MASK; 464 if (ipt_config_ip_filtering) { 465 if (ipt_config_num_addr_ranges >= 1) 466 settable_ctl_mask |= IPT_CTL_ADDR0_MASK; 467 if (ipt_config_num_addr_ranges >= 2) 468 settable_ctl_mask |= IPT_CTL_ADDR1_MASK; 469 if (ipt_config_num_addr_ranges >= 3) 470 settable_ctl_mask |= IPT_CTL_ADDR2_MASK; 471 if (ipt_config_num_addr_ranges >= 4) 472 settable_ctl_mask |= IPT_CTL_ADDR3_MASK; 473 } 474 if (ipt_config_psb) 475 settable_ctl_mask |= (IPT_CTL_CYC_EN_MASK | 476 IPT_CTL_PSB_FREQ_MASK | 477 IPT_CTL_CYC_THRESH_MASK); 478 if ((config->ctl & ~settable_ctl_mask) != 0) { 479 zxlogf(ERROR, "bad ctl, requested 0x%" PRIx64 ", valid 0x%" PRIx64 "\n", 480 config->ctl, settable_ctl_mask); 481 return ZX_ERR_INVALID_ARGS; 482 } 483 484 uint32_t mtc_freq = (uint32_t) ((config->ctl & IPT_CTL_MTC_FREQ_MASK) >> IPT_CTL_MTC_FREQ_SHIFT); 485 if (mtc_freq != 0 && ((1 << mtc_freq) & ipt_config_mtc_freq_mask) == 0) { 486 zxlogf(ERROR, "bad mtc_freq value, requested 0x%x, valid mask 0x%x\n", 487 mtc_freq, ipt_config_mtc_freq_mask); 488 return ZX_ERR_INVALID_ARGS; 489 } 490 uint32_t cyc_thresh = (uint32_t) ((config->ctl & IPT_CTL_CYC_THRESH_MASK) >> IPT_CTL_CYC_THRESH_SHIFT); 491 if (cyc_thresh != 0 && ((1 << cyc_thresh) & ipt_config_cyc_thresh_mask) == 0) { 492 zxlogf(ERROR, "bad cyc_thresh value, requested 0x%x, valid mask 0x%x\n", 493 cyc_thresh, ipt_config_cyc_thresh_mask); 494 return ZX_ERR_INVALID_ARGS; 495 } 496 uint32_t psb_freq = (uint32_t) ((config->ctl & IPT_CTL_PSB_FREQ_MASK) >> IPT_CTL_PSB_FREQ_SHIFT); 497 if (psb_freq != 0 && ((1 << psb_freq) & ipt_config_psb_freq_mask) == 0) { 498 zxlogf(ERROR, "bad psb_freq value, requested 0x%x, valid mask 0x%x\n", 499 psb_freq, ipt_config_psb_freq_mask); 500 return ZX_ERR_INVALID_ARGS; 501 } 502 503 // Find an unallocated buffer entry. 504 zx_itrace_buffer_descriptor_t descriptor; 505 for (descriptor = 0; descriptor < ipt_dev->num_traces; ++descriptor) { 506 if (!ipt_dev->per_trace_state[descriptor].allocated) 507 break; 508 } 509 if (descriptor == ipt_dev->num_traces) 510 return ZX_ERR_NO_RESOURCES; 511 512 ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[descriptor]; 513 memset(per_trace, 0, sizeof(*per_trace)); 514 zx_status_t status = x86_pt_alloc_buffer1(ipt_dev, per_trace, 515 config->num_chunks, config->chunk_order, config->is_circular); 516 if (status != ZX_OK) { 517 x86_pt_free_buffer1(ipt_dev, per_trace); 518 return status; 519 } 520 521 per_trace->ctl = config->ctl; 522 per_trace->status = 0; 523 per_trace->output_base = io_buffer_phys(&per_trace->topas[0]); 524 per_trace->output_mask_ptrs = 0; 525 per_trace->cr3_match = config->cr3_match; 526 static_assert(sizeof(per_trace->addr_ranges) == sizeof(config->addr_ranges), 527 "addr range size mismatch"); 528 memcpy(per_trace->addr_ranges, config->addr_ranges, sizeof(config->addr_ranges)); 529 per_trace->allocated = true; 530 *out_descriptor = descriptor; 531 return ZX_OK; 532} 533 534static zx_status_t x86_pt_assign_thread_buffer(insntrace_device_t* ipt_dev, 535 zx_itrace_buffer_descriptor_t descriptor, 536 zx_handle_t thread) { 537 zx_handle_close(thread); 538 // TODO(dje): Thread support is still work-in-progress. 539 return ZX_ERR_NOT_SUPPORTED; 540} 541 542static zx_status_t x86_pt_release_thread_buffer(insntrace_device_t* ipt_dev, 543 zx_itrace_buffer_descriptor_t descriptor, 544 zx_handle_t thread) { 545 zx_handle_close(thread); 546 // TODO(dje): Thread support is still work-in-progress. 547 return ZX_ERR_NOT_SUPPORTED; 548} 549 550static zx_status_t x86_pt_free_buffer(insntrace_device_t* ipt_dev, 551 zx_itrace_buffer_descriptor_t descriptor) { 552 if (ipt_dev->active) 553 return ZX_ERR_BAD_STATE; 554 if (descriptor >= ipt_dev->num_traces) 555 return ZX_ERR_INVALID_ARGS; 556 assert(ipt_dev->per_trace_state); 557 ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[descriptor]; 558 if (!per_trace->allocated) 559 return ZX_ERR_INVALID_ARGS; 560 x86_pt_free_buffer1(ipt_dev, per_trace); 561 return ZX_OK; 562} 563 564 565// ioctl handlers 566 567static zx_status_t ipt_alloc_trace(cpu_trace_device_t* dev, 568 const void* cmd, size_t cmdlen) { 569 if (!ipt_config_supported) 570 return ZX_ERR_NOT_SUPPORTED; 571 // For now we only support ToPA. 572 if (!ipt_config_output_topa) 573 return ZX_ERR_NOT_SUPPORTED; 574 575 ioctl_insntrace_trace_config_t config; 576 if (cmdlen != sizeof(config)) 577 return ZX_ERR_INVALID_ARGS; 578 memcpy(&config, cmd, sizeof(config)); 579 580 // TODO(dje): Until thread tracing is supported. 581 if (config.mode == IPT_MODE_THREADS) 582 return ZX_ERR_NOT_SUPPORTED; 583 584 uint32_t internal_mode; 585 switch (config.mode) { 586 case IPT_MODE_CPUS: 587 internal_mode = IPT_TRACE_CPUS; 588 break; 589 case IPT_MODE_THREADS: 590 internal_mode = IPT_TRACE_THREADS; 591 break; 592 default: 593 return ZX_ERR_INVALID_ARGS; 594 } 595 596 if (dev->insntrace) 597 return ZX_ERR_BAD_STATE; 598 599 insntrace_device_t* ipt_dev = calloc(1, sizeof(*dev->insntrace)); 600 if (!ipt_dev) 601 return ZX_ERR_NO_MEMORY; 602 603 ipt_dev->num_traces = zx_system_get_num_cpus(); 604 ipt_dev->bti = dev->bti; 605 606 ipt_dev->per_trace_state = calloc(ipt_dev->num_traces, sizeof(ipt_dev->per_trace_state[0])); 607 if (!ipt_dev->per_trace_state) { 608 free(ipt_dev); 609 return ZX_ERR_NO_MEMORY; 610 } 611 612 zx_handle_t resource = get_root_resource(); 613 zx_status_t status = 614 zx_mtrace_control(resource, MTRACE_KIND_INSNTRACE, MTRACE_INSNTRACE_ALLOC_TRACE, 0, 615 &internal_mode, sizeof(internal_mode)); 616 if (status != ZX_OK) { 617 free(ipt_dev->per_trace_state); 618 free(ipt_dev); 619 return status; 620 } 621 622 ipt_dev->mode = internal_mode; 623 dev->insntrace = ipt_dev; 624 return ZX_OK; 625} 626 627static zx_status_t ipt_free_trace(cpu_trace_device_t* dev) { 628 insntrace_device_t* ipt_dev = dev->insntrace; 629 if (ipt_dev->active) 630 return ZX_ERR_BAD_STATE; 631 632 for (uint32_t i = 0; i < ipt_dev->num_traces; ++i) { 633 ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[i]; 634 if (per_trace->allocated) 635 x86_pt_free_buffer1(ipt_dev, per_trace); 636 } 637 638 zx_handle_t resource = get_root_resource(); 639 zx_status_t status = 640 zx_mtrace_control(resource, MTRACE_KIND_INSNTRACE, MTRACE_INSNTRACE_FREE_TRACE, 0, NULL, 0); 641 // TODO(dje): This really shouldn't fail. What to do? 642 // For now flag things as busted and prevent further use. 643 if (status != ZX_OK) 644 return ZX_OK; 645 646 free(ipt_dev->per_trace_state); 647 free(ipt_dev); 648 dev->insntrace = NULL; 649 return ZX_OK; 650} 651 652static zx_status_t ipt_get_trace_config(insntrace_device_t* ipt_dev, 653 void* reply, size_t replymax, 654 size_t* out_actual) { 655 ioctl_insntrace_trace_config_t config; 656 if (replymax < sizeof(config)) 657 return ZX_ERR_BUFFER_TOO_SMALL; 658 659 switch (ipt_dev->mode) { 660 case IPT_TRACE_CPUS: 661 config.mode = IPT_MODE_CPUS; 662 break; 663 case IPT_TRACE_THREADS: 664 config.mode = IPT_MODE_THREADS; 665 break; 666 default: 667 __UNREACHABLE; 668 } 669 memcpy(reply, &config, sizeof(config)); 670 *out_actual = sizeof(config); 671 return ZX_OK; 672} 673 674static zx_status_t ipt_alloc_buffer(insntrace_device_t* ipt_dev, 675 const void* cmd, size_t cmdlen, 676 void* reply, size_t replymax, 677 size_t* out_actual) { 678 ioctl_insntrace_buffer_config_t config; 679 if (cmdlen != sizeof(config)) 680 return ZX_ERR_INVALID_ARGS; 681 memcpy(&config, cmd, sizeof(config)); 682 zx_itrace_buffer_descriptor_t descriptor; 683 if (replymax < sizeof(descriptor)) 684 return ZX_ERR_BUFFER_TOO_SMALL; 685 686 zx_status_t status = x86_pt_alloc_buffer(ipt_dev, &config, &descriptor); 687 if (status != ZX_OK) 688 return status; 689 memcpy(reply, &descriptor, sizeof(descriptor)); 690 *out_actual = sizeof(descriptor); 691 return ZX_OK; 692} 693 694static zx_status_t ipt_assign_thread_buffer(insntrace_device_t* ipt_dev, 695 const void* cmd, size_t cmdlen) { 696 ioctl_insntrace_assign_thread_buffer_t assign; 697 if (cmdlen != sizeof(assign)) 698 return ZX_ERR_INVALID_ARGS; 699 700 memcpy(&assign, cmd, sizeof(assign)); 701 return x86_pt_assign_thread_buffer(ipt_dev, assign.descriptor, assign.thread); 702} 703 704static zx_status_t ipt_release_thread_buffer(insntrace_device_t* ipt_dev, 705 const void* cmd, size_t cmdlen) { 706 ioctl_insntrace_assign_thread_buffer_t assign; 707 if (cmdlen != sizeof(assign)) 708 return ZX_ERR_INVALID_ARGS; 709 710 memcpy(&assign, cmd, sizeof(assign)); 711 return x86_pt_release_thread_buffer(ipt_dev, assign.descriptor, assign.thread); 712} 713 714static zx_status_t ipt_get_buffer_config(insntrace_device_t* ipt_dev, 715 const void* cmd, size_t cmdlen, 716 void* reply, size_t replymax, 717 size_t* out_actual) { 718 zx_itrace_buffer_descriptor_t descriptor; 719 ioctl_insntrace_buffer_config_t config; 720 721 if (cmdlen != sizeof(descriptor)) 722 return ZX_ERR_INVALID_ARGS; 723 if (replymax < sizeof(config)) 724 return ZX_ERR_BUFFER_TOO_SMALL; 725 726 memcpy(&descriptor, cmd, sizeof(descriptor)); 727 if (descriptor >= ipt_dev->num_traces) 728 return ZX_ERR_INVALID_ARGS; 729 const ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[descriptor]; 730 if (!per_trace->allocated) 731 return ZX_ERR_INVALID_ARGS; 732 733 config.num_chunks = per_trace->num_chunks; 734 config.chunk_order = per_trace->chunk_order; 735 config.is_circular = per_trace->is_circular; 736 config.ctl = per_trace->ctl; 737 config.cr3_match = per_trace->cr3_match; 738 static_assert(sizeof(config.addr_ranges) == sizeof(per_trace->addr_ranges), 739 "addr range size mismatch"); 740 memcpy(config.addr_ranges, per_trace->addr_ranges, sizeof(per_trace->addr_ranges)); 741 memcpy(reply, &config, sizeof(config)); 742 *out_actual = sizeof(config); 743 return ZX_OK; 744} 745 746static zx_status_t ipt_get_buffer_info(insntrace_device_t* ipt_dev, 747 const void* cmd, size_t cmdlen, 748 void* reply, size_t replymax, 749 size_t* out_actual) { 750 zx_itrace_buffer_descriptor_t descriptor; 751 ioctl_insntrace_buffer_info_t data; 752 753 if (cmdlen != sizeof(descriptor)) 754 return ZX_ERR_INVALID_ARGS; 755 if (replymax < sizeof(data)) 756 return ZX_ERR_BUFFER_TOO_SMALL; 757 758 if (ipt_dev->active) 759 return ZX_ERR_BAD_STATE; 760 761 memcpy(&descriptor, cmd, sizeof(descriptor)); 762 if (descriptor >= ipt_dev->num_traces) 763 return ZX_ERR_INVALID_ARGS; 764 const ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[descriptor]; 765 if (!per_trace->allocated) 766 return ZX_ERR_INVALID_ARGS; 767 768 // Note: If this is a circular buffer this is just where tracing stopped. 769 data.capture_end = compute_capture_size(ipt_dev, per_trace); 770 memcpy(reply, &data, sizeof(data)); 771 *out_actual = sizeof(data); 772 return ZX_OK; 773} 774 775static zx_status_t ipt_get_chunk_handle(insntrace_device_t* ipt_dev, 776 const void* cmd, size_t cmdlen, 777 void* reply, size_t replymax, 778 size_t* out_actual) { 779 ioctl_insntrace_chunk_handle_req_t req; 780 zx_handle_t h; 781 782 if (cmdlen != sizeof(req)) 783 return ZX_ERR_INVALID_ARGS; 784 if (replymax < sizeof(h)) 785 return ZX_ERR_BUFFER_TOO_SMALL; 786 787 memcpy(&req, cmd, sizeof(req)); 788 if (req.descriptor >= ipt_dev->num_traces) 789 return ZX_ERR_INVALID_ARGS; 790 const ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[req.descriptor]; 791 if (!per_trace->allocated) 792 return ZX_ERR_INVALID_ARGS; 793 if (req.chunk_num >= per_trace->num_chunks) 794 return ZX_ERR_INVALID_ARGS; 795 796 zx_status_t status = zx_handle_duplicate(per_trace->chunks[req.chunk_num].vmo_handle, ZX_RIGHT_SAME_RIGHTS, &h); 797 if (status < 0) 798 return status; 799 memcpy(reply, &h, sizeof(h)); 800 *out_actual = sizeof(h); 801 return ZX_OK; 802} 803 804static zx_status_t ipt_free_buffer(insntrace_device_t* ipt_dev, 805 const void* cmd, size_t cmdlen) { 806 zx_itrace_buffer_descriptor_t descriptor; 807 if (cmdlen != sizeof(descriptor)) 808 return ZX_ERR_INVALID_ARGS; 809 memcpy(&descriptor, cmd, sizeof(descriptor)); 810 811 x86_pt_free_buffer(ipt_dev, descriptor); 812 return 0; 813} 814 815// Begin tracing. 816static zx_status_t ipt_start(insntrace_device_t* ipt_dev) { 817 if (ipt_dev->active) 818 return ZX_ERR_BAD_STATE; 819 if (ipt_dev->mode != IPT_TRACE_CPUS) 820 return ZX_ERR_BAD_STATE; 821 assert(ipt_dev->per_trace_state); 822 823 zx_handle_t resource = get_root_resource(); 824 zx_status_t status; 825 826 // First verify a buffer has been allocated for each cpu. 827 for (uint32_t cpu = 0; cpu < ipt_dev->num_traces; ++cpu) { 828 const ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[cpu]; 829 if (!per_trace->allocated) 830 return ZX_ERR_BAD_STATE; 831 } 832 833 for (uint32_t cpu = 0; cpu < ipt_dev->num_traces; ++cpu) { 834 const ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[cpu]; 835 836 zx_x86_pt_regs_t regs; 837 regs.ctl = per_trace->ctl; 838 regs.ctl |= IPT_CTL_TOPA_MASK | IPT_CTL_TRACE_EN_MASK; 839 regs.status = per_trace->status; 840 regs.output_base = per_trace->output_base; 841 regs.output_mask_ptrs = per_trace->output_mask_ptrs; 842 regs.cr3_match = per_trace->cr3_match; 843 static_assert(sizeof(regs.addr_ranges) == sizeof(per_trace->addr_ranges), 844 "addr range size mismatch"); 845 memcpy(regs.addr_ranges, per_trace->addr_ranges, sizeof(per_trace->addr_ranges)); 846 847 status = zx_mtrace_control(resource, MTRACE_KIND_INSNTRACE, 848 MTRACE_INSNTRACE_STAGE_TRACE_DATA, 849 cpu, ®s, sizeof(regs)); 850 if (status != ZX_OK) 851 return status; 852 } 853 854 status = zx_mtrace_control(resource, MTRACE_KIND_INSNTRACE, 855 MTRACE_INSNTRACE_START, 856 0, NULL, 0); 857 if (status != ZX_OK) 858 return status; 859 ipt_dev->active = true; 860 return ZX_OK; 861} 862 863// Stop tracing. 864static zx_status_t ipt_stop(insntrace_device_t* ipt_dev) { 865 if (!ipt_dev->active) 866 return ZX_ERR_BAD_STATE; 867 assert(ipt_dev->per_trace_state); 868 869 zx_handle_t resource = get_root_resource(); 870 871 zx_status_t status = zx_mtrace_control(resource, MTRACE_KIND_INSNTRACE, 872 MTRACE_INSNTRACE_STOP, 873 0, NULL, 0); 874 if (status != ZX_OK) 875 return status; 876 ipt_dev->active = false; 877 878 for (uint32_t cpu = 0; cpu < ipt_dev->num_traces; ++cpu) { 879 ipt_per_trace_state_t* per_trace = &ipt_dev->per_trace_state[cpu]; 880 881 zx_x86_pt_regs_t regs; 882 status = zx_mtrace_control(resource, MTRACE_KIND_INSNTRACE, 883 MTRACE_INSNTRACE_GET_TRACE_DATA, 884 cpu, ®s, sizeof(regs)); 885 if (status != ZX_OK) 886 return status; 887 per_trace->ctl = regs.ctl; 888 per_trace->status = regs.status; 889 per_trace->output_base = regs.output_base; 890 per_trace->output_mask_ptrs = regs.output_mask_ptrs; 891 per_trace->cr3_match = regs.cr3_match; 892 static_assert(sizeof(per_trace->addr_ranges) == sizeof(regs.addr_ranges), 893 "addr range size mismatch"); 894 memcpy(per_trace->addr_ranges, regs.addr_ranges, sizeof(regs.addr_ranges)); 895 896 // If there was an operational error, report it. 897 if (per_trace->status & IPT_STATUS_ERROR_MASK) { 898 printf("%s: WARNING: operational error detected on cpu %u\n", 899 __func__, cpu); 900 } 901 } 902 903 return ZX_OK; 904} 905 906zx_status_t insntrace_ioctl(cpu_trace_device_t* dev, uint32_t op, 907 const void* cmd, size_t cmdlen, 908 void* reply, size_t replymax, 909 size_t* out_actual) { 910 assert(IOCTL_FAMILY(op) == IOCTL_FAMILY_INSNTRACE); 911 912 insntrace_device_t* ipt_dev = dev->insntrace; 913 if (op != IOCTL_INSNTRACE_ALLOC_TRACE) { 914 if (!ipt_dev) 915 return ZX_ERR_BAD_STATE; 916 } 917 918 switch (op) { 919 case IOCTL_INSNTRACE_ALLOC_TRACE: 920 if (replymax != 0) 921 return ZX_ERR_INVALID_ARGS; 922 return ipt_alloc_trace(dev, cmd, cmdlen); 923 924 case IOCTL_INSNTRACE_FREE_TRACE: 925 if (cmdlen != 0 || replymax != 0) 926 return ZX_ERR_INVALID_ARGS; 927 return ipt_free_trace(dev); 928 929 case IOCTL_INSNTRACE_GET_TRACE_CONFIG: 930 if (cmdlen != 0) 931 return ZX_ERR_INVALID_ARGS; 932 return ipt_get_trace_config(ipt_dev, reply, replymax, out_actual); 933 934 case IOCTL_INSNTRACE_ALLOC_BUFFER: 935 return ipt_alloc_buffer(ipt_dev, cmd, cmdlen, reply, replymax, out_actual); 936 937 case IOCTL_INSNTRACE_ASSIGN_THREAD_BUFFER: 938 if (replymax != 0) 939 return ZX_ERR_INVALID_ARGS; 940 return ipt_assign_thread_buffer(ipt_dev, cmd, cmdlen); 941 942 case IOCTL_INSNTRACE_RELEASE_THREAD_BUFFER: 943 if (replymax != 0) 944 return ZX_ERR_INVALID_ARGS; 945 return ipt_release_thread_buffer(ipt_dev, cmd, cmdlen); 946 947 case IOCTL_INSNTRACE_GET_BUFFER_CONFIG: 948 return ipt_get_buffer_config(ipt_dev, cmd, cmdlen, reply, replymax, out_actual); 949 950 case IOCTL_INSNTRACE_GET_BUFFER_INFO: 951 return ipt_get_buffer_info(ipt_dev, cmd, cmdlen, reply, replymax, out_actual); 952 953 case IOCTL_INSNTRACE_GET_CHUNK_HANDLE: 954 return ipt_get_chunk_handle(ipt_dev, cmd, cmdlen, reply, replymax, out_actual); 955 956 case IOCTL_INSNTRACE_FREE_BUFFER: 957 if (replymax != 0) 958 return ZX_ERR_INVALID_ARGS; 959 return ipt_free_buffer(ipt_dev, cmd, cmdlen); 960 961 case IOCTL_INSNTRACE_START: 962 if (cmdlen != 0 || replymax != 0) 963 return ZX_ERR_INVALID_ARGS; 964 return ipt_start(ipt_dev); 965 966 case IOCTL_INSNTRACE_STOP: 967 if (cmdlen != 0 || replymax != 0) 968 return ZX_ERR_INVALID_ARGS; 969 return ipt_stop(ipt_dev); 970 971 default: 972 return ZX_ERR_INVALID_ARGS; 973 } 974} 975 976void insntrace_release(cpu_trace_device_t* dev) { 977 // TODO(dje): None of these should fail. What to do? 978 // For now flag things as busted and prevent further use. 979 if (dev->insntrace) { 980 ipt_stop(dev->insntrace); 981 ipt_free_trace(dev); 982 } 983} 984