1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27/* #pragma ident "@(#)dtrace.c 1.49 06/08/11 SMI" */ 28 29/* 30 * DTrace - Dynamic Tracing for Solaris 31 * 32 * This is the implementation of the Solaris Dynamic Tracing framework 33 * (DTrace). The user-visible interface to DTrace is described at length in 34 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace 35 * library, the in-kernel DTrace framework, and the DTrace providers are 36 * described in the block comments in the <sys/dtrace.h> header file. The 37 * internal architecture of DTrace is described in the block comments in the 38 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace 39 * implementation very much assume mastery of all of these sources; if one has 40 * an unanswered question about the implementation, one should consult them 41 * first. 42 * 43 * The functions here are ordered roughly as follows: 44 * 45 * - Probe context functions 46 * - Probe hashing functions 47 * - Non-probe context utility functions 48 * - Matching functions 49 * - Provider-to-Framework API functions 50 * - Probe management functions 51 * - DIF object functions 52 * - Format functions 53 * - Predicate functions 54 * - ECB functions 55 * - Buffer functions 56 * - Enabling functions 57 * - DOF functions 58 * - Anonymous enabling functions 59 * - Consumer state functions 60 * - Helper functions 61 * - Hook functions 62 * - Driver cookbook functions 63 * 64 * Each group of functions begins with a block comment labelled the "DTrace 65 * [Group] Functions", allowing one to find each block by searching forward 66 * on capital-f functions. 67 */ 68 69#define _DTRACE_WANT_PROC_GLUE_ 1 70 71#include <sys/errno.h> 72#include <sys/types.h> 73#include <sys/stat.h> 74#include <sys/conf.h> 75#include <sys/systm.h> 76#include <sys/dtrace_impl.h> 77#include <sys/param.h> 78#include <sys/ioctl.h> 79#include <sys/fcntl.h> 80#include <miscfs/devfs/devfs.h> 81#include <sys/malloc.h> 82#include <sys/kernel_types.h> 83#include <sys/proc_internal.h> 84#include <sys/uio_internal.h> 85#include <sys/kauth.h> 86#include <vm/pmap.h> 87#include <sys/user.h> 88#include <mach/exception_types.h> 89#include <sys/signalvar.h> 90#include <kern/zalloc.h> 91 92#define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */ 93 94extern void dtrace_suspend(void); 95extern void dtrace_resume(void); 96extern void dtrace_init(void); 97extern void helper_init(void); 98 99#if defined(__APPLE__) 100 101#include "../../../osfmk/chud/chud_dtrace.h" 102 103extern kern_return_t chudxnu_dtrace_callback 104 (uint64_t selector, uint64_t *args, uint32_t count); 105#endif 106 107/* 108 * DTrace Tunable Variables 109 * 110 * The following variables may be tuned by adding a line to /etc/system that 111 * includes both the name of the DTrace module ("dtrace") and the name of the 112 * variable. For example: 113 * 114 * set dtrace:dtrace_destructive_disallow = 1 115 * 116 * In general, the only variables that one should be tuning this way are those 117 * that affect system-wide DTrace behavior, and for which the default behavior 118 * is undesirable. Most of these variables are tunable on a per-consumer 119 * basis using DTrace options, and need not be tuned on a system-wide basis. 120 * When tuning these variables, avoid pathological values; while some attempt 121 * is made to verify the integrity of these variables, they are not considered 122 * part of the supported interface to DTrace, and they are therefore not 123 * checked comprehensively. Further, these variables should not be tuned 124 * dynamically via "mdb -kw" or other means; they should only be tuned via 125 * /etc/system. 126 */ 127int dtrace_destructive_disallow = 0; 128#if defined(__APPLE__) 129#define proc_t struct proc 130#endif /* __APPLE__ */ 131dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); 132size_t dtrace_difo_maxsize = (256 * 1024); 133dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); 134size_t dtrace_global_maxsize = (16 * 1024); 135size_t dtrace_actions_max = (16 * 1024); 136size_t dtrace_retain_max = 1024; 137dtrace_optval_t dtrace_helper_actions_max = 32; 138dtrace_optval_t dtrace_helper_providers_max = 32; 139dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); 140size_t dtrace_strsize_default = 256; 141dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ 142dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ 143dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ 144dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ 145dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ 146dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */ 147dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */ 148dtrace_optval_t dtrace_nspec_default = 1; 149dtrace_optval_t dtrace_specsize_default = 32 * 1024; 150dtrace_optval_t dtrace_stackframes_default = 20; 151dtrace_optval_t dtrace_ustackframes_default = 20; 152dtrace_optval_t dtrace_jstackframes_default = 50; 153dtrace_optval_t dtrace_jstackstrsize_default = 512; 154int dtrace_msgdsize_max = 128; 155hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ 156hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ 157int dtrace_devdepth_max = 32; 158int dtrace_err_verbose; 159hrtime_t dtrace_deadman_interval = NANOSEC; 160hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; 161hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; 162 163/* 164 * DTrace External Variables 165 * 166 * As dtrace(7D) is a kernel module, any DTrace variables are obviously 167 * available to DTrace consumers via the backtick (`) syntax. One of these, 168 * dtrace_zero, is made deliberately so: it is provided as a source of 169 * well-known, zero-filled memory. While this variable is not documented, 170 * it is used by some translators as an implementation detail. 171 */ 172const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ 173 174/* 175 * DTrace Internal Variables 176 */ 177static dev_info_t *dtrace_devi; /* device info */ 178static vmem_t *dtrace_arena; /* probe ID arena */ 179static vmem_t *dtrace_minor; /* minor number arena */ 180static taskq_t *dtrace_taskq; /* task queue */ 181static dtrace_probe_t **dtrace_probes; /* array of all probes */ 182static int dtrace_nprobes; /* number of probes */ 183static dtrace_provider_t *dtrace_provider; /* provider list */ 184static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ 185static int dtrace_opens; /* number of opens */ 186static int dtrace_helpers; /* number of helpers */ 187static void *dtrace_softstate; /* softstate pointer */ 188static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ 189static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ 190static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ 191static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */ 192static int dtrace_toxranges; /* number of toxic ranges */ 193static int dtrace_toxranges_max; /* size of toxic range array */ 194static dtrace_anon_t dtrace_anon; /* anonymous enabling */ 195static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */ 196static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */ 197static kthread_t *dtrace_panicked; /* panicking thread */ 198static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ 199static dtrace_genid_t dtrace_probegen; /* current probe generation */ 200static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ 201static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ 202static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ 203#if defined(__APPLE__) 204static int dtrace_dof_mode; /* dof mode */ 205#endif 206 207#if defined(__APPLE__) 208 209/* 210 * To save memory, some common memory allocations are given a 211 * unique zone. In example, dtrace_probe_t is 72 bytes in size, 212 * which means it would fall into the kalloc.128 bucket. With 213 * 20k elements allocated, the space saved is substantial. 214 */ 215 216struct zone *dtrace_probe_t_zone; 217 218#endif 219 220/* 221 * DTrace Locking 222 * DTrace is protected by three (relatively coarse-grained) locks: 223 * 224 * (1) dtrace_lock is required to manipulate essentially any DTrace state, 225 * including enabling state, probes, ECBs, consumer state, helper state, 226 * etc. Importantly, dtrace_lock is _not_ required when in probe context; 227 * probe context is lock-free -- synchronization is handled via the 228 * dtrace_sync() cross call mechanism. 229 * 230 * (2) dtrace_provider_lock is required when manipulating provider state, or 231 * when provider state must be held constant. 232 * 233 * (3) dtrace_meta_lock is required when manipulating meta provider state, or 234 * when meta provider state must be held constant. 235 * 236 * The lock ordering between these three locks is dtrace_meta_lock before 237 * dtrace_provider_lock before dtrace_lock. (In particular, there are 238 * several places where dtrace_provider_lock is held by the framework as it 239 * calls into the providers -- which then call back into the framework, 240 * grabbing dtrace_lock.) 241 * 242 * There are two other locks in the mix: mod_lock and cpu_lock. With respect 243 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical 244 * role as a coarse-grained lock; it is acquired before both of these locks. 245 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must 246 * be acquired _between_ dtrace_meta_lock and any other DTrace locks. 247 * mod_lock is similar with respect to dtrace_provider_lock in that it must be 248 * acquired _between_ dtrace_provider_lock and dtrace_lock. 249 */ 250 251/* 252 * APPLE NOTE: 253 * 254 * All kmutex_t vars have been changed to lck_mtx_t. 255 * Note that lck_mtx_t's require explicit initialization. 256 * 257 * mutex_enter() becomes lck_mtx_lock() 258 * mutex_exit() becomes lck_mtx_unlock() 259 * 260 * Lock asserts are changed like this: 261 * 262 * ASSERT(MUTEX_HELD(&cpu_lock)); 263 * becomes: 264 * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 265 * 266 * Due to the number of these changes, they are not called out explicitly. 267 */ 268static lck_mtx_t dtrace_lock; /* probe state lock */ 269static lck_mtx_t dtrace_provider_lock; /* provider state lock */ 270static lck_mtx_t dtrace_meta_lock; /* meta-provider state lock */ 271#if defined(__APPLE__) 272static lck_rw_t dtrace_dof_mode_lock; /* dof mode lock */ 273#endif 274 275/* 276 * DTrace Provider Variables 277 * 278 * These are the variables relating to DTrace as a provider (that is, the 279 * provider of the BEGIN, END, and ERROR probes). 280 */ 281static dtrace_pattr_t dtrace_provider_attr = { 282{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 283{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 284{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 285{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 286{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 287}; 288 289static void 290dtrace_nullop(void) 291{} 292 293static dtrace_pops_t dtrace_provider_ops = { 294 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, 295 (void (*)(void *, struct modctl *))dtrace_nullop, 296 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 297 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 298 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 299 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 300 NULL, 301 NULL, 302 NULL, 303 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop 304}; 305 306static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ 307static dtrace_id_t dtrace_probeid_end; /* special END probe */ 308dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ 309 310/* 311 * DTrace Helper Tracing Variables 312 */ 313uint32_t dtrace_helptrace_next = 0; 314uint32_t dtrace_helptrace_nlocals; 315char *dtrace_helptrace_buffer; 316int dtrace_helptrace_bufsize = 512 * 1024; 317 318#ifdef DEBUG 319int dtrace_helptrace_enabled = 1; 320#else 321int dtrace_helptrace_enabled = 0; 322#endif 323 324/* 325 * DTrace Error Hashing 326 * 327 * On DEBUG kernels, DTrace will track the errors that has seen in a hash 328 * table. This is very useful for checking coverage of tests that are 329 * expected to induce DIF or DOF processing errors, and may be useful for 330 * debugging problems in the DIF code generator or in DOF generation . The 331 * error hash may be examined with the ::dtrace_errhash MDB dcmd. 332 */ 333#ifdef DEBUG 334static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; 335static const char *dtrace_errlast; 336static kthread_t *dtrace_errthread; 337static lck_mtx_t dtrace_errlock; 338#endif 339 340/* 341 * DTrace Macros and Constants 342 * 343 * These are various macros that are useful in various spots in the 344 * implementation, along with a few random constants that have no meaning 345 * outside of the implementation. There is no real structure to this cpp 346 * mishmash -- but is there ever? 347 */ 348#define DTRACE_HASHSTR(hash, probe) \ 349 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) 350 351#define DTRACE_HASHNEXT(hash, probe) \ 352 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) 353 354#define DTRACE_HASHPREV(hash, probe) \ 355 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) 356 357#define DTRACE_HASHEQ(hash, lhs, rhs) \ 358 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ 359 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) 360 361#define DTRACE_AGGHASHSIZE_SLEW 17 362 363/* 364 * The key for a thread-local variable consists of the lower 61 bits of the 365 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. 366 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never 367 * equal to a variable identifier. This is necessary (but not sufficient) to 368 * assure that global associative arrays never collide with thread-local 369 * variables. To guarantee that they cannot collide, we must also define the 370 * order for keying dynamic variables. That order is: 371 * 372 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] 373 * 374 * Because the variable-key and the tls-key are in orthogonal spaces, there is 375 * no way for a global variable key signature to match a thread-local key 376 * signature. 377 */ 378#if !defined(__APPLE__) 379#define DTRACE_TLS_THRKEY(where) { \ 380 uint_t intr = 0; \ 381 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ 382 for (; actv; actv >>= 1) \ 383 intr++; \ 384 ASSERT(intr < (1 << 3)); \ 385 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ 386 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 387} 388#else 389#define DTRACE_TLS_THRKEY(where) { \ 390 uint_t intr = ml_at_interrupt_context(); /* XXX just one measely bit */ \ 391 uint_t thr = (uint_t)current_thread(); \ 392 uint_t pid = (uint_t)proc_selfpid(); \ 393 ASSERT(intr < (1 << 3)); \ 394 (where) = ((((uint64_t)thr << 32 | pid) + DIF_VARIABLE_MAX) & \ 395 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 396} 397#endif /* __APPLE__ */ 398 399#define DTRACE_STORE(type, tomax, offset, what) \ 400 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); 401 402#if !defined(__APPLE__) 403#if !(defined(__i386__) || defined (__x86_64__)) 404#define DTRACE_ALIGNCHECK(addr, size, flags) \ 405 if (addr & (size - 1)) { \ 406 *flags |= CPU_DTRACE_BADALIGN; \ 407 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 408 return (0); \ 409 } 410#else 411#define DTRACE_ALIGNCHECK(addr, size, flags) 412#endif 413 414#define DTRACE_LOADFUNC(bits) \ 415/*CSTYLED*/ \ 416uint##bits##_t \ 417dtrace_load##bits(uintptr_t addr) \ 418{ \ 419 size_t size = bits / NBBY; \ 420 /*CSTYLED*/ \ 421 uint##bits##_t rval; \ 422 int i; \ 423 volatile uint16_t *flags = (volatile uint16_t *) \ 424 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ 425 \ 426 DTRACE_ALIGNCHECK(addr, size, flags); \ 427 \ 428 for (i = 0; i < dtrace_toxranges; i++) { \ 429 if (addr >= dtrace_toxrange[i].dtt_limit) \ 430 continue; \ 431 \ 432 if (addr + size <= dtrace_toxrange[i].dtt_base) \ 433 continue; \ 434 \ 435 /* \ 436 * This address falls within a toxic region; return 0. \ 437 */ \ 438 *flags |= CPU_DTRACE_BADADDR; \ 439 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 440 return (0); \ 441 } \ 442 \ 443 *flags |= CPU_DTRACE_NOFAULT; \ 444 /*CSTYLED*/ \ 445 rval = *((volatile uint##bits##_t *)addr); \ 446 *flags &= ~CPU_DTRACE_NOFAULT; \ 447 \ 448 return (rval); \ 449} 450#else 451#define DTRACE_ALIGNCHECK(addr, size, flags) \ 452 if (addr & (MIN(size,4) - 1)) { \ 453 *flags |= CPU_DTRACE_BADALIGN; \ 454 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 455 return (0); \ 456 } 457 458#define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" ); 459 460#define DTRACE_LOADFUNC(bits) \ 461/*CSTYLED*/ \ 462extern vm_offset_t dtraceLoadRecover##bits; \ 463uint##bits##_t dtrace_load##bits(uintptr_t addr); \ 464 \ 465uint##bits##_t \ 466dtrace_load##bits(uintptr_t addr) \ 467{ \ 468 size_t size = bits / NBBY; \ 469 /*CSTYLED*/ \ 470 uint##bits##_t rval = 0; \ 471 int i; \ 472 ppnum_t pp; \ 473 volatile uint16_t *flags = (volatile uint16_t *) \ 474 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ 475 \ 476 DTRACE_ALIGNCHECK(addr, size, flags); \ 477 \ 478 for (i = 0; i < dtrace_toxranges; i++) { \ 479 if (addr >= dtrace_toxrange[i].dtt_limit) \ 480 continue; \ 481 \ 482 if (addr + size <= dtrace_toxrange[i].dtt_base) \ 483 continue; \ 484 \ 485 /* \ 486 * This address falls within a toxic region; return 0. \ 487 */ \ 488 *flags |= CPU_DTRACE_BADADDR; \ 489 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 490 return (0); \ 491 } \ 492 \ 493 pp = pmap_find_phys(kernel_pmap, addr); \ 494 \ 495 if (0 == pp || /* pmap_find_phys failed ? */ \ 496 !dtxnu_is_RAM_page(pp) /* Backed by RAM? */ ) { \ 497 *flags |= CPU_DTRACE_BADADDR; \ 498 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ 499 return (0); \ 500 } \ 501 \ 502 { \ 503 volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \ 504 *flags |= CPU_DTRACE_NOFAULT; \ 505 recover = dtrace_set_thread_recover(current_thread(), recover); \ 506 /*CSTYLED*/ \ 507 rval = *((volatile uint##bits##_t *)addr); \ 508 RECOVER_LABEL(bits); \ 509 (void)dtrace_set_thread_recover(current_thread(), recover); \ 510 *flags &= ~CPU_DTRACE_NOFAULT; \ 511 } \ 512 \ 513 return (rval); \ 514} 515#endif /* __APPLE__ */ 516 517 518#ifdef __LP64__ 519#define dtrace_loadptr dtrace_load64 520#else 521#define dtrace_loadptr dtrace_load32 522#endif 523 524#define DTRACE_DYNHASH_FREE 0 525#define DTRACE_DYNHASH_SINK 1 526#define DTRACE_DYNHASH_VALID 2 527 528#define DTRACE_MATCH_NEXT 0 529#define DTRACE_MATCH_DONE 1 530#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') 531#define DTRACE_STATE_ALIGN 64 532 533#define DTRACE_FLAGS2FLT(flags) \ 534 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ 535 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ 536 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ 537 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ 538 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ 539 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ 540 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ 541 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ 542 DTRACEFLT_UNKNOWN) 543 544#define DTRACEACT_ISSTRING(act) \ 545 ((act)->dta_kind == DTRACEACT_DIFEXPR && \ 546 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) 547 548static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); 549static void dtrace_enabling_provide(dtrace_provider_t *); 550static int dtrace_enabling_match(dtrace_enabling_t *, int *); 551static void dtrace_enabling_matchall(void); 552static dtrace_state_t *dtrace_anon_grab(void); 553static uint64_t dtrace_helper(int, dtrace_mstate_t *, 554 dtrace_state_t *, uint64_t, uint64_t); 555static dtrace_helpers_t *dtrace_helpers_create(proc_t *); 556static void dtrace_buffer_drop(dtrace_buffer_t *); 557static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, 558 dtrace_state_t *, dtrace_mstate_t *); 559static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, 560 dtrace_optval_t); 561static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); 562static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); 563 564/* 565 * DTrace Probe Context Functions 566 * 567 * These functions are called from probe context. Because probe context is 568 * any context in which C may be called, arbitrarily locks may be held, 569 * interrupts may be disabled, we may be in arbitrary dispatched state, etc. 570 * As a result, functions called from probe context may only call other DTrace 571 * support functions -- they may not interact at all with the system at large. 572 * (Note that the ASSERT macro is made probe-context safe by redefining it in 573 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary 574 * loads are to be performed from probe context, they _must_ be in terms of 575 * the safe dtrace_load*() variants. 576 * 577 * Some functions in this block are not actually called from probe context; 578 * for these functions, there will be a comment above the function reading 579 * "Note: not called from probe context." 580 */ 581void 582dtrace_panic(const char *format, ...) 583{ 584 va_list alist; 585 586 va_start(alist, format); 587 dtrace_vpanic(format, alist); 588 va_end(alist); 589} 590 591int 592dtrace_assfail(const char *a, const char *f, int l) 593{ 594 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); 595 596 /* 597 * We just need something here that even the most clever compiler 598 * cannot optimize away. 599 */ 600 return (a[(uintptr_t)f]); 601} 602 603/* 604 * Atomically increment a specified error counter from probe context. 605 */ 606static void 607dtrace_error(uint32_t *counter) 608{ 609 /* 610 * Most counters stored to in probe context are per-CPU counters. 611 * However, there are some error conditions that are sufficiently 612 * arcane that they don't merit per-CPU storage. If these counters 613 * are incremented concurrently on different CPUs, scalability will be 614 * adversely affected -- but we don't expect them to be white-hot in a 615 * correctly constructed enabling... 616 */ 617 uint32_t oval, nval; 618 619 do { 620 oval = *counter; 621 622 if ((nval = oval + 1) == 0) { 623 /* 624 * If the counter would wrap, set it to 1 -- assuring 625 * that the counter is never zero when we have seen 626 * errors. (The counter must be 32-bits because we 627 * aren't guaranteed a 64-bit compare&swap operation.) 628 * To save this code both the infamy of being fingered 629 * by a priggish news story and the indignity of being 630 * the target of a neo-puritan witch trial, we're 631 * carefully avoiding any colorful description of the 632 * likelihood of this condition -- but suffice it to 633 * say that it is only slightly more likely than the 634 * overflow of predicate cache IDs, as discussed in 635 * dtrace_predicate_create(). 636 */ 637 nval = 1; 638 } 639 } while (dtrace_cas32(counter, oval, nval) != oval); 640} 641 642/* 643 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a 644 * uint8_t, a uint16_t, a uint32_t and a uint64_t. 645 */ 646DTRACE_LOADFUNC(8) 647DTRACE_LOADFUNC(16) 648DTRACE_LOADFUNC(32) 649DTRACE_LOADFUNC(64) 650 651static int 652dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) 653{ 654 if (dest < mstate->dtms_scratch_base) 655 return (0); 656 657 if (dest + size < dest) 658 return (0); 659 660 if (dest + size > mstate->dtms_scratch_ptr) 661 return (0); 662 663 return (1); 664} 665 666static int 667dtrace_canstore_statvar(uint64_t addr, size_t sz, 668 dtrace_statvar_t **svars, int nsvars) 669{ 670 int i; 671 672 for (i = 0; i < nsvars; i++) { 673 dtrace_statvar_t *svar = svars[i]; 674 675 if (svar == NULL || svar->dtsv_size == 0) 676 continue; 677 678 if (addr - svar->dtsv_data < svar->dtsv_size && 679 addr + sz <= svar->dtsv_data + svar->dtsv_size) 680 return (1); 681 } 682 683 return (0); 684} 685 686/* 687 * Check to see if the address is within a memory region to which a store may 688 * be issued. This includes the DTrace scratch areas, and any DTrace variable 689 * region. The caller of dtrace_canstore() is responsible for performing any 690 * alignment checks that are needed before stores are actually executed. 691 */ 692static int 693dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 694 dtrace_vstate_t *vstate) 695{ 696 uintptr_t a; 697 size_t s; 698 699 /* 700 * First, check to see if the address is in scratch space... 701 */ 702 a = mstate->dtms_scratch_base; 703 s = mstate->dtms_scratch_size; 704 705 if (addr - a < s && addr + sz <= a + s) 706 return (1); 707 708 /* 709 * Now check to see if it's a dynamic variable. This check will pick 710 * up both thread-local variables and any global dynamically-allocated 711 * variables. 712 */ 713 a = (uintptr_t)vstate->dtvs_dynvars.dtds_base; 714 s = vstate->dtvs_dynvars.dtds_size; 715 if (addr - a < s && addr + sz <= a + s) 716 return (1); 717 718 /* 719 * Finally, check the static local and global variables. These checks 720 * take the longest, so we perform them last. 721 */ 722 if (dtrace_canstore_statvar(addr, sz, 723 vstate->dtvs_locals, vstate->dtvs_nlocals)) 724 return (1); 725 726 if (dtrace_canstore_statvar(addr, sz, 727 vstate->dtvs_globals, vstate->dtvs_nglobals)) 728 return (1); 729 730 return (0); 731} 732 733/* 734 * Compare two strings using safe loads. 735 */ 736static int 737dtrace_strncmp(char *s1, char *s2, size_t limit) 738{ 739 uint8_t c1, c2; 740 volatile uint16_t *flags; 741 742 if (s1 == s2 || limit == 0) 743 return (0); 744 745 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 746 747 do { 748 if (s1 == NULL) 749 c1 = '\0'; 750 else 751 c1 = dtrace_load8((uintptr_t)s1++); 752 753 if (s2 == NULL) 754 c2 = '\0'; 755 else 756 c2 = dtrace_load8((uintptr_t)s2++); 757 758 if (c1 != c2) 759 return (c1 - c2); 760 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); 761 762 return (0); 763} 764 765/* 766 * Compute strlen(s) for a string using safe memory accesses. The additional 767 * len parameter is used to specify a maximum length to ensure completion. 768 */ 769static size_t 770dtrace_strlen(const char *s, size_t lim) 771{ 772 uint_t len; 773 774 for (len = 0; len != lim; len++) 775 if (dtrace_load8((uintptr_t)s++) == '\0') 776 break; 777 778 return (len); 779} 780 781/* 782 * Check if an address falls within a toxic region. 783 */ 784static int 785dtrace_istoxic(uintptr_t kaddr, size_t size) 786{ 787 uintptr_t taddr, tsize; 788 int i; 789 790 for (i = 0; i < dtrace_toxranges; i++) { 791 taddr = dtrace_toxrange[i].dtt_base; 792 tsize = dtrace_toxrange[i].dtt_limit - taddr; 793 794 if (kaddr - taddr < tsize) { 795 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 796 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr; 797 return (1); 798 } 799 800 if (taddr - kaddr < size) { 801 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 802 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr; 803 return (1); 804 } 805 } 806 807 return (0); 808} 809 810/* 811 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe 812 * memory specified by the DIF program. The dst is assumed to be safe memory 813 * that we can store to directly because it is managed by DTrace. As with 814 * standard bcopy, overlapping copies are handled properly. 815 */ 816static void 817dtrace_bcopy(const void *src, void *dst, size_t len) 818{ 819 if (len != 0) { 820 uint8_t *s1 = dst; 821 const uint8_t *s2 = src; 822 823 if (s1 <= s2) { 824 do { 825 *s1++ = dtrace_load8((uintptr_t)s2++); 826 } while (--len != 0); 827 } else { 828 s2 += len; 829 s1 += len; 830 831 do { 832 *--s1 = dtrace_load8((uintptr_t)--s2); 833 } while (--len != 0); 834 } 835 } 836} 837 838/* 839 * Copy src to dst using safe memory accesses, up to either the specified 840 * length, or the point that a nul byte is encountered. The src is assumed to 841 * be unsafe memory specified by the DIF program. The dst is assumed to be 842 * safe memory that we can store to directly because it is managed by DTrace. 843 * Unlike dtrace_bcopy(), overlapping regions are not handled. 844 */ 845static void 846dtrace_strcpy(const void *src, void *dst, size_t len) 847{ 848 if (len != 0) { 849 uint8_t *s1 = dst, c; 850 const uint8_t *s2 = src; 851 852 do { 853 *s1++ = c = dtrace_load8((uintptr_t)s2++); 854 } while (--len != 0 && c != '\0'); 855 } 856} 857 858/* 859 * Copy src to dst, deriving the size and type from the specified (BYREF) 860 * variable type. The src is assumed to be unsafe memory specified by the DIF 861 * program. The dst is assumed to be DTrace variable memory that is of the 862 * specified type; we assume that we can store to directly. 863 */ 864static void 865dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) 866{ 867 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 868 869 if (type->dtdt_kind == DIF_TYPE_STRING) 870 dtrace_strcpy(src, dst, type->dtdt_size); 871 else 872 dtrace_bcopy(src, dst, type->dtdt_size); 873} 874 875/* 876 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be 877 * unsafe memory specified by the DIF program. The s2 data is assumed to be 878 * safe memory that we can access directly because it is managed by DTrace. 879 */ 880static int 881dtrace_bcmp(const void *s1, const void *s2, size_t len) 882{ 883 volatile uint16_t *flags; 884 885 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 886 887 if (s1 == s2) 888 return (0); 889 890 if (s1 == NULL || s2 == NULL) 891 return (1); 892 893 if (s1 != s2 && len != 0) { 894 const uint8_t *ps1 = s1; 895 const uint8_t *ps2 = s2; 896 897 do { 898 if (dtrace_load8((uintptr_t)ps1++) != *ps2++) 899 return (1); 900 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); 901 } 902 return (0); 903} 904 905/* 906 * Zero the specified region using a simple byte-by-byte loop. Note that this 907 * is for safe DTrace-managed memory only. 908 */ 909static void 910dtrace_bzero(void *dst, size_t len) 911{ 912 uchar_t *cp; 913 914 for (cp = dst; len != 0; len--) 915 *cp++ = 0; 916} 917 918/* 919 * This privilege check should be used by actions and subroutines to 920 * verify that the user credentials of the process that enabled the 921 * invoking ECB match the target credentials 922 */ 923static int 924dtrace_priv_proc_common_user(dtrace_state_t *state) 925{ 926 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 927 928 /* 929 * We should always have a non-NULL state cred here, since if cred 930 * is null (anonymous tracing), we fast-path bypass this routine. 931 */ 932 ASSERT(s_cr != NULL); 933 934#if !defined(__APPLE__) 935 if ((cr = CRED()) != NULL && 936#else 937 if ((cr = dtrace_CRED()) != NULL && 938#endif /* __APPLE__ */ 939 s_cr->cr_uid == cr->cr_uid && 940 s_cr->cr_uid == cr->cr_ruid && 941 s_cr->cr_uid == cr->cr_suid && 942 s_cr->cr_gid == cr->cr_gid && 943 s_cr->cr_gid == cr->cr_rgid && 944 s_cr->cr_gid == cr->cr_sgid) 945 return (1); 946 947 return (0); 948} 949 950/* 951 * This privilege check should be used by actions and subroutines to 952 * verify that the zone of the process that enabled the invoking ECB 953 * matches the target credentials 954 */ 955static int 956dtrace_priv_proc_common_zone(dtrace_state_t *state) 957{ 958 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 959 960 /* 961 * We should always have a non-NULL state cred here, since if cred 962 * is null (anonymous tracing), we fast-path bypass this routine. 963 */ 964 ASSERT(s_cr != NULL); 965 966#if !defined(__APPLE__) 967 if ((cr = CRED()) != NULL && 968 s_cr->cr_zone == cr->cr_zone) 969 return (1); 970 971 return (0); 972#else 973#pragma unused(state) 974 975 return 1; /* Darwin doesn't do zones. */ 976#endif /* __APPLE__ */ 977} 978 979/* 980 * This privilege check should be used by actions and subroutines to 981 * verify that the process has not setuid or changed credentials. 982 */ 983#if !defined(__APPLE__) 984static int 985dtrace_priv_proc_common_nocd() 986{ 987 proc_t *proc; 988 989 if ((proc = ttoproc(curthread)) != NULL && 990 !(proc->p_flag & SNOCD)) 991 return (1); 992 993 return (0); 994} 995#else 996static int 997dtrace_priv_proc_common_nocd(void) 998{ 999 return 1; /* Darwin omits "No Core Dump" flag. */ 1000} 1001#endif /* __APPLE__ */ 1002 1003static int 1004dtrace_priv_proc_destructive(dtrace_state_t *state) 1005{ 1006 int action = state->dts_cred.dcr_action; 1007 1008#if defined(__APPLE__) 1009 if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) 1010 goto bad; 1011#endif /* __APPLE__ */ 1012 1013 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && 1014 dtrace_priv_proc_common_zone(state) == 0) 1015 goto bad; 1016 1017 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) && 1018 dtrace_priv_proc_common_user(state) == 0) 1019 goto bad; 1020 1021 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) && 1022 dtrace_priv_proc_common_nocd() == 0) 1023 goto bad; 1024 1025 return (1); 1026 1027bad: 1028 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1029 1030 return (0); 1031} 1032 1033static int 1034dtrace_priv_proc_control(dtrace_state_t *state) 1035{ 1036#if defined(__APPLE__) 1037 if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) 1038 goto bad; 1039#endif /* __APPLE__ */ 1040 1041 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) 1042 return (1); 1043 1044 if (dtrace_priv_proc_common_zone(state) && 1045 dtrace_priv_proc_common_user(state) && 1046 dtrace_priv_proc_common_nocd()) 1047 return (1); 1048 1049#if defined(__APPLE__) 1050bad: 1051#endif /* __APPLE__ */ 1052 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1053 1054 return (0); 1055} 1056 1057static int 1058dtrace_priv_proc(dtrace_state_t *state) 1059{ 1060#if defined(__APPLE__) 1061 if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) 1062 goto bad; 1063#endif /* __APPLE__ */ 1064 1065 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) 1066 return (1); 1067 1068#if defined(__APPLE__) 1069bad: 1070#endif /* __APPLE__ */ 1071 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1072 1073 return (0); 1074} 1075 1076#if defined(__APPLE__) 1077/* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */ 1078static int 1079dtrace_priv_proc_relaxed(dtrace_state_t *state) 1080{ 1081 1082 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) 1083 return (1); 1084 1085 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1086 1087 return (0); 1088} 1089#endif /* __APPLE__ */ 1090 1091static int 1092dtrace_priv_kernel(dtrace_state_t *state) 1093{ 1094 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) 1095 return (1); 1096 1097 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1098 1099 return (0); 1100} 1101 1102static int 1103dtrace_priv_kernel_destructive(dtrace_state_t *state) 1104{ 1105 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) 1106 return (1); 1107 1108 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1109 1110 return (0); 1111} 1112 1113/* 1114 * Note: not called from probe context. This function is called 1115 * asynchronously (and at a regular interval) from outside of probe context to 1116 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable 1117 * cleaning is explained in detail in <sys/dtrace_impl.h>. 1118 */ 1119#if defined(__APPLE__) 1120static 1121#endif /* __APPLE__ */ 1122void 1123dtrace_dynvar_clean(dtrace_dstate_t *dstate) 1124{ 1125 dtrace_dynvar_t *dirty; 1126 dtrace_dstate_percpu_t *dcpu; 1127 int i, work = 0; 1128 1129 for (i = 0; i < (int)NCPU; i++) { 1130 dcpu = &dstate->dtds_percpu[i]; 1131 1132 ASSERT(dcpu->dtdsc_rinsing == NULL); 1133 1134 /* 1135 * If the dirty list is NULL, there is no dirty work to do. 1136 */ 1137 if (dcpu->dtdsc_dirty == NULL) 1138 continue; 1139 1140 /* 1141 * If the clean list is non-NULL, then we're not going to do 1142 * any work for this CPU -- it means that there has not been 1143 * a dtrace_dynvar() allocation on this CPU (or from this CPU) 1144 * since the last time we cleaned house. 1145 */ 1146 if (dcpu->dtdsc_clean != NULL) 1147 continue; 1148 1149 work = 1; 1150 1151 /* 1152 * Atomically move the dirty list aside. 1153 */ 1154 do { 1155 dirty = dcpu->dtdsc_dirty; 1156 1157 /* 1158 * Before we zap the dirty list, set the rinsing list. 1159 * (This allows for a potential assertion in 1160 * dtrace_dynvar(): if a free dynamic variable appears 1161 * on a hash chain, either the dirty list or the 1162 * rinsing list for some CPU must be non-NULL.) 1163 */ 1164 dcpu->dtdsc_rinsing = dirty; 1165 dtrace_membar_producer(); 1166 } while (dtrace_casptr(&dcpu->dtdsc_dirty, 1167 dirty, NULL) != dirty); 1168 } 1169 1170 if (!work) { 1171 /* 1172 * We have no work to do; we can simply return. 1173 */ 1174 return; 1175 } 1176 1177 dtrace_sync(); 1178 1179 for (i = 0; i < (int)NCPU; i++) { 1180 dcpu = &dstate->dtds_percpu[i]; 1181 1182 if (dcpu->dtdsc_rinsing == NULL) 1183 continue; 1184 1185 /* 1186 * We are now guaranteed that no hash chain contains a pointer 1187 * into this dirty list; we can make it clean. 1188 */ 1189 ASSERT(dcpu->dtdsc_clean == NULL); 1190 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing; 1191 dcpu->dtdsc_rinsing = NULL; 1192 } 1193 1194 /* 1195 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make 1196 * sure that all CPUs have seen all of the dtdsc_clean pointers. 1197 * This prevents a race whereby a CPU incorrectly decides that 1198 * the state should be something other than DTRACE_DSTATE_CLEAN 1199 * after dtrace_dynvar_clean() has completed. 1200 */ 1201 dtrace_sync(); 1202 1203 dstate->dtds_state = DTRACE_DSTATE_CLEAN; 1204} 1205 1206/* 1207 * Depending on the value of the op parameter, this function looks-up, 1208 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an 1209 * allocation is requested, this function will return a pointer to a 1210 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no 1211 * variable can be allocated. If NULL is returned, the appropriate counter 1212 * will be incremented. 1213 */ 1214#if defined(__APPLE__) 1215static 1216#endif /* __APPLE__ */ 1217dtrace_dynvar_t * 1218dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, 1219 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op) 1220{ 1221 uint64_t hashval = DTRACE_DYNHASH_VALID; 1222 dtrace_dynhash_t *hash = dstate->dtds_hash; 1223 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL; 1224 processorid_t me = CPU->cpu_id, cpu = me; 1225 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me]; 1226 size_t bucket, ksize; 1227 size_t chunksize = dstate->dtds_chunksize; 1228 uintptr_t kdata, lock, nstate; 1229 uint_t i; 1230 1231 ASSERT(nkeys != 0); 1232 1233 /* 1234 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time" 1235 * algorithm. For the by-value portions, we perform the algorithm in 1236 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a 1237 * bit, and seems to have only a minute effect on distribution. For 1238 * the by-reference data, we perform "One-at-a-time" iterating (safely) 1239 * over each referenced byte. It's painful to do this, but it's much 1240 * better than pathological hash distribution. The efficacy of the 1241 * hashing algorithm (and a comparison with other algorithms) may be 1242 * found by running the ::dtrace_dynstat MDB dcmd. 1243 */ 1244 for (i = 0; i < nkeys; i++) { 1245 if (key[i].dttk_size == 0) { 1246 uint64_t val = key[i].dttk_value; 1247 1248 hashval += (val >> 48) & 0xffff; 1249 hashval += (hashval << 10); 1250 hashval ^= (hashval >> 6); 1251 1252 hashval += (val >> 32) & 0xffff; 1253 hashval += (hashval << 10); 1254 hashval ^= (hashval >> 6); 1255 1256 hashval += (val >> 16) & 0xffff; 1257 hashval += (hashval << 10); 1258 hashval ^= (hashval >> 6); 1259 1260 hashval += val & 0xffff; 1261 hashval += (hashval << 10); 1262 hashval ^= (hashval >> 6); 1263 } else { 1264 /* 1265 * This is incredibly painful, but it beats the hell 1266 * out of the alternative. 1267 */ 1268 uint64_t j, size = key[i].dttk_size; 1269 uintptr_t base = (uintptr_t)key[i].dttk_value; 1270 1271 for (j = 0; j < size; j++) { 1272 hashval += dtrace_load8(base + j); 1273 hashval += (hashval << 10); 1274 hashval ^= (hashval >> 6); 1275 } 1276 } 1277 } 1278 1279 hashval += (hashval << 3); 1280 hashval ^= (hashval >> 11); 1281 hashval += (hashval << 15); 1282 1283 /* 1284 * There is a remote chance (ideally, 1 in 2^31) that our hashval 1285 * comes out to be one of our two sentinel hash values. If this 1286 * actually happens, we set the hashval to be a value known to be a 1287 * non-sentinel value. 1288 */ 1289 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK) 1290 hashval = DTRACE_DYNHASH_VALID; 1291 1292 /* 1293 * Yes, it's painful to do a divide here. If the cycle count becomes 1294 * important here, tricks can be pulled to reduce it. (However, it's 1295 * critical that hash collisions be kept to an absolute minimum; 1296 * they're much more painful than a divide.) It's better to have a 1297 * solution that generates few collisions and still keeps things 1298 * relatively simple. 1299 */ 1300 bucket = hashval % dstate->dtds_hashsize; 1301 1302 if (op == DTRACE_DYNVAR_DEALLOC) { 1303 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock; 1304 1305 for (;;) { 1306 while ((lock = *lockp) & 1) 1307 continue; 1308 1309 if (dtrace_casptr((void *)lockp, 1310 (void *)lock, (void *)(lock + 1)) == (void *)lock) 1311 break; 1312 } 1313 1314 dtrace_membar_producer(); 1315 } 1316 1317top: 1318 prev = NULL; 1319 lock = hash[bucket].dtdh_lock; 1320 1321 dtrace_membar_consumer(); 1322 1323 start = hash[bucket].dtdh_chain; 1324 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK || 1325 start->dtdv_hashval != DTRACE_DYNHASH_FREE || 1326 op != DTRACE_DYNVAR_DEALLOC)); 1327 1328 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) { 1329 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple; 1330 dtrace_key_t *dkey = &dtuple->dtt_key[0]; 1331 1332 if (dvar->dtdv_hashval != hashval) { 1333 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) { 1334 /* 1335 * We've reached the sink, and therefore the 1336 * end of the hash chain; we can kick out of 1337 * the loop knowing that we have seen a valid 1338 * snapshot of state. 1339 */ 1340 ASSERT(dvar->dtdv_next == NULL); 1341 ASSERT(dvar == &dtrace_dynhash_sink); 1342 break; 1343 } 1344 1345 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) { 1346 /* 1347 * We've gone off the rails: somewhere along 1348 * the line, one of the members of this hash 1349 * chain was deleted. Note that we could also 1350 * detect this by simply letting this loop run 1351 * to completion, as we would eventually hit 1352 * the end of the dirty list. However, we 1353 * want to avoid running the length of the 1354 * dirty list unnecessarily (it might be quite 1355 * long), so we catch this as early as 1356 * possible by detecting the hash marker. In 1357 * this case, we simply set dvar to NULL and 1358 * break; the conditional after the loop will 1359 * send us back to top. 1360 */ 1361 dvar = NULL; 1362 break; 1363 } 1364 1365 goto next; 1366 } 1367 1368 if (dtuple->dtt_nkeys != nkeys) 1369 goto next; 1370 1371 for (i = 0; i < nkeys; i++, dkey++) { 1372 if (dkey->dttk_size != key[i].dttk_size) 1373 goto next; /* size or type mismatch */ 1374 1375 if (dkey->dttk_size != 0) { 1376 if (dtrace_bcmp( 1377 (void *)(uintptr_t)key[i].dttk_value, 1378 (void *)(uintptr_t)dkey->dttk_value, 1379 dkey->dttk_size)) 1380 goto next; 1381 } else { 1382 if (dkey->dttk_value != key[i].dttk_value) 1383 goto next; 1384 } 1385 } 1386 1387 if (op != DTRACE_DYNVAR_DEALLOC) 1388 return (dvar); 1389 1390 ASSERT(dvar->dtdv_next == NULL || 1391 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE); 1392 1393 if (prev != NULL) { 1394 ASSERT(hash[bucket].dtdh_chain != dvar); 1395 ASSERT(start != dvar); 1396 ASSERT(prev->dtdv_next == dvar); 1397 prev->dtdv_next = dvar->dtdv_next; 1398 } else { 1399 if (dtrace_casptr(&hash[bucket].dtdh_chain, 1400 start, dvar->dtdv_next) != start) { 1401 /* 1402 * We have failed to atomically swing the 1403 * hash table head pointer, presumably because 1404 * of a conflicting allocation on another CPU. 1405 * We need to reread the hash chain and try 1406 * again. 1407 */ 1408 goto top; 1409 } 1410 } 1411 1412 dtrace_membar_producer(); 1413 1414 /* 1415 * Now set the hash value to indicate that it's free. 1416 */ 1417 ASSERT(hash[bucket].dtdh_chain != dvar); 1418 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 1419 1420 dtrace_membar_producer(); 1421 1422 /* 1423 * Set the next pointer to point at the dirty list, and 1424 * atomically swing the dirty pointer to the newly freed dvar. 1425 */ 1426 do { 1427 next = dcpu->dtdsc_dirty; 1428 dvar->dtdv_next = next; 1429 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next); 1430 1431 /* 1432 * Finally, unlock this hash bucket. 1433 */ 1434 ASSERT(hash[bucket].dtdh_lock == lock); 1435 ASSERT(lock & 1); 1436 hash[bucket].dtdh_lock++; 1437 1438 return (NULL); 1439next: 1440 prev = dvar; 1441 continue; 1442 } 1443 1444 if (dvar == NULL) { 1445 /* 1446 * If dvar is NULL, it is because we went off the rails: 1447 * one of the elements that we traversed in the hash chain 1448 * was deleted while we were traversing it. In this case, 1449 * we assert that we aren't doing a dealloc (deallocs lock 1450 * the hash bucket to prevent themselves from racing with 1451 * one another), and retry the hash chain traversal. 1452 */ 1453 ASSERT(op != DTRACE_DYNVAR_DEALLOC); 1454 goto top; 1455 } 1456 1457 if (op != DTRACE_DYNVAR_ALLOC) { 1458 /* 1459 * If we are not to allocate a new variable, we want to 1460 * return NULL now. Before we return, check that the value 1461 * of the lock word hasn't changed. If it has, we may have 1462 * seen an inconsistent snapshot. 1463 */ 1464 if (op == DTRACE_DYNVAR_NOALLOC) { 1465 if (hash[bucket].dtdh_lock != lock) 1466 goto top; 1467 } else { 1468 ASSERT(op == DTRACE_DYNVAR_DEALLOC); 1469 ASSERT(hash[bucket].dtdh_lock == lock); 1470 ASSERT(lock & 1); 1471 hash[bucket].dtdh_lock++; 1472 } 1473 1474 return (NULL); 1475 } 1476 1477 /* 1478 * We need to allocate a new dynamic variable. The size we need is the 1479 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the 1480 * size of any auxiliary key data (rounded up to 8-byte alignment) plus 1481 * the size of any referred-to data (dsize). We then round the final 1482 * size up to the chunksize for allocation. 1483 */ 1484 for (ksize = 0, i = 0; i < nkeys; i++) 1485 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 1486 1487 /* 1488 * This should be pretty much impossible, but could happen if, say, 1489 * strange DIF specified the tuple. Ideally, this should be an 1490 * assertion and not an error condition -- but that requires that the 1491 * chunksize calculation in dtrace_difo_chunksize() be absolutely 1492 * bullet-proof. (That is, it must not be able to be fooled by 1493 * malicious DIF.) Given the lack of backwards branches in DIF, 1494 * solving this would presumably not amount to solving the Halting 1495 * Problem -- but it still seems awfully hard. 1496 */ 1497 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) + 1498 ksize + dsize > chunksize) { 1499 dcpu->dtdsc_drops++; 1500 return (NULL); 1501 } 1502 1503 nstate = DTRACE_DSTATE_EMPTY; 1504 1505 do { 1506retry: 1507 free = dcpu->dtdsc_free; 1508 1509 if (free == NULL) { 1510 dtrace_dynvar_t *clean = dcpu->dtdsc_clean; 1511 void *rval; 1512 1513 if (clean == NULL) { 1514 /* 1515 * We're out of dynamic variable space on 1516 * this CPU. Unless we have tried all CPUs, 1517 * we'll try to allocate from a different 1518 * CPU. 1519 */ 1520 switch (dstate->dtds_state) { 1521 case DTRACE_DSTATE_CLEAN: { 1522 void *sp = &dstate->dtds_state; 1523 1524 if (++cpu >= (int)NCPU) 1525 cpu = 0; 1526 1527 if (dcpu->dtdsc_dirty != NULL && 1528 nstate == DTRACE_DSTATE_EMPTY) 1529 nstate = DTRACE_DSTATE_DIRTY; 1530 1531 if (dcpu->dtdsc_rinsing != NULL) 1532 nstate = DTRACE_DSTATE_RINSING; 1533 1534 dcpu = &dstate->dtds_percpu[cpu]; 1535 1536 if (cpu != me) 1537 goto retry; 1538 1539 (void) dtrace_cas32(sp, 1540 DTRACE_DSTATE_CLEAN, nstate); 1541 1542 /* 1543 * To increment the correct bean 1544 * counter, take another lap. 1545 */ 1546 goto retry; 1547 } 1548 1549 case DTRACE_DSTATE_DIRTY: 1550 dcpu->dtdsc_dirty_drops++; 1551 break; 1552 1553 case DTRACE_DSTATE_RINSING: 1554 dcpu->dtdsc_rinsing_drops++; 1555 break; 1556 1557 case DTRACE_DSTATE_EMPTY: 1558 dcpu->dtdsc_drops++; 1559 break; 1560 } 1561 1562 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP); 1563 return (NULL); 1564 } 1565 1566 /* 1567 * The clean list appears to be non-empty. We want to 1568 * move the clean list to the free list; we start by 1569 * moving the clean pointer aside. 1570 */ 1571 if (dtrace_casptr(&dcpu->dtdsc_clean, 1572 clean, NULL) != clean) { 1573 /* 1574 * We are in one of two situations: 1575 * 1576 * (a) The clean list was switched to the 1577 * free list by another CPU. 1578 * 1579 * (b) The clean list was added to by the 1580 * cleansing cyclic. 1581 * 1582 * In either of these situations, we can 1583 * just reattempt the free list allocation. 1584 */ 1585 goto retry; 1586 } 1587 1588 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); 1589 1590 /* 1591 * Now we'll move the clean list to the free list. 1592 * It's impossible for this to fail: the only way 1593 * the free list can be updated is through this 1594 * code path, and only one CPU can own the clean list. 1595 * Thus, it would only be possible for this to fail if 1596 * this code were racing with dtrace_dynvar_clean(). 1597 * (That is, if dtrace_dynvar_clean() updated the clean 1598 * list, and we ended up racing to update the free 1599 * list.) This race is prevented by the dtrace_sync() 1600 * in dtrace_dynvar_clean() -- which flushes the 1601 * owners of the clean lists out before resetting 1602 * the clean lists. 1603 */ 1604 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); 1605 ASSERT(rval == NULL); 1606 goto retry; 1607 } 1608 1609 dvar = free; 1610 new_free = dvar->dtdv_next; 1611 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free); 1612 1613 /* 1614 * We have now allocated a new chunk. We copy the tuple keys into the 1615 * tuple array and copy any referenced key data into the data space 1616 * following the tuple array. As we do this, we relocate dttk_value 1617 * in the final tuple to point to the key data address in the chunk. 1618 */ 1619 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys]; 1620 dvar->dtdv_data = (void *)(kdata + ksize); 1621 dvar->dtdv_tuple.dtt_nkeys = nkeys; 1622 1623 for (i = 0; i < nkeys; i++) { 1624 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i]; 1625 size_t kesize = key[i].dttk_size; 1626 1627 if (kesize != 0) { 1628 dtrace_bcopy( 1629 (const void *)(uintptr_t)key[i].dttk_value, 1630 (void *)kdata, kesize); 1631 dkey->dttk_value = kdata; 1632 kdata += P2ROUNDUP(kesize, sizeof (uint64_t)); 1633 } else { 1634 dkey->dttk_value = key[i].dttk_value; 1635 } 1636 1637 dkey->dttk_size = kesize; 1638 } 1639 1640 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE); 1641 dvar->dtdv_hashval = hashval; 1642 dvar->dtdv_next = start; 1643 1644 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start) 1645 return (dvar); 1646 1647 /* 1648 * The cas has failed. Either another CPU is adding an element to 1649 * this hash chain, or another CPU is deleting an element from this 1650 * hash chain. The simplest way to deal with both of these cases 1651 * (though not necessarily the most efficient) is to free our 1652 * allocated block and tail-call ourselves. Note that the free is 1653 * to the dirty list and _not_ to the free list. This is to prevent 1654 * races with allocators, above. 1655 */ 1656 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 1657 1658 dtrace_membar_producer(); 1659 1660 do { 1661 free = dcpu->dtdsc_dirty; 1662 dvar->dtdv_next = free; 1663 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); 1664 1665 return (dtrace_dynvar(dstate, nkeys, key, dsize, op)); 1666} 1667 1668/*ARGSUSED*/ 1669static void 1670dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) 1671{ 1672#pragma unused(arg) 1673 if (nval < *oval) 1674 *oval = nval; 1675} 1676 1677/*ARGSUSED*/ 1678static void 1679dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) 1680{ 1681#pragma unused(arg) 1682 if (nval > *oval) 1683 *oval = nval; 1684} 1685 1686static void 1687dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr) 1688{ 1689 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET; 1690 int64_t val = (int64_t)nval; 1691 1692 if (val < 0) { 1693 for (i = 0; i < zero; i++) { 1694 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) { 1695 quanta[i] += incr; 1696 return; 1697 } 1698 } 1699 } else { 1700 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) { 1701 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) { 1702 quanta[i - 1] += incr; 1703 return; 1704 } 1705 } 1706 1707 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr; 1708 return; 1709 } 1710 1711 ASSERT(0); 1712} 1713 1714static void 1715dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) 1716{ 1717 uint64_t arg = *lquanta++; 1718 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 1719 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 1720 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); 1721 int32_t val = (int32_t)nval, level; 1722 1723 ASSERT(step != 0); 1724 ASSERT(levels != 0); 1725 1726 if (val < base) { 1727 /* 1728 * This is an underflow. 1729 */ 1730 lquanta[0] += incr; 1731 return; 1732 } 1733 1734 level = (val - base) / step; 1735 1736 if (level < levels) { 1737 lquanta[level + 1] += incr; 1738 return; 1739 } 1740 1741 /* 1742 * This is an overflow. 1743 */ 1744 lquanta[levels + 1] += incr; 1745} 1746 1747/*ARGSUSED*/ 1748static void 1749dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) 1750{ 1751#pragma unused(arg) 1752 data[0]++; 1753 data[1] += nval; 1754} 1755 1756/*ARGSUSED*/ 1757static void 1758dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) 1759{ 1760#pragma unused(nval,arg) 1761 *oval = *oval + 1; 1762} 1763 1764/*ARGSUSED*/ 1765static void 1766dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) 1767{ 1768#pragma unused(arg) 1769 *oval += nval; 1770} 1771 1772/* 1773 * Aggregate given the tuple in the principal data buffer, and the aggregating 1774 * action denoted by the specified dtrace_aggregation_t. The aggregation 1775 * buffer is specified as the buf parameter. This routine does not return 1776 * failure; if there is no space in the aggregation buffer, the data will be 1777 * dropped, and a corresponding counter incremented. 1778 */ 1779static void 1780dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, 1781 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) 1782{ 1783#pragma unused(arg) 1784 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec; 1785 uint32_t i, ndx, size, fsize; 1786 uint32_t align = sizeof (uint64_t) - 1; 1787 dtrace_aggbuffer_t *agb; 1788 dtrace_aggkey_t *key; 1789 uint32_t hashval = 0, limit, isstr; 1790 caddr_t tomax, data, kdata; 1791 dtrace_actkind_t action; 1792 dtrace_action_t *act; 1793 uintptr_t offs; 1794 1795 if (buf == NULL) 1796 return; 1797 1798 if (!agg->dtag_hasarg) { 1799 /* 1800 * Currently, only quantize() and lquantize() take additional 1801 * arguments, and they have the same semantics: an increment 1802 * value that defaults to 1 when not present. If additional 1803 * aggregating actions take arguments, the setting of the 1804 * default argument value will presumably have to become more 1805 * sophisticated... 1806 */ 1807 arg = 1; 1808 } 1809 1810 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION; 1811 size = rec->dtrd_offset - agg->dtag_base; 1812 fsize = size + rec->dtrd_size; 1813 1814 ASSERT(dbuf->dtb_tomax != NULL); 1815 data = dbuf->dtb_tomax + offset + agg->dtag_base; 1816 1817 if ((tomax = buf->dtb_tomax) == NULL) { 1818 dtrace_buffer_drop(buf); 1819 return; 1820 } 1821 1822 /* 1823 * The metastructure is always at the bottom of the buffer. 1824 */ 1825 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size - 1826 sizeof (dtrace_aggbuffer_t)); 1827 1828 if (buf->dtb_offset == 0) { 1829 /* 1830 * We just kludge up approximately 1/8th of the size to be 1831 * buckets. If this guess ends up being routinely 1832 * off-the-mark, we may need to dynamically readjust this 1833 * based on past performance. 1834 */ 1835 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t); 1836 1837 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) < 1838 (uintptr_t)tomax || hashsize == 0) { 1839 /* 1840 * We've been given a ludicrously small buffer; 1841 * increment our drop count and leave. 1842 */ 1843 dtrace_buffer_drop(buf); 1844 return; 1845 } 1846 1847 /* 1848 * And now, a pathetic attempt to try to get a an odd (or 1849 * perchance, a prime) hash size for better hash distribution. 1850 */ 1851 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3)) 1852 hashsize -= DTRACE_AGGHASHSIZE_SLEW; 1853 1854 agb->dtagb_hashsize = hashsize; 1855 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb - 1856 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *)); 1857 agb->dtagb_free = (uintptr_t)agb->dtagb_hash; 1858 1859 for (i = 0; i < agb->dtagb_hashsize; i++) 1860 agb->dtagb_hash[i] = NULL; 1861 } 1862 1863 ASSERT(agg->dtag_first != NULL); 1864 ASSERT(agg->dtag_first->dta_intuple); 1865 1866 /* 1867 * Calculate the hash value based on the key. Note that we _don't_ 1868 * include the aggid in the hashing (but we will store it as part of 1869 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time" 1870 * algorithm: a simple, quick algorithm that has no known funnels, and 1871 * gets good distribution in practice. The efficacy of the hashing 1872 * algorithm (and a comparison with other algorithms) may be found by 1873 * running the ::dtrace_aggstat MDB dcmd. 1874 */ 1875 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 1876 i = act->dta_rec.dtrd_offset - agg->dtag_base; 1877 limit = i + act->dta_rec.dtrd_size; 1878 ASSERT(limit <= size); 1879 isstr = DTRACEACT_ISSTRING(act); 1880 1881 for (; i < limit; i++) { 1882 hashval += data[i]; 1883 hashval += (hashval << 10); 1884 hashval ^= (hashval >> 6); 1885 1886 if (isstr && data[i] == '\0') 1887 break; 1888 } 1889 } 1890 1891 hashval += (hashval << 3); 1892 hashval ^= (hashval >> 11); 1893 hashval += (hashval << 15); 1894 1895 /* 1896 * Yes, the divide here is expensive -- but it's generally the least 1897 * of the performance issues given the amount of data that we iterate 1898 * over to compute hash values, compare data, etc. 1899 */ 1900 ndx = hashval % agb->dtagb_hashsize; 1901 1902 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) { 1903 ASSERT((caddr_t)key >= tomax); 1904 ASSERT((caddr_t)key < tomax + buf->dtb_size); 1905 1906 if (hashval != key->dtak_hashval || key->dtak_size != size) 1907 continue; 1908 1909 kdata = key->dtak_data; 1910 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size); 1911 1912 for (act = agg->dtag_first; act->dta_intuple; 1913 act = act->dta_next) { 1914 i = act->dta_rec.dtrd_offset - agg->dtag_base; 1915 limit = i + act->dta_rec.dtrd_size; 1916 ASSERT(limit <= size); 1917 isstr = DTRACEACT_ISSTRING(act); 1918 1919 for (; i < limit; i++) { 1920 if (kdata[i] != data[i]) 1921 goto next; 1922 1923 if (isstr && data[i] == '\0') 1924 break; 1925 } 1926 } 1927 1928 if (action != key->dtak_action) { 1929 /* 1930 * We are aggregating on the same value in the same 1931 * aggregation with two different aggregating actions. 1932 * (This should have been picked up in the compiler, 1933 * so we may be dealing with errant or devious DIF.) 1934 * This is an error condition; we indicate as much, 1935 * and return. 1936 */ 1937 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 1938 return; 1939 } 1940 1941 /* 1942 * This is a hit: we need to apply the aggregator to 1943 * the value at this key. 1944 */ 1945 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg); 1946 return; 1947next: 1948 continue; 1949 } 1950 1951 /* 1952 * We didn't find it. We need to allocate some zero-filled space, 1953 * link it into the hash table appropriately, and apply the aggregator 1954 * to the (zero-filled) value. 1955 */ 1956 offs = buf->dtb_offset; 1957 while (offs & (align - 1)) 1958 offs += sizeof (uint32_t); 1959 1960 /* 1961 * If we don't have enough room to both allocate a new key _and_ 1962 * its associated data, increment the drop count and return. 1963 */ 1964 if ((uintptr_t)tomax + offs + fsize > 1965 agb->dtagb_free - sizeof (dtrace_aggkey_t)) { 1966 dtrace_buffer_drop(buf); 1967 return; 1968 } 1969 1970 /*CONSTCOND*/ 1971 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1))); 1972 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t)); 1973 agb->dtagb_free -= sizeof (dtrace_aggkey_t); 1974 1975 key->dtak_data = kdata = tomax + offs; 1976 buf->dtb_offset = offs + fsize; 1977 1978 /* 1979 * Now copy the data across. 1980 */ 1981 *((dtrace_aggid_t *)kdata) = agg->dtag_id; 1982 1983 for (i = sizeof (dtrace_aggid_t); i < size; i++) 1984 kdata[i] = data[i]; 1985 1986 /* 1987 * Because strings are not zeroed out by default, we need to iterate 1988 * looking for actions that store strings, and we need to explicitly 1989 * pad these strings out with zeroes. 1990 */ 1991 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 1992 int nul; 1993 1994 if (!DTRACEACT_ISSTRING(act)) 1995 continue; 1996 1997 i = act->dta_rec.dtrd_offset - agg->dtag_base; 1998 limit = i + act->dta_rec.dtrd_size; 1999 ASSERT(limit <= size); 2000 2001 for (nul = 0; i < limit; i++) { 2002 if (nul) { 2003 kdata[i] = '\0'; 2004 continue; 2005 } 2006 2007 if (data[i] != '\0') 2008 continue; 2009 2010 nul = 1; 2011 } 2012 } 2013 2014 for (i = size; i < fsize; i++) 2015 kdata[i] = 0; 2016 2017 key->dtak_hashval = hashval; 2018 key->dtak_size = size; 2019 key->dtak_action = action; 2020 key->dtak_next = agb->dtagb_hash[ndx]; 2021 agb->dtagb_hash[ndx] = key; 2022 2023 /* 2024 * Finally, apply the aggregator. 2025 */ 2026 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial; 2027 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg); 2028} 2029 2030/* 2031 * Given consumer state, this routine finds a speculation in the INACTIVE 2032 * state and transitions it into the ACTIVE state. If there is no speculation 2033 * in the INACTIVE state, 0 is returned. In this case, no error counter is 2034 * incremented -- it is up to the caller to take appropriate action. 2035 */ 2036static int 2037dtrace_speculation(dtrace_state_t *state) 2038{ 2039 int i = 0; 2040 dtrace_speculation_state_t current; 2041 uint32_t *stat = &state->dts_speculations_unavail, count; 2042 2043 while (i < state->dts_nspeculations) { 2044 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2045 2046 current = spec->dtsp_state; 2047 2048 if (current != DTRACESPEC_INACTIVE) { 2049 if (current == DTRACESPEC_COMMITTINGMANY || 2050 current == DTRACESPEC_COMMITTING || 2051 current == DTRACESPEC_DISCARDING) 2052 stat = &state->dts_speculations_busy; 2053 i++; 2054 continue; 2055 } 2056 2057 if (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2058 current, DTRACESPEC_ACTIVE) == current) 2059 return (i + 1); 2060 } 2061 2062 /* 2063 * We couldn't find a speculation. If we found as much as a single 2064 * busy speculation buffer, we'll attribute this failure as "busy" 2065 * instead of "unavail". 2066 */ 2067 do { 2068 count = *stat; 2069 } while (dtrace_cas32(stat, count, count + 1) != count); 2070 2071 return (0); 2072} 2073 2074/* 2075 * This routine commits an active speculation. If the specified speculation 2076 * is not in a valid state to perform a commit(), this routine will silently do 2077 * nothing. The state of the specified speculation is transitioned according 2078 * to the state transition diagram outlined in <sys/dtrace_impl.h> 2079 */ 2080static void 2081dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, 2082 dtrace_specid_t which) 2083{ 2084 dtrace_speculation_t *spec; 2085 dtrace_buffer_t *src, *dest; 2086 uintptr_t daddr, saddr, dlimit; 2087 dtrace_speculation_state_t current, new; 2088 intptr_t offs; 2089 2090 if (which == 0) 2091 return; 2092 2093 if (which > state->dts_nspeculations) { 2094 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2095 return; 2096 } 2097 2098 spec = &state->dts_speculations[which - 1]; 2099 src = &spec->dtsp_buffer[cpu]; 2100 dest = &state->dts_buffer[cpu]; 2101 2102 do { 2103 current = spec->dtsp_state; 2104 2105 if (current == DTRACESPEC_COMMITTINGMANY) 2106 break; 2107 2108 switch (current) { 2109 case DTRACESPEC_INACTIVE: 2110 case DTRACESPEC_DISCARDING: 2111 return; 2112 2113 case DTRACESPEC_COMMITTING: 2114 /* 2115 * This is only possible if we are (a) commit()'ing 2116 * without having done a prior speculate() on this CPU 2117 * and (b) racing with another commit() on a different 2118 * CPU. There's nothing to do -- we just assert that 2119 * our offset is 0. 2120 */ 2121 ASSERT(src->dtb_offset == 0); 2122 return; 2123 2124 case DTRACESPEC_ACTIVE: 2125 new = DTRACESPEC_COMMITTING; 2126 break; 2127 2128 case DTRACESPEC_ACTIVEONE: 2129 /* 2130 * This speculation is active on one CPU. If our 2131 * buffer offset is non-zero, we know that the one CPU 2132 * must be us. Otherwise, we are committing on a 2133 * different CPU from the speculate(), and we must 2134 * rely on being asynchronously cleaned. 2135 */ 2136 if (src->dtb_offset != 0) { 2137 new = DTRACESPEC_COMMITTING; 2138 break; 2139 } 2140 /*FALLTHROUGH*/ 2141 2142 case DTRACESPEC_ACTIVEMANY: 2143 new = DTRACESPEC_COMMITTINGMANY; 2144 break; 2145 2146 default: 2147 ASSERT(0); 2148 } 2149 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2150 current, new) != current); 2151 2152 /* 2153 * We have set the state to indicate that we are committing this 2154 * speculation. Now reserve the necessary space in the destination 2155 * buffer. 2156 */ 2157 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset, 2158 sizeof (uint64_t), state, NULL)) < 0) { 2159 dtrace_buffer_drop(dest); 2160 goto out; 2161 } 2162 2163 /* 2164 * We have the space; copy the buffer across. (Note that this is a 2165 * highly subobtimal bcopy(); in the unlikely event that this becomes 2166 * a serious performance issue, a high-performance DTrace-specific 2167 * bcopy() should obviously be invented.) 2168 */ 2169 daddr = (uintptr_t)dest->dtb_tomax + offs; 2170 dlimit = daddr + src->dtb_offset; 2171 saddr = (uintptr_t)src->dtb_tomax; 2172 2173 /* 2174 * First, the aligned portion. 2175 */ 2176 while (dlimit - daddr >= sizeof (uint64_t)) { 2177 *((uint64_t *)daddr) = *((uint64_t *)saddr); 2178 2179 daddr += sizeof (uint64_t); 2180 saddr += sizeof (uint64_t); 2181 } 2182 2183 /* 2184 * Now any left-over bit... 2185 */ 2186 while (dlimit - daddr) 2187 *((uint8_t *)daddr++) = *((uint8_t *)saddr++); 2188 2189 /* 2190 * Finally, commit the reserved space in the destination buffer. 2191 */ 2192 dest->dtb_offset = offs + src->dtb_offset; 2193 2194out: 2195 /* 2196 * If we're lucky enough to be the only active CPU on this speculation 2197 * buffer, we can just set the state back to DTRACESPEC_INACTIVE. 2198 */ 2199 if (current == DTRACESPEC_ACTIVE || 2200 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { 2201 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, 2202 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); 2203 2204 ASSERT(rval == DTRACESPEC_COMMITTING); 2205 } 2206 2207 src->dtb_offset = 0; 2208 src->dtb_xamot_drops += src->dtb_drops; 2209 src->dtb_drops = 0; 2210} 2211 2212/* 2213 * This routine discards an active speculation. If the specified speculation 2214 * is not in a valid state to perform a discard(), this routine will silently 2215 * do nothing. The state of the specified speculation is transitioned 2216 * according to the state transition diagram outlined in <sys/dtrace_impl.h> 2217 */ 2218static void 2219dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, 2220 dtrace_specid_t which) 2221{ 2222 dtrace_speculation_t *spec; 2223 dtrace_speculation_state_t current, new; 2224 dtrace_buffer_t *buf; 2225 2226 if (which == 0) 2227 return; 2228 2229 if (which > state->dts_nspeculations) { 2230 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2231 return; 2232 } 2233 2234 spec = &state->dts_speculations[which - 1]; 2235 buf = &spec->dtsp_buffer[cpu]; 2236 2237 do { 2238 current = spec->dtsp_state; 2239 2240 switch (current) { 2241 case DTRACESPEC_INACTIVE: 2242 case DTRACESPEC_COMMITTINGMANY: 2243 case DTRACESPEC_COMMITTING: 2244 case DTRACESPEC_DISCARDING: 2245 return; 2246 2247 case DTRACESPEC_ACTIVE: 2248 case DTRACESPEC_ACTIVEMANY: 2249 new = DTRACESPEC_DISCARDING; 2250 break; 2251 2252 case DTRACESPEC_ACTIVEONE: 2253 if (buf->dtb_offset != 0) { 2254 new = DTRACESPEC_INACTIVE; 2255 } else { 2256 new = DTRACESPEC_DISCARDING; 2257 } 2258 break; 2259 2260 default: 2261 ASSERT(0); 2262 } 2263 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2264 current, new) != current); 2265 2266 buf->dtb_offset = 0; 2267 buf->dtb_drops = 0; 2268} 2269 2270/* 2271 * Note: not called from probe context. This function is called 2272 * asynchronously from cross call context to clean any speculations that are 2273 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be 2274 * transitioned back to the INACTIVE state until all CPUs have cleaned the 2275 * speculation. 2276 */ 2277static void 2278dtrace_speculation_clean_here(dtrace_state_t *state) 2279{ 2280 dtrace_icookie_t cookie; 2281 processorid_t cpu = CPU->cpu_id; 2282 dtrace_buffer_t *dest = &state->dts_buffer[cpu]; 2283 dtrace_specid_t i; 2284 2285 cookie = dtrace_interrupt_disable(); 2286 2287 if (dest->dtb_tomax == NULL) { 2288 dtrace_interrupt_enable(cookie); 2289 return; 2290 } 2291 2292 for (i = 0; i < state->dts_nspeculations; i++) { 2293 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2294 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; 2295 2296 if (src->dtb_tomax == NULL) 2297 continue; 2298 2299 if (spec->dtsp_state == DTRACESPEC_DISCARDING) { 2300 src->dtb_offset = 0; 2301 continue; 2302 } 2303 2304 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 2305 continue; 2306 2307 if (src->dtb_offset == 0) 2308 continue; 2309 2310 dtrace_speculation_commit(state, cpu, i + 1); 2311 } 2312 2313 dtrace_interrupt_enable(cookie); 2314} 2315 2316/* 2317 * Note: not called from probe context. This function is called 2318 * asynchronously (and at a regular interval) to clean any speculations that 2319 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there 2320 * is work to be done, it cross calls all CPUs to perform that work; 2321 * COMMITMANY and DISCARDING speculations may not be transitioned back to the 2322 * INACTIVE state until they have been cleaned by all CPUs. 2323 */ 2324static void 2325dtrace_speculation_clean(dtrace_state_t *state) 2326{ 2327 int work = 0, rv; 2328 dtrace_specid_t i; 2329 2330 for (i = 0; i < state->dts_nspeculations; i++) { 2331 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2332 2333 ASSERT(!spec->dtsp_cleaning); 2334 2335 if (spec->dtsp_state != DTRACESPEC_DISCARDING && 2336 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 2337 continue; 2338 2339 work++; 2340 spec->dtsp_cleaning = 1; 2341 } 2342 2343 if (!work) 2344 return; 2345 2346 dtrace_xcall(DTRACE_CPUALL, 2347 (dtrace_xcall_t)dtrace_speculation_clean_here, state); 2348 2349 /* 2350 * We now know that all CPUs have committed or discarded their 2351 * speculation buffers, as appropriate. We can now set the state 2352 * to inactive. 2353 */ 2354 for (i = 0; i < state->dts_nspeculations; i++) { 2355 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2356 dtrace_speculation_state_t current, new; 2357 2358 if (!spec->dtsp_cleaning) 2359 continue; 2360 2361 current = spec->dtsp_state; 2362 ASSERT(current == DTRACESPEC_DISCARDING || 2363 current == DTRACESPEC_COMMITTINGMANY); 2364 2365 new = DTRACESPEC_INACTIVE; 2366 2367 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new); 2368 ASSERT(rv == current); 2369 spec->dtsp_cleaning = 0; 2370 } 2371} 2372 2373/* 2374 * Called as part of a speculate() to get the speculative buffer associated 2375 * with a given speculation. Returns NULL if the specified speculation is not 2376 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and 2377 * the active CPU is not the specified CPU -- the speculation will be 2378 * atomically transitioned into the ACTIVEMANY state. 2379 */ 2380static dtrace_buffer_t * 2381dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, 2382 dtrace_specid_t which) 2383{ 2384 dtrace_speculation_t *spec; 2385 dtrace_speculation_state_t current, new; 2386 dtrace_buffer_t *buf; 2387 2388 if (which == 0) 2389 return (NULL); 2390 2391 if (which > state->dts_nspeculations) { 2392 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2393 return (NULL); 2394 } 2395 2396 spec = &state->dts_speculations[which - 1]; 2397 buf = &spec->dtsp_buffer[cpuid]; 2398 2399 do { 2400 current = spec->dtsp_state; 2401 2402 switch (current) { 2403 case DTRACESPEC_INACTIVE: 2404 case DTRACESPEC_COMMITTINGMANY: 2405 case DTRACESPEC_DISCARDING: 2406 return (NULL); 2407 2408 case DTRACESPEC_COMMITTING: 2409 ASSERT(buf->dtb_offset == 0); 2410 return (NULL); 2411 2412 case DTRACESPEC_ACTIVEONE: 2413 /* 2414 * This speculation is currently active on one CPU. 2415 * Check the offset in the buffer; if it's non-zero, 2416 * that CPU must be us (and we leave the state alone). 2417 * If it's zero, assume that we're starting on a new 2418 * CPU -- and change the state to indicate that the 2419 * speculation is active on more than one CPU. 2420 */ 2421 if (buf->dtb_offset != 0) 2422 return (buf); 2423 2424 new = DTRACESPEC_ACTIVEMANY; 2425 break; 2426 2427 case DTRACESPEC_ACTIVEMANY: 2428 return (buf); 2429 2430 case DTRACESPEC_ACTIVE: 2431 new = DTRACESPEC_ACTIVEONE; 2432 break; 2433 2434 default: 2435 ASSERT(0); 2436 } 2437 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2438 current, new) != current); 2439 2440 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY); 2441 return (buf); 2442} 2443 2444/* 2445 * This function implements the DIF emulator's variable lookups. The emulator 2446 * passes a reserved variable identifier and optional built-in array index. 2447 */ 2448static uint64_t 2449dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, 2450 uint64_t ndx) 2451{ 2452 /* 2453 * If we're accessing one of the uncached arguments, we'll turn this 2454 * into a reference in the args array. 2455 */ 2456 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { 2457 ndx = v - DIF_VAR_ARG0; 2458 v = DIF_VAR_ARGS; 2459 } 2460 2461 switch (v) { 2462 case DIF_VAR_ARGS: 2463 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); 2464 if (ndx >= sizeof (mstate->dtms_arg) / 2465 sizeof (mstate->dtms_arg[0])) { 2466#if !defined(__APPLE__) 2467 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 2468#else 2469 /* Account for introduction of __dtrace_probe() on xnu. */ 2470 int aframes = mstate->dtms_probe->dtpr_aframes + 3; 2471#endif /* __APPLE__ */ 2472 dtrace_provider_t *pv; 2473 uint64_t val; 2474 2475 pv = mstate->dtms_probe->dtpr_provider; 2476 if (pv->dtpv_pops.dtps_getargval != NULL) 2477 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, 2478 mstate->dtms_probe->dtpr_id, 2479 mstate->dtms_probe->dtpr_arg, ndx, aframes); 2480#if defined(__APPLE__) 2481 /* Special case access of arg5 as passed to dtrace_probeid_error (which see.) */ 2482 else if (mstate->dtms_probe->dtpr_id == dtrace_probeid_error && ndx == 5) { 2483 return ((dtrace_state_t *)(mstate->dtms_arg[0]))->dts_arg_error_illval; 2484 } 2485#endif /* __APPLE__ */ 2486 else 2487 val = dtrace_getarg(ndx, aframes); 2488 2489 /* 2490 * This is regrettably required to keep the compiler 2491 * from tail-optimizing the call to dtrace_getarg(). 2492 * The condition always evaluates to true, but the 2493 * compiler has no way of figuring that out a priori. 2494 * (None of this would be necessary if the compiler 2495 * could be relied upon to _always_ tail-optimize 2496 * the call to dtrace_getarg() -- but it can't.) 2497 */ 2498 if (mstate->dtms_probe != NULL) 2499 return (val); 2500 2501 ASSERT(0); 2502 } 2503 2504 return (mstate->dtms_arg[ndx]); 2505 2506#if !defined(__APPLE__) 2507 case DIF_VAR_UREGS: { 2508 klwp_t *lwp; 2509 2510 if (!dtrace_priv_proc(state)) 2511 return (0); 2512 2513 if ((lwp = curthread->t_lwp) == NULL) { 2514 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 2515 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL; 2516 return (0); 2517 } 2518 2519 return (dtrace_getreg(lwp->lwp_regs, ndx)); 2520 } 2521#else 2522 case DIF_VAR_UREGS: { 2523 thread_t thread; 2524 2525 if (!dtrace_priv_proc(state)) 2526 return (0); 2527 2528 if ((thread = current_thread()) == NULL) { 2529 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 2530 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = 0; 2531 return (0); 2532 } 2533 2534 return (dtrace_getreg(find_user_regs(thread), ndx)); 2535 } 2536#endif /* __APPLE__ */ 2537 2538#if !defined(__APPLE__) 2539 case DIF_VAR_CURTHREAD: 2540 if (!dtrace_priv_kernel(state)) 2541 return (0); 2542 return ((uint64_t)(uintptr_t)curthread); 2543#else 2544 case DIF_VAR_CURTHREAD: 2545 if (!dtrace_priv_kernel(state)) 2546 return (0); 2547 2548 return ((uint64_t)(uintptr_t)current_thread()); 2549#endif /* __APPLE__ */ 2550 2551 case DIF_VAR_TIMESTAMP: 2552 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { 2553 mstate->dtms_timestamp = dtrace_gethrtime(); 2554 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP; 2555 } 2556 return (mstate->dtms_timestamp); 2557 2558#if !defined(__APPLE__) 2559 case DIF_VAR_VTIMESTAMP: 2560 ASSERT(dtrace_vtime_references != 0); 2561 return (curthread->t_dtrace_vtime); 2562#else 2563 case DIF_VAR_VTIMESTAMP: 2564 ASSERT(dtrace_vtime_references != 0); 2565 return (dtrace_get_thread_vtime(current_thread())); 2566#endif /* __APPLE__ */ 2567 2568 case DIF_VAR_WALLTIMESTAMP: 2569 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { 2570 mstate->dtms_walltimestamp = dtrace_gethrestime(); 2571 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP; 2572 } 2573 return (mstate->dtms_walltimestamp); 2574 2575 case DIF_VAR_IPL: 2576 if (!dtrace_priv_kernel(state)) 2577 return (0); 2578 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) { 2579 mstate->dtms_ipl = dtrace_getipl(); 2580 mstate->dtms_present |= DTRACE_MSTATE_IPL; 2581 } 2582 return (mstate->dtms_ipl); 2583 2584 case DIF_VAR_EPID: 2585 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID); 2586 return (mstate->dtms_epid); 2587 2588 case DIF_VAR_ID: 2589 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 2590 return (mstate->dtms_probe->dtpr_id); 2591 2592 case DIF_VAR_STACKDEPTH: 2593 if (!dtrace_priv_kernel(state)) 2594 return (0); 2595 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { 2596#if !defined(__APPLE__) 2597 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 2598#else 2599 /* Account for introduction of __dtrace_probe() on xnu. */ 2600 int aframes = mstate->dtms_probe->dtpr_aframes + 3; 2601#endif /* __APPLE__ */ 2602 2603 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); 2604 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; 2605 } 2606 return (mstate->dtms_stackdepth); 2607 2608 case DIF_VAR_USTACKDEPTH: 2609 if (!dtrace_priv_proc(state)) 2610 return (0); 2611 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { 2612 /* 2613 * See comment in DIF_VAR_PID. 2614 */ 2615 if (DTRACE_ANCHORED(mstate->dtms_probe) && 2616 CPU_ON_INTR(CPU)) { 2617 mstate->dtms_ustackdepth = 0; 2618 } else { 2619 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 2620 mstate->dtms_ustackdepth = 2621 dtrace_getustackdepth(); 2622 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 2623 } 2624 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH; 2625 } 2626 return (mstate->dtms_ustackdepth); 2627 2628 case DIF_VAR_CALLER: 2629 if (!dtrace_priv_kernel(state)) 2630 return (0); 2631 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { 2632#if !defined(__APPLE__) 2633 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 2634#else 2635 /* Account for introduction of __dtrace_probe() on xnu. */ 2636 int aframes = mstate->dtms_probe->dtpr_aframes + 3; 2637#endif /* __APPLE__ */ 2638 2639 if (!DTRACE_ANCHORED(mstate->dtms_probe)) { 2640 /* 2641 * If this is an unanchored probe, we are 2642 * required to go through the slow path: 2643 * dtrace_caller() only guarantees correct 2644 * results for anchored probes. 2645 */ 2646 pc_t caller[2]; 2647 2648 dtrace_getpcstack(caller, 2, aframes, 2649 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); 2650 mstate->dtms_caller = caller[1]; 2651 } else if ((mstate->dtms_caller = 2652 dtrace_caller(aframes)) == -1) { 2653 /* 2654 * We have failed to do this the quick way; 2655 * we must resort to the slower approach of 2656 * calling dtrace_getpcstack(). 2657 */ 2658 pc_t caller; 2659 2660 dtrace_getpcstack(&caller, 1, aframes, NULL); 2661 mstate->dtms_caller = caller; 2662 } 2663 2664 mstate->dtms_present |= DTRACE_MSTATE_CALLER; 2665 } 2666 return (mstate->dtms_caller); 2667 2668 case DIF_VAR_UCALLER: 2669 if (!dtrace_priv_proc(state)) 2670 return (0); 2671 2672 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { 2673 uint64_t ustack[3]; 2674 2675 /* 2676 * dtrace_getupcstack() fills in the first uint64_t 2677 * with the current PID. The second uint64_t will 2678 * be the program counter at user-level. The third 2679 * uint64_t will contain the caller, which is what 2680 * we're after. 2681 */ 2682 ustack[2] = NULL; 2683 dtrace_getupcstack(ustack, 3); 2684 mstate->dtms_ucaller = ustack[2]; 2685 mstate->dtms_present |= DTRACE_MSTATE_UCALLER; 2686 } 2687 2688 return (mstate->dtms_ucaller); 2689 2690 case DIF_VAR_PROBEPROV: 2691 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 2692 return ((uint64_t)(uintptr_t) 2693 mstate->dtms_probe->dtpr_provider->dtpv_name); 2694 2695 case DIF_VAR_PROBEMOD: 2696 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 2697 return ((uint64_t)(uintptr_t) 2698 mstate->dtms_probe->dtpr_mod); 2699 2700 case DIF_VAR_PROBEFUNC: 2701 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 2702 return ((uint64_t)(uintptr_t) 2703 mstate->dtms_probe->dtpr_func); 2704 2705 case DIF_VAR_PROBENAME: 2706 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 2707 return ((uint64_t)(uintptr_t) 2708 mstate->dtms_probe->dtpr_name); 2709 2710#if !defined(__APPLE__) 2711 case DIF_VAR_PID: 2712 if (!dtrace_priv_proc(state)) 2713 return (0); 2714 2715 /* 2716 * Note that we are assuming that an unanchored probe is 2717 * always due to a high-level interrupt. (And we're assuming 2718 * that there is only a single high level interrupt.) 2719 */ 2720 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2721 return (pid0.pid_id); 2722 2723 /* 2724 * It is always safe to dereference one's own t_procp pointer: 2725 * it always points to a valid, allocated proc structure. 2726 * Further, it is always safe to dereference the p_pidp member 2727 * of one's own proc structure. (These are truisms becuase 2728 * threads and processes don't clean up their own state -- 2729 * they leave that task to whomever reaps them.) 2730 */ 2731 return ((uint64_t)curthread->t_procp->p_pidp->pid_id); 2732 2733#else 2734 case DIF_VAR_PID: 2735 if (!dtrace_priv_proc_relaxed(state)) 2736 return (0); 2737 2738 /* 2739 * Note that we are assuming that an unanchored probe is 2740 * always due to a high-level interrupt. (And we're assuming 2741 * that there is only a single high level interrupt.) 2742 */ 2743 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2744 /* Anchored probe that fires while on an interrupt accrues to process 0 */ 2745 return 0; 2746 2747 return ((uint64_t)proc_selfpid()); 2748#endif /* __APPLE__ */ 2749 2750#if !defined(__APPLE__) 2751 case DIF_VAR_PPID: 2752 if (!dtrace_priv_proc(state)) 2753 return (0); 2754 2755 /* 2756 * See comment in DIF_VAR_PID. 2757 */ 2758 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2759 return (pid0.pid_id); 2760 2761 return ((uint64_t)curthread->t_procp->p_ppid); 2762#else 2763 case DIF_VAR_PPID: 2764 if (!dtrace_priv_proc_relaxed(state)) 2765 return (0); 2766 2767 /* 2768 * See comment in DIF_VAR_PID. 2769 */ 2770 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2771 return (0); 2772 2773 return ((uint64_t)(uintptr_t)(current_proc()->p_ppid)); 2774#endif /* __APPLE__ */ 2775 2776#if !defined(__APPLE__) 2777 case DIF_VAR_TID: 2778 /* 2779 * See comment in DIF_VAR_PID. 2780 */ 2781 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2782 return (0); 2783 2784 return ((uint64_t)curthread->t_tid); 2785#else 2786 case DIF_VAR_TID: 2787 /* 2788 * See comment in DIF_VAR_PID. 2789 */ 2790 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2791 return (0); 2792 2793 return ((uint64_t)(uintptr_t)current_thread()); /* Is user's (pthread_t)t->kernel_thread */ 2794#endif /* __APPLE__ */ 2795 2796#if !defined(__APPLE__) 2797 case DIF_VAR_EXECNAME: 2798 if (!dtrace_priv_proc(state)) 2799 return (0); 2800 2801 /* 2802 * See comment in DIF_VAR_PID. 2803 */ 2804 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2805 return ((uint64_t)(uintptr_t)p0.p_user.u_comm); 2806 2807 /* 2808 * It is always safe to dereference one's own t_procp pointer: 2809 * it always points to a valid, allocated proc structure. 2810 * (This is true because threads don't clean up their own 2811 * state -- they leave that task to whomever reaps them.) 2812 */ 2813 return ((uint64_t)(uintptr_t) 2814 curthread->t_procp->p_user.u_comm); 2815#else 2816 case DIF_VAR_EXECNAME: 2817 { 2818 char *xname = (char *)mstate->dtms_scratch_ptr; 2819 size_t scratch_size = MAXCOMLEN+1; 2820 2821 /* The scratch allocation's lifetime is that of the clause. */ 2822 if (mstate->dtms_scratch_ptr + scratch_size > 2823 mstate->dtms_scratch_base + mstate->dtms_scratch_size) 2824 return 0; 2825 2826 if (!dtrace_priv_proc_relaxed(state)) 2827 return (0); 2828 2829 mstate->dtms_scratch_ptr += scratch_size; 2830 proc_selfname( xname, MAXCOMLEN ); 2831 2832 return ((uint64_t)(uintptr_t)xname); 2833 } 2834#endif /* __APPLE__ */ 2835#if !defined(__APPLE__) 2836 case DIF_VAR_ZONENAME: 2837 if (!dtrace_priv_proc(state)) 2838 return (0); 2839 2840 /* 2841 * See comment in DIF_VAR_PID. 2842 */ 2843 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2844 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); 2845 2846 /* 2847 * It is always safe to dereference one's own t_procp pointer: 2848 * it always points to a valid, allocated proc structure. 2849 * (This is true because threads don't clean up their own 2850 * state -- they leave that task to whomever reaps them.) 2851 */ 2852 return ((uint64_t)(uintptr_t) 2853 curthread->t_procp->p_zone->zone_name); 2854 2855#else 2856 case DIF_VAR_ZONENAME: 2857 if (!dtrace_priv_proc(state)) 2858 return (0); 2859 2860 return ((uint64_t)(uintptr_t)NULL); /* Darwin doesn't do "zones" */ 2861#endif /* __APPLE__ */ 2862 2863#if !defined(__APPLE__) 2864 case DIF_VAR_UID: 2865 if (!dtrace_priv_proc(state)) 2866 return (0); 2867 2868 /* 2869 * See comment in DIF_VAR_PID. 2870 */ 2871 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2872 return ((uint64_t)p0.p_cred->cr_uid); 2873 2874 return ((uint64_t)curthread->t_cred->cr_uid); 2875#else 2876 case DIF_VAR_UID: 2877 if (!dtrace_priv_proc(state)) 2878 return (0); 2879 2880 /* 2881 * See comment in DIF_VAR_PID. 2882 */ 2883 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2884 return (0); 2885 2886 if (dtrace_CRED() != NULL) 2887 return ((uint64_t)kauth_getuid()); 2888 else 2889 return -1LL; 2890#endif /* __APPLE__ */ 2891 2892#if !defined(__APPLE__) 2893 case DIF_VAR_GID: 2894 if (!dtrace_priv_proc(state)) 2895 return (0); 2896 2897 /* 2898 * See comment in DIF_VAR_PID. 2899 */ 2900 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2901 return ((uint64_t)p0.p_cred->cr_gid); 2902 2903 return ((uint64_t)curthread->t_cred->cr_gid); 2904#else 2905 case DIF_VAR_GID: 2906 if (!dtrace_priv_proc(state)) 2907 return (0); 2908 2909 /* 2910 * See comment in DIF_VAR_PID. 2911 */ 2912 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2913 return (0); 2914 2915 if (dtrace_CRED() != NULL) 2916 return ((uint64_t)kauth_getgid()); 2917 else 2918 return -1LL; 2919#endif /* __APPLE__ */ 2920 2921#if !defined(__APPLE__) 2922 case DIF_VAR_ERRNO: { 2923 klwp_t *lwp; 2924 if (!dtrace_priv_proc(state)) 2925 return (0); 2926 2927 /* 2928 * See comment in DIF_VAR_PID. 2929 */ 2930 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2931 return (0); 2932 2933 if ((lwp = curthread->t_lwp) == NULL) 2934 return (0); 2935 2936 return ((uint64_t)lwp->lwp_errno); 2937 } 2938#else 2939 case DIF_VAR_ERRNO: { 2940 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 2941 if (!dtrace_priv_proc(state)) 2942 return (0); 2943 2944 /* 2945 * See comment in DIF_VAR_PID. 2946 */ 2947 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 2948 return (0); 2949 2950 return (uthread ? uthread->t_dtrace_errno : -1); 2951 } 2952#endif /* __APPLE__ */ 2953 2954 default: 2955 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 2956 return (0); 2957 } 2958} 2959 2960/* 2961 * Emulate the execution of DTrace ID subroutines invoked by the call opcode. 2962 * Notice that we don't bother validating the proper number of arguments or 2963 * their types in the tuple stack. This isn't needed because all argument 2964 * interpretation is safe because of our load safety -- the worst that can 2965 * happen is that a bogus program can obtain bogus results. 2966 */ 2967static void 2968dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, 2969 dtrace_key_t *tupregs, int nargs, 2970 dtrace_mstate_t *mstate, dtrace_state_t *state) 2971{ 2972 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 2973#if !defined(__APPLE__) 2974 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 2975#else 2976 volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 2977#endif /* __APPLE__ */ 2978 2979#if !defined(__APPLE__) 2980 union { 2981 mutex_impl_t mi; 2982 uint64_t mx; 2983 } m; 2984 2985 union { 2986 krwlock_t ri; 2987 uintptr_t rw; 2988 } r; 2989#else 2990/* XXX awaits lock/mutex work */ 2991#endif /* __APPLE__ */ 2992 2993 switch (subr) { 2994 case DIF_SUBR_RAND: 2995 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; 2996 break; 2997 2998#if !defined(__APPLE__) 2999 case DIF_SUBR_MUTEX_OWNED: 3000 m.mx = dtrace_load64(tupregs[0].dttk_value); 3001 if (MUTEX_TYPE_ADAPTIVE(&m.mi)) 3002 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; 3003 else 3004 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock); 3005 break; 3006 3007 case DIF_SUBR_MUTEX_OWNER: 3008 m.mx = dtrace_load64(tupregs[0].dttk_value); 3009 if (MUTEX_TYPE_ADAPTIVE(&m.mi) && 3010 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) 3011 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi); 3012 else 3013 regs[rd] = 0; 3014 break; 3015 3016 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: 3017 m.mx = dtrace_load64(tupregs[0].dttk_value); 3018 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); 3019 break; 3020 3021 case DIF_SUBR_MUTEX_TYPE_SPIN: 3022 m.mx = dtrace_load64(tupregs[0].dttk_value); 3023 regs[rd] = MUTEX_TYPE_SPIN(&m.mi); 3024 break; 3025 3026 case DIF_SUBR_RW_READ_HELD: { 3027 uintptr_t tmp; 3028 3029 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3030 regs[rd] = _RW_READ_HELD(&r.ri, tmp); 3031 break; 3032 } 3033 3034 case DIF_SUBR_RW_WRITE_HELD: 3035 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3036 regs[rd] = _RW_WRITE_HELD(&r.ri); 3037 break; 3038 3039 case DIF_SUBR_RW_ISWRITER: 3040 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 3041 regs[rd] = _RW_ISWRITER(&r.ri); 3042 break; 3043#else 3044/* XXX awaits lock/mutex work */ 3045#endif /* __APPLE__ */ 3046 3047 case DIF_SUBR_BCOPY: { 3048 /* 3049 * We need to be sure that the destination is in the scratch 3050 * region -- no other region is allowed. 3051 */ 3052 uintptr_t src = tupregs[0].dttk_value; 3053 uintptr_t dest = tupregs[1].dttk_value; 3054 size_t size = tupregs[2].dttk_value; 3055 3056 if (!dtrace_inscratch(dest, size, mstate)) { 3057 *flags |= CPU_DTRACE_BADADDR; 3058 *illval = regs[rd]; 3059 break; 3060 } 3061 3062 dtrace_bcopy((void *)src, (void *)dest, size); 3063 break; 3064 } 3065 3066 case DIF_SUBR_ALLOCA: 3067 case DIF_SUBR_COPYIN: { 3068 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 3069 uint64_t size = 3070 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; 3071 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; 3072 3073 /* 3074 * This action doesn't require any credential checks since 3075 * probes will not activate in user contexts to which the 3076 * enabling user does not have permissions. 3077 */ 3078 if (mstate->dtms_scratch_ptr + scratch_size > 3079 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3080 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3081 regs[rd] = NULL; 3082 break; 3083 } 3084 3085 if (subr == DIF_SUBR_COPYIN) { 3086 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3087#if !defined(__APPLE__) 3088 dtrace_copyin(tupregs[0].dttk_value, dest, size); 3089#else 3090 if (dtrace_priv_proc(state)) 3091 dtrace_copyin(tupregs[0].dttk_value, dest, size); 3092#endif /* __APPLE__ */ 3093 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3094 } 3095 3096 mstate->dtms_scratch_ptr += scratch_size; 3097 regs[rd] = dest; 3098 break; 3099 } 3100 3101 case DIF_SUBR_COPYINTO: { 3102 uint64_t size = tupregs[1].dttk_value; 3103 uintptr_t dest = tupregs[2].dttk_value; 3104 3105 /* 3106 * This action doesn't require any credential checks since 3107 * probes will not activate in user contexts to which the 3108 * enabling user does not have permissions. 3109 */ 3110 if (!dtrace_inscratch(dest, size, mstate)) { 3111 *flags |= CPU_DTRACE_BADADDR; 3112 *illval = regs[rd]; 3113 break; 3114 } 3115 3116 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3117#if !defined(__APPLE__) 3118 dtrace_copyin(tupregs[0].dttk_value, dest, size); 3119#else 3120 if (dtrace_priv_proc(state)) 3121 dtrace_copyin(tupregs[0].dttk_value, dest, size); 3122#endif /* __APPLE__ */ 3123 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3124 break; 3125 } 3126 3127 case DIF_SUBR_COPYINSTR: { 3128 uintptr_t dest = mstate->dtms_scratch_ptr; 3129 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3130 3131 if (nargs > 1 && tupregs[1].dttk_value < size) 3132 size = tupregs[1].dttk_value + 1; 3133 3134 /* 3135 * This action doesn't require any credential checks since 3136 * probes will not activate in user contexts to which the 3137 * enabling user does not have permissions. 3138 */ 3139 if (mstate->dtms_scratch_ptr + size > 3140 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3141 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3142 regs[rd] = NULL; 3143 break; 3144 } 3145 3146 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3147#if !defined(__APPLE__) 3148 dtrace_copyinstr(tupregs[0].dttk_value, dest, size); 3149#else 3150 if (dtrace_priv_proc(state)) 3151 dtrace_copyinstr(tupregs[0].dttk_value, dest, size); 3152#endif /* __APPLE__ */ 3153 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3154 3155 ((char *)dest)[size - 1] = '\0'; 3156 mstate->dtms_scratch_ptr += size; 3157 regs[rd] = dest; 3158 break; 3159 } 3160 3161#if !defined(__APPLE__) 3162 case DIF_SUBR_MSGSIZE: 3163 case DIF_SUBR_MSGDSIZE: { 3164 uintptr_t baddr = tupregs[0].dttk_value, daddr; 3165 uintptr_t wptr, rptr; 3166 size_t count = 0; 3167 int cont = 0; 3168 3169 while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 3170 wptr = dtrace_loadptr(baddr + 3171 offsetof(mblk_t, b_wptr)); 3172 3173 rptr = dtrace_loadptr(baddr + 3174 offsetof(mblk_t, b_rptr)); 3175 3176 if (wptr < rptr) { 3177 *flags |= CPU_DTRACE_BADADDR; 3178 *illval = tupregs[0].dttk_value; 3179 break; 3180 } 3181 3182 daddr = dtrace_loadptr(baddr + 3183 offsetof(mblk_t, b_datap)); 3184 3185 baddr = dtrace_loadptr(baddr + 3186 offsetof(mblk_t, b_cont)); 3187 3188 /* 3189 * We want to prevent against denial-of-service here, 3190 * so we're only going to search the list for 3191 * dtrace_msgdsize_max mblks. 3192 */ 3193 if (cont++ > dtrace_msgdsize_max) { 3194 *flags |= CPU_DTRACE_ILLOP; 3195 break; 3196 } 3197 3198 if (subr == DIF_SUBR_MSGDSIZE) { 3199 if (dtrace_load8(daddr + 3200 offsetof(dblk_t, db_type)) != M_DATA) 3201 continue; 3202 } 3203 3204 count += wptr - rptr; 3205 } 3206 3207 if (!(*flags & CPU_DTRACE_FAULT)) 3208 regs[rd] = count; 3209 3210 break; 3211 } 3212#else 3213 case DIF_SUBR_MSGSIZE: 3214 case DIF_SUBR_MSGDSIZE: { 3215 /* Darwin does not implement SysV streams messages */ 3216 regs[rd] = 0; 3217 break; 3218 } 3219#endif /* __APPLE__ */ 3220 3221#if !defined(__APPLE__) 3222 case DIF_SUBR_PROGENYOF: { 3223 pid_t pid = tupregs[0].dttk_value; 3224 proc_t *p; 3225 int rval = 0; 3226 3227 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3228 3229 for (p = curthread->t_procp; p != NULL; p = p->p_parent) { 3230 if (p->p_pidp->pid_id == pid) { 3231 rval = 1; 3232 break; 3233 } 3234 } 3235 3236 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3237 3238 regs[rd] = rval; 3239 break; 3240 } 3241#else 3242 case DIF_SUBR_PROGENYOF: { 3243 pid_t pid = tupregs[0].dttk_value; 3244 struct proc *p = current_proc(); 3245 int rval = 0, lim = nprocs; 3246 3247 while(p && (lim-- > 0)) { 3248 pid_t ppid; 3249 3250 ppid = (pid_t)dtrace_load32((uintptr_t)&(p->p_pid)); 3251 if (*flags & CPU_DTRACE_FAULT) 3252 break; 3253 3254 if (ppid == pid) { 3255 rval = 1; 3256 break; 3257 } 3258 3259 if (ppid == 0) 3260 break; /* Can't climb process tree any further. */ 3261 3262 p = (struct proc *)dtrace_loadptr((uintptr_t)&(p->p_pptr)); 3263 if (*flags & CPU_DTRACE_FAULT) 3264 break; 3265 } 3266 3267 regs[rd] = rval; 3268 break; 3269 } 3270#endif /* __APPLE__ */ 3271 3272 case DIF_SUBR_SPECULATION: 3273 regs[rd] = dtrace_speculation(state); 3274 break; 3275 3276#if !defined(__APPLE__) 3277 case DIF_SUBR_COPYOUT: { 3278 uintptr_t kaddr = tupregs[0].dttk_value; 3279 uintptr_t uaddr = tupregs[1].dttk_value; 3280 uint64_t size = tupregs[2].dttk_value; 3281 3282 if (!dtrace_destructive_disallow && 3283 dtrace_priv_proc_control(state) && 3284 !dtrace_istoxic(kaddr, size)) { 3285 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3286 dtrace_copyout(kaddr, uaddr, size); 3287 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3288 } 3289 break; 3290 } 3291 3292 case DIF_SUBR_COPYOUTSTR: { 3293 uintptr_t kaddr = tupregs[0].dttk_value; 3294 uintptr_t uaddr = tupregs[1].dttk_value; 3295 uint64_t size = tupregs[2].dttk_value; 3296 3297 if (!dtrace_destructive_disallow && 3298 dtrace_priv_proc_control(state) && 3299 !dtrace_istoxic(kaddr, size)) { 3300 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3301 dtrace_copyoutstr(kaddr, uaddr, size); 3302 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3303 } 3304 break; 3305 } 3306#else 3307 case DIF_SUBR_COPYOUT: { 3308 uintptr_t kaddr = tupregs[0].dttk_value; 3309 user_addr_t uaddr = tupregs[1].dttk_value; 3310 uint64_t size = tupregs[2].dttk_value; 3311 3312 if (!dtrace_destructive_disallow && 3313 dtrace_priv_proc_control(state) && 3314 !dtrace_istoxic(kaddr, size)) { 3315 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3316 dtrace_copyout(kaddr, uaddr, size); 3317 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3318 } 3319 break; 3320 } 3321 3322 case DIF_SUBR_COPYOUTSTR: { 3323 uintptr_t kaddr = tupregs[0].dttk_value; 3324 user_addr_t uaddr = tupregs[1].dttk_value; 3325 uint64_t size = tupregs[2].dttk_value; 3326 3327 if (!dtrace_destructive_disallow && 3328 dtrace_priv_proc_control(state) && 3329 !dtrace_istoxic(kaddr, size)) { 3330 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3331 dtrace_copyoutstr(kaddr, uaddr, size); 3332 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3333 } 3334 break; 3335 } 3336#endif /* __APPLE__ */ 3337 3338 case DIF_SUBR_STRLEN: 3339 regs[rd] = dtrace_strlen((char *)(uintptr_t) 3340 tupregs[0].dttk_value, 3341 state->dts_options[DTRACEOPT_STRSIZE]); 3342 break; 3343 3344 case DIF_SUBR_STRCHR: 3345 case DIF_SUBR_STRRCHR: { 3346 /* 3347 * We're going to iterate over the string looking for the 3348 * specified character. We will iterate until we have reached 3349 * the string length or we have found the character. If this 3350 * is DIF_SUBR_STRRCHR, we will look for the last occurrence 3351 * of the specified character instead of the first. 3352 */ 3353 uintptr_t addr = tupregs[0].dttk_value; 3354 uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; 3355 char c, target = (char)tupregs[1].dttk_value; 3356 3357 for (regs[rd] = NULL; addr < limit; addr++) { 3358 if ((c = dtrace_load8(addr)) == target) { 3359 regs[rd] = addr; 3360 3361 if (subr == DIF_SUBR_STRCHR) 3362 break; 3363 } 3364 3365 if (c == '\0') 3366 break; 3367 } 3368 3369 break; 3370 } 3371 3372 case DIF_SUBR_STRSTR: 3373 case DIF_SUBR_INDEX: 3374 case DIF_SUBR_RINDEX: { 3375 /* 3376 * We're going to iterate over the string looking for the 3377 * specified string. We will iterate until we have reached 3378 * the string length or we have found the string. (Yes, this 3379 * is done in the most naive way possible -- but considering 3380 * that the string we're searching for is likely to be 3381 * relatively short, the complexity of Rabin-Karp or similar 3382 * hardly seems merited.) 3383 */ 3384 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value; 3385 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value; 3386 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3387 size_t len = dtrace_strlen(addr, size); 3388 size_t sublen = dtrace_strlen(substr, size); 3389 char *limit = addr + len, *orig = addr; 3390 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1; 3391 int inc = 1; 3392 3393 regs[rd] = notfound; 3394 3395 /* 3396 * strstr() and index()/rindex() have similar semantics if 3397 * both strings are the empty string: strstr() returns a 3398 * pointer to the (empty) string, and index() and rindex() 3399 * both return index 0 (regardless of any position argument). 3400 */ 3401 if (sublen == 0 && len == 0) { 3402 if (subr == DIF_SUBR_STRSTR) 3403 regs[rd] = (uintptr_t)addr; 3404 else 3405 regs[rd] = 0; 3406 break; 3407 } 3408 3409 if (subr != DIF_SUBR_STRSTR) { 3410 if (subr == DIF_SUBR_RINDEX) { 3411 limit = orig - 1; 3412 addr += len; 3413 inc = -1; 3414 } 3415 3416 /* 3417 * Both index() and rindex() take an optional position 3418 * argument that denotes the starting position. 3419 */ 3420 if (nargs == 3) { 3421 int64_t pos = (int64_t)tupregs[2].dttk_value; 3422 3423 /* 3424 * If the position argument to index() is 3425 * negative, Perl implicitly clamps it at 3426 * zero. This semantic is a little surprising 3427 * given the special meaning of negative 3428 * positions to similar Perl functions like 3429 * substr(), but it appears to reflect a 3430 * notion that index() can start from a 3431 * negative index and increment its way up to 3432 * the string. Given this notion, Perl's 3433 * rindex() is at least self-consistent in 3434 * that it implicitly clamps positions greater 3435 * than the string length to be the string 3436 * length. Where Perl completely loses 3437 * coherence, however, is when the specified 3438 * substring is the empty string (""). In 3439 * this case, even if the position is 3440 * negative, rindex() returns 0 -- and even if 3441 * the position is greater than the length, 3442 * index() returns the string length. These 3443 * semantics violate the notion that index() 3444 * should never return a value less than the 3445 * specified position and that rindex() should 3446 * never return a value greater than the 3447 * specified position. (One assumes that 3448 * these semantics are artifacts of Perl's 3449 * implementation and not the results of 3450 * deliberate design -- it beggars belief that 3451 * even Larry Wall could desire such oddness.) 3452 * While in the abstract one would wish for 3453 * consistent position semantics across 3454 * substr(), index() and rindex() -- or at the 3455 * very least self-consistent position 3456 * semantics for index() and rindex() -- we 3457 * instead opt to keep with the extant Perl 3458 * semantics, in all their broken glory. (Do 3459 * we have more desire to maintain Perl's 3460 * semantics than Perl does? Probably.) 3461 */ 3462 if (subr == DIF_SUBR_RINDEX) { 3463 if (pos < 0) { 3464 if (sublen == 0) 3465 regs[rd] = 0; 3466 break; 3467 } 3468 3469 if (pos > len) 3470 pos = len; 3471 } else { 3472 if (pos < 0) 3473 pos = 0; 3474 3475 if (pos >= len) { 3476 if (sublen == 0) 3477 regs[rd] = len; 3478 break; 3479 } 3480 } 3481 3482 addr = orig + pos; 3483 } 3484 } 3485 3486 for (regs[rd] = notfound; addr != limit; addr += inc) { 3487 if (dtrace_strncmp(addr, substr, sublen) == 0) { 3488 if (subr != DIF_SUBR_STRSTR) { 3489 /* 3490 * As D index() and rindex() are 3491 * modeled on Perl (and not on awk), 3492 * we return a zero-based (and not a 3493 * one-based) index. (For you Perl 3494 * weenies: no, we're not going to add 3495 * $[ -- and shouldn't you be at a con 3496 * or something?) 3497 */ 3498 regs[rd] = (uintptr_t)(addr - orig); 3499 break; 3500 } 3501 3502 ASSERT(subr == DIF_SUBR_STRSTR); 3503 regs[rd] = (uintptr_t)addr; 3504 break; 3505 } 3506 } 3507 3508 break; 3509 } 3510 3511 case DIF_SUBR_STRTOK: { 3512 uintptr_t addr = tupregs[0].dttk_value; 3513 uintptr_t tokaddr = tupregs[1].dttk_value; 3514 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3515 uintptr_t limit, toklimit = tokaddr + size; 3516 uint8_t c, tokmap[32]; /* 256 / 8 */ 3517 char *dest = (char *)mstate->dtms_scratch_ptr; 3518 int i; 3519 3520 if (mstate->dtms_scratch_ptr + size > 3521 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3522 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3523 regs[rd] = NULL; 3524 break; 3525 } 3526 3527 if (addr == NULL) { 3528 /* 3529 * If the address specified is NULL, we use our saved 3530 * strtok pointer from the mstate. Note that this 3531 * means that the saved strtok pointer is _only_ 3532 * valid within multiple enablings of the same probe -- 3533 * it behaves like an implicit clause-local variable. 3534 */ 3535 addr = mstate->dtms_strtok; 3536 } 3537 3538 /* 3539 * First, zero the token map, and then process the token 3540 * string -- setting a bit in the map for every character 3541 * found in the token string. 3542 */ 3543 for (i = 0; i < (int)sizeof (tokmap); i++) 3544 tokmap[i] = 0; 3545 3546 for (; tokaddr < toklimit; tokaddr++) { 3547 if ((c = dtrace_load8(tokaddr)) == '\0') 3548 break; 3549 3550 ASSERT((c >> 3) < sizeof (tokmap)); 3551 tokmap[c >> 3] |= (1 << (c & 0x7)); 3552 } 3553 3554 for (limit = addr + size; addr < limit; addr++) { 3555 /* 3556 * We're looking for a character that is _not_ contained 3557 * in the token string. 3558 */ 3559 if ((c = dtrace_load8(addr)) == '\0') 3560 break; 3561 3562 if (!(tokmap[c >> 3] & (1 << (c & 0x7)))) 3563 break; 3564 } 3565 3566 if (c == '\0') { 3567 /* 3568 * We reached the end of the string without finding 3569 * any character that was not in the token string. 3570 * We return NULL in this case, and we set the saved 3571 * address to NULL as well. 3572 */ 3573 regs[rd] = NULL; 3574 mstate->dtms_strtok = NULL; 3575 break; 3576 } 3577 3578 /* 3579 * From here on, we're copying into the destination string. 3580 */ 3581 for (i = 0; addr < limit && i < size - 1; addr++) { 3582 if ((c = dtrace_load8(addr)) == '\0') 3583 break; 3584 3585 if (tokmap[c >> 3] & (1 << (c & 0x7))) 3586 break; 3587 3588 ASSERT(i < size); 3589 dest[i++] = c; 3590 } 3591 3592 ASSERT(i < size); 3593 dest[i] = '\0'; 3594 regs[rd] = (uintptr_t)dest; 3595 mstate->dtms_scratch_ptr += size; 3596 mstate->dtms_strtok = addr; 3597 break; 3598 } 3599 3600 case DIF_SUBR_SUBSTR: { 3601 uintptr_t s = tupregs[0].dttk_value; 3602 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3603 char *d = (char *)mstate->dtms_scratch_ptr; 3604 int64_t index = (int64_t)tupregs[1].dttk_value; 3605 int64_t remaining = (int64_t)tupregs[2].dttk_value; 3606 size_t len = dtrace_strlen((char *)s, size); 3607 int64_t i = 0; 3608 3609 if (nargs <= 2) 3610 remaining = (int64_t)size; 3611 3612 if (mstate->dtms_scratch_ptr + size > 3613 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3614 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3615 regs[rd] = NULL; 3616 break; 3617 } 3618 3619 if (index < 0) { 3620 index += len; 3621 3622 if (index < 0 && index + remaining > 0) { 3623 remaining += index; 3624 index = 0; 3625 } 3626 } 3627 3628 if (index >= len || index < 0) 3629 index = len; 3630 3631 for (d[0] = '\0'; remaining > 0; remaining--) { 3632 if ((d[i++] = dtrace_load8(s++ + index)) == '\0') 3633 break; 3634 3635 if (i == size) { 3636 d[i - 1] = '\0'; 3637 break; 3638 } 3639 } 3640 3641 mstate->dtms_scratch_ptr += size; 3642 regs[rd] = (uintptr_t)d; 3643 break; 3644 } 3645 3646#if !defined(__APPLE__) 3647 case DIF_SUBR_GETMAJOR: 3648#ifdef __LP64__ 3649 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; 3650#else 3651 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; 3652#endif 3653 break; 3654 3655#else /* __APPLE__ */ 3656 case DIF_SUBR_GETMAJOR: 3657 regs[rd] = (uintptr_t)major( (dev_t)tupregs[0].dttk_value ); 3658 break; 3659#endif /* __APPLE__ */ 3660 3661#if !defined(__APPLE__) 3662 case DIF_SUBR_GETMINOR: 3663#ifdef __LP64__ 3664 regs[rd] = tupregs[0].dttk_value & MAXMIN64; 3665#else 3666 regs[rd] = tupregs[0].dttk_value & MAXMIN; 3667#endif 3668 break; 3669 3670#else /* __APPLE__ */ 3671 case DIF_SUBR_GETMINOR: 3672 regs[rd] = (uintptr_t)minor( (dev_t)tupregs[0].dttk_value ); 3673 break; 3674#endif /* __APPLE__ */ 3675 3676#if !defined(__APPLE__) 3677 case DIF_SUBR_DDI_PATHNAME: { 3678 /* 3679 * This one is a galactic mess. We are going to roughly 3680 * emulate ddi_pathname(), but it's made more complicated 3681 * by the fact that we (a) want to include the minor name and 3682 * (b) must proceed iteratively instead of recursively. 3683 */ 3684 uintptr_t dest = mstate->dtms_scratch_ptr; 3685 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3686 char *start = (char *)dest, *end = start + size - 1; 3687 uintptr_t daddr = tupregs[0].dttk_value; 3688 int64_t minor = (int64_t)tupregs[1].dttk_value; 3689 char *s; 3690 int i, len, depth = 0; 3691 3692 if (size == 0 || mstate->dtms_scratch_ptr + size > 3693 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3694 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3695 regs[rd] = NULL; 3696 break; 3697 } 3698 3699 *end = '\0'; 3700 3701 /* 3702 * We want to have a name for the minor. In order to do this, 3703 * we need to walk the minor list from the devinfo. We want 3704 * to be sure that we don't infinitely walk a circular list, 3705 * so we check for circularity by sending a scout pointer 3706 * ahead two elements for every element that we iterate over; 3707 * if the list is circular, these will ultimately point to the 3708 * same element. You may recognize this little trick as the 3709 * answer to a stupid interview question -- one that always 3710 * seems to be asked by those who had to have it laboriously 3711 * explained to them, and who can't even concisely describe 3712 * the conditions under which one would be forced to resort to 3713 * this technique. Needless to say, those conditions are 3714 * found here -- and probably only here. Is this is the only 3715 * use of this infamous trick in shipping, production code? 3716 * If it isn't, it probably should be... 3717 */ 3718 if (minor != -1) { 3719 uintptr_t maddr = dtrace_loadptr(daddr + 3720 offsetof(struct dev_info, devi_minor)); 3721 3722 uintptr_t next = offsetof(struct ddi_minor_data, next); 3723 uintptr_t name = offsetof(struct ddi_minor_data, 3724 d_minor) + offsetof(struct ddi_minor, name); 3725 uintptr_t dev = offsetof(struct ddi_minor_data, 3726 d_minor) + offsetof(struct ddi_minor, dev); 3727 uintptr_t scout; 3728 3729 if (maddr != NULL) 3730 scout = dtrace_loadptr(maddr + next); 3731 3732 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 3733 uint64_t m; 3734#ifdef __LP64__ 3735 m = dtrace_load64(maddr + dev) & MAXMIN64; 3736#else 3737 m = dtrace_load32(maddr + dev) & MAXMIN; 3738#endif 3739 if (m != minor) { 3740 maddr = dtrace_loadptr(maddr + next); 3741 3742 if (scout == NULL) 3743 continue; 3744 3745 scout = dtrace_loadptr(scout + next); 3746 3747 if (scout == NULL) 3748 continue; 3749 3750 scout = dtrace_loadptr(scout + next); 3751 3752 if (scout == NULL) 3753 continue; 3754 3755 if (scout == maddr) { 3756 *flags |= CPU_DTRACE_ILLOP; 3757 break; 3758 } 3759 3760 continue; 3761 } 3762 3763 /* 3764 * We have the minor data. Now we need to 3765 * copy the minor's name into the end of the 3766 * pathname. 3767 */ 3768 s = (char *)dtrace_loadptr(maddr + name); 3769 len = dtrace_strlen(s, size); 3770 3771 if (*flags & CPU_DTRACE_FAULT) 3772 break; 3773 3774 if (len != 0) { 3775 if ((end -= (len + 1)) < start) 3776 break; 3777 3778 *end = ':'; 3779 } 3780 3781 for (i = 1; i <= len; i++) 3782 end[i] = dtrace_load8((uintptr_t)s++); 3783 break; 3784 } 3785 } 3786 3787 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 3788 ddi_node_state_t devi_state; 3789 3790 devi_state = dtrace_load32(daddr + 3791 offsetof(struct dev_info, devi_node_state)); 3792 3793 if (*flags & CPU_DTRACE_FAULT) 3794 break; 3795 3796 if (devi_state >= DS_INITIALIZED) { 3797 s = (char *)dtrace_loadptr(daddr + 3798 offsetof(struct dev_info, devi_addr)); 3799 len = dtrace_strlen(s, size); 3800 3801 if (*flags & CPU_DTRACE_FAULT) 3802 break; 3803 3804 if (len != 0) { 3805 if ((end -= (len + 1)) < start) 3806 break; 3807 3808 *end = '@'; 3809 } 3810 3811 for (i = 1; i <= len; i++) 3812 end[i] = dtrace_load8((uintptr_t)s++); 3813 } 3814 3815 /* 3816 * Now for the node name... 3817 */ 3818 s = (char *)dtrace_loadptr(daddr + 3819 offsetof(struct dev_info, devi_node_name)); 3820 3821 daddr = dtrace_loadptr(daddr + 3822 offsetof(struct dev_info, devi_parent)); 3823 3824 /* 3825 * If our parent is NULL (that is, if we're the root 3826 * node), we're going to use the special path 3827 * "devices". 3828 */ 3829 if (daddr == NULL) 3830 s = "devices"; 3831 3832 len = dtrace_strlen(s, size); 3833 if (*flags & CPU_DTRACE_FAULT) 3834 break; 3835 3836 if ((end -= (len + 1)) < start) 3837 break; 3838 3839 for (i = 1; i <= len; i++) 3840 end[i] = dtrace_load8((uintptr_t)s++); 3841 *end = '/'; 3842 3843 if (depth++ > dtrace_devdepth_max) { 3844 *flags |= CPU_DTRACE_ILLOP; 3845 break; 3846 } 3847 } 3848 3849 if (end < start) 3850 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3851 3852 if (daddr == NULL) { 3853 regs[rd] = (uintptr_t)end; 3854 mstate->dtms_scratch_ptr += size; 3855 } 3856 3857 break; 3858 } 3859#else 3860 case DIF_SUBR_DDI_PATHNAME: { 3861 /* XXX awaits galactic disentanglement ;-} */ 3862 regs[rd] = NULL; 3863 break; 3864 } 3865#endif /* __APPLE__ */ 3866 3867 case DIF_SUBR_STRJOIN: { 3868 char *d = (char *)mstate->dtms_scratch_ptr; 3869 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3870 uintptr_t s1 = tupregs[0].dttk_value; 3871 uintptr_t s2 = tupregs[1].dttk_value; 3872 int i = 0; 3873 3874 if (mstate->dtms_scratch_ptr + size > 3875 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3876 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3877 regs[rd] = NULL; 3878 break; 3879 } 3880 3881 for (;;) { 3882 if (i >= size) { 3883 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3884 regs[rd] = NULL; 3885 break; 3886 } 3887 3888 if ((d[i++] = dtrace_load8(s1++)) == '\0') { 3889 i--; 3890 break; 3891 } 3892 } 3893 3894 for (;;) { 3895 if (i >= size) { 3896 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3897 regs[rd] = NULL; 3898 break; 3899 } 3900 3901 if ((d[i++] = dtrace_load8(s2++)) == '\0') 3902 break; 3903 } 3904 3905 if (i < size) { 3906 mstate->dtms_scratch_ptr += i; 3907 regs[rd] = (uintptr_t)d; 3908 } 3909 3910 break; 3911 } 3912 3913 case DIF_SUBR_LLTOSTR: { 3914 int64_t i = (int64_t)tupregs[0].dttk_value; 3915 int64_t val = i < 0 ? i * -1 : i; 3916 uint64_t size = 22; /* enough room for 2^64 in decimal */ 3917 char *end = (char *)mstate->dtms_scratch_ptr + size - 1; 3918 3919 if (mstate->dtms_scratch_ptr + size > 3920 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3921 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3922 regs[rd] = NULL; 3923 break; 3924 } 3925 3926 for (*end-- = '\0'; val; val /= 10) 3927 *end-- = '0' + (val % 10); 3928 3929 if (i == 0) 3930 *end-- = '0'; 3931 3932 if (i < 0) 3933 *end-- = '-'; 3934 3935 regs[rd] = (uintptr_t)end + 1; 3936 mstate->dtms_scratch_ptr += size; 3937 break; 3938 } 3939 3940 case DIF_SUBR_DIRNAME: 3941 case DIF_SUBR_BASENAME: { 3942 char *dest = (char *)mstate->dtms_scratch_ptr; 3943 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3944 uintptr_t src = tupregs[0].dttk_value; 3945 int i, j, len = dtrace_strlen((char *)src, size); 3946 int lastbase = -1, firstbase = -1, lastdir = -1; 3947 int start, end; 3948 3949 if (mstate->dtms_scratch_ptr + size > 3950 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3951 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3952 regs[rd] = NULL; 3953 break; 3954 } 3955 3956 /* 3957 * The basename and dirname for a zero-length string is 3958 * defined to be "." 3959 */ 3960 if (len == 0) { 3961 len = 1; 3962 src = (uintptr_t)"."; 3963 } 3964 3965 /* 3966 * Start from the back of the string, moving back toward the 3967 * front until we see a character that isn't a slash. That 3968 * character is the last character in the basename. 3969 */ 3970 for (i = len - 1; i >= 0; i--) { 3971 if (dtrace_load8(src + i) != '/') 3972 break; 3973 } 3974 3975 if (i >= 0) 3976 lastbase = i; 3977 3978 /* 3979 * Starting from the last character in the basename, move 3980 * towards the front until we find a slash. The character 3981 * that we processed immediately before that is the first 3982 * character in the basename. 3983 */ 3984 for (; i >= 0; i--) { 3985 if (dtrace_load8(src + i) == '/') 3986 break; 3987 } 3988 3989 if (i >= 0) 3990 firstbase = i + 1; 3991 3992 /* 3993 * Now keep going until we find a non-slash character. That 3994 * character is the last character in the dirname. 3995 */ 3996 for (; i >= 0; i--) { 3997 if (dtrace_load8(src + i) != '/') 3998 break; 3999 } 4000 4001 if (i >= 0) 4002 lastdir = i; 4003 4004 ASSERT(!(lastbase == -1 && firstbase != -1)); 4005 ASSERT(!(firstbase == -1 && lastdir != -1)); 4006 4007 if (lastbase == -1) { 4008 /* 4009 * We didn't find a non-slash character. We know that 4010 * the length is non-zero, so the whole string must be 4011 * slashes. In either the dirname or the basename 4012 * case, we return '/'. 4013 */ 4014 ASSERT(firstbase == -1); 4015 firstbase = lastbase = lastdir = 0; 4016 } 4017 4018 if (firstbase == -1) { 4019 /* 4020 * The entire string consists only of a basename 4021 * component. If we're looking for dirname, we need 4022 * to change our string to be just "."; if we're 4023 * looking for a basename, we'll just set the first 4024 * character of the basename to be 0. 4025 */ 4026 if (subr == DIF_SUBR_DIRNAME) { 4027 ASSERT(lastdir == -1); 4028 src = (uintptr_t)"."; 4029 lastdir = 0; 4030 } else { 4031 firstbase = 0; 4032 } 4033 } 4034 4035 if (subr == DIF_SUBR_DIRNAME) { 4036 if (lastdir == -1) { 4037 /* 4038 * We know that we have a slash in the name -- 4039 * or lastdir would be set to 0, above. And 4040 * because lastdir is -1, we know that this 4041 * slash must be the first character. (That 4042 * is, the full string must be of the form 4043 * "/basename".) In this case, the last 4044 * character of the directory name is 0. 4045 */ 4046 lastdir = 0; 4047 } 4048 4049 start = 0; 4050 end = lastdir; 4051 } else { 4052 ASSERT(subr == DIF_SUBR_BASENAME); 4053 ASSERT(firstbase != -1 && lastbase != -1); 4054 start = firstbase; 4055 end = lastbase; 4056 } 4057 4058 for (i = start, j = 0; i <= end && j < size - 1; i++, j++) 4059 dest[j] = dtrace_load8(src + i); 4060 4061 dest[j] = '\0'; 4062 regs[rd] = (uintptr_t)dest; 4063 mstate->dtms_scratch_ptr += size; 4064 break; 4065 } 4066 4067 case DIF_SUBR_CLEANPATH: { 4068 char *dest = (char *)mstate->dtms_scratch_ptr, c; 4069 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4070 uintptr_t src = tupregs[0].dttk_value; 4071 int i = 0, j = 0; 4072 4073 if (mstate->dtms_scratch_ptr + size > 4074 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 4075 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4076 regs[rd] = NULL; 4077 break; 4078 } 4079 4080 /* 4081 * Move forward, loading each character. 4082 */ 4083 do { 4084 c = dtrace_load8(src + i++); 4085next: 4086 if (j + 5 >= size) /* 5 = strlen("/..c\0") */ 4087 break; 4088 4089 if (c != '/') { 4090 dest[j++] = c; 4091 continue; 4092 } 4093 4094 c = dtrace_load8(src + i++); 4095 4096 if (c == '/') { 4097 /* 4098 * We have two slashes -- we can just advance 4099 * to the next character. 4100 */ 4101 goto next; 4102 } 4103 4104 if (c != '.') { 4105 /* 4106 * This is not "." and it's not ".." -- we can 4107 * just store the "/" and this character and 4108 * drive on. 4109 */ 4110 dest[j++] = '/'; 4111 dest[j++] = c; 4112 continue; 4113 } 4114 4115 c = dtrace_load8(src + i++); 4116 4117 if (c == '/') { 4118 /* 4119 * This is a "/./" component. We're not going 4120 * to store anything in the destination buffer; 4121 * we're just going to go to the next component. 4122 */ 4123 goto next; 4124 } 4125 4126 if (c != '.') { 4127 /* 4128 * This is not ".." -- we can just store the 4129 * "/." and this character and continue 4130 * processing. 4131 */ 4132 dest[j++] = '/'; 4133 dest[j++] = '.'; 4134 dest[j++] = c; 4135 continue; 4136 } 4137 4138 c = dtrace_load8(src + i++); 4139 4140 if (c != '/' && c != '\0') { 4141 /* 4142 * This is not ".." -- it's "..[mumble]". 4143 * We'll store the "/.." and this character 4144 * and continue processing. 4145 */ 4146 dest[j++] = '/'; 4147 dest[j++] = '.'; 4148 dest[j++] = '.'; 4149 dest[j++] = c; 4150 continue; 4151 } 4152 4153 /* 4154 * This is "/../" or "/..\0". We need to back up 4155 * our destination pointer until we find a "/". 4156 */ 4157 i--; 4158 while (j != 0 && dest[--j] != '/') 4159 continue; 4160 4161 if (c == '\0') 4162 dest[++j] = '/'; 4163 } while (c != '\0'); 4164 4165 dest[j] = '\0'; 4166 regs[rd] = (uintptr_t)dest; 4167 mstate->dtms_scratch_ptr += size; 4168 break; 4169 } 4170#ifdef __APPLE__ 4171 4172 /* CHUD callback ('chud(uint64_t, [uint64_t], [uint64_t] ...)') */ 4173 case DIF_SUBR_CHUD: { 4174 uint64_t selector = tupregs[0].dttk_value; 4175 uint64_t args[DIF_DTR_NREGS-1] = {0ULL}; 4176 uint32_t ii; 4177 4178 /* copy in any variadic argument list */ 4179 for(ii = 0; ii < DIF_DTR_NREGS-1; ii++) { 4180 args[ii] = tupregs[ii+1].dttk_value; 4181 } 4182 4183 kern_return_t ret = 4184 chudxnu_dtrace_callback(selector, args, DIF_DTR_NREGS-1); 4185 if(KERN_SUCCESS != ret) { 4186 /* error */ 4187 } 4188 break; 4189 } 4190 4191#endif /* __APPLE__ */ 4192 4193 } 4194} 4195 4196/* 4197 * Emulate the execution of DTrace IR instructions specified by the given 4198 * DIF object. This function is deliberately void of assertions as all of 4199 * the necessary checks are handled by a call to dtrace_difo_validate(). 4200 */ 4201static uint64_t 4202dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, 4203 dtrace_vstate_t *vstate, dtrace_state_t *state) 4204{ 4205 const dif_instr_t *text = difo->dtdo_buf; 4206 const uint_t textlen = difo->dtdo_len; 4207 const char *strtab = difo->dtdo_strtab; 4208 const uint64_t *inttab = difo->dtdo_inttab; 4209 4210 uint64_t rval = 0; 4211 dtrace_statvar_t *svar; 4212 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 4213 dtrace_difv_t *v; 4214 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 4215#if !defined(__APPLE__) 4216 volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 4217#else 4218 volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 4219#endif /* __APPLE__ */ 4220 4221 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 4222 uint64_t regs[DIF_DIR_NREGS]; 4223 uint64_t *tmp; 4224 4225 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; 4226 int64_t cc_r; 4227 uint_t pc = 0, id, opc; 4228 uint8_t ttop = 0; 4229 dif_instr_t instr; 4230 uint_t r1, r2, rd; 4231 4232 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ 4233 4234 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { 4235 opc = pc; 4236 4237 instr = text[pc++]; 4238 r1 = DIF_INSTR_R1(instr); 4239 r2 = DIF_INSTR_R2(instr); 4240 rd = DIF_INSTR_RD(instr); 4241 4242 switch (DIF_INSTR_OP(instr)) { 4243 case DIF_OP_OR: 4244 regs[rd] = regs[r1] | regs[r2]; 4245 break; 4246 case DIF_OP_XOR: 4247 regs[rd] = regs[r1] ^ regs[r2]; 4248 break; 4249 case DIF_OP_AND: 4250 regs[rd] = regs[r1] & regs[r2]; 4251 break; 4252 case DIF_OP_SLL: 4253 regs[rd] = regs[r1] << regs[r2]; 4254 break; 4255 case DIF_OP_SRL: 4256 regs[rd] = regs[r1] >> regs[r2]; 4257 break; 4258 case DIF_OP_SUB: 4259 regs[rd] = regs[r1] - regs[r2]; 4260 break; 4261 case DIF_OP_ADD: 4262 regs[rd] = regs[r1] + regs[r2]; 4263 break; 4264 case DIF_OP_MUL: 4265 regs[rd] = regs[r1] * regs[r2]; 4266 break; 4267 case DIF_OP_SDIV: 4268 if (regs[r2] == 0) { 4269 regs[rd] = 0; 4270 *flags |= CPU_DTRACE_DIVZERO; 4271 } else { 4272 regs[rd] = (int64_t)regs[r1] / 4273 (int64_t)regs[r2]; 4274 } 4275 break; 4276 4277 case DIF_OP_UDIV: 4278 if (regs[r2] == 0) { 4279 regs[rd] = 0; 4280 *flags |= CPU_DTRACE_DIVZERO; 4281 } else { 4282 regs[rd] = regs[r1] / regs[r2]; 4283 } 4284 break; 4285 4286 case DIF_OP_SREM: 4287 if (regs[r2] == 0) { 4288 regs[rd] = 0; 4289 *flags |= CPU_DTRACE_DIVZERO; 4290 } else { 4291 regs[rd] = (int64_t)regs[r1] % 4292 (int64_t)regs[r2]; 4293 } 4294 break; 4295 4296 case DIF_OP_UREM: 4297 if (regs[r2] == 0) { 4298 regs[rd] = 0; 4299 *flags |= CPU_DTRACE_DIVZERO; 4300 } else { 4301 regs[rd] = regs[r1] % regs[r2]; 4302 } 4303 break; 4304 4305 case DIF_OP_NOT: 4306 regs[rd] = ~regs[r1]; 4307 break; 4308 case DIF_OP_MOV: 4309 regs[rd] = regs[r1]; 4310 break; 4311 case DIF_OP_CMP: 4312 cc_r = regs[r1] - regs[r2]; 4313 cc_n = cc_r < 0; 4314 cc_z = cc_r == 0; 4315 cc_v = 0; 4316 cc_c = regs[r1] < regs[r2]; 4317 break; 4318 case DIF_OP_TST: 4319 cc_n = cc_v = cc_c = 0; 4320 cc_z = regs[r1] == 0; 4321 break; 4322 case DIF_OP_BA: 4323 pc = DIF_INSTR_LABEL(instr); 4324 break; 4325 case DIF_OP_BE: 4326 if (cc_z) 4327 pc = DIF_INSTR_LABEL(instr); 4328 break; 4329 case DIF_OP_BNE: 4330 if (cc_z == 0) 4331 pc = DIF_INSTR_LABEL(instr); 4332 break; 4333 case DIF_OP_BG: 4334 if ((cc_z | (cc_n ^ cc_v)) == 0) 4335 pc = DIF_INSTR_LABEL(instr); 4336 break; 4337 case DIF_OP_BGU: 4338 if ((cc_c | cc_z) == 0) 4339 pc = DIF_INSTR_LABEL(instr); 4340 break; 4341 case DIF_OP_BGE: 4342 if ((cc_n ^ cc_v) == 0) 4343 pc = DIF_INSTR_LABEL(instr); 4344 break; 4345 case DIF_OP_BGEU: 4346 if (cc_c == 0) 4347 pc = DIF_INSTR_LABEL(instr); 4348 break; 4349 case DIF_OP_BL: 4350 if (cc_n ^ cc_v) 4351 pc = DIF_INSTR_LABEL(instr); 4352 break; 4353 case DIF_OP_BLU: 4354 if (cc_c) 4355 pc = DIF_INSTR_LABEL(instr); 4356 break; 4357 case DIF_OP_BLE: 4358 if (cc_z | (cc_n ^ cc_v)) 4359 pc = DIF_INSTR_LABEL(instr); 4360 break; 4361 case DIF_OP_BLEU: 4362 if (cc_c | cc_z) 4363 pc = DIF_INSTR_LABEL(instr); 4364 break; 4365 case DIF_OP_RLDSB: 4366 if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) { 4367 *flags |= CPU_DTRACE_KPRIV; 4368 *illval = regs[r1]; 4369 break; 4370 } 4371 /*FALLTHROUGH*/ 4372 case DIF_OP_LDSB: 4373 regs[rd] = (int8_t)dtrace_load8(regs[r1]); 4374 break; 4375 case DIF_OP_RLDSH: 4376 if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) { 4377 *flags |= CPU_DTRACE_KPRIV; 4378 *illval = regs[r1]; 4379 break; 4380 } 4381 /*FALLTHROUGH*/ 4382 case DIF_OP_LDSH: 4383 regs[rd] = (int16_t)dtrace_load16(regs[r1]); 4384 break; 4385 case DIF_OP_RLDSW: 4386 if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) { 4387 *flags |= CPU_DTRACE_KPRIV; 4388 *illval = regs[r1]; 4389 break; 4390 } 4391 /*FALLTHROUGH*/ 4392 case DIF_OP_LDSW: 4393 regs[rd] = (int32_t)dtrace_load32(regs[r1]); 4394 break; 4395 case DIF_OP_RLDUB: 4396 if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) { 4397 *flags |= CPU_DTRACE_KPRIV; 4398 *illval = regs[r1]; 4399 break; 4400 } 4401 /*FALLTHROUGH*/ 4402 case DIF_OP_LDUB: 4403 regs[rd] = dtrace_load8(regs[r1]); 4404 break; 4405 case DIF_OP_RLDUH: 4406 if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) { 4407 *flags |= CPU_DTRACE_KPRIV; 4408 *illval = regs[r1]; 4409 break; 4410 } 4411 /*FALLTHROUGH*/ 4412 case DIF_OP_LDUH: 4413 regs[rd] = dtrace_load16(regs[r1]); 4414 break; 4415 case DIF_OP_RLDUW: 4416 if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) { 4417 *flags |= CPU_DTRACE_KPRIV; 4418 *illval = regs[r1]; 4419 break; 4420 } 4421 /*FALLTHROUGH*/ 4422 case DIF_OP_LDUW: 4423 regs[rd] = dtrace_load32(regs[r1]); 4424 break; 4425 case DIF_OP_RLDX: 4426 if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) { 4427 *flags |= CPU_DTRACE_KPRIV; 4428 *illval = regs[r1]; 4429 break; 4430 } 4431 /*FALLTHROUGH*/ 4432 case DIF_OP_LDX: 4433 regs[rd] = dtrace_load64(regs[r1]); 4434 break; 4435 case DIF_OP_ULDSB: 4436 regs[rd] = (int8_t) 4437 dtrace_fuword8(regs[r1]); 4438 break; 4439 case DIF_OP_ULDSH: 4440 regs[rd] = (int16_t) 4441 dtrace_fuword16(regs[r1]); 4442 break; 4443 case DIF_OP_ULDSW: 4444 regs[rd] = (int32_t) 4445 dtrace_fuword32(regs[r1]); 4446 break; 4447 case DIF_OP_ULDUB: 4448 regs[rd] = 4449 dtrace_fuword8(regs[r1]); 4450 break; 4451 case DIF_OP_ULDUH: 4452 regs[rd] = 4453 dtrace_fuword16(regs[r1]); 4454 break; 4455 case DIF_OP_ULDUW: 4456 regs[rd] = 4457 dtrace_fuword32(regs[r1]); 4458 break; 4459 case DIF_OP_ULDX: 4460 regs[rd] = 4461 dtrace_fuword64(regs[r1]); 4462 break; 4463 case DIF_OP_RET: 4464 rval = regs[rd]; 4465 break; 4466 case DIF_OP_NOP: 4467 break; 4468 case DIF_OP_SETX: 4469 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)]; 4470 break; 4471 case DIF_OP_SETS: 4472 regs[rd] = (uint64_t)(uintptr_t) 4473 (strtab + DIF_INSTR_STRING(instr)); 4474 break; 4475 case DIF_OP_SCMP: 4476 cc_r = dtrace_strncmp((char *)(uintptr_t)regs[r1], 4477 (char *)(uintptr_t)regs[r2], 4478 state->dts_options[DTRACEOPT_STRSIZE]); 4479 4480 cc_n = cc_r < 0; 4481 cc_z = cc_r == 0; 4482 cc_v = cc_c = 0; 4483 break; 4484 case DIF_OP_LDGA: 4485 regs[rd] = dtrace_dif_variable(mstate, state, 4486 r1, regs[r2]); 4487 break; 4488 case DIF_OP_LDGS: 4489 id = DIF_INSTR_VAR(instr); 4490 4491 if (id >= DIF_VAR_OTHER_UBASE) { 4492 uintptr_t a; 4493 4494 id -= DIF_VAR_OTHER_UBASE; 4495 svar = vstate->dtvs_globals[id]; 4496 ASSERT(svar != NULL); 4497 v = &svar->dtsv_var; 4498 4499 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) { 4500 regs[rd] = svar->dtsv_data; 4501 break; 4502 } 4503 4504 a = (uintptr_t)svar->dtsv_data; 4505 4506 if (*(uint8_t *)a == UINT8_MAX) { 4507 /* 4508 * If the 0th byte is set to UINT8_MAX 4509 * then this is to be treated as a 4510 * reference to a NULL variable. 4511 */ 4512 regs[rd] = NULL; 4513 } else { 4514 regs[rd] = a + sizeof (uint64_t); 4515 } 4516 4517 break; 4518 } 4519 4520 regs[rd] = dtrace_dif_variable(mstate, state, id, 0); 4521 break; 4522 4523 case DIF_OP_STGS: 4524 id = DIF_INSTR_VAR(instr); 4525 4526 ASSERT(id >= DIF_VAR_OTHER_UBASE); 4527 id -= DIF_VAR_OTHER_UBASE; 4528 4529 svar = vstate->dtvs_globals[id]; 4530 ASSERT(svar != NULL); 4531 v = &svar->dtsv_var; 4532 4533 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 4534 uintptr_t a = (uintptr_t)svar->dtsv_data; 4535 4536 ASSERT(a != NULL); 4537 ASSERT(svar->dtsv_size != 0); 4538 4539 if (regs[rd] == NULL) { 4540 *(uint8_t *)a = UINT8_MAX; 4541 break; 4542 } else { 4543 *(uint8_t *)a = 0; 4544 a += sizeof (uint64_t); 4545 } 4546 4547 dtrace_vcopy((void *)(uintptr_t)regs[rd], 4548 (void *)a, &v->dtdv_type); 4549 break; 4550 } 4551 4552 svar->dtsv_data = regs[rd]; 4553 break; 4554 4555 case DIF_OP_LDTA: 4556 /* 4557 * There are no DTrace built-in thread-local arrays at 4558 * present. This opcode is saved for future work. 4559 */ 4560 *flags |= CPU_DTRACE_ILLOP; 4561 regs[rd] = 0; 4562 break; 4563 4564 case DIF_OP_LDLS: 4565 id = DIF_INSTR_VAR(instr); 4566 4567 if (id < DIF_VAR_OTHER_UBASE) { 4568 /* 4569 * For now, this has no meaning. 4570 */ 4571 regs[rd] = 0; 4572 break; 4573 } 4574 4575 id -= DIF_VAR_OTHER_UBASE; 4576 4577 ASSERT(id < vstate->dtvs_nlocals); 4578 ASSERT(vstate->dtvs_locals != NULL); 4579 4580 svar = vstate->dtvs_locals[id]; 4581 ASSERT(svar != NULL); 4582 v = &svar->dtsv_var; 4583 4584 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 4585 uintptr_t a = (uintptr_t)svar->dtsv_data; 4586 size_t sz = v->dtdv_type.dtdt_size; 4587 4588 sz += sizeof (uint64_t); 4589 ASSERT(svar->dtsv_size == (int)NCPU * sz); 4590 a += CPU->cpu_id * sz; 4591 4592 if (*(uint8_t *)a == UINT8_MAX) { 4593 /* 4594 * If the 0th byte is set to UINT8_MAX 4595 * then this is to be treated as a 4596 * reference to a NULL variable. 4597 */ 4598 regs[rd] = NULL; 4599 } else { 4600 regs[rd] = a + sizeof (uint64_t); 4601 } 4602 4603 break; 4604 } 4605 4606 ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t)); 4607 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 4608 regs[rd] = tmp[CPU->cpu_id]; 4609 break; 4610 4611 case DIF_OP_STLS: 4612 id = DIF_INSTR_VAR(instr); 4613 4614 ASSERT(id >= DIF_VAR_OTHER_UBASE); 4615 id -= DIF_VAR_OTHER_UBASE; 4616 ASSERT(id < vstate->dtvs_nlocals); 4617 4618 ASSERT(vstate->dtvs_locals != NULL); 4619 svar = vstate->dtvs_locals[id]; 4620 ASSERT(svar != NULL); 4621 v = &svar->dtsv_var; 4622 4623 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 4624 uintptr_t a = (uintptr_t)svar->dtsv_data; 4625 size_t sz = v->dtdv_type.dtdt_size; 4626 4627 sz += sizeof (uint64_t); 4628 ASSERT(svar->dtsv_size == (int)NCPU * sz); 4629 a += CPU->cpu_id * sz; 4630 4631 if (regs[rd] == NULL) { 4632 *(uint8_t *)a = UINT8_MAX; 4633 break; 4634 } else { 4635 *(uint8_t *)a = 0; 4636 a += sizeof (uint64_t); 4637 } 4638 4639 dtrace_vcopy((void *)(uintptr_t)regs[rd], 4640 (void *)a, &v->dtdv_type); 4641 break; 4642 } 4643 4644 ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t)); 4645 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 4646 tmp[CPU->cpu_id] = regs[rd]; 4647 break; 4648 4649 case DIF_OP_LDTS: { 4650 dtrace_dynvar_t *dvar; 4651 dtrace_key_t *key; 4652 4653 id = DIF_INSTR_VAR(instr); 4654 ASSERT(id >= DIF_VAR_OTHER_UBASE); 4655 id -= DIF_VAR_OTHER_UBASE; 4656 v = &vstate->dtvs_tlocals[id]; 4657 4658 key = &tupregs[DIF_DTR_NREGS]; 4659 key[0].dttk_value = (uint64_t)id; 4660 key[0].dttk_size = 0; 4661 DTRACE_TLS_THRKEY(key[1].dttk_value); 4662 key[1].dttk_size = 0; 4663 4664 dvar = dtrace_dynvar(dstate, 2, key, 4665 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC); 4666 4667 if (dvar == NULL) { 4668 regs[rd] = 0; 4669 break; 4670 } 4671 4672 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 4673 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 4674 } else { 4675 regs[rd] = *((uint64_t *)dvar->dtdv_data); 4676 } 4677 4678 break; 4679 } 4680 4681 case DIF_OP_STTS: { 4682 dtrace_dynvar_t *dvar; 4683 dtrace_key_t *key; 4684 4685 id = DIF_INSTR_VAR(instr); 4686 ASSERT(id >= DIF_VAR_OTHER_UBASE); 4687 id -= DIF_VAR_OTHER_UBASE; 4688 4689 key = &tupregs[DIF_DTR_NREGS]; 4690 key[0].dttk_value = (uint64_t)id; 4691 key[0].dttk_size = 0; 4692 DTRACE_TLS_THRKEY(key[1].dttk_value); 4693 key[1].dttk_size = 0; 4694 v = &vstate->dtvs_tlocals[id]; 4695 4696 dvar = dtrace_dynvar(dstate, 2, key, 4697 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 4698 v->dtdv_type.dtdt_size : sizeof (uint64_t), 4699 regs[rd] ? DTRACE_DYNVAR_ALLOC : 4700 DTRACE_DYNVAR_DEALLOC); 4701 4702 /* 4703 * Given that we're storing to thread-local data, 4704 * we need to flush our predicate cache. 4705 */ 4706#if !defined(__APPLE__) 4707 curthread->t_predcache = NULL; 4708#else 4709 dtrace_set_thread_predcache(current_thread(), 0); 4710#endif /* __APPLE__ */ 4711 4712 4713 if (dvar == NULL) 4714 break; 4715 4716 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 4717 dtrace_vcopy((void *)(uintptr_t)regs[rd], 4718 dvar->dtdv_data, &v->dtdv_type); 4719 } else { 4720 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 4721 } 4722 4723 break; 4724 } 4725 4726 case DIF_OP_SRA: 4727 regs[rd] = (int64_t)regs[r1] >> regs[r2]; 4728 break; 4729 4730 case DIF_OP_CALL: 4731 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd, 4732 regs, tupregs, ttop, mstate, state); 4733 break; 4734 4735 case DIF_OP_PUSHTR: 4736 if (ttop == DIF_DTR_NREGS) { 4737 *flags |= CPU_DTRACE_TUPOFLOW; 4738 break; 4739 } 4740 4741 if (r1 == DIF_TYPE_STRING) { 4742 /* 4743 * If this is a string type and the size is 0, 4744 * we'll use the system-wide default string 4745 * size. Note that we are _not_ looking at 4746 * the value of the DTRACEOPT_STRSIZE option; 4747 * had this been set, we would expect to have 4748 * a non-zero size value in the "pushtr". 4749 */ 4750 tupregs[ttop].dttk_size = 4751 dtrace_strlen((char *)(uintptr_t)regs[rd], 4752 regs[r2] ? regs[r2] : 4753 dtrace_strsize_default) + 1; 4754 } else { 4755 tupregs[ttop].dttk_size = regs[r2]; 4756 } 4757 4758 tupregs[ttop++].dttk_value = regs[rd]; 4759 break; 4760 4761 case DIF_OP_PUSHTV: 4762 if (ttop == DIF_DTR_NREGS) { 4763 *flags |= CPU_DTRACE_TUPOFLOW; 4764 break; 4765 } 4766 4767 tupregs[ttop].dttk_value = regs[rd]; 4768 tupregs[ttop++].dttk_size = 0; 4769 break; 4770 4771 case DIF_OP_POPTS: 4772 if (ttop != 0) 4773 ttop--; 4774 break; 4775 4776 case DIF_OP_FLUSHTS: 4777 ttop = 0; 4778 break; 4779 4780 case DIF_OP_LDGAA: 4781 case DIF_OP_LDTAA: { 4782 dtrace_dynvar_t *dvar; 4783 dtrace_key_t *key = tupregs; 4784 uint_t nkeys = ttop; 4785 4786 id = DIF_INSTR_VAR(instr); 4787 ASSERT(id >= DIF_VAR_OTHER_UBASE); 4788 id -= DIF_VAR_OTHER_UBASE; 4789 4790 key[nkeys].dttk_value = (uint64_t)id; 4791 key[nkeys++].dttk_size = 0; 4792 4793 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { 4794 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 4795 key[nkeys++].dttk_size = 0; 4796 v = &vstate->dtvs_tlocals[id]; 4797 } else { 4798 v = &vstate->dtvs_globals[id]->dtsv_var; 4799 } 4800 4801 dvar = dtrace_dynvar(dstate, nkeys, key, 4802 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 4803 v->dtdv_type.dtdt_size : sizeof (uint64_t), 4804 DTRACE_DYNVAR_NOALLOC); 4805 4806 if (dvar == NULL) { 4807 regs[rd] = 0; 4808 break; 4809 } 4810 4811 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 4812 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 4813 } else { 4814 regs[rd] = *((uint64_t *)dvar->dtdv_data); 4815 } 4816 4817 break; 4818 } 4819 4820 case DIF_OP_STGAA: 4821 case DIF_OP_STTAA: { 4822 dtrace_dynvar_t *dvar; 4823 dtrace_key_t *key = tupregs; 4824 uint_t nkeys = ttop; 4825 4826 id = DIF_INSTR_VAR(instr); 4827 ASSERT(id >= DIF_VAR_OTHER_UBASE); 4828 id -= DIF_VAR_OTHER_UBASE; 4829 4830 key[nkeys].dttk_value = (uint64_t)id; 4831 key[nkeys++].dttk_size = 0; 4832 4833 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { 4834 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 4835 key[nkeys++].dttk_size = 0; 4836 v = &vstate->dtvs_tlocals[id]; 4837 } else { 4838 v = &vstate->dtvs_globals[id]->dtsv_var; 4839 } 4840 4841 dvar = dtrace_dynvar(dstate, nkeys, key, 4842 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 4843 v->dtdv_type.dtdt_size : sizeof (uint64_t), 4844 regs[rd] ? DTRACE_DYNVAR_ALLOC : 4845 DTRACE_DYNVAR_DEALLOC); 4846 4847 if (dvar == NULL) 4848 break; 4849 4850 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 4851 dtrace_vcopy((void *)(uintptr_t)regs[rd], 4852 dvar->dtdv_data, &v->dtdv_type); 4853 } else { 4854 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 4855 } 4856 4857 break; 4858 } 4859 4860 case DIF_OP_ALLOCS: { 4861 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 4862 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; 4863 4864 if (mstate->dtms_scratch_ptr + size > 4865 mstate->dtms_scratch_base + 4866 mstate->dtms_scratch_size) { 4867 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4868 regs[rd] = NULL; 4869 } else { 4870 dtrace_bzero((void *) 4871 mstate->dtms_scratch_ptr, size); 4872 mstate->dtms_scratch_ptr += size; 4873 regs[rd] = ptr; 4874 } 4875 break; 4876 } 4877 4878 case DIF_OP_COPYS: 4879 if (!dtrace_canstore(regs[rd], regs[r2], 4880 mstate, vstate)) { 4881 *flags |= CPU_DTRACE_BADADDR; 4882 *illval = regs[rd]; 4883 break; 4884 } 4885 4886 dtrace_bcopy((void *)(uintptr_t)regs[r1], 4887 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); 4888 break; 4889 4890 case DIF_OP_STB: 4891 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) { 4892 *flags |= CPU_DTRACE_BADADDR; 4893 *illval = regs[rd]; 4894 break; 4895 } 4896 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1]; 4897 break; 4898 4899 case DIF_OP_STH: 4900 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) { 4901 *flags |= CPU_DTRACE_BADADDR; 4902 *illval = regs[rd]; 4903 break; 4904 } 4905 if (regs[rd] & 1) { 4906 *flags |= CPU_DTRACE_BADALIGN; 4907 *illval = regs[rd]; 4908 break; 4909 } 4910 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1]; 4911 break; 4912 4913 case DIF_OP_STW: 4914 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) { 4915 *flags |= CPU_DTRACE_BADADDR; 4916 *illval = regs[rd]; 4917 break; 4918 } 4919 if (regs[rd] & 3) { 4920 *flags |= CPU_DTRACE_BADALIGN; 4921 *illval = regs[rd]; 4922 break; 4923 } 4924 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1]; 4925 break; 4926 4927 case DIF_OP_STX: 4928 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) { 4929 *flags |= CPU_DTRACE_BADADDR; 4930 *illval = regs[rd]; 4931 break; 4932 } 4933#if !defined(__APPLE__) 4934 if (regs[rd] & 7) { 4935#else 4936 if (regs[rd] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */ 4937#endif /* __APPLE__ */ 4938 *flags |= CPU_DTRACE_BADALIGN; 4939 *illval = regs[rd]; 4940 break; 4941 } 4942 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; 4943 break; 4944 } 4945 } 4946 4947 if (!(*flags & CPU_DTRACE_FAULT)) 4948 return (rval); 4949 4950 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t); 4951 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS; 4952 4953 return (0); 4954} 4955 4956static void 4957dtrace_action_breakpoint(dtrace_ecb_t *ecb) 4958{ 4959 dtrace_probe_t *probe = ecb->dte_probe; 4960 dtrace_provider_t *prov = probe->dtpr_provider; 4961 char c[DTRACE_FULLNAMELEN + 80], *str; 4962 char *msg = "dtrace: breakpoint action at probe "; 4963 char *ecbmsg = " (ecb "; 4964 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); 4965 uintptr_t val = (uintptr_t)ecb; 4966 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; 4967 4968 if (dtrace_destructive_disallow) 4969 return; 4970 4971 /* 4972 * It's impossible to be taking action on the NULL probe. 4973 */ 4974 ASSERT(probe != NULL); 4975 4976 /* 4977 * This is a poor man's (destitute man's?) sprintf(): we want to 4978 * print the provider name, module name, function name and name of 4979 * the probe, along with the hex address of the ECB with the breakpoint 4980 * action -- all of which we must place in the character buffer by 4981 * hand. 4982 */ 4983 while (*msg != '\0') 4984 c[i++] = *msg++; 4985 4986 for (str = prov->dtpv_name; *str != '\0'; str++) 4987 c[i++] = *str; 4988 c[i++] = ':'; 4989 4990 for (str = probe->dtpr_mod; *str != '\0'; str++) 4991 c[i++] = *str; 4992 c[i++] = ':'; 4993 4994 for (str = probe->dtpr_func; *str != '\0'; str++) 4995 c[i++] = *str; 4996 c[i++] = ':'; 4997 4998 for (str = probe->dtpr_name; *str != '\0'; str++) 4999 c[i++] = *str; 5000 5001 while (*ecbmsg != '\0') 5002 c[i++] = *ecbmsg++; 5003 5004 while (shift >= 0) { 5005 mask = (uintptr_t)0xf << shift; 5006 5007 if (val >= ((uintptr_t)1 << shift)) 5008 c[i++] = "0123456789abcdef"[(val & mask) >> shift]; 5009 shift -= 4; 5010 } 5011 5012 c[i++] = ')'; 5013 c[i] = '\0'; 5014 5015 debug_enter(c); 5016} 5017 5018static void 5019dtrace_action_panic(dtrace_ecb_t *ecb) 5020{ 5021 dtrace_probe_t *probe = ecb->dte_probe; 5022 5023 /* 5024 * It's impossible to be taking action on the NULL probe. 5025 */ 5026 ASSERT(probe != NULL); 5027 5028 if (dtrace_destructive_disallow) 5029 return; 5030 5031 if (dtrace_panicked != NULL) 5032 return; 5033 5034#if !defined(__APPLE__) 5035 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) 5036 return; 5037#else 5038 if (dtrace_casptr(&dtrace_panicked, NULL, current_thread()) != NULL) 5039 return; 5040#endif /* __APPLE__ */ 5041 5042 /* 5043 * We won the right to panic. (We want to be sure that only one 5044 * thread calls panic() from dtrace_probe(), and that panic() is 5045 * called exactly once.) 5046 */ 5047 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", 5048 probe->dtpr_provider->dtpv_name, probe->dtpr_mod, 5049 probe->dtpr_func, probe->dtpr_name, (void *)ecb); 5050 5051#if defined(__APPLE__) 5052 /* Mac OS X debug feature -- can return from panic() */ 5053 dtrace_panicked = NULL; 5054#endif /* __APPLE__ */ 5055} 5056 5057static void 5058dtrace_action_raise(uint64_t sig) 5059{ 5060 if (dtrace_destructive_disallow) 5061 return; 5062 5063 if (sig >= NSIG) { 5064 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 5065 return; 5066 } 5067 5068#if !defined(__APPLE__) 5069 /* 5070 * raise() has a queue depth of 1 -- we ignore all subsequent 5071 * invocations of the raise() action. 5072 */ 5073 if (curthread->t_dtrace_sig == 0) 5074 curthread->t_dtrace_sig = (uint8_t)sig; 5075 5076 curthread->t_sig_check = 1; 5077 aston(curthread); 5078#else 5079 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); 5080 5081 if (uthread && uthread->t_dtrace_sig == 0) { 5082 uthread->t_dtrace_sig = sig; 5083 psignal(current_proc(), (int)sig); 5084 } 5085#endif /* __APPLE__ */ 5086} 5087 5088static void 5089dtrace_action_stop(void) 5090{ 5091 if (dtrace_destructive_disallow) 5092 return; 5093 5094#if !defined(__APPLE__) 5095 if (!curthread->t_dtrace_stop) { 5096 curthread->t_dtrace_stop = 1; 5097 curthread->t_sig_check = 1; 5098 aston(curthread); 5099 } 5100#else 5101 psignal(current_proc(), SIGSTOP); 5102#endif /* __APPLE__ */ 5103} 5104 5105static void 5106dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) 5107{ 5108 hrtime_t now; 5109 volatile uint16_t *flags; 5110 cpu_t *cpu = CPU; 5111 5112 if (dtrace_destructive_disallow) 5113 return; 5114 5115 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags; 5116 5117 now = dtrace_gethrtime(); 5118 5119 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) { 5120 /* 5121 * We need to advance the mark to the current time. 5122 */ 5123 cpu->cpu_dtrace_chillmark = now; 5124 cpu->cpu_dtrace_chilled = 0; 5125 } 5126 5127 /* 5128 * Now check to see if the requested chill time would take us over 5129 * the maximum amount of time allowed in the chill interval. (Or 5130 * worse, if the calculation itself induces overflow.) 5131 */ 5132 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max || 5133 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) { 5134 *flags |= CPU_DTRACE_ILLOP; 5135 return; 5136 } 5137 5138 while (dtrace_gethrtime() - now < val) 5139 continue; 5140 5141 /* 5142 * Normally, we assure that the value of the variable "timestamp" does 5143 * not change within an ECB. The presence of chill() represents an 5144 * exception to this rule, however. 5145 */ 5146 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP; 5147 cpu->cpu_dtrace_chilled += val; 5148} 5149 5150static void 5151dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, 5152 uint64_t *buf, uint64_t arg) 5153{ 5154 int nframes = DTRACE_USTACK_NFRAMES(arg); 5155 int strsize = DTRACE_USTACK_STRSIZE(arg); 5156 uint64_t *pcs = &buf[1], *fps; 5157 char *str = (char *)&pcs[nframes]; 5158 int size, offs = 0, i, j; 5159 uintptr_t old = mstate->dtms_scratch_ptr, saved; 5160 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 5161 char *sym; 5162 5163 /* 5164 * Should be taking a faster path if string space has not been 5165 * allocated. 5166 */ 5167 ASSERT(strsize != 0); 5168 5169 /* 5170 * We will first allocate some temporary space for the frame pointers. 5171 */ 5172 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 5173 size = (uintptr_t)fps - mstate->dtms_scratch_ptr + 5174 (nframes * sizeof (uint64_t)); 5175 5176 if (mstate->dtms_scratch_ptr + size > 5177 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 5178 /* 5179 * Not enough room for our frame pointers -- need to indicate 5180 * that we ran out of scratch space. 5181 */ 5182 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5183 return; 5184 } 5185 5186 mstate->dtms_scratch_ptr += size; 5187 saved = mstate->dtms_scratch_ptr; 5188 5189 /* 5190 * Now get a stack with both program counters and frame pointers. 5191 */ 5192 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 5193 dtrace_getufpstack(buf, fps, nframes + 1); 5194 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 5195 5196 /* 5197 * If that faulted, we're cooked. 5198 */ 5199 if (*flags & CPU_DTRACE_FAULT) 5200 goto out; 5201 5202 /* 5203 * Now we want to walk up the stack, calling the USTACK helper. For 5204 * each iteration, we restore the scratch pointer. 5205 */ 5206 for (i = 0; i < nframes; i++) { 5207 mstate->dtms_scratch_ptr = saved; 5208 5209 if (offs >= strsize) 5210 break; 5211 5212 sym = (char *)(uintptr_t)dtrace_helper( 5213 DTRACE_HELPER_ACTION_USTACK, 5214 mstate, state, pcs[i], fps[i]); 5215 5216 /* 5217 * If we faulted while running the helper, we're going to 5218 * clear the fault and null out the corresponding string. 5219 */ 5220 if (*flags & CPU_DTRACE_FAULT) { 5221 *flags &= ~CPU_DTRACE_FAULT; 5222 str[offs++] = '\0'; 5223 continue; 5224 } 5225 5226 if (sym == NULL) { 5227 str[offs++] = '\0'; 5228 continue; 5229 } 5230 5231 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 5232 5233 /* 5234 * Now copy in the string that the helper returned to us. 5235 */ 5236 for (j = 0; offs + j < strsize; j++) { 5237 if ((str[offs + j] = sym[j]) == '\0') 5238 break; 5239 } 5240 5241 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 5242 5243 offs += j + 1; 5244 } 5245 5246 if (offs >= strsize) { 5247 /* 5248 * If we didn't have room for all of the strings, we don't 5249 * abort processing -- this needn't be a fatal error -- but we 5250 * still want to increment a counter (dts_stkstroverflows) to 5251 * allow this condition to be warned about. (If this is from 5252 * a jstack() action, it is easily tuned via jstackstrsize.) 5253 */ 5254 dtrace_error(&state->dts_stkstroverflows); 5255 } 5256 5257 while (offs < strsize) 5258 str[offs++] = '\0'; 5259 5260out: 5261 mstate->dtms_scratch_ptr = old; 5262} 5263 5264/* 5265 * If you're looking for the epicenter of DTrace, you just found it. This 5266 * is the function called by the provider to fire a probe -- from which all 5267 * subsequent probe-context DTrace activity emanates. 5268 */ 5269#if !defined(__APPLE__) 5270void 5271dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, 5272 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) 5273#else 5274static void 5275__dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, 5276 uint64_t arg2, uint64_t arg3, uint64_t arg4) 5277#endif /* __APPLE__ */ 5278{ 5279 processorid_t cpuid; 5280 dtrace_icookie_t cookie; 5281 dtrace_probe_t *probe; 5282 dtrace_mstate_t mstate; 5283 dtrace_ecb_t *ecb; 5284 dtrace_action_t *act; 5285 intptr_t offs; 5286 size_t size; 5287 int vtime, onintr; 5288 volatile uint16_t *flags; 5289 hrtime_t now; 5290 5291#if !defined(__APPLE__) 5292 /* 5293 * Kick out immediately if this CPU is still being born (in which case 5294 * curthread will be set to -1) 5295 */ 5296 if ((uintptr_t)curthread & 1) 5297 return; 5298#else 5299#endif /* __APPLE__ */ 5300 5301 cookie = dtrace_interrupt_disable(); 5302 probe = dtrace_probes[id - 1]; 5303 cpuid = CPU->cpu_id; 5304 onintr = CPU_ON_INTR(CPU); 5305 5306#if !defined(__APPLE__) 5307 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && 5308 probe->dtpr_predcache == curthread->t_predcache) { 5309#else 5310 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && 5311 probe->dtpr_predcache == dtrace_get_thread_predcache(current_thread())) { 5312#endif /* __APPLE__ */ 5313 /* 5314 * We have hit in the predicate cache; we know that 5315 * this predicate would evaluate to be false. 5316 */ 5317 dtrace_interrupt_enable(cookie); 5318 return; 5319 } 5320 5321 if (panic_quiesce) { 5322 /* 5323 * We don't trace anything if we're panicking. 5324 */ 5325 dtrace_interrupt_enable(cookie); 5326 return; 5327 } 5328 5329#if !defined(__APPLE__) 5330 now = dtrace_gethrtime(); 5331 vtime = dtrace_vtime_references != 0; 5332 5333 if (vtime && curthread->t_dtrace_start) 5334 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; 5335#else 5336 vtime = dtrace_vtime_references != 0; 5337 5338 if (vtime) 5339 { 5340 int64_t dtrace_accum_time, recent_vtime; 5341 thread_t thread = current_thread(); 5342 5343 dtrace_accum_time = dtrace_get_thread_tracing(thread); /* Time spent inside DTrace so far (nanoseconds) */ 5344 5345 if (dtrace_accum_time >= 0) { 5346 recent_vtime = dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread)); /* up to the moment thread vtime */ 5347 5348 recent_vtime = recent_vtime - dtrace_accum_time; /* Time without DTrace contribution */ 5349 5350 dtrace_set_thread_vtime(thread, recent_vtime); 5351 } 5352 } 5353 5354 now = dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */ 5355#endif /* __APPLE__ */ 5356 5357#if defined(__APPLE__) 5358 /* 5359 * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances. 5360 * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes 5361 * NULL through "arg0" and the probe_id of the ovedrriden probe as arg1. Detect that here 5362 * and cons up a viable state (from the probe_id). 5363 */ 5364 if (dtrace_probeid_error == id && NULL == arg0) { 5365 dtrace_id_t ftp_id = (dtrace_id_t)arg1; 5366 dtrace_probe_t *ftp_probe = dtrace_probes[ftp_id - 1]; 5367 dtrace_ecb_t *ftp_ecb = ftp_probe->dtpr_ecb; 5368 5369 if (NULL != ftp_ecb) { 5370 dtrace_state_t *ftp_state = ftp_ecb->dte_state; 5371 5372 arg0 = (uint64_t)(uintptr_t)ftp_state; 5373 arg1 = ftp_ecb->dte_epid; 5374 /* 5375 * args[2-4] established by caller. 5376 */ 5377 ftp_state->dts_arg_error_illval = -1; /* arg5 */ 5378 } 5379 } 5380#endif /* __APPLE__ */ 5381 5382 mstate.dtms_probe = probe; 5383 mstate.dtms_arg[0] = arg0; 5384 mstate.dtms_arg[1] = arg1; 5385 mstate.dtms_arg[2] = arg2; 5386 mstate.dtms_arg[3] = arg3; 5387 mstate.dtms_arg[4] = arg4; 5388 5389 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags; 5390 5391 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 5392 dtrace_predicate_t *pred = ecb->dte_predicate; 5393 dtrace_state_t *state = ecb->dte_state; 5394 dtrace_buffer_t *buf = &state->dts_buffer[cpuid]; 5395 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; 5396 dtrace_vstate_t *vstate = &state->dts_vstate; 5397 dtrace_provider_t *prov = probe->dtpr_provider; 5398 int committed = 0; 5399 caddr_t tomax; 5400 5401 /* 5402 * A little subtlety with the following (seemingly innocuous) 5403 * declaration of the automatic 'val': by looking at the 5404 * code, you might think that it could be declared in the 5405 * action processing loop, below. (That is, it's only used in 5406 * the action processing loop.) However, it must be declared 5407 * out of that scope because in the case of DIF expression 5408 * arguments to aggregating actions, one iteration of the 5409 * action loop will use the last iteration's value. 5410 */ 5411#ifdef lint 5412 uint64_t val = 0; 5413#else 5414 uint64_t val = 0; 5415#endif 5416 5417 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; 5418 *flags &= ~CPU_DTRACE_ERROR; 5419 5420 if (prov == dtrace_provider) { 5421 /* 5422 * If dtrace itself is the provider of this probe, 5423 * we're only going to continue processing the ECB if 5424 * arg0 (the dtrace_state_t) is equal to the ECB's 5425 * creating state. (This prevents disjoint consumers 5426 * from seeing one another's metaprobes.) 5427 */ 5428 if (arg0 != (uint64_t)(uintptr_t)state) 5429 continue; 5430 } 5431 5432 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) { 5433 /* 5434 * We're not currently active. If our provider isn't 5435 * the dtrace pseudo provider, we're not interested. 5436 */ 5437 if (prov != dtrace_provider) 5438 continue; 5439 5440 /* 5441 * Now we must further check if we are in the BEGIN 5442 * probe. If we are, we will only continue processing 5443 * if we're still in WARMUP -- if one BEGIN enabling 5444 * has invoked the exit() action, we don't want to 5445 * evaluate subsequent BEGIN enablings. 5446 */ 5447 if (probe->dtpr_id == dtrace_probeid_begin && 5448 state->dts_activity != DTRACE_ACTIVITY_WARMUP) { 5449 ASSERT(state->dts_activity == 5450 DTRACE_ACTIVITY_DRAINING); 5451 continue; 5452 } 5453 } 5454 5455 if (ecb->dte_cond) { 5456 /* 5457 * If the dte_cond bits indicate that this 5458 * consumer is only allowed to see user-mode firings 5459 * of this probe, call the provider's dtps_usermode() 5460 * entry point to check that the probe was fired 5461 * while in a user context. Skip this ECB if that's 5462 * not the case. 5463 */ 5464 if ((ecb->dte_cond & DTRACE_COND_USERMODE) && 5465 prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, 5466 probe->dtpr_id, probe->dtpr_arg) == 0) 5467 continue; 5468 5469 /* 5470 * This is more subtle than it looks. We have to be 5471 * absolutely certain that CRED() isn't going to 5472 * change out from under us so it's only legit to 5473 * examine that structure if we're in constrained 5474 * situations. Currently, the only times we'll this 5475 * check is if a non-super-user has enabled the 5476 * profile or syscall providers -- providers that 5477 * allow visibility of all processes. For the 5478 * profile case, the check above will ensure that 5479 * we're examining a user context. 5480 */ 5481 if (ecb->dte_cond & DTRACE_COND_OWNER) { 5482 cred_t *cr; 5483 cred_t *s_cr = 5484 ecb->dte_state->dts_cred.dcr_cred; 5485 proc_t *proc; 5486 5487 ASSERT(s_cr != NULL); 5488 5489#if !defined(__APPLE__) 5490 if ((cr = CRED()) == NULL || 5491#else 5492 if ((cr = dtrace_CRED()) == NULL || 5493#endif /* __APPLE__ */ 5494 s_cr->cr_uid != cr->cr_uid || 5495 s_cr->cr_uid != cr->cr_ruid || 5496 s_cr->cr_uid != cr->cr_suid || 5497 s_cr->cr_gid != cr->cr_gid || 5498 s_cr->cr_gid != cr->cr_rgid || 5499 s_cr->cr_gid != cr->cr_sgid || 5500#if !defined(__APPLE__) 5501 (proc = ttoproc(curthread)) == NULL || 5502 (proc->p_flag & SNOCD)) 5503#else 5504 1) /* Darwin omits "No Core Dump" flag. */ 5505#endif /* __APPLE__ */ 5506 continue; 5507 } 5508 5509 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { 5510 cred_t *cr; 5511 cred_t *s_cr = 5512 ecb->dte_state->dts_cred.dcr_cred; 5513 5514 ASSERT(s_cr != NULL); 5515 5516#if !defined(__APPLE__) /* Darwin doesn't do zones. */ 5517 if ((cr = CRED()) == NULL || 5518 s_cr->cr_zone->zone_id != 5519 cr->cr_zone->zone_id) 5520 continue; 5521#endif /* __APPLE__ */ 5522 } 5523 } 5524 5525 if (now - state->dts_alive > dtrace_deadman_timeout) { 5526 /* 5527 * We seem to be dead. Unless we (a) have kernel 5528 * destructive permissions (b) have expicitly enabled 5529 * destructive actions and (c) destructive actions have 5530 * not been disabled, we're going to transition into 5531 * the KILLED state, from which no further processing 5532 * on this state will be performed. 5533 */ 5534 if (!dtrace_priv_kernel_destructive(state) || 5535 !state->dts_cred.dcr_destructive || 5536 dtrace_destructive_disallow) { 5537 void *activity = &state->dts_activity; 5538 dtrace_activity_t current; 5539 5540 do { 5541 current = state->dts_activity; 5542 } while (dtrace_cas32(activity, current, 5543 DTRACE_ACTIVITY_KILLED) != current); 5544 5545 continue; 5546 } 5547 } 5548 5549 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed, 5550 ecb->dte_alignment, state, &mstate)) < 0) 5551 continue; 5552 5553 tomax = buf->dtb_tomax; 5554 ASSERT(tomax != NULL); 5555 5556 if (ecb->dte_size != 0) 5557 DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid); 5558 5559 mstate.dtms_epid = ecb->dte_epid; 5560 mstate.dtms_present |= DTRACE_MSTATE_EPID; 5561 5562 if (pred != NULL) { 5563 dtrace_difo_t *dp = pred->dtp_difo; 5564 int rval; 5565 5566 rval = dtrace_dif_emulate(dp, &mstate, vstate, state); 5567 5568 if (!(*flags & CPU_DTRACE_ERROR) && !rval) { 5569 dtrace_cacheid_t cid = probe->dtpr_predcache; 5570 5571 if (cid != DTRACE_CACHEIDNONE && !onintr) { 5572 /* 5573 * Update the predicate cache... 5574 */ 5575 ASSERT(cid == pred->dtp_cacheid); 5576#if !defined(__APPLE__) 5577 curthread->t_predcache = cid; 5578#else 5579 dtrace_set_thread_predcache(current_thread(), cid); 5580#endif /* __APPLE__ */ 5581 } 5582 5583 continue; 5584 } 5585 } 5586 5587 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) && 5588 act != NULL; act = act->dta_next) { 5589 size_t valoffs; 5590 dtrace_difo_t *dp; 5591 dtrace_recdesc_t *rec = &act->dta_rec; 5592 5593 size = rec->dtrd_size; 5594 valoffs = offs + rec->dtrd_offset; 5595 5596 if (DTRACEACT_ISAGG(act->dta_kind)) { 5597 uint64_t v = 0xbad; 5598 dtrace_aggregation_t *agg; 5599 5600 agg = (dtrace_aggregation_t *)act; 5601 5602 if ((dp = act->dta_difo) != NULL) 5603 v = dtrace_dif_emulate(dp, 5604 &mstate, vstate, state); 5605 5606 if (*flags & CPU_DTRACE_ERROR) 5607 continue; 5608 5609 /* 5610 * Note that we always pass the expression 5611 * value from the previous iteration of the 5612 * action loop. This value will only be used 5613 * if there is an expression argument to the 5614 * aggregating action, denoted by the 5615 * dtag_hasarg field. 5616 */ 5617 dtrace_aggregate(agg, buf, 5618 offs, aggbuf, v, val); 5619 continue; 5620 } 5621 5622 switch (act->dta_kind) { 5623 case DTRACEACT_STOP: 5624 if (dtrace_priv_proc_destructive(state)) 5625 dtrace_action_stop(); 5626 continue; 5627 5628 case DTRACEACT_BREAKPOINT: 5629 if (dtrace_priv_kernel_destructive(state)) 5630 dtrace_action_breakpoint(ecb); 5631 continue; 5632 5633 case DTRACEACT_PANIC: 5634 if (dtrace_priv_kernel_destructive(state)) 5635 dtrace_action_panic(ecb); 5636 continue; 5637 5638 case DTRACEACT_STACK: 5639 if (!dtrace_priv_kernel(state)) 5640 continue; 5641 5642 dtrace_getpcstack((pc_t *)(tomax + valoffs), 5643 size / sizeof (pc_t), probe->dtpr_aframes, 5644 DTRACE_ANCHORED(probe) ? NULL : 5645 (uint32_t *)arg0); 5646 5647 continue; 5648 5649 case DTRACEACT_JSTACK: 5650 case DTRACEACT_USTACK: 5651 if (!dtrace_priv_proc(state)) 5652 continue; 5653 5654 /* 5655 * See comment in DIF_VAR_PID. 5656 */ 5657 if (DTRACE_ANCHORED(mstate.dtms_probe) && 5658 CPU_ON_INTR(CPU)) { 5659 int depth = DTRACE_USTACK_NFRAMES( 5660 rec->dtrd_arg) + 1; 5661 5662 dtrace_bzero((void *)(tomax + valoffs), 5663 DTRACE_USTACK_STRSIZE(rec->dtrd_arg) 5664 + depth * sizeof (uint64_t)); 5665 5666 continue; 5667 } 5668 5669 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 && 5670 curproc->p_dtrace_helpers != NULL) { 5671 /* 5672 * This is the slow path -- we have 5673 * allocated string space, and we're 5674 * getting the stack of a process that 5675 * has helpers. Call into a separate 5676 * routine to perform this processing. 5677 */ 5678 dtrace_action_ustack(&mstate, state, 5679 (uint64_t *)(tomax + valoffs), 5680 rec->dtrd_arg); 5681 continue; 5682 } 5683 5684 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 5685 dtrace_getupcstack((uint64_t *) 5686 (tomax + valoffs), 5687 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1); 5688 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 5689 continue; 5690 5691 default: 5692 break; 5693 } 5694 5695 dp = act->dta_difo; 5696 ASSERT(dp != NULL); 5697 5698 val = dtrace_dif_emulate(dp, &mstate, vstate, state); 5699 5700 if (*flags & CPU_DTRACE_ERROR) 5701 continue; 5702 5703 switch (act->dta_kind) { 5704 case DTRACEACT_SPECULATE: 5705 ASSERT(buf == &state->dts_buffer[cpuid]); 5706 buf = dtrace_speculation_buffer(state, 5707 cpuid, val); 5708 5709 if (buf == NULL) { 5710 *flags |= CPU_DTRACE_DROP; 5711 continue; 5712 } 5713 5714 offs = dtrace_buffer_reserve(buf, 5715 ecb->dte_needed, ecb->dte_alignment, 5716 state, NULL); 5717 5718 if (offs < 0) { 5719 *flags |= CPU_DTRACE_DROP; 5720 continue; 5721 } 5722 5723 tomax = buf->dtb_tomax; 5724 ASSERT(tomax != NULL); 5725 5726 if (ecb->dte_size != 0) 5727 DTRACE_STORE(uint32_t, tomax, offs, 5728 ecb->dte_epid); 5729 continue; 5730 5731 case DTRACEACT_CHILL: 5732 if (dtrace_priv_kernel_destructive(state)) 5733 dtrace_action_chill(&mstate, val); 5734 continue; 5735 5736 case DTRACEACT_RAISE: 5737 if (dtrace_priv_proc_destructive(state)) 5738 dtrace_action_raise(val); 5739 continue; 5740 5741 case DTRACEACT_COMMIT: 5742 ASSERT(!committed); 5743 5744 /* 5745 * We need to commit our buffer state. 5746 */ 5747 if (ecb->dte_size) 5748 buf->dtb_offset = offs + ecb->dte_size; 5749 buf = &state->dts_buffer[cpuid]; 5750 dtrace_speculation_commit(state, cpuid, val); 5751 committed = 1; 5752 continue; 5753 5754 case DTRACEACT_DISCARD: 5755 dtrace_speculation_discard(state, cpuid, val); 5756 continue; 5757 5758 case DTRACEACT_DIFEXPR: 5759 case DTRACEACT_LIBACT: 5760 case DTRACEACT_PRINTF: 5761 case DTRACEACT_PRINTA: 5762 case DTRACEACT_SYSTEM: 5763 case DTRACEACT_FREOPEN: 5764 break; 5765 5766 case DTRACEACT_SYM: 5767 case DTRACEACT_MOD: 5768 if (!dtrace_priv_kernel(state)) 5769 continue; 5770 break; 5771 5772#if !defined(__APPLE__) 5773 case DTRACEACT_USYM: 5774 case DTRACEACT_UMOD: 5775 case DTRACEACT_UADDR: { 5776 struct pid *pid = curthread->t_procp->p_pidp; 5777 5778 if (!dtrace_priv_proc(state)) 5779 continue; 5780 5781 DTRACE_STORE(uint64_t, tomax, 5782 valoffs, (uint64_t)pid->pid_id); 5783 DTRACE_STORE(uint64_t, tomax, 5784 valoffs + sizeof (uint64_t), val); 5785 5786 continue; 5787 } 5788#else 5789 case DTRACEACT_USYM: 5790 case DTRACEACT_UMOD: 5791 case DTRACEACT_UADDR: { 5792 if (!dtrace_priv_proc(state)) 5793 continue; 5794 5795 DTRACE_STORE(uint64_t, tomax, 5796 valoffs, (uint64_t)proc_selfpid()); 5797 DTRACE_STORE(uint64_t, tomax, 5798 valoffs + sizeof (uint64_t), val); 5799 5800 continue; 5801 } 5802#endif /* __APPLE__ */ 5803 5804 case DTRACEACT_EXIT: { 5805 /* 5806 * For the exit action, we are going to attempt 5807 * to atomically set our activity to be 5808 * draining. If this fails (either because 5809 * another CPU has beat us to the exit action, 5810 * or because our current activity is something 5811 * other than ACTIVE or WARMUP), we will 5812 * continue. This assures that the exit action 5813 * can be successfully recorded at most once 5814 * when we're in the ACTIVE state. If we're 5815 * encountering the exit() action while in 5816 * COOLDOWN, however, we want to honor the new 5817 * status code. (We know that we're the only 5818 * thread in COOLDOWN, so there is no race.) 5819 */ 5820 void *activity = &state->dts_activity; 5821 dtrace_activity_t current = state->dts_activity; 5822 5823 if (current == DTRACE_ACTIVITY_COOLDOWN) 5824 break; 5825 5826 if (current != DTRACE_ACTIVITY_WARMUP) 5827 current = DTRACE_ACTIVITY_ACTIVE; 5828 5829 if (dtrace_cas32(activity, current, 5830 DTRACE_ACTIVITY_DRAINING) != current) { 5831 *flags |= CPU_DTRACE_DROP; 5832 continue; 5833 } 5834 5835 break; 5836 } 5837 5838 default: 5839 ASSERT(0); 5840 } 5841 5842 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { 5843 uintptr_t end = valoffs + size; 5844 5845 /* 5846 * If this is a string, we're going to only 5847 * load until we find the zero byte -- after 5848 * which we'll store zero bytes. 5849 */ 5850 if (dp->dtdo_rtype.dtdt_kind == 5851 DIF_TYPE_STRING) { 5852 char c = '\0' + 1; 5853 int intuple = act->dta_intuple; 5854 size_t s; 5855 5856 for (s = 0; s < size; s++) { 5857 if (c != '\0') 5858 c = dtrace_load8(val++); 5859 5860 DTRACE_STORE(uint8_t, tomax, 5861 valoffs++, c); 5862 5863 if (c == '\0' && intuple) 5864 break; 5865 } 5866 5867 continue; 5868 } 5869 5870 while (valoffs < end) { 5871 DTRACE_STORE(uint8_t, tomax, valoffs++, 5872 dtrace_load8(val++)); 5873 } 5874 5875 continue; 5876 } 5877 5878 switch (size) { 5879 case 0: 5880 break; 5881 5882 case sizeof (uint8_t): 5883 DTRACE_STORE(uint8_t, tomax, valoffs, val); 5884 break; 5885 case sizeof (uint16_t): 5886 DTRACE_STORE(uint16_t, tomax, valoffs, val); 5887 break; 5888 case sizeof (uint32_t): 5889 DTRACE_STORE(uint32_t, tomax, valoffs, val); 5890 break; 5891 case sizeof (uint64_t): 5892 DTRACE_STORE(uint64_t, tomax, valoffs, val); 5893 break; 5894 default: 5895 /* 5896 * Any other size should have been returned by 5897 * reference, not by value. 5898 */ 5899 ASSERT(0); 5900 break; 5901 } 5902 } 5903 5904 if (*flags & CPU_DTRACE_DROP) 5905 continue; 5906 5907 if (*flags & CPU_DTRACE_FAULT) { 5908 int ndx; 5909 dtrace_action_t *err; 5910 5911 buf->dtb_errors++; 5912 5913 if (probe->dtpr_id == dtrace_probeid_error) { 5914 /* 5915 * There's nothing we can do -- we had an 5916 * error on the error probe. We bump an 5917 * error counter to at least indicate that 5918 * this condition happened. 5919 */ 5920 dtrace_error(&state->dts_dblerrors); 5921 continue; 5922 } 5923 5924 if (vtime) { 5925 /* 5926 * Before recursing on dtrace_probe(), we 5927 * need to explicitly clear out our start 5928 * time to prevent it from being accumulated 5929 * into t_dtrace_vtime. 5930 */ 5931#if !defined(__APPLE__) 5932 curthread->t_dtrace_start = 0; 5933#else 5934 /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */ 5935 dtrace_set_thread_tracing(current_thread(), 5936 (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); 5937#endif /* __APPLE__ */ 5938 } 5939 5940 /* 5941 * Iterate over the actions to figure out which action 5942 * we were processing when we experienced the error. 5943 * Note that act points _past_ the faulting action; if 5944 * act is ecb->dte_action, the fault was in the 5945 * predicate, if it's ecb->dte_action->dta_next it's 5946 * in action #1, and so on. 5947 */ 5948 for (err = ecb->dte_action, ndx = 0; 5949 err != act; err = err->dta_next, ndx++) 5950 continue; 5951 5952 dtrace_probe_error(state, ecb->dte_epid, ndx, 5953 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ? 5954 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags), 5955 cpu_core[cpuid].cpuc_dtrace_illval); 5956 5957 continue; 5958 } 5959 5960 if (!committed) 5961 buf->dtb_offset = offs + ecb->dte_size; 5962 } 5963 5964#if !defined(__APPLE__) 5965 if (vtime) 5966 curthread->t_dtrace_start = dtrace_gethrtime(); 5967#else 5968 if (vtime) { 5969 thread_t thread = current_thread(); 5970 int64_t t = dtrace_get_thread_tracing(thread); 5971 5972 if (t >= 0) { 5973 /* Usual case, accumulate time spent here into t_dtrace_tracing */ 5974 dtrace_set_thread_tracing(thread, t + (dtrace_gethrtime() - now)); 5975 } else { 5976 /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */ 5977 dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t); 5978 } 5979 } 5980#endif /* __APPLE__ */ 5981 5982 dtrace_interrupt_enable(cookie); 5983} 5984 5985#if defined(__APPLE__) 5986/* Don't allow a thread to re-enter dtrace_probe() */ 5987void 5988dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, 5989 uint64_t arg2, uint64_t arg3, uint64_t arg4) 5990{ 5991 thread_t thread = current_thread(); 5992 5993 if (id == dtrace_probeid_error) { 5994 __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); 5995 dtrace_getfp(); /* Defeat tail-call optimization of __dtrace_probe() */ 5996 } else if (!dtrace_get_thread_reentering(thread)) { 5997 dtrace_set_thread_reentering(thread, TRUE); 5998 __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); 5999 dtrace_set_thread_reentering(thread, FALSE); 6000 } 6001} 6002#endif /* __APPLE__ */ 6003 6004/* 6005 * DTrace Probe Hashing Functions 6006 * 6007 * The functions in this section (and indeed, the functions in remaining 6008 * sections) are not _called_ from probe context. (Any exceptions to this are 6009 * marked with a "Note:".) Rather, they are called from elsewhere in the 6010 * DTrace framework to look-up probes in, add probes to and remove probes from 6011 * the DTrace probe hashes. (Each probe is hashed by each element of the 6012 * probe tuple -- allowing for fast lookups, regardless of what was 6013 * specified.) 6014 */ 6015static uint_t 6016dtrace_hash_str(char *p) 6017{ 6018 unsigned int g; 6019 uint_t hval = 0; 6020 6021 while (*p) { 6022 hval = (hval << 4) + *p++; 6023 if ((g = (hval & 0xf0000000)) != 0) 6024 hval ^= g >> 24; 6025 hval &= ~g; 6026 } 6027 return (hval); 6028} 6029 6030static dtrace_hash_t * 6031dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) 6032{ 6033 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); 6034 6035 hash->dth_stroffs = stroffs; 6036 hash->dth_nextoffs = nextoffs; 6037 hash->dth_prevoffs = prevoffs; 6038 6039 hash->dth_size = 1; 6040 hash->dth_mask = hash->dth_size - 1; 6041 6042 hash->dth_tab = kmem_zalloc(hash->dth_size * 6043 sizeof (dtrace_hashbucket_t *), KM_SLEEP); 6044 6045 return (hash); 6046} 6047 6048#if !defined(__APPLE__) /* Quiet compiler warning */ 6049static void 6050dtrace_hash_destroy(dtrace_hash_t *hash) 6051{ 6052#ifdef DEBUG 6053 int i; 6054 6055 for (i = 0; i < hash->dth_size; i++) 6056 ASSERT(hash->dth_tab[i] == NULL); 6057#endif 6058 6059 kmem_free(hash->dth_tab, 6060 hash->dth_size * sizeof (dtrace_hashbucket_t *)); 6061 kmem_free(hash, sizeof (dtrace_hash_t)); 6062} 6063#endif /* __APPLE__ */ 6064 6065static void 6066dtrace_hash_resize(dtrace_hash_t *hash) 6067{ 6068 int size = hash->dth_size, i, ndx; 6069 int new_size = hash->dth_size << 1; 6070 int new_mask = new_size - 1; 6071 dtrace_hashbucket_t **new_tab, *bucket, *next; 6072 6073 ASSERT((new_size & new_mask) == 0); 6074 6075 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP); 6076 6077 for (i = 0; i < size; i++) { 6078 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { 6079 dtrace_probe_t *probe = bucket->dthb_chain; 6080 6081 ASSERT(probe != NULL); 6082 ndx = DTRACE_HASHSTR(hash, probe) & new_mask; 6083 6084 next = bucket->dthb_next; 6085 bucket->dthb_next = new_tab[ndx]; 6086 new_tab[ndx] = bucket; 6087 } 6088 } 6089 6090 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *)); 6091 hash->dth_tab = new_tab; 6092 hash->dth_size = new_size; 6093 hash->dth_mask = new_mask; 6094} 6095 6096static void 6097dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) 6098{ 6099 int hashval = DTRACE_HASHSTR(hash, new); 6100 int ndx = hashval & hash->dth_mask; 6101 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6102 dtrace_probe_t **nextp, **prevp; 6103 6104 for (; bucket != NULL; bucket = bucket->dthb_next) { 6105 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) 6106 goto add; 6107 } 6108 6109 if ((hash->dth_nbuckets >> 1) > hash->dth_size) { 6110 dtrace_hash_resize(hash); 6111 dtrace_hash_add(hash, new); 6112 return; 6113 } 6114 6115 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP); 6116 bucket->dthb_next = hash->dth_tab[ndx]; 6117 hash->dth_tab[ndx] = bucket; 6118 hash->dth_nbuckets++; 6119 6120add: 6121 nextp = DTRACE_HASHNEXT(hash, new); 6122 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL); 6123 *nextp = bucket->dthb_chain; 6124 6125 if (bucket->dthb_chain != NULL) { 6126 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain); 6127 ASSERT(*prevp == NULL); 6128 *prevp = new; 6129 } 6130 6131 bucket->dthb_chain = new; 6132 bucket->dthb_len++; 6133} 6134 6135static dtrace_probe_t * 6136dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) 6137{ 6138 int hashval = DTRACE_HASHSTR(hash, template); 6139 int ndx = hashval & hash->dth_mask; 6140 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6141 6142 for (; bucket != NULL; bucket = bucket->dthb_next) { 6143 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 6144 return (bucket->dthb_chain); 6145 } 6146 6147 return (NULL); 6148} 6149 6150static int 6151dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) 6152{ 6153 int hashval = DTRACE_HASHSTR(hash, template); 6154 int ndx = hashval & hash->dth_mask; 6155 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6156 6157 for (; bucket != NULL; bucket = bucket->dthb_next) { 6158 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 6159 return (bucket->dthb_len); 6160 } 6161 6162 return (NULL); 6163} 6164 6165static void 6166dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) 6167{ 6168 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; 6169 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 6170 6171 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); 6172 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); 6173 6174 /* 6175 * Find the bucket that we're removing this probe from. 6176 */ 6177 for (; bucket != NULL; bucket = bucket->dthb_next) { 6178 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) 6179 break; 6180 } 6181 6182 ASSERT(bucket != NULL); 6183 6184 if (*prevp == NULL) { 6185 if (*nextp == NULL) { 6186 /* 6187 * The removed probe was the only probe on this 6188 * bucket; we need to remove the bucket. 6189 */ 6190 dtrace_hashbucket_t *b = hash->dth_tab[ndx]; 6191 6192 ASSERT(bucket->dthb_chain == probe); 6193 ASSERT(b != NULL); 6194 6195 if (b == bucket) { 6196 hash->dth_tab[ndx] = bucket->dthb_next; 6197 } else { 6198 while (b->dthb_next != bucket) 6199 b = b->dthb_next; 6200 b->dthb_next = bucket->dthb_next; 6201 } 6202 6203 ASSERT(hash->dth_nbuckets > 0); 6204 hash->dth_nbuckets--; 6205 kmem_free(bucket, sizeof (dtrace_hashbucket_t)); 6206 return; 6207 } 6208 6209 bucket->dthb_chain = *nextp; 6210 } else { 6211 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp; 6212 } 6213 6214 if (*nextp != NULL) 6215 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp; 6216} 6217 6218/* 6219 * DTrace Utility Functions 6220 * 6221 * These are random utility functions that are _not_ called from probe context. 6222 */ 6223static int 6224dtrace_badattr(const dtrace_attribute_t *a) 6225{ 6226 return (a->dtat_name > DTRACE_STABILITY_MAX || 6227 a->dtat_data > DTRACE_STABILITY_MAX || 6228 a->dtat_class > DTRACE_CLASS_MAX); 6229} 6230 6231/* 6232 * Return a duplicate copy of a string. If the specified string is NULL, 6233 * this function returns a zero-length string. 6234 */ 6235static char * 6236dtrace_strdup(const char *str) 6237{ 6238 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); 6239 6240 if (str != NULL) 6241 (void) strcpy(new, str); 6242 6243 return (new); 6244} 6245 6246#define DTRACE_ISALPHA(c) \ 6247 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) 6248 6249static int 6250dtrace_badname(const char *s) 6251{ 6252 char c; 6253 6254 if (s == NULL || (c = *s++) == '\0') 6255 return (0); 6256 6257 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.') 6258 return (1); 6259 6260 while ((c = *s++) != '\0') { 6261 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') && 6262 c != '-' && c != '_' && c != '.' && c != '`') 6263 return (1); 6264 } 6265 6266 return (0); 6267} 6268 6269static void 6270dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) 6271{ 6272 uint32_t priv; 6273 6274 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 6275 /* 6276 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. 6277 */ 6278 priv = DTRACE_PRIV_ALL; 6279 } else { 6280 *uidp = crgetuid(cr); 6281 *zoneidp = crgetzoneid(cr); 6282 6283 priv = 0; 6284 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) 6285 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER; 6286 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) 6287 priv |= DTRACE_PRIV_USER; 6288 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) 6289 priv |= DTRACE_PRIV_PROC; 6290 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 6291 priv |= DTRACE_PRIV_OWNER; 6292 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 6293 priv |= DTRACE_PRIV_ZONEOWNER; 6294 } 6295 6296 *privp = priv; 6297} 6298 6299#ifdef DTRACE_ERRDEBUG 6300static void 6301dtrace_errdebug(const char *str) 6302{ 6303 int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ; 6304 int occupied = 0; 6305 6306 lck_mtx_lock(&dtrace_errlock); 6307 dtrace_errlast = str; 6308#if !defined(__APPLE__) 6309 dtrace_errthread = curthread; 6310#else 6311 dtrace_errthread = current_thread(); 6312#endif /* __APPLE__ */ 6313 6314 while (occupied++ < DTRACE_ERRHASHSZ) { 6315 if (dtrace_errhash[hval].dter_msg == str) { 6316 dtrace_errhash[hval].dter_count++; 6317 goto out; 6318 } 6319 6320 if (dtrace_errhash[hval].dter_msg != NULL) { 6321 hval = (hval + 1) % DTRACE_ERRHASHSZ; 6322 continue; 6323 } 6324 6325 dtrace_errhash[hval].dter_msg = str; 6326 dtrace_errhash[hval].dter_count = 1; 6327 goto out; 6328 } 6329 6330 panic("dtrace: undersized error hash"); 6331out: 6332 lck_mtx_unlock(&dtrace_errlock); 6333} 6334#endif 6335 6336/* 6337 * DTrace Matching Functions 6338 * 6339 * These functions are used to match groups of probes, given some elements of 6340 * a probe tuple, or some globbed expressions for elements of a probe tuple. 6341 */ 6342static int 6343dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid, 6344 zoneid_t zoneid) 6345{ 6346 if (priv != DTRACE_PRIV_ALL) { 6347 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags; 6348 uint32_t match = priv & ppriv; 6349 6350 /* 6351 * No PRIV_DTRACE_* privileges... 6352 */ 6353 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER | 6354 DTRACE_PRIV_KERNEL)) == 0) 6355 return (0); 6356 6357 /* 6358 * No matching bits, but there were bits to match... 6359 */ 6360 if (match == 0 && ppriv != 0) 6361 return (0); 6362 6363 /* 6364 * Need to have permissions to the process, but don't... 6365 */ 6366 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 && 6367 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) { 6368 return (0); 6369 } 6370 6371 /* 6372 * Need to be in the same zone unless we possess the 6373 * privilege to examine all zones. 6374 */ 6375 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 && 6376 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) { 6377 return (0); 6378 } 6379 } 6380 6381 return (1); 6382} 6383 6384/* 6385 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which 6386 * consists of input pattern strings and an ops-vector to evaluate them. 6387 * This function returns >0 for match, 0 for no match, and <0 for error. 6388 */ 6389static int 6390dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp, 6391 uint32_t priv, uid_t uid, zoneid_t zoneid) 6392{ 6393 dtrace_provider_t *pvp = prp->dtpr_provider; 6394 int rv; 6395 6396 if (pvp->dtpv_defunct) 6397 return (0); 6398 6399 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0) 6400 return (rv); 6401 6402 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0) 6403 return (rv); 6404 6405 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0) 6406 return (rv); 6407 6408 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0) 6409 return (rv); 6410 6411 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0) 6412 return (0); 6413 6414 return (rv); 6415} 6416 6417/* 6418 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN) 6419 * interface for matching a glob pattern 'p' to an input string 's'. Unlike 6420 * libc's version, the kernel version only applies to 8-bit ASCII strings. 6421 * In addition, all of the recursion cases except for '*' matching have been 6422 * unwound. For '*', we still implement recursive evaluation, but a depth 6423 * counter is maintained and matching is aborted if we recurse too deep. 6424 * The function returns 0 if no match, >0 if match, and <0 if recursion error. 6425 */ 6426static int 6427dtrace_match_glob(const char *s, const char *p, int depth) 6428{ 6429 const char *olds; 6430 char s1, c; 6431 int gs; 6432 6433 if (depth > DTRACE_PROBEKEY_MAXDEPTH) 6434 return (-1); 6435 6436 if (s == NULL) 6437 s = ""; /* treat NULL as empty string */ 6438 6439top: 6440 olds = s; 6441 s1 = *s++; 6442 6443 if (p == NULL) 6444 return (0); 6445 6446 if ((c = *p++) == '\0') 6447 return (s1 == '\0'); 6448 6449 switch (c) { 6450 case '[': { 6451 int ok = 0, notflag = 0; 6452 char lc = '\0'; 6453 6454 if (s1 == '\0') 6455 return (0); 6456 6457 if (*p == '!') { 6458 notflag = 1; 6459 p++; 6460 } 6461 6462 if ((c = *p++) == '\0') 6463 return (0); 6464 6465 do { 6466 if (c == '-' && lc != '\0' && *p != ']') { 6467 if ((c = *p++) == '\0') 6468 return (0); 6469 if (c == '\\' && (c = *p++) == '\0') 6470 return (0); 6471 6472 if (notflag) { 6473 if (s1 < lc || s1 > c) 6474 ok++; 6475 else 6476 return (0); 6477 } else if (lc <= s1 && s1 <= c) 6478 ok++; 6479 6480 } else if (c == '\\' && (c = *p++) == '\0') 6481 return (0); 6482 6483 lc = c; /* save left-hand 'c' for next iteration */ 6484 6485 if (notflag) { 6486 if (s1 != c) 6487 ok++; 6488 else 6489 return (0); 6490 } else if (s1 == c) 6491 ok++; 6492 6493 if ((c = *p++) == '\0') 6494 return (0); 6495 6496 } while (c != ']'); 6497 6498 if (ok) 6499 goto top; 6500 6501 return (0); 6502 } 6503 6504 case '\\': 6505 if ((c = *p++) == '\0') 6506 return (0); 6507 /*FALLTHRU*/ 6508 6509 default: 6510 if (c != s1) 6511 return (0); 6512 /*FALLTHRU*/ 6513 6514 case '?': 6515 if (s1 != '\0') 6516 goto top; 6517 return (0); 6518 6519 case '*': 6520 while (*p == '*') 6521 p++; /* consecutive *'s are identical to a single one */ 6522 6523 if (*p == '\0') 6524 return (1); 6525 6526 for (s = olds; *s != '\0'; s++) { 6527 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0) 6528 return (gs); 6529 } 6530 6531 return (0); 6532 } 6533} 6534 6535/*ARGSUSED*/ 6536static int 6537dtrace_match_string(const char *s, const char *p, int depth) 6538{ 6539 return (s != NULL && strcmp(s, p) == 0); 6540} 6541 6542/*ARGSUSED*/ 6543static int 6544dtrace_match_nul(const char *s, const char *p, int depth) 6545{ 6546#pragma unused(s,p,depth) 6547 return (1); /* always match the empty pattern */ 6548} 6549 6550/*ARGSUSED*/ 6551static int 6552dtrace_match_nonzero(const char *s, const char *p, int depth) 6553{ 6554#pragma unused(p,depth) 6555 return (s != NULL && s[0] != '\0'); 6556} 6557 6558static int 6559dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, 6560 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) 6561{ 6562 dtrace_probe_t template, *probe; 6563 dtrace_hash_t *hash = NULL; 6564 int len, best = INT_MAX, nmatched = 0; 6565 dtrace_id_t i; 6566 6567 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 6568 6569 /* 6570 * If the probe ID is specified in the key, just lookup by ID and 6571 * invoke the match callback once if a matching probe is found. 6572 */ 6573 if (pkp->dtpk_id != DTRACE_IDNONE) { 6574 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && 6575 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { 6576 (void) (*matched)(probe, arg); 6577 nmatched++; 6578 } 6579 return (nmatched); 6580 } 6581 6582 template.dtpr_mod = (char *)pkp->dtpk_mod; 6583 template.dtpr_func = (char *)pkp->dtpk_func; 6584 template.dtpr_name = (char *)pkp->dtpk_name; 6585 6586 /* 6587 * We want to find the most distinct of the module name, function 6588 * name, and name. So for each one that is not a glob pattern or 6589 * empty string, we perform a lookup in the corresponding hash and 6590 * use the hash table with the fewest collisions to do our search. 6591 */ 6592 if (pkp->dtpk_mmatch == &dtrace_match_string && 6593 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { 6594 best = len; 6595 hash = dtrace_bymod; 6596 } 6597 6598 if (pkp->dtpk_fmatch == &dtrace_match_string && 6599 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) { 6600 best = len; 6601 hash = dtrace_byfunc; 6602 } 6603 6604 if (pkp->dtpk_nmatch == &dtrace_match_string && 6605 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) { 6606 best = len; 6607 hash = dtrace_byname; 6608 } 6609 6610 /* 6611 * If we did not select a hash table, iterate over every probe and 6612 * invoke our callback for each one that matches our input probe key. 6613 */ 6614 if (hash == NULL) { 6615 for (i = 0; i < dtrace_nprobes; i++) { 6616 if ((probe = dtrace_probes[i]) == NULL || 6617 dtrace_match_probe(probe, pkp, priv, uid, 6618 zoneid) <= 0) 6619 continue; 6620 6621 nmatched++; 6622 6623 if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) 6624 break; 6625 } 6626 6627 return (nmatched); 6628 } 6629 6630 /* 6631 * If we selected a hash table, iterate over each probe of the same key 6632 * name and invoke the callback for every probe that matches the other 6633 * attributes of our input probe key. 6634 */ 6635 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL; 6636 probe = *(DTRACE_HASHNEXT(hash, probe))) { 6637 6638 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) 6639 continue; 6640 6641 nmatched++; 6642 6643 if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) 6644 break; 6645 } 6646 6647 return (nmatched); 6648} 6649 6650/* 6651 * Return the function pointer dtrace_probecmp() should use to compare the 6652 * specified pattern with a string. For NULL or empty patterns, we select 6653 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob(). 6654 * For non-empty non-glob strings, we use dtrace_match_string(). 6655 */ 6656static dtrace_probekey_f * 6657dtrace_probekey_func(const char *p) 6658{ 6659 char c; 6660 6661 if (p == NULL || *p == '\0') 6662 return (&dtrace_match_nul); 6663 6664 while ((c = *p++) != '\0') { 6665 if (c == '[' || c == '?' || c == '*' || c == '\\') 6666 return (&dtrace_match_glob); 6667 } 6668 6669 return (&dtrace_match_string); 6670} 6671 6672/* 6673 * Build a probe comparison key for use with dtrace_match_probe() from the 6674 * given probe description. By convention, a null key only matches anchored 6675 * probes: if each field is the empty string, reset dtpk_fmatch to 6676 * dtrace_match_nonzero(). 6677 */ 6678static void 6679dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) 6680{ 6681 pkp->dtpk_prov = pdp->dtpd_provider; 6682 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); 6683 6684 pkp->dtpk_mod = pdp->dtpd_mod; 6685 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); 6686 6687 pkp->dtpk_func = pdp->dtpd_func; 6688 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); 6689 6690 pkp->dtpk_name = pdp->dtpd_name; 6691 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); 6692 6693 pkp->dtpk_id = pdp->dtpd_id; 6694 6695 if (pkp->dtpk_id == DTRACE_IDNONE && 6696 pkp->dtpk_pmatch == &dtrace_match_nul && 6697 pkp->dtpk_mmatch == &dtrace_match_nul && 6698 pkp->dtpk_fmatch == &dtrace_match_nul && 6699 pkp->dtpk_nmatch == &dtrace_match_nul) 6700 pkp->dtpk_fmatch = &dtrace_match_nonzero; 6701} 6702 6703/* 6704 * DTrace Provider-to-Framework API Functions 6705 * 6706 * These functions implement much of the Provider-to-Framework API, as 6707 * described in <sys/dtrace.h>. The parts of the API not in this section are 6708 * the functions in the API for probe management (found below), and 6709 * dtrace_probe() itself (found above). 6710 */ 6711 6712/* 6713 * Register the calling provider with the DTrace framework. This should 6714 * generally be called by DTrace providers in their attach(9E) entry point. 6715 */ 6716int 6717dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, 6718 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp) 6719{ 6720 dtrace_provider_t *provider; 6721 6722 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) { 6723 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 6724 "arguments", name ? name : "<NULL>"); 6725 return (EINVAL); 6726 } 6727 6728 if (name[0] == '\0' || dtrace_badname(name)) { 6729 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 6730 "provider name", name); 6731 return (EINVAL); 6732 } 6733 6734 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) || 6735 pops->dtps_enable == NULL || pops->dtps_disable == NULL || 6736 pops->dtps_destroy == NULL || 6737 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) { 6738 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 6739 "provider ops", name); 6740 return (EINVAL); 6741 } 6742 6743 if (dtrace_badattr(&pap->dtpa_provider) || 6744 dtrace_badattr(&pap->dtpa_mod) || 6745 dtrace_badattr(&pap->dtpa_func) || 6746 dtrace_badattr(&pap->dtpa_name) || 6747 dtrace_badattr(&pap->dtpa_args)) { 6748 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 6749 "provider attributes", name); 6750 return (EINVAL); 6751 } 6752 6753 if (priv & ~DTRACE_PRIV_ALL) { 6754 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 6755 "privilege attributes", name); 6756 return (EINVAL); 6757 } 6758 6759 if ((priv & DTRACE_PRIV_KERNEL) && 6760 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && 6761 pops->dtps_usermode == NULL) { 6762 cmn_err(CE_WARN, "failed to register provider '%s': need " 6763 "dtps_usermode() op for given privilege attributes", name); 6764 return (EINVAL); 6765 } 6766 6767 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); 6768 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 6769 (void) strcpy(provider->dtpv_name, name); 6770 6771 provider->dtpv_attr = *pap; 6772 provider->dtpv_priv.dtpp_flags = priv; 6773 if (cr != NULL) { 6774 provider->dtpv_priv.dtpp_uid = crgetuid(cr); 6775 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr); 6776 } 6777 provider->dtpv_pops = *pops; 6778 6779 if (pops->dtps_provide == NULL) { 6780 ASSERT(pops->dtps_provide_module != NULL); 6781 provider->dtpv_pops.dtps_provide = 6782 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop; 6783 } 6784 6785 if (pops->dtps_provide_module == NULL) { 6786 ASSERT(pops->dtps_provide != NULL); 6787 provider->dtpv_pops.dtps_provide_module = 6788 (void (*)(void *, struct modctl *))dtrace_nullop; 6789 } 6790 6791 if (pops->dtps_suspend == NULL) { 6792 ASSERT(pops->dtps_resume == NULL); 6793 provider->dtpv_pops.dtps_suspend = 6794 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 6795 provider->dtpv_pops.dtps_resume = 6796 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 6797 } 6798 6799 provider->dtpv_arg = arg; 6800 *idp = (dtrace_provider_id_t)provider; 6801 6802 if (pops == &dtrace_provider_ops) { 6803 lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); 6804 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 6805 ASSERT(dtrace_anon.dta_enabling == NULL); 6806 6807 /* 6808 * We make sure that the DTrace provider is at the head of 6809 * the provider chain. 6810 */ 6811 provider->dtpv_next = dtrace_provider; 6812 dtrace_provider = provider; 6813 return (0); 6814 } 6815 6816 lck_mtx_lock(&dtrace_provider_lock); 6817 lck_mtx_lock(&dtrace_lock); 6818 6819 /* 6820 * If there is at least one provider registered, we'll add this 6821 * provider after the first provider. 6822 */ 6823 if (dtrace_provider != NULL) { 6824 provider->dtpv_next = dtrace_provider->dtpv_next; 6825 dtrace_provider->dtpv_next = provider; 6826 } else { 6827 dtrace_provider = provider; 6828 } 6829 6830 if (dtrace_retained != NULL) { 6831 dtrace_enabling_provide(provider); 6832 6833 /* 6834 * Now we need to call dtrace_enabling_matchall() -- which 6835 * will acquire cpu_lock and dtrace_lock. We therefore need 6836 * to drop all of our locks before calling into it... 6837 */ 6838 lck_mtx_unlock(&dtrace_lock); 6839 lck_mtx_unlock(&dtrace_provider_lock); 6840 dtrace_enabling_matchall(); 6841 6842 return (0); 6843 } 6844 6845 lck_mtx_unlock(&dtrace_lock); 6846 lck_mtx_unlock(&dtrace_provider_lock); 6847 6848 return (0); 6849} 6850 6851/* 6852 * Unregister the specified provider from the DTrace framework. This should 6853 * generally be called by DTrace providers in their detach(9E) entry point. 6854 */ 6855int 6856dtrace_unregister(dtrace_provider_id_t id) 6857{ 6858 dtrace_provider_t *old = (dtrace_provider_t *)id; 6859 dtrace_provider_t *prev = NULL; 6860 int i, self = 0; 6861 dtrace_probe_t *probe, *first = NULL; 6862 6863 if (old->dtpv_pops.dtps_enable == 6864 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { 6865 /* 6866 * If DTrace itself is the provider, we're called with locks 6867 * already held. 6868 */ 6869 ASSERT(old == dtrace_provider); 6870 ASSERT(dtrace_devi != NULL); 6871 lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); 6872 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 6873 6874 self = 1; 6875 6876 if (dtrace_provider->dtpv_next != NULL) { 6877 /* 6878 * There's another provider here; return failure. 6879 */ 6880 return (EBUSY); 6881 } 6882 } else { 6883 lck_mtx_lock(&dtrace_provider_lock); 6884 lck_mtx_lock(&mod_lock); 6885 lck_mtx_lock(&dtrace_lock); 6886 } 6887 6888 /* 6889 * If anyone has /dev/dtrace open, or if there are anonymous enabled 6890 * probes, we refuse to let providers slither away, unless this 6891 * provider has already been explicitly invalidated. 6892 */ 6893 if (!old->dtpv_defunct && 6894 (dtrace_opens || (dtrace_anon.dta_state != NULL && 6895 dtrace_anon.dta_state->dts_necbs > 0))) { 6896 if (!self) { 6897 lck_mtx_unlock(&dtrace_lock); 6898 lck_mtx_unlock(&mod_lock); 6899 lck_mtx_unlock(&dtrace_provider_lock); 6900 } 6901 return (EBUSY); 6902 } 6903 6904 /* 6905 * Attempt to destroy the probes associated with this provider. 6906 */ 6907 for (i = 0; i < dtrace_nprobes; i++) { 6908 if ((probe = dtrace_probes[i]) == NULL) 6909 continue; 6910 6911 if (probe->dtpr_provider != old) 6912 continue; 6913 6914 if (probe->dtpr_ecb == NULL) 6915 continue; 6916 6917 /* 6918 * We have at least one ECB; we can't remove this provider. 6919 */ 6920 if (!self) { 6921 lck_mtx_unlock(&dtrace_lock); 6922 lck_mtx_unlock(&mod_lock); 6923 lck_mtx_unlock(&dtrace_provider_lock); 6924 } 6925 return (EBUSY); 6926 } 6927 6928 /* 6929 * All of the probes for this provider are disabled; we can safely 6930 * remove all of them from their hash chains and from the probe array. 6931 */ 6932 for (i = 0; i < dtrace_nprobes; i++) { 6933 if ((probe = dtrace_probes[i]) == NULL) 6934 continue; 6935 6936 if (probe->dtpr_provider != old) 6937 continue; 6938 6939 dtrace_probes[i] = NULL; 6940 6941 dtrace_hash_remove(dtrace_bymod, probe); 6942 dtrace_hash_remove(dtrace_byfunc, probe); 6943 dtrace_hash_remove(dtrace_byname, probe); 6944 6945 if (first == NULL) { 6946 first = probe; 6947 probe->dtpr_nextmod = NULL; 6948 } else { 6949 probe->dtpr_nextmod = first; 6950 first = probe; 6951 } 6952 } 6953 6954 /* 6955 * The provider's probes have been removed from the hash chains and 6956 * from the probe array. Now issue a dtrace_sync() to be sure that 6957 * everyone has cleared out from any probe array processing. 6958 */ 6959 dtrace_sync(); 6960 6961 for (probe = first; probe != NULL; probe = first) { 6962 first = probe->dtpr_nextmod; 6963 6964 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, 6965 probe->dtpr_arg); 6966 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 6967 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 6968 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 6969 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); 6970#if !defined(__APPLE__) 6971 kmem_free(probe, sizeof (dtrace_probe_t)); 6972#else 6973 zfree(dtrace_probe_t_zone, probe); 6974#endif 6975 } 6976 6977 if ((prev = dtrace_provider) == old) { 6978 ASSERT(self || dtrace_devi == NULL); 6979 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL); 6980 dtrace_provider = old->dtpv_next; 6981 } else { 6982 while (prev != NULL && prev->dtpv_next != old) 6983 prev = prev->dtpv_next; 6984 6985 if (prev == NULL) { 6986 panic("attempt to unregister non-existent " 6987 "dtrace provider %p\n", (void *)id); 6988 } 6989 6990 prev->dtpv_next = old->dtpv_next; 6991 } 6992 6993 if (!self) { 6994 lck_mtx_unlock(&dtrace_lock); 6995 lck_mtx_unlock(&mod_lock); 6996 lck_mtx_unlock(&dtrace_provider_lock); 6997 } 6998 6999 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); 7000 kmem_free(old, sizeof (dtrace_provider_t)); 7001 7002 return (0); 7003} 7004 7005/* 7006 * Invalidate the specified provider. All subsequent probe lookups for the 7007 * specified provider will fail, but its probes will not be removed. 7008 */ 7009void 7010dtrace_invalidate(dtrace_provider_id_t id) 7011{ 7012 dtrace_provider_t *pvp = (dtrace_provider_t *)id; 7013 7014 ASSERT(pvp->dtpv_pops.dtps_enable != 7015 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); 7016 7017 lck_mtx_lock(&dtrace_provider_lock); 7018 lck_mtx_lock(&dtrace_lock); 7019 7020 pvp->dtpv_defunct = 1; 7021 7022 lck_mtx_unlock(&dtrace_lock); 7023 lck_mtx_unlock(&dtrace_provider_lock); 7024} 7025 7026/* 7027 * Indicate whether or not DTrace has attached. 7028 */ 7029int 7030dtrace_attached(void) 7031{ 7032 /* 7033 * dtrace_provider will be non-NULL iff the DTrace driver has 7034 * attached. (It's non-NULL because DTrace is always itself a 7035 * provider.) 7036 */ 7037 return (dtrace_provider != NULL); 7038} 7039 7040/* 7041 * Remove all the unenabled probes for the given provider. This function is 7042 * not unlike dtrace_unregister(), except that it doesn't remove the provider 7043 * -- just as many of its associated probes as it can. 7044 */ 7045int 7046dtrace_condense(dtrace_provider_id_t id) 7047{ 7048 dtrace_provider_t *prov = (dtrace_provider_t *)id; 7049 int i; 7050 dtrace_probe_t *probe; 7051 7052 /* 7053 * Make sure this isn't the dtrace provider itself. 7054 */ 7055 ASSERT(prov->dtpv_pops.dtps_enable != 7056 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); 7057 7058 lck_mtx_lock(&dtrace_provider_lock); 7059 lck_mtx_lock(&dtrace_lock); 7060 7061 /* 7062 * Attempt to destroy the probes associated with this provider. 7063 */ 7064 for (i = 0; i < dtrace_nprobes; i++) { 7065 if ((probe = dtrace_probes[i]) == NULL) 7066 continue; 7067 7068 if (probe->dtpr_provider != prov) 7069 continue; 7070 7071 if (probe->dtpr_ecb != NULL) 7072 continue; 7073 7074 dtrace_probes[i] = NULL; 7075 7076 dtrace_hash_remove(dtrace_bymod, probe); 7077 dtrace_hash_remove(dtrace_byfunc, probe); 7078 dtrace_hash_remove(dtrace_byname, probe); 7079 7080 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, 7081 probe->dtpr_arg); 7082 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 7083 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 7084 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 7085#if !defined(__APPLE__) 7086 kmem_free(probe, sizeof (dtrace_probe_t)); 7087#else 7088 zfree(dtrace_probe_t_zone, probe); 7089#endif 7090 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); 7091 } 7092 7093 lck_mtx_unlock(&dtrace_lock); 7094 lck_mtx_unlock(&dtrace_provider_lock); 7095 7096 return (0); 7097} 7098 7099/* 7100 * DTrace Probe Management Functions 7101 * 7102 * The functions in this section perform the DTrace probe management, 7103 * including functions to create probes, look-up probes, and call into the 7104 * providers to request that probes be provided. Some of these functions are 7105 * in the Provider-to-Framework API; these functions can be identified by the 7106 * fact that they are not declared "static". 7107 */ 7108 7109/* 7110 * Create a probe with the specified module name, function name, and name. 7111 */ 7112dtrace_id_t 7113dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, 7114 const char *func, const char *name, int aframes, void *arg) 7115{ 7116 dtrace_probe_t *probe, **probes; 7117 dtrace_provider_t *provider = (dtrace_provider_t *)prov; 7118 dtrace_id_t id; 7119 7120 if (provider == dtrace_provider) { 7121 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 7122 } else { 7123 lck_mtx_lock(&dtrace_lock); 7124 } 7125 7126 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, 7127 VM_BESTFIT | VM_SLEEP); 7128#if !defined(__APPLE__) 7129 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); 7130#else 7131 probe = zalloc(dtrace_probe_t_zone); 7132 bzero(probe, sizeof (dtrace_probe_t)); 7133#endif 7134 7135 probe->dtpr_id = id; 7136 probe->dtpr_gen = dtrace_probegen++; 7137 probe->dtpr_mod = dtrace_strdup(mod); 7138 probe->dtpr_func = dtrace_strdup(func); 7139 probe->dtpr_name = dtrace_strdup(name); 7140 probe->dtpr_arg = arg; 7141 probe->dtpr_aframes = aframes; 7142 probe->dtpr_provider = provider; 7143 7144 dtrace_hash_add(dtrace_bymod, probe); 7145 dtrace_hash_add(dtrace_byfunc, probe); 7146 dtrace_hash_add(dtrace_byname, probe); 7147 7148 if (id - 1 >= dtrace_nprobes) { 7149 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); 7150 size_t nsize = osize << 1; 7151 7152 if (nsize == 0) { 7153 ASSERT(osize == 0); 7154 ASSERT(dtrace_probes == NULL); 7155 nsize = sizeof (dtrace_probe_t *); 7156 } 7157 7158 probes = kmem_zalloc(nsize, KM_SLEEP); 7159 7160 if (dtrace_probes == NULL) { 7161 ASSERT(osize == 0); 7162 dtrace_probes = probes; 7163 dtrace_nprobes = 1; 7164 } else { 7165 dtrace_probe_t **oprobes = dtrace_probes; 7166 7167 bcopy(oprobes, probes, osize); 7168 dtrace_membar_producer(); 7169 dtrace_probes = probes; 7170 7171 dtrace_sync(); 7172 7173 /* 7174 * All CPUs are now seeing the new probes array; we can 7175 * safely free the old array. 7176 */ 7177 kmem_free(oprobes, osize); 7178 dtrace_nprobes <<= 1; 7179 } 7180 7181 ASSERT(id - 1 < dtrace_nprobes); 7182 } 7183 7184 ASSERT(dtrace_probes[id - 1] == NULL); 7185 dtrace_probes[id - 1] = probe; 7186 7187 if (provider != dtrace_provider) 7188 lck_mtx_unlock(&dtrace_lock); 7189 7190 return (id); 7191} 7192 7193static dtrace_probe_t * 7194dtrace_probe_lookup_id(dtrace_id_t id) 7195{ 7196 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 7197 7198 if (id == 0 || id > dtrace_nprobes) 7199 return (NULL); 7200 7201 return (dtrace_probes[id - 1]); 7202} 7203 7204static int 7205dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) 7206{ 7207 *((dtrace_id_t *)arg) = probe->dtpr_id; 7208 7209 return (DTRACE_MATCH_DONE); 7210} 7211 7212/* 7213 * Look up a probe based on provider and one or more of module name, function 7214 * name and probe name. 7215 */ 7216dtrace_id_t 7217dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod, 7218 const char *func, const char *name) 7219{ 7220 dtrace_probekey_t pkey; 7221 dtrace_id_t id; 7222 int match; 7223 7224 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; 7225 pkey.dtpk_pmatch = &dtrace_match_string; 7226 pkey.dtpk_mod = mod; 7227 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; 7228 pkey.dtpk_func = func; 7229 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; 7230 pkey.dtpk_name = name; 7231 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; 7232 pkey.dtpk_id = DTRACE_IDNONE; 7233 7234 lck_mtx_lock(&dtrace_lock); 7235 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, 7236 dtrace_probe_lookup_match, &id); 7237 lck_mtx_unlock(&dtrace_lock); 7238 7239 ASSERT(match == 1 || match == 0); 7240 return (match ? id : 0); 7241} 7242 7243/* 7244 * Returns the probe argument associated with the specified probe. 7245 */ 7246void * 7247dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid) 7248{ 7249 dtrace_probe_t *probe; 7250 void *rval = NULL; 7251 7252 lck_mtx_lock(&dtrace_lock); 7253 7254 if ((probe = dtrace_probe_lookup_id(pid)) != NULL && 7255 probe->dtpr_provider == (dtrace_provider_t *)id) 7256 rval = probe->dtpr_arg; 7257 7258 lck_mtx_unlock(&dtrace_lock); 7259 7260 return (rval); 7261} 7262 7263/* 7264 * Copy a probe into a probe description. 7265 */ 7266static void 7267dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) 7268{ 7269 bzero(pdp, sizeof (dtrace_probedesc_t)); 7270 pdp->dtpd_id = prp->dtpr_id; 7271 7272 (void) strlcpy(pdp->dtpd_provider, 7273 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN); 7274 7275 (void) strlcpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN); 7276 (void) strlcpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN); 7277 (void) strlcpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN); 7278} 7279 7280/* 7281 * Called to indicate that a probe -- or probes -- should be provided by a 7282 * specfied provider. If the specified description is NULL, the provider will 7283 * be told to provide all of its probes. (This is done whenever a new 7284 * consumer comes along, or whenever a retained enabling is to be matched.) If 7285 * the specified description is non-NULL, the provider is given the 7286 * opportunity to dynamically provide the specified probe, allowing providers 7287 * to support the creation of probes on-the-fly. (So-called _autocreated_ 7288 * probes.) If the provider is NULL, the operations will be applied to all 7289 * providers; if the provider is non-NULL the operations will only be applied 7290 * to the specified provider. The dtrace_provider_lock must be held, and the 7291 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation 7292 * will need to grab the dtrace_lock when it reenters the framework through 7293 * dtrace_probe_lookup(), dtrace_probe_create(), etc. 7294 */ 7295static void 7296dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) 7297{ 7298 struct modctl *ctl; 7299 int all = 0; 7300 7301 lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); 7302 7303 if (prv == NULL) { 7304 all = 1; 7305 prv = dtrace_provider; 7306 } 7307 7308 do { 7309 /* 7310 * First, call the blanket provide operation. 7311 */ 7312 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); 7313 7314#if !defined(__APPLE__) 7315 /* 7316 * Now call the per-module provide operation. We will grab 7317 * mod_lock to prevent the list from being modified. Note 7318 * that this also prevents the mod_busy bits from changing. 7319 * (mod_busy can only be changed with mod_lock held.) 7320 */ 7321 lck_mtx_lock(&mod_lock); 7322 7323 ctl = &modules; 7324 do { 7325 if (ctl->mod_busy || ctl->mod_mp == NULL) 7326 continue; 7327 7328 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 7329 7330 } while ((ctl = ctl->mod_next) != &modules); 7331 7332 lck_mtx_unlock(&mod_lock); 7333#else 7334#if 0 /* FIXME: Workaround for PR_4643546 */ 7335 simple_lock(&kmod_lock); 7336 7337 kmod_info_t *ktl = kmod; 7338 while (ktl) { 7339 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ktl); 7340 ktl = ktl->next; 7341 } 7342 7343 simple_unlock(&kmod_lock); 7344#else 7345 /* 7346 * Don't bother to iterate over the kmod list. At present only fbt 7347 * offers a provide_module in its dtpv_pops, and then it ignores the 7348 * module anyway. 7349 */ 7350 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, NULL); 7351#endif 7352#endif /* __APPLE__ */ 7353 } while (all && (prv = prv->dtpv_next) != NULL); 7354} 7355 7356/* 7357 * Iterate over each probe, and call the Framework-to-Provider API function 7358 * denoted by offs. 7359 */ 7360static void 7361dtrace_probe_foreach(uintptr_t offs) 7362{ 7363 dtrace_provider_t *prov; 7364 void (*func)(void *, dtrace_id_t, void *); 7365 dtrace_probe_t *probe; 7366 dtrace_icookie_t cookie; 7367 int i; 7368 7369 /* 7370 * We disable interrupts to walk through the probe array. This is 7371 * safe -- the dtrace_sync() in dtrace_unregister() assures that we 7372 * won't see stale data. 7373 */ 7374 cookie = dtrace_interrupt_disable(); 7375 7376 for (i = 0; i < dtrace_nprobes; i++) { 7377 if ((probe = dtrace_probes[i]) == NULL) 7378 continue; 7379 7380 if (probe->dtpr_ecb == NULL) { 7381 /* 7382 * This probe isn't enabled -- don't call the function. 7383 */ 7384 continue; 7385 } 7386 7387 prov = probe->dtpr_provider; 7388 func = *((void(**)(void *, dtrace_id_t, void *)) 7389 ((uintptr_t)&prov->dtpv_pops + offs)); 7390 7391 func(prov->dtpv_arg, i + 1, probe->dtpr_arg); 7392 } 7393 7394 dtrace_interrupt_enable(cookie); 7395} 7396 7397static int 7398dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) 7399{ 7400 dtrace_probekey_t pkey; 7401 uint32_t priv; 7402 uid_t uid; 7403 zoneid_t zoneid; 7404 7405 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 7406 7407 dtrace_ecb_create_cache = NULL; 7408 7409 if (desc == NULL) { 7410 /* 7411 * If we're passed a NULL description, we're being asked to 7412 * create an ECB with a NULL probe. 7413 */ 7414 (void) dtrace_ecb_create_enable(NULL, enab); 7415 return (0); 7416 } 7417 7418 dtrace_probekey(desc, &pkey); 7419 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, 7420 &priv, &uid, &zoneid); 7421 7422 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, 7423 enab)); 7424} 7425 7426/* 7427 * DTrace Helper Provider Functions 7428 */ 7429static void 7430dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr) 7431{ 7432 attr->dtat_name = DOF_ATTR_NAME(dofattr); 7433 attr->dtat_data = DOF_ATTR_DATA(dofattr); 7434 attr->dtat_class = DOF_ATTR_CLASS(dofattr); 7435} 7436 7437static void 7438dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, 7439 const dof_provider_t *dofprov, char *strtab) 7440{ 7441 hprov->dthpv_provname = strtab + dofprov->dofpv_name; 7442 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider, 7443 dofprov->dofpv_provattr); 7444 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod, 7445 dofprov->dofpv_modattr); 7446 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func, 7447 dofprov->dofpv_funcattr); 7448 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name, 7449 dofprov->dofpv_nameattr); 7450 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args, 7451 dofprov->dofpv_argsattr); 7452} 7453 7454static void 7455dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 7456{ 7457 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7458 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7459 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 7460 dof_provider_t *provider; 7461 dof_probe_t *probe; 7462 uint32_t *off, *enoff; 7463 uint8_t *arg; 7464 char *strtab; 7465 uint_t i, nprobes; 7466 dtrace_helper_provdesc_t dhpv; 7467 dtrace_helper_probedesc_t dhpb; 7468 dtrace_meta_t *meta = dtrace_meta_pid; 7469 dtrace_mops_t *mops = &meta->dtm_mops; 7470 void *parg; 7471 7472 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 7473 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7474 provider->dofpv_strtab * dof->dofh_secsize); 7475 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7476 provider->dofpv_probes * dof->dofh_secsize); 7477 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7478 provider->dofpv_prargs * dof->dofh_secsize); 7479 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7480 provider->dofpv_proffs * dof->dofh_secsize); 7481 7482 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 7483 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset); 7484 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 7485 enoff = NULL; 7486 7487 /* 7488 * See dtrace_helper_provider_validate(). 7489 */ 7490 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 7491 provider->dofpv_prenoffs != DOF_SECT_NONE) { 7492 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7493 provider->dofpv_prenoffs * dof->dofh_secsize); 7494 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset); 7495 } 7496 7497 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 7498 7499 /* 7500 * Create the provider. 7501 */ 7502 dtrace_dofprov2hprov(&dhpv, provider, strtab); 7503 7504 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) 7505 return; 7506 7507 meta->dtm_count++; 7508 7509 /* 7510 * Create the probes. 7511 */ 7512 for (i = 0; i < nprobes; i++) { 7513 probe = (dof_probe_t *)(uintptr_t)(daddr + 7514 prb_sec->dofs_offset + i * prb_sec->dofs_entsize); 7515 7516 dhpb.dthpb_mod = dhp->dofhp_mod; 7517 dhpb.dthpb_func = strtab + probe->dofpr_func; 7518 dhpb.dthpb_name = strtab + probe->dofpr_name; 7519#if defined(__APPLE__) 7520 dhpb.dthpb_base = dhp->dofhp_addr; 7521#else 7522 dhpb.dthpb_base = probe->dofpr_addr; 7523#endif 7524 dhpb.dthpb_offs = off + probe->dofpr_offidx; 7525 dhpb.dthpb_noffs = probe->dofpr_noffs; 7526 if (enoff != NULL) { 7527 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; 7528 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; 7529 } else { 7530 dhpb.dthpb_enoffs = NULL; 7531 dhpb.dthpb_nenoffs = 0; 7532 } 7533 dhpb.dthpb_args = arg + probe->dofpr_argidx; 7534 dhpb.dthpb_nargc = probe->dofpr_nargc; 7535 dhpb.dthpb_xargc = probe->dofpr_xargc; 7536 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv; 7537 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv; 7538 7539 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); 7540 } 7541} 7542 7543static void 7544dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) 7545{ 7546 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7547 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7548 int i; 7549 7550 lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); 7551 7552 for (i = 0; i < dof->dofh_secnum; i++) { 7553 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 7554 dof->dofh_secoff + i * dof->dofh_secsize); 7555 7556 if (sec->dofs_type != DOF_SECT_PROVIDER) 7557 continue; 7558 7559 dtrace_helper_provide_one(dhp, sec, pid); 7560 } 7561 7562 /* 7563 * We may have just created probes, so we must now rematch against 7564 * any retained enablings. Note that this call will acquire both 7565 * cpu_lock and dtrace_lock; the fact that we are holding 7566 * dtrace_meta_lock now is what defines the ordering with respect to 7567 * these three locks. 7568 */ 7569 dtrace_enabling_matchall(); 7570} 7571 7572static void 7573dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 7574{ 7575 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7576 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7577 dof_sec_t *str_sec; 7578 dof_provider_t *provider; 7579 char *strtab; 7580 dtrace_helper_provdesc_t dhpv; 7581 dtrace_meta_t *meta = dtrace_meta_pid; 7582 dtrace_mops_t *mops = &meta->dtm_mops; 7583 7584 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 7585 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 7586 provider->dofpv_strtab * dof->dofh_secsize); 7587 7588 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 7589 7590 /* 7591 * Create the provider. 7592 */ 7593 dtrace_dofprov2hprov(&dhpv, provider, strtab); 7594 7595 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); 7596 7597 meta->dtm_count--; 7598} 7599 7600static void 7601dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) 7602{ 7603 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 7604 dof_hdr_t *dof = (dof_hdr_t *)daddr; 7605 int i; 7606 7607 lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); 7608 7609 for (i = 0; i < dof->dofh_secnum; i++) { 7610 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 7611 dof->dofh_secoff + i * dof->dofh_secsize); 7612 7613 if (sec->dofs_type != DOF_SECT_PROVIDER) 7614 continue; 7615 7616 dtrace_helper_provider_remove_one(dhp, sec, pid); 7617 } 7618} 7619 7620/* 7621 * DTrace Meta Provider-to-Framework API Functions 7622 * 7623 * These functions implement the Meta Provider-to-Framework API, as described 7624 * in <sys/dtrace.h>. 7625 */ 7626int 7627dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, 7628 dtrace_meta_provider_id_t *idp) 7629{ 7630 dtrace_meta_t *meta; 7631 dtrace_helpers_t *help, *next; 7632 int i; 7633 7634 *idp = DTRACE_METAPROVNONE; 7635 7636 /* 7637 * We strictly don't need the name, but we hold onto it for 7638 * debuggability. All hail error queues! 7639 */ 7640 if (name == NULL) { 7641 cmn_err(CE_WARN, "failed to register meta-provider: " 7642 "invalid name"); 7643 return (EINVAL); 7644 } 7645 7646 if (mops == NULL || 7647 mops->dtms_create_probe == NULL || 7648 mops->dtms_provide_pid == NULL || 7649 mops->dtms_remove_pid == NULL) { 7650 cmn_err(CE_WARN, "failed to register meta-register %s: " 7651 "invalid ops", name); 7652 return (EINVAL); 7653 } 7654 7655 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); 7656 meta->dtm_mops = *mops; 7657 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 7658 (void) strcpy(meta->dtm_name, name); 7659 meta->dtm_arg = arg; 7660 7661 lck_mtx_lock(&dtrace_meta_lock); 7662 lck_mtx_lock(&dtrace_lock); 7663 7664 if (dtrace_meta_pid != NULL) { 7665 lck_mtx_unlock(&dtrace_lock); 7666 lck_mtx_unlock(&dtrace_meta_lock); 7667 cmn_err(CE_WARN, "failed to register meta-register %s: " 7668 "user-land meta-provider exists", name); 7669 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); 7670 kmem_free(meta, sizeof (dtrace_meta_t)); 7671 return (EINVAL); 7672 } 7673 7674 dtrace_meta_pid = meta; 7675 *idp = (dtrace_meta_provider_id_t)meta; 7676 7677 /* 7678 * If there are providers and probes ready to go, pass them 7679 * off to the new meta provider now. 7680 */ 7681 7682 help = dtrace_deferred_pid; 7683 dtrace_deferred_pid = NULL; 7684 7685 lck_mtx_unlock(&dtrace_lock); 7686 7687 while (help != NULL) { 7688 for (i = 0; i < help->dthps_nprovs; i++) { 7689 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 7690 help->dthps_pid); 7691 } 7692 7693 next = help->dthps_next; 7694 help->dthps_next = NULL; 7695 help->dthps_prev = NULL; 7696 help->dthps_deferred = 0; 7697 help = next; 7698 } 7699 7700 lck_mtx_unlock(&dtrace_meta_lock); 7701 7702 return (0); 7703} 7704 7705int 7706dtrace_meta_unregister(dtrace_meta_provider_id_t id) 7707{ 7708 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id; 7709 7710 lck_mtx_lock(&dtrace_meta_lock); 7711 lck_mtx_lock(&dtrace_lock); 7712 7713 if (old == dtrace_meta_pid) { 7714 pp = &dtrace_meta_pid; 7715 } else { 7716 panic("attempt to unregister non-existent " 7717 "dtrace meta-provider %p\n", (void *)old); 7718 } 7719 7720 if (old->dtm_count != 0) { 7721 lck_mtx_unlock(&dtrace_lock); 7722 lck_mtx_unlock(&dtrace_meta_lock); 7723 return (EBUSY); 7724 } 7725 7726 *pp = NULL; 7727 7728 lck_mtx_unlock(&dtrace_lock); 7729 lck_mtx_unlock(&dtrace_meta_lock); 7730 7731 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); 7732 kmem_free(old, sizeof (dtrace_meta_t)); 7733 7734 return (0); 7735} 7736 7737 7738/* 7739 * DTrace DIF Object Functions 7740 */ 7741static int 7742dtrace_difo_err(uint_t pc, const char *format, ...) 7743{ 7744 if (dtrace_err_verbose) { 7745 va_list alist; 7746 7747 (void) uprintf("dtrace DIF object error: [%u]: ", pc); 7748 va_start(alist, format); 7749 (void) vuprintf(format, alist); 7750 va_end(alist); 7751 } 7752 7753#ifdef DTRACE_ERRDEBUG 7754 dtrace_errdebug(format); 7755#endif 7756 return (1); 7757} 7758 7759/* 7760 * Validate a DTrace DIF object by checking the IR instructions. The following 7761 * rules are currently enforced by dtrace_difo_validate(): 7762 * 7763 * 1. Each instruction must have a valid opcode 7764 * 2. Each register, string, variable, or subroutine reference must be valid 7765 * 3. No instruction can modify register %r0 (must be zero) 7766 * 4. All instruction reserved bits must be set to zero 7767 * 5. The last instruction must be a "ret" instruction 7768 * 6. All branch targets must reference a valid instruction _after_ the branch 7769 */ 7770static int 7771dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, 7772 cred_t *cr) 7773{ 7774 int err = 0, i; 7775 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 7776 int kcheck; 7777 uint_t pc; 7778 7779 kcheck = cr == NULL || 7780 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE) == 0; 7781 7782 dp->dtdo_destructive = 0; 7783 7784 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { 7785 dif_instr_t instr = dp->dtdo_buf[pc]; 7786 7787 uint_t r1 = DIF_INSTR_R1(instr); 7788 uint_t r2 = DIF_INSTR_R2(instr); 7789 uint_t rd = DIF_INSTR_RD(instr); 7790 uint_t rs = DIF_INSTR_RS(instr); 7791 uint_t label = DIF_INSTR_LABEL(instr); 7792 uint_t v = DIF_INSTR_VAR(instr); 7793 uint_t subr = DIF_INSTR_SUBR(instr); 7794 uint_t type = DIF_INSTR_TYPE(instr); 7795 uint_t op = DIF_INSTR_OP(instr); 7796 7797 switch (op) { 7798 case DIF_OP_OR: 7799 case DIF_OP_XOR: 7800 case DIF_OP_AND: 7801 case DIF_OP_SLL: 7802 case DIF_OP_SRL: 7803 case DIF_OP_SRA: 7804 case DIF_OP_SUB: 7805 case DIF_OP_ADD: 7806 case DIF_OP_MUL: 7807 case DIF_OP_SDIV: 7808 case DIF_OP_UDIV: 7809 case DIF_OP_SREM: 7810 case DIF_OP_UREM: 7811 case DIF_OP_COPYS: 7812 if (r1 >= nregs) 7813 err += efunc(pc, "invalid register %u\n", r1); 7814 if (r2 >= nregs) 7815 err += efunc(pc, "invalid register %u\n", r2); 7816 if (rd >= nregs) 7817 err += efunc(pc, "invalid register %u\n", rd); 7818 if (rd == 0) 7819 err += efunc(pc, "cannot write to %r0\n"); 7820 break; 7821 case DIF_OP_NOT: 7822 case DIF_OP_MOV: 7823 case DIF_OP_ALLOCS: 7824 if (r1 >= nregs) 7825 err += efunc(pc, "invalid register %u\n", r1); 7826 if (r2 != 0) 7827 err += efunc(pc, "non-zero reserved bits\n"); 7828 if (rd >= nregs) 7829 err += efunc(pc, "invalid register %u\n", rd); 7830 if (rd == 0) 7831 err += efunc(pc, "cannot write to %r0\n"); 7832 break; 7833 case DIF_OP_LDSB: 7834 case DIF_OP_LDSH: 7835 case DIF_OP_LDSW: 7836 case DIF_OP_LDUB: 7837 case DIF_OP_LDUH: 7838 case DIF_OP_LDUW: 7839 case DIF_OP_LDX: 7840 if (r1 >= nregs) 7841 err += efunc(pc, "invalid register %u\n", r1); 7842 if (r2 != 0) 7843 err += efunc(pc, "non-zero reserved bits\n"); 7844 if (rd >= nregs) 7845 err += efunc(pc, "invalid register %u\n", rd); 7846 if (rd == 0) 7847 err += efunc(pc, "cannot write to %r0\n"); 7848 if (kcheck) 7849 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + 7850 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); 7851 break; 7852 case DIF_OP_RLDSB: 7853 case DIF_OP_RLDSH: 7854 case DIF_OP_RLDSW: 7855 case DIF_OP_RLDUB: 7856 case DIF_OP_RLDUH: 7857 case DIF_OP_RLDUW: 7858 case DIF_OP_RLDX: 7859 if (r1 >= nregs) 7860 err += efunc(pc, "invalid register %u\n", r1); 7861 if (r2 != 0) 7862 err += efunc(pc, "non-zero reserved bits\n"); 7863 if (rd >= nregs) 7864 err += efunc(pc, "invalid register %u\n", rd); 7865 if (rd == 0) 7866 err += efunc(pc, "cannot write to %r0\n"); 7867 break; 7868 case DIF_OP_ULDSB: 7869 case DIF_OP_ULDSH: 7870 case DIF_OP_ULDSW: 7871 case DIF_OP_ULDUB: 7872 case DIF_OP_ULDUH: 7873 case DIF_OP_ULDUW: 7874 case DIF_OP_ULDX: 7875 if (r1 >= nregs) 7876 err += efunc(pc, "invalid register %u\n", r1); 7877 if (r2 != 0) 7878 err += efunc(pc, "non-zero reserved bits\n"); 7879 if (rd >= nregs) 7880 err += efunc(pc, "invalid register %u\n", rd); 7881 if (rd == 0) 7882 err += efunc(pc, "cannot write to %r0\n"); 7883 break; 7884 case DIF_OP_STB: 7885 case DIF_OP_STH: 7886 case DIF_OP_STW: 7887 case DIF_OP_STX: 7888 if (r1 >= nregs) 7889 err += efunc(pc, "invalid register %u\n", r1); 7890 if (r2 != 0) 7891 err += efunc(pc, "non-zero reserved bits\n"); 7892 if (rd >= nregs) 7893 err += efunc(pc, "invalid register %u\n", rd); 7894 if (rd == 0) 7895 err += efunc(pc, "cannot write to 0 address\n"); 7896 break; 7897 case DIF_OP_CMP: 7898 case DIF_OP_SCMP: 7899 if (r1 >= nregs) 7900 err += efunc(pc, "invalid register %u\n", r1); 7901 if (r2 >= nregs) 7902 err += efunc(pc, "invalid register %u\n", r2); 7903 if (rd != 0) 7904 err += efunc(pc, "non-zero reserved bits\n"); 7905 break; 7906 case DIF_OP_TST: 7907 if (r1 >= nregs) 7908 err += efunc(pc, "invalid register %u\n", r1); 7909 if (r2 != 0 || rd != 0) 7910 err += efunc(pc, "non-zero reserved bits\n"); 7911 break; 7912 case DIF_OP_BA: 7913 case DIF_OP_BE: 7914 case DIF_OP_BNE: 7915 case DIF_OP_BG: 7916 case DIF_OP_BGU: 7917 case DIF_OP_BGE: 7918 case DIF_OP_BGEU: 7919 case DIF_OP_BL: 7920 case DIF_OP_BLU: 7921 case DIF_OP_BLE: 7922 case DIF_OP_BLEU: 7923 if (label >= dp->dtdo_len) { 7924 err += efunc(pc, "invalid branch target %u\n", 7925 label); 7926 } 7927 if (label <= pc) { 7928 err += efunc(pc, "backward branch to %u\n", 7929 label); 7930 } 7931 break; 7932 case DIF_OP_RET: 7933 if (r1 != 0 || r2 != 0) 7934 err += efunc(pc, "non-zero reserved bits\n"); 7935 if (rd >= nregs) 7936 err += efunc(pc, "invalid register %u\n", rd); 7937 break; 7938 case DIF_OP_NOP: 7939 case DIF_OP_POPTS: 7940 case DIF_OP_FLUSHTS: 7941 if (r1 != 0 || r2 != 0 || rd != 0) 7942 err += efunc(pc, "non-zero reserved bits\n"); 7943 break; 7944 case DIF_OP_SETX: 7945 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) { 7946 err += efunc(pc, "invalid integer ref %u\n", 7947 DIF_INSTR_INTEGER(instr)); 7948 } 7949 if (rd >= nregs) 7950 err += efunc(pc, "invalid register %u\n", rd); 7951 if (rd == 0) 7952 err += efunc(pc, "cannot write to %r0\n"); 7953 break; 7954 case DIF_OP_SETS: 7955 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) { 7956 err += efunc(pc, "invalid string ref %u\n", 7957 DIF_INSTR_STRING(instr)); 7958 } 7959 if (rd >= nregs) 7960 err += efunc(pc, "invalid register %u\n", rd); 7961 if (rd == 0) 7962 err += efunc(pc, "cannot write to %r0\n"); 7963 break; 7964 case DIF_OP_LDGA: 7965 case DIF_OP_LDTA: 7966 if (r1 > DIF_VAR_ARRAY_MAX) 7967 err += efunc(pc, "invalid array %u\n", r1); 7968 if (r2 >= nregs) 7969 err += efunc(pc, "invalid register %u\n", r2); 7970 if (rd >= nregs) 7971 err += efunc(pc, "invalid register %u\n", rd); 7972 if (rd == 0) 7973 err += efunc(pc, "cannot write to %r0\n"); 7974 break; 7975 case DIF_OP_LDGS: 7976 case DIF_OP_LDTS: 7977 case DIF_OP_LDLS: 7978 case DIF_OP_LDGAA: 7979 case DIF_OP_LDTAA: 7980 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX) 7981 err += efunc(pc, "invalid variable %u\n", v); 7982 if (rd >= nregs) 7983 err += efunc(pc, "invalid register %u\n", rd); 7984 if (rd == 0) 7985 err += efunc(pc, "cannot write to %r0\n"); 7986 break; 7987 case DIF_OP_STGS: 7988 case DIF_OP_STTS: 7989 case DIF_OP_STLS: 7990 case DIF_OP_STGAA: 7991 case DIF_OP_STTAA: 7992 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX) 7993 err += efunc(pc, "invalid variable %u\n", v); 7994 if (rs >= nregs) 7995 err += efunc(pc, "invalid register %u\n", rd); 7996 break; 7997 case DIF_OP_CALL: 7998 if (subr > DIF_SUBR_MAX) 7999 err += efunc(pc, "invalid subr %u\n", subr); 8000 if (rd >= nregs) 8001 err += efunc(pc, "invalid register %u\n", rd); 8002 if (rd == 0) 8003 err += efunc(pc, "cannot write to %r0\n"); 8004 8005 if (subr == DIF_SUBR_COPYOUT || 8006 subr == DIF_SUBR_COPYOUTSTR) { 8007 dp->dtdo_destructive = 1; 8008 } 8009 break; 8010 case DIF_OP_PUSHTR: 8011 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF) 8012 err += efunc(pc, "invalid ref type %u\n", type); 8013 if (r2 >= nregs) 8014 err += efunc(pc, "invalid register %u\n", r2); 8015 if (rs >= nregs) 8016 err += efunc(pc, "invalid register %u\n", rs); 8017 break; 8018 case DIF_OP_PUSHTV: 8019 if (type != DIF_TYPE_CTF) 8020 err += efunc(pc, "invalid val type %u\n", type); 8021 if (r2 >= nregs) 8022 err += efunc(pc, "invalid register %u\n", r2); 8023 if (rs >= nregs) 8024 err += efunc(pc, "invalid register %u\n", rs); 8025 break; 8026 default: 8027 err += efunc(pc, "invalid opcode %u\n", 8028 DIF_INSTR_OP(instr)); 8029 } 8030 } 8031 8032 if (dp->dtdo_len != 0 && 8033 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) { 8034 err += efunc(dp->dtdo_len - 1, 8035 "expected 'ret' as last DIF instruction\n"); 8036 } 8037 8038 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) { 8039 /* 8040 * If we're not returning by reference, the size must be either 8041 * 0 or the size of one of the base types. 8042 */ 8043 switch (dp->dtdo_rtype.dtdt_size) { 8044 case 0: 8045 case sizeof (uint8_t): 8046 case sizeof (uint16_t): 8047 case sizeof (uint32_t): 8048 case sizeof (uint64_t): 8049 break; 8050 8051 default: 8052 err += efunc(dp->dtdo_len - 1, "bad return size"); 8053 } 8054 } 8055 8056 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { 8057 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; 8058 dtrace_diftype_t *vt, *et; 8059 uint_t id, ndx; 8060 8061 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && 8062 v->dtdv_scope != DIFV_SCOPE_THREAD && 8063 v->dtdv_scope != DIFV_SCOPE_LOCAL) { 8064 err += efunc(i, "unrecognized variable scope %d\n", 8065 v->dtdv_scope); 8066 break; 8067 } 8068 8069 if (v->dtdv_kind != DIFV_KIND_ARRAY && 8070 v->dtdv_kind != DIFV_KIND_SCALAR) { 8071 err += efunc(i, "unrecognized variable type %d\n", 8072 v->dtdv_kind); 8073 break; 8074 } 8075 8076 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) { 8077 err += efunc(i, "%d exceeds variable id limit\n", id); 8078 break; 8079 } 8080 8081 if (id < DIF_VAR_OTHER_UBASE) 8082 continue; 8083 8084 /* 8085 * For user-defined variables, we need to check that this 8086 * definition is identical to any previous definition that we 8087 * encountered. 8088 */ 8089 ndx = id - DIF_VAR_OTHER_UBASE; 8090 8091 switch (v->dtdv_scope) { 8092 case DIFV_SCOPE_GLOBAL: 8093 if (ndx < vstate->dtvs_nglobals) { 8094 dtrace_statvar_t *svar; 8095 8096 if ((svar = vstate->dtvs_globals[ndx]) != NULL) 8097 existing = &svar->dtsv_var; 8098 } 8099 8100 break; 8101 8102 case DIFV_SCOPE_THREAD: 8103 if (ndx < vstate->dtvs_ntlocals) 8104 existing = &vstate->dtvs_tlocals[ndx]; 8105 break; 8106 8107 case DIFV_SCOPE_LOCAL: 8108 if (ndx < vstate->dtvs_nlocals) { 8109 dtrace_statvar_t *svar; 8110 8111 if ((svar = vstate->dtvs_locals[ndx]) != NULL) 8112 existing = &svar->dtsv_var; 8113 } 8114 8115 break; 8116 } 8117 8118 vt = &v->dtdv_type; 8119 8120 if (vt->dtdt_flags & DIF_TF_BYREF) { 8121 if (vt->dtdt_size == 0) { 8122 err += efunc(i, "zero-sized variable\n"); 8123 break; 8124 } 8125 8126 if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && 8127 vt->dtdt_size > dtrace_global_maxsize) { 8128 err += efunc(i, "oversized by-ref global\n"); 8129 break; 8130 } 8131 } 8132 8133 if (existing == NULL || existing->dtdv_id == 0) 8134 continue; 8135 8136 ASSERT(existing->dtdv_id == v->dtdv_id); 8137 ASSERT(existing->dtdv_scope == v->dtdv_scope); 8138 8139 if (existing->dtdv_kind != v->dtdv_kind) 8140 err += efunc(i, "%d changed variable kind\n", id); 8141 8142 et = &existing->dtdv_type; 8143 8144 if (vt->dtdt_flags != et->dtdt_flags) { 8145 err += efunc(i, "%d changed variable type flags\n", id); 8146 break; 8147 } 8148 8149 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) { 8150 err += efunc(i, "%d changed variable type size\n", id); 8151 break; 8152 } 8153 } 8154 8155 return (err); 8156} 8157 8158/* 8159 * Validate a DTrace DIF object that it is to be used as a helper. Helpers 8160 * are much more constrained than normal DIFOs. Specifically, they may 8161 * not: 8162 * 8163 * 1. Make calls to subroutines other than copyin(), copyinstr() or 8164 * miscellaneous string routines 8165 * 2. Access DTrace variables other than the args[] array, and the 8166 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables. 8167 * 3. Have thread-local variables. 8168 * 4. Have dynamic variables. 8169 */ 8170static int 8171dtrace_difo_validate_helper(dtrace_difo_t *dp) 8172{ 8173 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 8174 int err = 0; 8175 uint_t pc; 8176 8177 for (pc = 0; pc < dp->dtdo_len; pc++) { 8178 dif_instr_t instr = dp->dtdo_buf[pc]; 8179 8180 uint_t v = DIF_INSTR_VAR(instr); 8181 uint_t subr = DIF_INSTR_SUBR(instr); 8182 uint_t op = DIF_INSTR_OP(instr); 8183 8184 switch (op) { 8185 case DIF_OP_OR: 8186 case DIF_OP_XOR: 8187 case DIF_OP_AND: 8188 case DIF_OP_SLL: 8189 case DIF_OP_SRL: 8190 case DIF_OP_SRA: 8191 case DIF_OP_SUB: 8192 case DIF_OP_ADD: 8193 case DIF_OP_MUL: 8194 case DIF_OP_SDIV: 8195 case DIF_OP_UDIV: 8196 case DIF_OP_SREM: 8197 case DIF_OP_UREM: 8198 case DIF_OP_COPYS: 8199 case DIF_OP_NOT: 8200 case DIF_OP_MOV: 8201 case DIF_OP_RLDSB: 8202 case DIF_OP_RLDSH: 8203 case DIF_OP_RLDSW: 8204 case DIF_OP_RLDUB: 8205 case DIF_OP_RLDUH: 8206 case DIF_OP_RLDUW: 8207 case DIF_OP_RLDX: 8208 case DIF_OP_ULDSB: 8209 case DIF_OP_ULDSH: 8210 case DIF_OP_ULDSW: 8211 case DIF_OP_ULDUB: 8212 case DIF_OP_ULDUH: 8213 case DIF_OP_ULDUW: 8214 case DIF_OP_ULDX: 8215 case DIF_OP_STB: 8216 case DIF_OP_STH: 8217 case DIF_OP_STW: 8218 case DIF_OP_STX: 8219 case DIF_OP_ALLOCS: 8220 case DIF_OP_CMP: 8221 case DIF_OP_SCMP: 8222 case DIF_OP_TST: 8223 case DIF_OP_BA: 8224 case DIF_OP_BE: 8225 case DIF_OP_BNE: 8226 case DIF_OP_BG: 8227 case DIF_OP_BGU: 8228 case DIF_OP_BGE: 8229 case DIF_OP_BGEU: 8230 case DIF_OP_BL: 8231 case DIF_OP_BLU: 8232 case DIF_OP_BLE: 8233 case DIF_OP_BLEU: 8234 case DIF_OP_RET: 8235 case DIF_OP_NOP: 8236 case DIF_OP_POPTS: 8237 case DIF_OP_FLUSHTS: 8238 case DIF_OP_SETX: 8239 case DIF_OP_SETS: 8240 case DIF_OP_LDGA: 8241 case DIF_OP_LDLS: 8242 case DIF_OP_STGS: 8243 case DIF_OP_STLS: 8244 case DIF_OP_PUSHTR: 8245 case DIF_OP_PUSHTV: 8246 break; 8247 8248 case DIF_OP_LDGS: 8249 if (v >= DIF_VAR_OTHER_UBASE) 8250 break; 8251 8252 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) 8253 break; 8254 8255 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID || 8256 v == DIF_VAR_PPID || v == DIF_VAR_TID || 8257 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME || 8258 v == DIF_VAR_UID || v == DIF_VAR_GID) 8259 break; 8260 8261 err += efunc(pc, "illegal variable %u\n", v); 8262 break; 8263 8264 case DIF_OP_LDTA: 8265 case DIF_OP_LDTS: 8266 case DIF_OP_LDGAA: 8267 case DIF_OP_LDTAA: 8268 err += efunc(pc, "illegal dynamic variable load\n"); 8269 break; 8270 8271 case DIF_OP_STTS: 8272 case DIF_OP_STGAA: 8273 case DIF_OP_STTAA: 8274 err += efunc(pc, "illegal dynamic variable store\n"); 8275 break; 8276 8277 case DIF_OP_CALL: 8278 if (subr == DIF_SUBR_ALLOCA || 8279 subr == DIF_SUBR_BCOPY || 8280 subr == DIF_SUBR_COPYIN || 8281 subr == DIF_SUBR_COPYINTO || 8282 subr == DIF_SUBR_COPYINSTR || 8283 subr == DIF_SUBR_INDEX || 8284 subr == DIF_SUBR_LLTOSTR || 8285 subr == DIF_SUBR_RINDEX || 8286 subr == DIF_SUBR_STRCHR || 8287 subr == DIF_SUBR_STRJOIN || 8288 subr == DIF_SUBR_STRRCHR || 8289 subr == DIF_SUBR_STRSTR || 8290 subr == DIF_SUBR_CHUD) 8291 break; 8292 8293 err += efunc(pc, "invalid subr %u\n", subr); 8294 break; 8295 8296 default: 8297 err += efunc(pc, "invalid opcode %u\n", 8298 DIF_INSTR_OP(instr)); 8299 } 8300 } 8301 8302 return (err); 8303} 8304 8305/* 8306 * Returns 1 if the expression in the DIF object can be cached on a per-thread 8307 * basis; 0 if not. 8308 */ 8309static int 8310dtrace_difo_cacheable(dtrace_difo_t *dp) 8311{ 8312 int i; 8313 8314 if (dp == NULL) 8315 return (0); 8316 8317 for (i = 0; i < dp->dtdo_varlen; i++) { 8318 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8319 8320 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL) 8321 continue; 8322 8323 switch (v->dtdv_id) { 8324 case DIF_VAR_CURTHREAD: 8325 case DIF_VAR_PID: 8326 case DIF_VAR_TID: 8327 case DIF_VAR_EXECNAME: 8328 case DIF_VAR_ZONENAME: 8329 break; 8330 8331 default: 8332 return (0); 8333 } 8334 } 8335 8336 /* 8337 * This DIF object may be cacheable. Now we need to look for any 8338 * array loading instructions, any memory loading instructions, or 8339 * any stores to thread-local variables. 8340 */ 8341 for (i = 0; i < dp->dtdo_len; i++) { 8342 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]); 8343 8344 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) || 8345 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) || 8346 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) || 8347 op == DIF_OP_LDGA || op == DIF_OP_STTS) 8348 return (0); 8349 } 8350 8351 return (1); 8352} 8353 8354static void 8355dtrace_difo_hold(dtrace_difo_t *dp) 8356{ 8357 int i; 8358 8359 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 8360 8361 dp->dtdo_refcnt++; 8362 ASSERT(dp->dtdo_refcnt != 0); 8363 8364 /* 8365 * We need to check this DIF object for references to the variable 8366 * DIF_VAR_VTIMESTAMP. 8367 */ 8368 for (i = 0; i < dp->dtdo_varlen; i++) { 8369 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8370 8371 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 8372 continue; 8373 8374 if (dtrace_vtime_references++ == 0) 8375 dtrace_vtime_enable(); 8376 } 8377} 8378 8379/* 8380 * This routine calculates the dynamic variable chunksize for a given DIF 8381 * object. The calculation is not fool-proof, and can probably be tricked by 8382 * malicious DIF -- but it works for all compiler-generated DIF. Because this 8383 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail 8384 * if a dynamic variable size exceeds the chunksize. 8385 */ 8386static void 8387dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8388{ 8389 uint64_t sval; 8390 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 8391 const dif_instr_t *text = dp->dtdo_buf; 8392 uint_t pc, srd = 0; 8393 uint_t ttop = 0; 8394 size_t size, ksize; 8395 uint_t id, i; 8396 8397 for (pc = 0; pc < dp->dtdo_len; pc++) { 8398 dif_instr_t instr = text[pc]; 8399 uint_t op = DIF_INSTR_OP(instr); 8400 uint_t rd = DIF_INSTR_RD(instr); 8401 uint_t r1 = DIF_INSTR_R1(instr); 8402 uint_t nkeys = 0; 8403 uchar_t scope; 8404 8405 dtrace_key_t *key = tupregs; 8406 8407 switch (op) { 8408 case DIF_OP_SETX: 8409 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)]; 8410 srd = rd; 8411 continue; 8412 8413 case DIF_OP_STTS: 8414 key = &tupregs[DIF_DTR_NREGS]; 8415 key[0].dttk_size = 0; 8416 key[1].dttk_size = 0; 8417 nkeys = 2; 8418 scope = DIFV_SCOPE_THREAD; 8419 break; 8420 8421 case DIF_OP_STGAA: 8422 case DIF_OP_STTAA: 8423 nkeys = ttop; 8424 8425 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) 8426 key[nkeys++].dttk_size = 0; 8427 8428 key[nkeys++].dttk_size = 0; 8429 8430 if (op == DIF_OP_STTAA) { 8431 scope = DIFV_SCOPE_THREAD; 8432 } else { 8433 scope = DIFV_SCOPE_GLOBAL; 8434 } 8435 8436 break; 8437 8438 case DIF_OP_PUSHTR: 8439 if (ttop == DIF_DTR_NREGS) 8440 return; 8441 8442 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) { 8443 /* 8444 * If the register for the size of the "pushtr" 8445 * is %r0 (or the value is 0) and the type is 8446 * a string, we'll use the system-wide default 8447 * string size. 8448 */ 8449 tupregs[ttop++].dttk_size = 8450 dtrace_strsize_default; 8451 } else { 8452 if (srd == 0) 8453 return; 8454 8455 tupregs[ttop++].dttk_size = sval; 8456 } 8457 8458 break; 8459 8460 case DIF_OP_PUSHTV: 8461 if (ttop == DIF_DTR_NREGS) 8462 return; 8463 8464 tupregs[ttop++].dttk_size = 0; 8465 break; 8466 8467 case DIF_OP_FLUSHTS: 8468 ttop = 0; 8469 break; 8470 8471 case DIF_OP_POPTS: 8472 if (ttop != 0) 8473 ttop--; 8474 break; 8475 } 8476 8477 sval = 0; 8478 srd = 0; 8479 8480 if (nkeys == 0) 8481 continue; 8482 8483 /* 8484 * We have a dynamic variable allocation; calculate its size. 8485 */ 8486 for (ksize = 0, i = 0; i < nkeys; i++) 8487 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 8488 8489 size = sizeof (dtrace_dynvar_t); 8490 size += sizeof (dtrace_key_t) * (nkeys - 1); 8491 size += ksize; 8492 8493 /* 8494 * Now we need to determine the size of the stored data. 8495 */ 8496 id = DIF_INSTR_VAR(instr); 8497 8498 for (i = 0; i < dp->dtdo_varlen; i++) { 8499 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8500 8501 if (v->dtdv_id == id && v->dtdv_scope == scope) { 8502 size += v->dtdv_type.dtdt_size; 8503 break; 8504 } 8505 } 8506 8507 if (i == dp->dtdo_varlen) 8508 return; 8509 8510 /* 8511 * We have the size. If this is larger than the chunk size 8512 * for our dynamic variable state, reset the chunk size. 8513 */ 8514 size = P2ROUNDUP(size, sizeof (uint64_t)); 8515 8516 if (size > vstate->dtvs_dynvars.dtds_chunksize) 8517 vstate->dtvs_dynvars.dtds_chunksize = size; 8518 } 8519} 8520 8521static void 8522dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8523{ 8524 int i, oldsvars, osz, nsz, otlocals, ntlocals; 8525 uint_t id; 8526 8527 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 8528 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); 8529 8530 for (i = 0; i < dp->dtdo_varlen; i++) { 8531 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8532 dtrace_statvar_t *svar, ***svarp; 8533 size_t dsize = 0; 8534 uint8_t scope = v->dtdv_scope; 8535 int *np; 8536 8537 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 8538 continue; 8539 8540 id -= DIF_VAR_OTHER_UBASE; 8541 8542 switch (scope) { 8543 case DIFV_SCOPE_THREAD: 8544 while (id >= (otlocals = vstate->dtvs_ntlocals)) { 8545 dtrace_difv_t *tlocals; 8546 8547 if ((ntlocals = (otlocals << 1)) == 0) 8548 ntlocals = 1; 8549 8550 osz = otlocals * sizeof (dtrace_difv_t); 8551 nsz = ntlocals * sizeof (dtrace_difv_t); 8552 8553 tlocals = kmem_zalloc(nsz, KM_SLEEP); 8554 8555 if (osz != 0) { 8556 bcopy(vstate->dtvs_tlocals, 8557 tlocals, osz); 8558 kmem_free(vstate->dtvs_tlocals, osz); 8559 } 8560 8561 vstate->dtvs_tlocals = tlocals; 8562 vstate->dtvs_ntlocals = ntlocals; 8563 } 8564 8565 vstate->dtvs_tlocals[id] = *v; 8566 continue; 8567 8568 case DIFV_SCOPE_LOCAL: 8569 np = &vstate->dtvs_nlocals; 8570 svarp = &vstate->dtvs_locals; 8571 8572 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 8573 dsize = (int)NCPU * (v->dtdv_type.dtdt_size + 8574 sizeof (uint64_t)); 8575 else 8576 dsize = (int)NCPU * sizeof (uint64_t); 8577 8578 break; 8579 8580 case DIFV_SCOPE_GLOBAL: 8581 np = &vstate->dtvs_nglobals; 8582 svarp = &vstate->dtvs_globals; 8583 8584 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 8585 dsize = v->dtdv_type.dtdt_size + 8586 sizeof (uint64_t); 8587 8588 break; 8589 8590 default: 8591 ASSERT(0); 8592 } 8593 8594 while (id >= (oldsvars = *np)) { 8595 dtrace_statvar_t **statics; 8596 int newsvars, oldsize, newsize; 8597 8598 if ((newsvars = (oldsvars << 1)) == 0) 8599 newsvars = 1; 8600 8601 oldsize = oldsvars * sizeof (dtrace_statvar_t *); 8602 newsize = newsvars * sizeof (dtrace_statvar_t *); 8603 8604 statics = kmem_zalloc(newsize, KM_SLEEP); 8605 8606 if (oldsize != 0) { 8607 bcopy(*svarp, statics, oldsize); 8608 kmem_free(*svarp, oldsize); 8609 } 8610 8611 *svarp = statics; 8612 *np = newsvars; 8613 } 8614 8615 if ((svar = (*svarp)[id]) == NULL) { 8616 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP); 8617 svar->dtsv_var = *v; 8618 8619 if ((svar->dtsv_size = dsize) != 0) { 8620 svar->dtsv_data = (uint64_t)(uintptr_t) 8621 kmem_zalloc(dsize, KM_SLEEP); 8622 } 8623 8624 (*svarp)[id] = svar; 8625 } 8626 8627 svar->dtsv_refcnt++; 8628 } 8629 8630 dtrace_difo_chunksize(dp, vstate); 8631 dtrace_difo_hold(dp); 8632} 8633 8634static dtrace_difo_t * 8635dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8636{ 8637 dtrace_difo_t *new; 8638 size_t sz; 8639 8640 ASSERT(dp->dtdo_buf != NULL); 8641 ASSERT(dp->dtdo_refcnt != 0); 8642 8643 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 8644 8645 ASSERT(dp->dtdo_buf != NULL); 8646 sz = dp->dtdo_len * sizeof (dif_instr_t); 8647 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP); 8648 bcopy(dp->dtdo_buf, new->dtdo_buf, sz); 8649 new->dtdo_len = dp->dtdo_len; 8650 8651 if (dp->dtdo_strtab != NULL) { 8652 ASSERT(dp->dtdo_strlen != 0); 8653 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP); 8654 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen); 8655 new->dtdo_strlen = dp->dtdo_strlen; 8656 } 8657 8658 if (dp->dtdo_inttab != NULL) { 8659 ASSERT(dp->dtdo_intlen != 0); 8660 sz = dp->dtdo_intlen * sizeof (uint64_t); 8661 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP); 8662 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz); 8663 new->dtdo_intlen = dp->dtdo_intlen; 8664 } 8665 8666 if (dp->dtdo_vartab != NULL) { 8667 ASSERT(dp->dtdo_varlen != 0); 8668 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t); 8669 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP); 8670 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz); 8671 new->dtdo_varlen = dp->dtdo_varlen; 8672 } 8673 8674 dtrace_difo_init(new, vstate); 8675 return (new); 8676} 8677 8678static void 8679dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8680{ 8681 int i; 8682 8683 ASSERT(dp->dtdo_refcnt == 0); 8684 8685 for (i = 0; i < dp->dtdo_varlen; i++) { 8686 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8687 dtrace_statvar_t *svar, **svarp; 8688 uint_t id; 8689 uint8_t scope = v->dtdv_scope; 8690 int *np; 8691 8692 switch (scope) { 8693 case DIFV_SCOPE_THREAD: 8694 continue; 8695 8696 case DIFV_SCOPE_LOCAL: 8697 np = &vstate->dtvs_nlocals; 8698 svarp = vstate->dtvs_locals; 8699 break; 8700 8701 case DIFV_SCOPE_GLOBAL: 8702 np = &vstate->dtvs_nglobals; 8703 svarp = vstate->dtvs_globals; 8704 break; 8705 8706 default: 8707 ASSERT(0); 8708 } 8709 8710 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 8711 continue; 8712 8713 id -= DIF_VAR_OTHER_UBASE; 8714 ASSERT(id < *np); 8715 8716 svar = svarp[id]; 8717 ASSERT(svar != NULL); 8718 ASSERT(svar->dtsv_refcnt > 0); 8719 8720 if (--svar->dtsv_refcnt > 0) 8721 continue; 8722 8723 if (svar->dtsv_size != 0) { 8724 ASSERT(svar->dtsv_data != NULL); 8725 kmem_free((void *)(uintptr_t)svar->dtsv_data, 8726 svar->dtsv_size); 8727 } 8728 8729 kmem_free(svar, sizeof (dtrace_statvar_t)); 8730 svarp[id] = NULL; 8731 } 8732 8733 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 8734 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 8735 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 8736 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 8737 8738 kmem_free(dp, sizeof (dtrace_difo_t)); 8739} 8740 8741static void 8742dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 8743{ 8744 int i; 8745 8746 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 8747 ASSERT(dp->dtdo_refcnt != 0); 8748 8749 for (i = 0; i < dp->dtdo_varlen; i++) { 8750 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 8751 8752 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 8753 continue; 8754 8755 ASSERT(dtrace_vtime_references > 0); 8756 if (--dtrace_vtime_references == 0) 8757 dtrace_vtime_disable(); 8758 } 8759 8760 if (--dp->dtdo_refcnt == 0) 8761 dtrace_difo_destroy(dp, vstate); 8762} 8763 8764/* 8765 * DTrace Format Functions 8766 */ 8767static uint16_t 8768dtrace_format_add(dtrace_state_t *state, char *str) 8769{ 8770 char *fmt, **new; 8771 uint16_t ndx, len = strlen(str) + 1; 8772 8773 fmt = kmem_zalloc(len, KM_SLEEP); 8774 bcopy(str, fmt, len); 8775 8776 for (ndx = 0; ndx < state->dts_nformats; ndx++) { 8777 if (state->dts_formats[ndx] == NULL) { 8778 state->dts_formats[ndx] = fmt; 8779 return (ndx + 1); 8780 } 8781 } 8782 8783 if (state->dts_nformats == USHRT_MAX) { 8784 /* 8785 * This is only likely if a denial-of-service attack is being 8786 * attempted. As such, it's okay to fail silently here. 8787 */ 8788 kmem_free(fmt, len); 8789 return (0); 8790 } 8791 8792 /* 8793 * For simplicity, we always resize the formats array to be exactly the 8794 * number of formats. 8795 */ 8796 ndx = state->dts_nformats++; 8797 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP); 8798 8799 if (state->dts_formats != NULL) { 8800 ASSERT(ndx != 0); 8801 bcopy(state->dts_formats, new, ndx * sizeof (char *)); 8802 kmem_free(state->dts_formats, ndx * sizeof (char *)); 8803 } 8804 8805 state->dts_formats = new; 8806 state->dts_formats[ndx] = fmt; 8807 8808 return (ndx + 1); 8809} 8810 8811static void 8812dtrace_format_remove(dtrace_state_t *state, uint16_t format) 8813{ 8814 char *fmt; 8815 8816 ASSERT(state->dts_formats != NULL); 8817 ASSERT(format <= state->dts_nformats); 8818 ASSERT(state->dts_formats[format - 1] != NULL); 8819 8820 fmt = state->dts_formats[format - 1]; 8821 kmem_free(fmt, strlen(fmt) + 1); 8822 state->dts_formats[format - 1] = NULL; 8823} 8824 8825static void 8826dtrace_format_destroy(dtrace_state_t *state) 8827{ 8828 int i; 8829 8830 if (state->dts_nformats == 0) { 8831 ASSERT(state->dts_formats == NULL); 8832 return; 8833 } 8834 8835 ASSERT(state->dts_formats != NULL); 8836 8837 for (i = 0; i < state->dts_nformats; i++) { 8838 char *fmt = state->dts_formats[i]; 8839 8840 if (fmt == NULL) 8841 continue; 8842 8843 kmem_free(fmt, strlen(fmt) + 1); 8844 } 8845 8846 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *)); 8847 state->dts_nformats = 0; 8848 state->dts_formats = NULL; 8849} 8850 8851/* 8852 * DTrace Predicate Functions 8853 */ 8854static dtrace_predicate_t * 8855dtrace_predicate_create(dtrace_difo_t *dp) 8856{ 8857 dtrace_predicate_t *pred; 8858 8859 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 8860 ASSERT(dp->dtdo_refcnt != 0); 8861 8862 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); 8863 pred->dtp_difo = dp; 8864 pred->dtp_refcnt = 1; 8865 8866 if (!dtrace_difo_cacheable(dp)) 8867 return (pred); 8868 8869 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) { 8870 /* 8871 * This is only theoretically possible -- we have had 2^32 8872 * cacheable predicates on this machine. We cannot allow any 8873 * more predicates to become cacheable: as unlikely as it is, 8874 * there may be a thread caching a (now stale) predicate cache 8875 * ID. (N.B.: the temptation is being successfully resisted to 8876 * have this cmn_err() "Holy shit -- we executed this code!") 8877 */ 8878 return (pred); 8879 } 8880 8881 pred->dtp_cacheid = dtrace_predcache_id++; 8882 8883 return (pred); 8884} 8885 8886static void 8887dtrace_predicate_hold(dtrace_predicate_t *pred) 8888{ 8889 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 8890 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); 8891 ASSERT(pred->dtp_refcnt > 0); 8892 8893 pred->dtp_refcnt++; 8894} 8895 8896static void 8897dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) 8898{ 8899 dtrace_difo_t *dp = pred->dtp_difo; 8900 8901 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 8902 ASSERT(dp != NULL && dp->dtdo_refcnt != 0); 8903 ASSERT(pred->dtp_refcnt > 0); 8904 8905 if (--pred->dtp_refcnt == 0) { 8906 dtrace_difo_release(pred->dtp_difo, vstate); 8907 kmem_free(pred, sizeof (dtrace_predicate_t)); 8908 } 8909} 8910 8911/* 8912 * DTrace Action Description Functions 8913 */ 8914static dtrace_actdesc_t * 8915dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, 8916 uint64_t uarg, uint64_t arg) 8917{ 8918 dtrace_actdesc_t *act; 8919 8920/* ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && 8921 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));*/ 8922 8923 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); 8924 act->dtad_kind = kind; 8925 act->dtad_ntuple = ntuple; 8926 act->dtad_uarg = uarg; 8927 act->dtad_arg = arg; 8928 act->dtad_refcnt = 1; 8929 8930 return (act); 8931} 8932 8933static void 8934dtrace_actdesc_hold(dtrace_actdesc_t *act) 8935{ 8936 ASSERT(act->dtad_refcnt >= 1); 8937 act->dtad_refcnt++; 8938} 8939 8940static void 8941dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) 8942{ 8943 dtrace_actkind_t kind = act->dtad_kind; 8944 dtrace_difo_t *dp; 8945 8946 ASSERT(act->dtad_refcnt >= 1); 8947 8948 if (--act->dtad_refcnt != 0) 8949 return; 8950 8951 if ((dp = act->dtad_difo) != NULL) 8952 dtrace_difo_release(dp, vstate); 8953 8954 if (DTRACEACT_ISPRINTFLIKE(kind)) { 8955 char *str = (char *)(uintptr_t)act->dtad_arg; 8956 8957/* ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || 8958 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));*/ 8959 8960 if (str != NULL) 8961 kmem_free(str, strlen(str) + 1); 8962 } 8963 8964 kmem_free(act, sizeof (dtrace_actdesc_t)); 8965} 8966 8967/* 8968 * DTrace ECB Functions 8969 */ 8970static dtrace_ecb_t * 8971dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) 8972{ 8973 dtrace_ecb_t *ecb; 8974 dtrace_epid_t epid; 8975 8976 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 8977 8978 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); 8979 ecb->dte_predicate = NULL; 8980 ecb->dte_probe = probe; 8981 8982 /* 8983 * The default size is the size of the default action: recording 8984 * the epid. 8985 */ 8986 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t); 8987 ecb->dte_alignment = sizeof (dtrace_epid_t); 8988 8989 epid = state->dts_epid++; 8990 8991 if (epid - 1 >= state->dts_necbs) { 8992 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; 8993 int necbs = state->dts_necbs << 1; 8994 8995 ASSERT(epid == state->dts_necbs + 1); 8996 8997 if (necbs == 0) { 8998 ASSERT(oecbs == NULL); 8999 necbs = 1; 9000 } 9001 9002 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP); 9003 9004 if (oecbs != NULL) 9005 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs)); 9006 9007 dtrace_membar_producer(); 9008 state->dts_ecbs = ecbs; 9009 9010 if (oecbs != NULL) { 9011 /* 9012 * If this state is active, we must dtrace_sync() 9013 * before we can free the old dts_ecbs array: we're 9014 * coming in hot, and there may be active ring 9015 * buffer processing (which indexes into the dts_ecbs 9016 * array) on another CPU. 9017 */ 9018 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 9019 dtrace_sync(); 9020 9021 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs)); 9022 } 9023 9024 dtrace_membar_producer(); 9025 state->dts_necbs = necbs; 9026 } 9027 9028 ecb->dte_state = state; 9029 9030 ASSERT(state->dts_ecbs[epid - 1] == NULL); 9031 dtrace_membar_producer(); 9032 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb; 9033 9034 return (ecb); 9035} 9036 9037static void 9038dtrace_ecb_enable(dtrace_ecb_t *ecb) 9039{ 9040 dtrace_probe_t *probe = ecb->dte_probe; 9041 9042 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 9043 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 9044 ASSERT(ecb->dte_next == NULL); 9045 9046 if (probe == NULL) { 9047 /* 9048 * This is the NULL probe -- there's nothing to do. 9049 */ 9050 return; 9051 } 9052 9053 if (probe->dtpr_ecb == NULL) { 9054 dtrace_provider_t *prov = probe->dtpr_provider; 9055 9056 /* 9057 * We're the first ECB on this probe. 9058 */ 9059 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb; 9060 9061 if (ecb->dte_predicate != NULL) 9062 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; 9063 9064 prov->dtpv_pops.dtps_enable(prov->dtpv_arg, 9065 probe->dtpr_id, probe->dtpr_arg); 9066 } else { 9067 /* 9068 * This probe is already active. Swing the last pointer to 9069 * point to the new ECB, and issue a dtrace_sync() to assure 9070 * that all CPUs have seen the change. 9071 */ 9072 ASSERT(probe->dtpr_ecb_last != NULL); 9073 probe->dtpr_ecb_last->dte_next = ecb; 9074 probe->dtpr_ecb_last = ecb; 9075 probe->dtpr_predcache = 0; 9076 9077 dtrace_sync(); 9078 } 9079} 9080 9081static void 9082dtrace_ecb_resize(dtrace_ecb_t *ecb) 9083{ 9084 uint32_t maxalign = sizeof (dtrace_epid_t); 9085 uint32_t align = sizeof (uint8_t), offs, diff; 9086 dtrace_action_t *act; 9087 int wastuple = 0; 9088 uint32_t aggbase = UINT32_MAX; 9089 dtrace_state_t *state = ecb->dte_state; 9090 9091 /* 9092 * If we record anything, we always record the epid. (And we always 9093 * record it first.) 9094 */ 9095 offs = sizeof (dtrace_epid_t); 9096 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t); 9097 9098 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 9099 dtrace_recdesc_t *rec = &act->dta_rec; 9100 9101 if ((align = rec->dtrd_alignment) > maxalign) 9102 maxalign = align; 9103 9104 if (!wastuple && act->dta_intuple) { 9105 /* 9106 * This is the first record in a tuple. Align the 9107 * offset to be at offset 4 in an 8-byte aligned 9108 * block. 9109 */ 9110 diff = offs + sizeof (dtrace_aggid_t); 9111 9112 if ((diff = (diff & (sizeof (uint64_t) - 1)))) 9113 offs += sizeof (uint64_t) - diff; 9114 9115 aggbase = offs - sizeof (dtrace_aggid_t); 9116 ASSERT(!(aggbase & (sizeof (uint64_t) - 1))); 9117 } 9118 9119 /*LINTED*/ 9120 if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) { 9121 /* 9122 * The current offset is not properly aligned; align it. 9123 */ 9124 offs += align - diff; 9125 } 9126 9127 rec->dtrd_offset = offs; 9128 9129 if (offs + rec->dtrd_size > ecb->dte_needed) { 9130 ecb->dte_needed = offs + rec->dtrd_size; 9131 9132 if (ecb->dte_needed > state->dts_needed) 9133 state->dts_needed = ecb->dte_needed; 9134 } 9135 9136 if (DTRACEACT_ISAGG(act->dta_kind)) { 9137 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 9138 dtrace_action_t *first = agg->dtag_first, *prev; 9139 9140 ASSERT(rec->dtrd_size != 0 && first != NULL); 9141 ASSERT(wastuple); 9142 ASSERT(aggbase != UINT32_MAX); 9143 9144 agg->dtag_base = aggbase; 9145 9146 while ((prev = first->dta_prev) != NULL && 9147 DTRACEACT_ISAGG(prev->dta_kind)) { 9148 agg = (dtrace_aggregation_t *)prev; 9149 first = agg->dtag_first; 9150 } 9151 9152 if (prev != NULL) { 9153 offs = prev->dta_rec.dtrd_offset + 9154 prev->dta_rec.dtrd_size; 9155 } else { 9156 offs = sizeof (dtrace_epid_t); 9157 } 9158 wastuple = 0; 9159 } else { 9160 if (!act->dta_intuple) 9161 ecb->dte_size = offs + rec->dtrd_size; 9162 9163 offs += rec->dtrd_size; 9164 } 9165 9166 wastuple = act->dta_intuple; 9167 } 9168 9169 if ((act = ecb->dte_action) != NULL && 9170 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && 9171 ecb->dte_size == sizeof (dtrace_epid_t)) { 9172 /* 9173 * If the size is still sizeof (dtrace_epid_t), then all 9174 * actions store no data; set the size to 0. 9175 */ 9176 ecb->dte_alignment = maxalign; 9177 ecb->dte_size = 0; 9178 9179 /* 9180 * If the needed space is still sizeof (dtrace_epid_t), then 9181 * all actions need no additional space; set the needed 9182 * size to 0. 9183 */ 9184 if (ecb->dte_needed == sizeof (dtrace_epid_t)) 9185 ecb->dte_needed = 0; 9186 9187 return; 9188 } 9189 9190 /* 9191 * Set our alignment, and make sure that the dte_size and dte_needed 9192 * are aligned to the size of an EPID. 9193 */ 9194 ecb->dte_alignment = maxalign; 9195 ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) & 9196 ~(sizeof (dtrace_epid_t) - 1); 9197 ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) & 9198 ~(sizeof (dtrace_epid_t) - 1); 9199 ASSERT(ecb->dte_size <= ecb->dte_needed); 9200} 9201 9202static dtrace_action_t * 9203dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 9204{ 9205 dtrace_aggregation_t *agg; 9206 size_t size = sizeof (uint64_t); 9207 int ntuple = desc->dtad_ntuple; 9208 dtrace_action_t *act; 9209 dtrace_recdesc_t *frec; 9210 dtrace_aggid_t aggid; 9211 dtrace_state_t *state = ecb->dte_state; 9212 9213 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP); 9214 agg->dtag_ecb = ecb; 9215 9216 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind)); 9217 9218 switch (desc->dtad_kind) { 9219 case DTRACEAGG_MIN: 9220 agg->dtag_initial = UINT64_MAX; 9221 agg->dtag_aggregate = dtrace_aggregate_min; 9222 break; 9223 9224 case DTRACEAGG_MAX: 9225 agg->dtag_aggregate = dtrace_aggregate_max; 9226 break; 9227 9228 case DTRACEAGG_COUNT: 9229 agg->dtag_aggregate = dtrace_aggregate_count; 9230 break; 9231 9232 case DTRACEAGG_QUANTIZE: 9233 agg->dtag_aggregate = dtrace_aggregate_quantize; 9234 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) * 9235 sizeof (uint64_t); 9236 break; 9237 9238 case DTRACEAGG_LQUANTIZE: { 9239 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg); 9240 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg); 9241 9242 agg->dtag_initial = desc->dtad_arg; 9243 agg->dtag_aggregate = dtrace_aggregate_lquantize; 9244 9245 if (step == 0 || levels == 0) 9246 goto err; 9247 9248 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t); 9249 break; 9250 } 9251 9252 case DTRACEAGG_AVG: 9253 agg->dtag_aggregate = dtrace_aggregate_avg; 9254 size = sizeof (uint64_t) * 2; 9255 break; 9256 9257 case DTRACEAGG_SUM: 9258 agg->dtag_aggregate = dtrace_aggregate_sum; 9259 break; 9260 9261 default: 9262 goto err; 9263 } 9264 9265 agg->dtag_action.dta_rec.dtrd_size = size; 9266 9267 if (ntuple == 0) 9268 goto err; 9269 9270 /* 9271 * We must make sure that we have enough actions for the n-tuple. 9272 */ 9273 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) { 9274 if (DTRACEACT_ISAGG(act->dta_kind)) 9275 break; 9276 9277 if (--ntuple == 0) { 9278 /* 9279 * This is the action with which our n-tuple begins. 9280 */ 9281 agg->dtag_first = act; 9282 goto success; 9283 } 9284 } 9285 9286 /* 9287 * This n-tuple is short by ntuple elements. Return failure. 9288 */ 9289 ASSERT(ntuple != 0); 9290err: 9291 kmem_free(agg, sizeof (dtrace_aggregation_t)); 9292 return (NULL); 9293 9294success: 9295 /* 9296 * If the last action in the tuple has a size of zero, it's actually 9297 * an expression argument for the aggregating action. 9298 */ 9299 ASSERT(ecb->dte_action_last != NULL); 9300 act = ecb->dte_action_last; 9301 9302 if (act->dta_kind == DTRACEACT_DIFEXPR) { 9303 ASSERT(act->dta_difo != NULL); 9304 9305 if (act->dta_difo->dtdo_rtype.dtdt_size == 0) 9306 agg->dtag_hasarg = 1; 9307 } 9308 9309 /* 9310 * We need to allocate an id for this aggregation. 9311 */ 9312 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, 9313 VM_BESTFIT | VM_SLEEP); 9314 9315 if (aggid - 1 >= state->dts_naggregations) { 9316 dtrace_aggregation_t **oaggs = state->dts_aggregations; 9317 dtrace_aggregation_t **aggs; 9318 int naggs = state->dts_naggregations << 1; 9319 int onaggs = state->dts_naggregations; 9320 9321 ASSERT(aggid == state->dts_naggregations + 1); 9322 9323 if (naggs == 0) { 9324 ASSERT(oaggs == NULL); 9325 naggs = 1; 9326 } 9327 9328 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP); 9329 9330 if (oaggs != NULL) { 9331 bcopy(oaggs, aggs, onaggs * sizeof (*aggs)); 9332 kmem_free(oaggs, onaggs * sizeof (*aggs)); 9333 } 9334 9335 state->dts_aggregations = aggs; 9336 state->dts_naggregations = naggs; 9337 } 9338 9339 ASSERT(state->dts_aggregations[aggid - 1] == NULL); 9340 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg; 9341 9342 frec = &agg->dtag_first->dta_rec; 9343 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t)) 9344 frec->dtrd_alignment = sizeof (dtrace_aggid_t); 9345 9346 for (act = agg->dtag_first; act != NULL; act = act->dta_next) { 9347 ASSERT(!act->dta_intuple); 9348 act->dta_intuple = 1; 9349 } 9350 9351 return (&agg->dtag_action); 9352} 9353 9354static void 9355dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act) 9356{ 9357 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 9358 dtrace_state_t *state = ecb->dte_state; 9359 dtrace_aggid_t aggid = agg->dtag_id; 9360 9361 ASSERT(DTRACEACT_ISAGG(act->dta_kind)); 9362 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1); 9363 9364 ASSERT(state->dts_aggregations[aggid - 1] == agg); 9365 state->dts_aggregations[aggid - 1] = NULL; 9366 9367 kmem_free(agg, sizeof (dtrace_aggregation_t)); 9368} 9369 9370static int 9371dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 9372{ 9373 dtrace_action_t *action, *last; 9374 dtrace_difo_t *dp = desc->dtad_difo; 9375 uint32_t size = 0, align = sizeof (uint8_t), mask; 9376 uint16_t format = 0; 9377 dtrace_recdesc_t *rec; 9378 dtrace_state_t *state = ecb->dte_state; 9379 dtrace_optval_t *opt = state->dts_options, nframes, strsize; 9380 uint64_t arg = desc->dtad_arg; 9381 9382 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 9383 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); 9384 9385 if (DTRACEACT_ISAGG(desc->dtad_kind)) { 9386 /* 9387 * If this is an aggregating action, there must be neither 9388 * a speculate nor a commit on the action chain. 9389 */ 9390 dtrace_action_t *act; 9391 9392 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 9393 if (act->dta_kind == DTRACEACT_COMMIT) 9394 return (EINVAL); 9395 9396 if (act->dta_kind == DTRACEACT_SPECULATE) 9397 return (EINVAL); 9398 } 9399 9400 action = dtrace_ecb_aggregation_create(ecb, desc); 9401 9402 if (action == NULL) 9403 return (EINVAL); 9404 } else { 9405 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) || 9406 (desc->dtad_kind == DTRACEACT_DIFEXPR && 9407 dp != NULL && dp->dtdo_destructive)) { 9408 state->dts_destructive = 1; 9409 } 9410 9411 switch (desc->dtad_kind) { 9412 case DTRACEACT_PRINTF: 9413 case DTRACEACT_PRINTA: 9414 case DTRACEACT_SYSTEM: 9415 case DTRACEACT_FREOPEN: 9416 /* 9417 * We know that our arg is a string -- turn it into a 9418 * format. 9419 */ 9420 if (arg == NULL) { 9421 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); 9422 format = 0; 9423 } else { 9424 ASSERT(arg != NULL); 9425 /* ASSERT(arg > KERNELBASE); */ 9426 format = dtrace_format_add(state, 9427 (char *)(uintptr_t)arg); 9428 } 9429 9430 /*FALLTHROUGH*/ 9431 case DTRACEACT_LIBACT: 9432 case DTRACEACT_DIFEXPR: 9433 if (dp == NULL) 9434 return (EINVAL); 9435 9436 if ((size = dp->dtdo_rtype.dtdt_size) != 0) 9437 break; 9438 9439 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { 9440 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9441 return (EINVAL); 9442 9443 size = opt[DTRACEOPT_STRSIZE]; 9444 } 9445 9446 break; 9447 9448 case DTRACEACT_STACK: 9449 if ((nframes = arg) == 0) { 9450 nframes = opt[DTRACEOPT_STACKFRAMES]; 9451 ASSERT(nframes > 0); 9452 arg = nframes; 9453 } 9454 9455 size = nframes * sizeof (pc_t); 9456 break; 9457 9458 case DTRACEACT_JSTACK: 9459 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0) 9460 strsize = opt[DTRACEOPT_JSTACKSTRSIZE]; 9461 9462 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) 9463 nframes = opt[DTRACEOPT_JSTACKFRAMES]; 9464 9465 arg = DTRACE_USTACK_ARG(nframes, strsize); 9466 9467 /*FALLTHROUGH*/ 9468 case DTRACEACT_USTACK: 9469 if (desc->dtad_kind != DTRACEACT_JSTACK && 9470 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { 9471 strsize = DTRACE_USTACK_STRSIZE(arg); 9472 nframes = opt[DTRACEOPT_USTACKFRAMES]; 9473 ASSERT(nframes > 0); 9474 arg = DTRACE_USTACK_ARG(nframes, strsize); 9475 } 9476 9477 /* 9478 * Save a slot for the pid. 9479 */ 9480 size = (nframes + 1) * sizeof (uint64_t); 9481 size += DTRACE_USTACK_STRSIZE(arg); 9482 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t))); 9483 9484 break; 9485 9486 case DTRACEACT_SYM: 9487 case DTRACEACT_MOD: 9488 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) != 9489 sizeof (uint64_t)) || 9490 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9491 return (EINVAL); 9492 break; 9493 9494 case DTRACEACT_USYM: 9495 case DTRACEACT_UMOD: 9496 case DTRACEACT_UADDR: 9497 if (dp == NULL || 9498 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) || 9499 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9500 return (EINVAL); 9501 9502 /* 9503 * We have a slot for the pid, plus a slot for the 9504 * argument. To keep things simple (aligned with 9505 * bitness-neutral sizing), we store each as a 64-bit 9506 * quantity. 9507 */ 9508 size = 2 * sizeof (uint64_t); 9509 break; 9510 9511 case DTRACEACT_STOP: 9512 case DTRACEACT_BREAKPOINT: 9513 case DTRACEACT_PANIC: 9514 break; 9515 9516 case DTRACEACT_CHILL: 9517 case DTRACEACT_DISCARD: 9518 case DTRACEACT_RAISE: 9519 if (dp == NULL) 9520 return (EINVAL); 9521 break; 9522 9523 case DTRACEACT_EXIT: 9524 if (dp == NULL || 9525 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) || 9526 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 9527 return (EINVAL); 9528 break; 9529 9530 case DTRACEACT_SPECULATE: 9531 if (ecb->dte_size > sizeof (dtrace_epid_t)) 9532 return (EINVAL); 9533 9534 if (dp == NULL) 9535 return (EINVAL); 9536 9537 state->dts_speculates = 1; 9538 break; 9539 9540 case DTRACEACT_COMMIT: { 9541 dtrace_action_t *act = ecb->dte_action; 9542 9543 for (; act != NULL; act = act->dta_next) { 9544 if (act->dta_kind == DTRACEACT_COMMIT) 9545 return (EINVAL); 9546 } 9547 9548 if (dp == NULL) 9549 return (EINVAL); 9550 break; 9551 } 9552 9553 default: 9554 return (EINVAL); 9555 } 9556 9557 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) { 9558 /* 9559 * If this is a data-storing action or a speculate, 9560 * we must be sure that there isn't a commit on the 9561 * action chain. 9562 */ 9563 dtrace_action_t *act = ecb->dte_action; 9564 9565 for (; act != NULL; act = act->dta_next) { 9566 if (act->dta_kind == DTRACEACT_COMMIT) 9567 return (EINVAL); 9568 } 9569 } 9570 9571 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP); 9572 action->dta_rec.dtrd_size = size; 9573 } 9574 9575 action->dta_refcnt = 1; 9576 rec = &action->dta_rec; 9577 size = rec->dtrd_size; 9578 9579 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) { 9580 if (!(size & mask)) { 9581 align = mask + 1; 9582 break; 9583 } 9584 } 9585 9586 action->dta_kind = desc->dtad_kind; 9587 9588 if ((action->dta_difo = dp) != NULL) 9589 dtrace_difo_hold(dp); 9590 9591 rec->dtrd_action = action->dta_kind; 9592 rec->dtrd_arg = arg; 9593 rec->dtrd_uarg = desc->dtad_uarg; 9594 rec->dtrd_alignment = (uint16_t)align; 9595 rec->dtrd_format = format; 9596 9597 if ((last = ecb->dte_action_last) != NULL) { 9598 ASSERT(ecb->dte_action != NULL); 9599 action->dta_prev = last; 9600 last->dta_next = action; 9601 } else { 9602 ASSERT(ecb->dte_action == NULL); 9603 ecb->dte_action = action; 9604 } 9605 9606 ecb->dte_action_last = action; 9607 9608 return (0); 9609} 9610 9611static void 9612dtrace_ecb_action_remove(dtrace_ecb_t *ecb) 9613{ 9614 dtrace_action_t *act = ecb->dte_action, *next; 9615 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate; 9616 dtrace_difo_t *dp; 9617 uint16_t format; 9618 9619 if (act != NULL && act->dta_refcnt > 1) { 9620 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1); 9621 act->dta_refcnt--; 9622 } else { 9623 for (; act != NULL; act = next) { 9624 next = act->dta_next; 9625 ASSERT(next != NULL || act == ecb->dte_action_last); 9626 ASSERT(act->dta_refcnt == 1); 9627 9628 if ((format = act->dta_rec.dtrd_format) != 0) 9629 dtrace_format_remove(ecb->dte_state, format); 9630 9631 if ((dp = act->dta_difo) != NULL) 9632 dtrace_difo_release(dp, vstate); 9633 9634 if (DTRACEACT_ISAGG(act->dta_kind)) { 9635 dtrace_ecb_aggregation_destroy(ecb, act); 9636 } else { 9637 kmem_free(act, sizeof (dtrace_action_t)); 9638 } 9639 } 9640 } 9641 9642 ecb->dte_action = NULL; 9643 ecb->dte_action_last = NULL; 9644 ecb->dte_size = sizeof (dtrace_epid_t); 9645} 9646 9647static void 9648dtrace_ecb_disable(dtrace_ecb_t *ecb) 9649{ 9650 /* 9651 * We disable the ECB by removing it from its probe. 9652 */ 9653 dtrace_ecb_t *pecb, *prev = NULL; 9654 dtrace_probe_t *probe = ecb->dte_probe; 9655 9656 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 9657 9658 if (probe == NULL) { 9659 /* 9660 * This is the NULL probe; there is nothing to disable. 9661 */ 9662 return; 9663 } 9664 9665 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) { 9666 if (pecb == ecb) 9667 break; 9668 prev = pecb; 9669 } 9670 9671 ASSERT(pecb != NULL); 9672 9673 if (prev == NULL) { 9674 probe->dtpr_ecb = ecb->dte_next; 9675 } else { 9676 prev->dte_next = ecb->dte_next; 9677 } 9678 9679 if (ecb == probe->dtpr_ecb_last) { 9680 ASSERT(ecb->dte_next == NULL); 9681 probe->dtpr_ecb_last = prev; 9682 } 9683 9684 /* 9685 * The ECB has been disconnected from the probe; now sync to assure 9686 * that all CPUs have seen the change before returning. 9687 */ 9688 dtrace_sync(); 9689 9690 if (probe->dtpr_ecb == NULL) { 9691 /* 9692 * That was the last ECB on the probe; clear the predicate 9693 * cache ID for the probe, disable it and sync one more time 9694 * to assure that we'll never hit it again. 9695 */ 9696 dtrace_provider_t *prov = probe->dtpr_provider; 9697 9698 ASSERT(ecb->dte_next == NULL); 9699 ASSERT(probe->dtpr_ecb_last == NULL); 9700 probe->dtpr_predcache = DTRACE_CACHEIDNONE; 9701 prov->dtpv_pops.dtps_disable(prov->dtpv_arg, 9702 probe->dtpr_id, probe->dtpr_arg); 9703 dtrace_sync(); 9704 } else { 9705 /* 9706 * There is at least one ECB remaining on the probe. If there 9707 * is _exactly_ one, set the probe's predicate cache ID to be 9708 * the predicate cache ID of the remaining ECB. 9709 */ 9710 ASSERT(probe->dtpr_ecb_last != NULL); 9711 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE); 9712 9713 if (probe->dtpr_ecb == probe->dtpr_ecb_last) { 9714 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate; 9715 9716 ASSERT(probe->dtpr_ecb->dte_next == NULL); 9717 9718 if (p != NULL) 9719 probe->dtpr_predcache = p->dtp_cacheid; 9720 } 9721 9722 ecb->dte_next = NULL; 9723 } 9724} 9725 9726static void 9727dtrace_ecb_destroy(dtrace_ecb_t *ecb) 9728{ 9729 dtrace_state_t *state = ecb->dte_state; 9730 dtrace_vstate_t *vstate = &state->dts_vstate; 9731 dtrace_predicate_t *pred; 9732 dtrace_epid_t epid = ecb->dte_epid; 9733 9734 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 9735 ASSERT(ecb->dte_next == NULL); 9736 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); 9737 9738 if ((pred = ecb->dte_predicate) != NULL) 9739 dtrace_predicate_release(pred, vstate); 9740 9741 dtrace_ecb_action_remove(ecb); 9742 9743 ASSERT(state->dts_ecbs[epid - 1] == ecb); 9744 state->dts_ecbs[epid - 1] = NULL; 9745 9746 kmem_free(ecb, sizeof (dtrace_ecb_t)); 9747} 9748 9749static dtrace_ecb_t * 9750dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, 9751 dtrace_enabling_t *enab) 9752{ 9753 dtrace_ecb_t *ecb; 9754 dtrace_predicate_t *pred; 9755 dtrace_actdesc_t *act; 9756 dtrace_provider_t *prov; 9757 dtrace_ecbdesc_t *desc = enab->dten_current; 9758 9759 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 9760 ASSERT(state != NULL); 9761 9762 ecb = dtrace_ecb_add(state, probe); 9763 ecb->dte_uarg = desc->dted_uarg; 9764 9765 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) { 9766 dtrace_predicate_hold(pred); 9767 ecb->dte_predicate = pred; 9768 } 9769 9770 if (probe != NULL) { 9771 /* 9772 * If the provider shows more leg than the consumer is old 9773 * enough to see, we need to enable the appropriate implicit 9774 * predicate bits to prevent the ecb from activating at 9775 * revealing times. 9776 * 9777 * Providers specifying DTRACE_PRIV_USER at register time 9778 * are stating that they need the /proc-style privilege 9779 * model to be enforced, and this is what DTRACE_COND_OWNER 9780 * and DTRACE_COND_ZONEOWNER will then do at probe time. 9781 */ 9782 prov = probe->dtpr_provider; 9783 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) && 9784 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 9785 ecb->dte_cond |= DTRACE_COND_OWNER; 9786 9787 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) && 9788 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 9789 ecb->dte_cond |= DTRACE_COND_ZONEOWNER; 9790 9791 /* 9792 * If the provider shows us kernel innards and the user 9793 * is lacking sufficient privilege, enable the 9794 * DTRACE_COND_USERMODE implicit predicate. 9795 */ 9796 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) && 9797 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL)) 9798 ecb->dte_cond |= DTRACE_COND_USERMODE; 9799 } 9800 9801 if (dtrace_ecb_create_cache != NULL) { 9802 /* 9803 * If we have a cached ecb, we'll use its action list instead 9804 * of creating our own (saving both time and space). 9805 */ 9806 dtrace_ecb_t *cached = dtrace_ecb_create_cache; 9807 dtrace_action_t *act_if = cached->dte_action; 9808 9809 if (act_if != NULL) { 9810 ASSERT(act_if->dta_refcnt > 0); 9811 act_if->dta_refcnt++; 9812 ecb->dte_action = act_if; 9813 ecb->dte_action_last = cached->dte_action_last; 9814 ecb->dte_needed = cached->dte_needed; 9815 ecb->dte_size = cached->dte_size; 9816 ecb->dte_alignment = cached->dte_alignment; 9817 } 9818 9819 return (ecb); 9820 } 9821 9822 for (act = desc->dted_action; act != NULL; act = act->dtad_next) { 9823 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) { 9824 dtrace_ecb_destroy(ecb); 9825 return (NULL); 9826 } 9827 } 9828 9829 dtrace_ecb_resize(ecb); 9830 9831 return (dtrace_ecb_create_cache = ecb); 9832} 9833 9834static int 9835dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) 9836{ 9837 dtrace_ecb_t *ecb; 9838 dtrace_enabling_t *enab = arg; 9839 dtrace_state_t *state = enab->dten_vstate->dtvs_state; 9840 9841 ASSERT(state != NULL); 9842 9843 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { 9844 /* 9845 * This probe was created in a generation for which this 9846 * enabling has previously created ECBs; we don't want to 9847 * enable it again, so just kick out. 9848 */ 9849 return (DTRACE_MATCH_NEXT); 9850 } 9851 9852 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) 9853 return (DTRACE_MATCH_DONE); 9854 9855 dtrace_ecb_enable(ecb); 9856 return (DTRACE_MATCH_NEXT); 9857} 9858 9859static dtrace_ecb_t * 9860dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) 9861{ 9862 dtrace_ecb_t *ecb; 9863 9864 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 9865 9866 if (id == 0 || id > state->dts_necbs) 9867 return (NULL); 9868 9869 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); 9870 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id); 9871 9872 return (state->dts_ecbs[id - 1]); 9873} 9874 9875static dtrace_aggregation_t * 9876dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) 9877{ 9878 dtrace_aggregation_t *agg; 9879 9880 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 9881 9882 if (id == 0 || id > state->dts_naggregations) 9883 return (NULL); 9884 9885 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); 9886 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL || 9887 agg->dtag_id == id); 9888 9889 return (state->dts_aggregations[id - 1]); 9890} 9891 9892/* 9893 * DTrace Buffer Functions 9894 * 9895 * The following functions manipulate DTrace buffers. Most of these functions 9896 * are called in the context of establishing or processing consumer state; 9897 * exceptions are explicitly noted. 9898 */ 9899 9900/* 9901 * Note: called from cross call context. This function switches the two 9902 * buffers on a given CPU. The atomicity of this operation is assured by 9903 * disabling interrupts while the actual switch takes place; the disabling of 9904 * interrupts serializes the execution with any execution of dtrace_probe() on 9905 * the same CPU. 9906 */ 9907static void 9908dtrace_buffer_switch(dtrace_buffer_t *buf) 9909{ 9910 caddr_t tomax = buf->dtb_tomax; 9911 caddr_t xamot = buf->dtb_xamot; 9912 dtrace_icookie_t cookie; 9913 9914 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 9915 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); 9916 9917 cookie = dtrace_interrupt_disable(); 9918 buf->dtb_tomax = xamot; 9919 buf->dtb_xamot = tomax; 9920 buf->dtb_xamot_drops = buf->dtb_drops; 9921 buf->dtb_xamot_offset = buf->dtb_offset; 9922 buf->dtb_xamot_errors = buf->dtb_errors; 9923 buf->dtb_xamot_flags = buf->dtb_flags; 9924 buf->dtb_offset = 0; 9925 buf->dtb_drops = 0; 9926 buf->dtb_errors = 0; 9927 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); 9928 dtrace_interrupt_enable(cookie); 9929} 9930 9931/* 9932 * Note: called from cross call context. This function activates a buffer 9933 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation 9934 * is guaranteed by the disabling of interrupts. 9935 */ 9936static void 9937dtrace_buffer_activate(dtrace_state_t *state) 9938{ 9939 dtrace_buffer_t *buf; 9940 dtrace_icookie_t cookie = dtrace_interrupt_disable(); 9941 9942 buf = &state->dts_buffer[CPU->cpu_id]; 9943 9944 if (buf->dtb_tomax != NULL) { 9945 /* 9946 * We might like to assert that the buffer is marked inactive, 9947 * but this isn't necessarily true: the buffer for the CPU 9948 * that processes the BEGIN probe has its buffer activated 9949 * manually. In this case, we take the (harmless) action 9950 * re-clearing the bit INACTIVE bit. 9951 */ 9952 buf->dtb_flags &= ~DTRACEBUF_INACTIVE; 9953 } 9954 9955 dtrace_interrupt_enable(cookie); 9956} 9957 9958static int 9959dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, 9960 processorid_t cpu) 9961{ 9962 cpu_t *cp; 9963 dtrace_buffer_t *buf; 9964 9965 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 9966 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 9967 9968 if (size > dtrace_nonroot_maxsize && 9969 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) 9970 return (EFBIG); 9971 9972#if defined(__APPLE__) 9973 if (size > (sane_size / 8) / (int)NCPU) /* As in kdbg_set_nkdbufs(), roughly. */ 9974 return (ENOMEM); 9975#endif /* __APPLE__ */ 9976 9977 cp = cpu_list; 9978 9979 do { 9980 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 9981 continue; 9982 9983 buf = &bufs[cp->cpu_id]; 9984 9985 /* 9986 * If there is already a buffer allocated for this CPU, it 9987 * is only possible that this is a DR event. In this case, 9988 * the buffer size must match our specified size. 9989 */ 9990 if (buf->dtb_tomax != NULL) { 9991 ASSERT(buf->dtb_size == size); 9992 continue; 9993 } 9994 9995 ASSERT(buf->dtb_xamot == NULL); 9996 9997 if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL) 9998 goto err; 9999 10000 buf->dtb_size = size; 10001 buf->dtb_flags = flags; 10002 buf->dtb_offset = 0; 10003 buf->dtb_drops = 0; 10004 10005 if (flags & DTRACEBUF_NOSWITCH) 10006 continue; 10007 10008 if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL) 10009 goto err; 10010 } while ((cp = cp->cpu_next) != cpu_list); 10011 10012 return (0); 10013 10014err: 10015 cp = cpu_list; 10016 10017 do { 10018 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 10019 continue; 10020 10021 buf = &bufs[cp->cpu_id]; 10022 10023 if (buf->dtb_xamot != NULL) { 10024 ASSERT(buf->dtb_tomax != NULL); 10025 ASSERT(buf->dtb_size == size); 10026 kmem_free(buf->dtb_xamot, size); 10027 } 10028 10029 if (buf->dtb_tomax != NULL) { 10030 ASSERT(buf->dtb_size == size); 10031 kmem_free(buf->dtb_tomax, size); 10032 } 10033 10034 buf->dtb_tomax = NULL; 10035 buf->dtb_xamot = NULL; 10036 buf->dtb_size = 0; 10037 } while ((cp = cp->cpu_next) != cpu_list); 10038 10039 return (ENOMEM); 10040} 10041 10042/* 10043 * Note: called from probe context. This function just increments the drop 10044 * count on a buffer. It has been made a function to allow for the 10045 * possibility of understanding the source of mysterious drop counts. (A 10046 * problem for which one may be particularly disappointed that DTrace cannot 10047 * be used to understand DTrace.) 10048 */ 10049static void 10050dtrace_buffer_drop(dtrace_buffer_t *buf) 10051{ 10052 buf->dtb_drops++; 10053} 10054 10055/* 10056 * Note: called from probe context. This function is called to reserve space 10057 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the 10058 * mstate. Returns the new offset in the buffer, or a negative value if an 10059 * error has occurred. 10060 */ 10061static intptr_t 10062dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, 10063 dtrace_state_t *state, dtrace_mstate_t *mstate) 10064{ 10065 intptr_t offs = buf->dtb_offset, soffs; 10066 intptr_t woffs; 10067 caddr_t tomax; 10068 size_t total_off; 10069 10070 if (buf->dtb_flags & DTRACEBUF_INACTIVE) 10071 return (-1); 10072 10073 if ((tomax = buf->dtb_tomax) == NULL) { 10074 dtrace_buffer_drop(buf); 10075 return (-1); 10076 } 10077 10078 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) { 10079 while (offs & (align - 1)) { 10080 /* 10081 * Assert that our alignment is off by a number which 10082 * is itself sizeof (uint32_t) aligned. 10083 */ 10084 ASSERT(!((align - (offs & (align - 1))) & 10085 (sizeof (uint32_t) - 1))); 10086 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 10087 offs += sizeof (uint32_t); 10088 } 10089 10090 if ((soffs = offs + needed) > buf->dtb_size) { 10091 dtrace_buffer_drop(buf); 10092 return (-1); 10093 } 10094 10095 if (mstate == NULL) 10096 return (offs); 10097 10098 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs; 10099 mstate->dtms_scratch_size = buf->dtb_size - soffs; 10100 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 10101 10102 return (offs); 10103 } 10104 10105 if (buf->dtb_flags & DTRACEBUF_FILL) { 10106 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN && 10107 (buf->dtb_flags & DTRACEBUF_FULL)) 10108 return (-1); 10109 goto out; 10110 } 10111 10112 total_off = needed + (offs & (align - 1)); 10113 10114 /* 10115 * For a ring buffer, life is quite a bit more complicated. Before 10116 * we can store any padding, we need to adjust our wrapping offset. 10117 * (If we've never before wrapped or we're not about to, no adjustment 10118 * is required.) 10119 */ 10120 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) || 10121 offs + total_off > buf->dtb_size) { 10122 woffs = buf->dtb_xamot_offset; 10123 10124 if (offs + total_off > buf->dtb_size) { 10125 /* 10126 * We can't fit in the end of the buffer. First, a 10127 * sanity check that we can fit in the buffer at all. 10128 */ 10129 if (total_off > buf->dtb_size) { 10130 dtrace_buffer_drop(buf); 10131 return (-1); 10132 } 10133 10134 /* 10135 * We're going to be storing at the top of the buffer, 10136 * so now we need to deal with the wrapped offset. We 10137 * only reset our wrapped offset to 0 if it is 10138 * currently greater than the current offset. If it 10139 * is less than the current offset, it is because a 10140 * previous allocation induced a wrap -- but the 10141 * allocation didn't subsequently take the space due 10142 * to an error or false predicate evaluation. In this 10143 * case, we'll just leave the wrapped offset alone: if 10144 * the wrapped offset hasn't been advanced far enough 10145 * for this allocation, it will be adjusted in the 10146 * lower loop. 10147 */ 10148 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 10149 if (woffs >= offs) 10150 woffs = 0; 10151 } else { 10152 woffs = 0; 10153 } 10154 10155 /* 10156 * Now we know that we're going to be storing to the 10157 * top of the buffer and that there is room for us 10158 * there. We need to clear the buffer from the current 10159 * offset to the end (there may be old gunk there). 10160 */ 10161 while (offs < buf->dtb_size) 10162 tomax[offs++] = 0; 10163 10164 /* 10165 * We need to set our offset to zero. And because we 10166 * are wrapping, we need to set the bit indicating as 10167 * much. We can also adjust our needed space back 10168 * down to the space required by the ECB -- we know 10169 * that the top of the buffer is aligned. 10170 */ 10171 offs = 0; 10172 total_off = needed; 10173 buf->dtb_flags |= DTRACEBUF_WRAPPED; 10174 } else { 10175 /* 10176 * There is room for us in the buffer, so we simply 10177 * need to check the wrapped offset. 10178 */ 10179 if (woffs < offs) { 10180 /* 10181 * The wrapped offset is less than the offset. 10182 * This can happen if we allocated buffer space 10183 * that induced a wrap, but then we didn't 10184 * subsequently take the space due to an error 10185 * or false predicate evaluation. This is 10186 * okay; we know that _this_ allocation isn't 10187 * going to induce a wrap. We still can't 10188 * reset the wrapped offset to be zero, 10189 * however: the space may have been trashed in 10190 * the previous failed probe attempt. But at 10191 * least the wrapped offset doesn't need to 10192 * be adjusted at all... 10193 */ 10194 goto out; 10195 } 10196 } 10197 10198 while (offs + total_off > woffs) { 10199 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); 10200 size_t size; 10201 10202 if (epid == DTRACE_EPIDNONE) { 10203 size = sizeof (uint32_t); 10204 } else { 10205 ASSERT(epid <= state->dts_necbs); 10206 ASSERT(state->dts_ecbs[epid - 1] != NULL); 10207 10208 size = state->dts_ecbs[epid - 1]->dte_size; 10209 } 10210 10211 ASSERT(woffs + size <= buf->dtb_size); 10212 ASSERT(size != 0); 10213 10214 if (woffs + size == buf->dtb_size) { 10215 /* 10216 * We've reached the end of the buffer; we want 10217 * to set the wrapped offset to 0 and break 10218 * out. However, if the offs is 0, then we're 10219 * in a strange edge-condition: the amount of 10220 * space that we want to reserve plus the size 10221 * of the record that we're overwriting is 10222 * greater than the size of the buffer. This 10223 * is problematic because if we reserve the 10224 * space but subsequently don't consume it (due 10225 * to a failed predicate or error) the wrapped 10226 * offset will be 0 -- yet the EPID at offset 0 10227 * will not be committed. This situation is 10228 * relatively easy to deal with: if we're in 10229 * this case, the buffer is indistinguishable 10230 * from one that hasn't wrapped; we need only 10231 * finish the job by clearing the wrapped bit, 10232 * explicitly setting the offset to be 0, and 10233 * zero'ing out the old data in the buffer. 10234 */ 10235 if (offs == 0) { 10236 buf->dtb_flags &= ~DTRACEBUF_WRAPPED; 10237 buf->dtb_offset = 0; 10238 woffs = total_off; 10239 10240 while (woffs < buf->dtb_size) 10241 tomax[woffs++] = 0; 10242 } 10243 10244 woffs = 0; 10245 break; 10246 } 10247 10248 woffs += size; 10249 } 10250 10251 /* 10252 * We have a wrapped offset. It may be that the wrapped offset 10253 * has become zero -- that's okay. 10254 */ 10255 buf->dtb_xamot_offset = woffs; 10256 } 10257 10258out: 10259 /* 10260 * Now we can plow the buffer with any necessary padding. 10261 */ 10262 while (offs & (align - 1)) { 10263 /* 10264 * Assert that our alignment is off by a number which 10265 * is itself sizeof (uint32_t) aligned. 10266 */ 10267 ASSERT(!((align - (offs & (align - 1))) & 10268 (sizeof (uint32_t) - 1))); 10269 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 10270 offs += sizeof (uint32_t); 10271 } 10272 10273 if (buf->dtb_flags & DTRACEBUF_FILL) { 10274 if (offs + needed > buf->dtb_size - state->dts_reserve) { 10275 buf->dtb_flags |= DTRACEBUF_FULL; 10276 return (-1); 10277 } 10278 } 10279 10280 if (mstate == NULL) 10281 return (offs); 10282 10283 /* 10284 * For ring buffers and fill buffers, the scratch space is always 10285 * the inactive buffer. 10286 */ 10287 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot; 10288 mstate->dtms_scratch_size = buf->dtb_size; 10289 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 10290 10291 return (offs); 10292} 10293 10294static void 10295dtrace_buffer_polish(dtrace_buffer_t *buf) 10296{ 10297 ASSERT(buf->dtb_flags & DTRACEBUF_RING); 10298 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10299 10300 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) 10301 return; 10302 10303 /* 10304 * We need to polish the ring buffer. There are three cases: 10305 * 10306 * - The first (and presumably most common) is that there is no gap 10307 * between the buffer offset and the wrapped offset. In this case, 10308 * there is nothing in the buffer that isn't valid data; we can 10309 * mark the buffer as polished and return. 10310 * 10311 * - The second (less common than the first but still more common 10312 * than the third) is that there is a gap between the buffer offset 10313 * and the wrapped offset, and the wrapped offset is larger than the 10314 * buffer offset. This can happen because of an alignment issue, or 10315 * can happen because of a call to dtrace_buffer_reserve() that 10316 * didn't subsequently consume the buffer space. In this case, 10317 * we need to zero the data from the buffer offset to the wrapped 10318 * offset. 10319 * 10320 * - The third (and least common) is that there is a gap between the 10321 * buffer offset and the wrapped offset, but the wrapped offset is 10322 * _less_ than the buffer offset. This can only happen because a 10323 * call to dtrace_buffer_reserve() induced a wrap, but the space 10324 * was not subsequently consumed. In this case, we need to zero the 10325 * space from the offset to the end of the buffer _and_ from the 10326 * top of the buffer to the wrapped offset. 10327 */ 10328 if (buf->dtb_offset < buf->dtb_xamot_offset) { 10329 bzero(buf->dtb_tomax + buf->dtb_offset, 10330 buf->dtb_xamot_offset - buf->dtb_offset); 10331 } 10332 10333 if (buf->dtb_offset > buf->dtb_xamot_offset) { 10334 bzero(buf->dtb_tomax + buf->dtb_offset, 10335 buf->dtb_size - buf->dtb_offset); 10336 bzero(buf->dtb_tomax, buf->dtb_xamot_offset); 10337 } 10338} 10339 10340static void 10341dtrace_buffer_free(dtrace_buffer_t *bufs) 10342{ 10343 int i; 10344 10345 for (i = 0; i < (int)NCPU; i++) { 10346 dtrace_buffer_t *buf = &bufs[i]; 10347 10348 if (buf->dtb_tomax == NULL) { 10349 ASSERT(buf->dtb_xamot == NULL); 10350 ASSERT(buf->dtb_size == 0); 10351 continue; 10352 } 10353 10354 if (buf->dtb_xamot != NULL) { 10355 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 10356 kmem_free(buf->dtb_xamot, buf->dtb_size); 10357 } 10358 10359 kmem_free(buf->dtb_tomax, buf->dtb_size); 10360 buf->dtb_size = 0; 10361 buf->dtb_tomax = NULL; 10362 buf->dtb_xamot = NULL; 10363 } 10364} 10365 10366/* 10367 * DTrace Enabling Functions 10368 */ 10369static dtrace_enabling_t * 10370dtrace_enabling_create(dtrace_vstate_t *vstate) 10371{ 10372 dtrace_enabling_t *enab; 10373 10374 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP); 10375 enab->dten_vstate = vstate; 10376 10377 return (enab); 10378} 10379 10380static void 10381dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) 10382{ 10383 dtrace_ecbdesc_t **ndesc; 10384 size_t osize, nsize; 10385 10386 /* 10387 * We can't add to enablings after we've enabled them, or after we've 10388 * retained them. 10389 */ 10390 ASSERT(enab->dten_probegen == 0); 10391 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 10392 10393#if defined(__APPLE__) 10394 if (ecb == NULL) return; /* XXX protection against gcc 4.0 botch on x86 */ 10395#endif /* __APPLE__ */ 10396 10397 if (enab->dten_ndesc < enab->dten_maxdesc) { 10398 enab->dten_desc[enab->dten_ndesc++] = ecb; 10399 return; 10400 } 10401 10402 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 10403 10404 if (enab->dten_maxdesc == 0) { 10405 enab->dten_maxdesc = 1; 10406 } else { 10407 enab->dten_maxdesc <<= 1; 10408 } 10409 10410 ASSERT(enab->dten_ndesc < enab->dten_maxdesc); 10411 10412 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 10413 ndesc = kmem_zalloc(nsize, KM_SLEEP); 10414 bcopy(enab->dten_desc, ndesc, osize); 10415 kmem_free(enab->dten_desc, osize); 10416 10417 enab->dten_desc = ndesc; 10418 enab->dten_desc[enab->dten_ndesc++] = ecb; 10419} 10420 10421static void 10422dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb, 10423 dtrace_probedesc_t *pd) 10424{ 10425 dtrace_ecbdesc_t *new; 10426 dtrace_predicate_t *pred; 10427 dtrace_actdesc_t *act; 10428 10429 /* 10430 * We're going to create a new ECB description that matches the 10431 * specified ECB in every way, but has the specified probe description. 10432 */ 10433 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 10434 10435 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL) 10436 dtrace_predicate_hold(pred); 10437 10438 for (act = ecb->dted_action; act != NULL; act = act->dtad_next) 10439 dtrace_actdesc_hold(act); 10440 10441 new->dted_action = ecb->dted_action; 10442 new->dted_pred = ecb->dted_pred; 10443 new->dted_probe = *pd; 10444 new->dted_uarg = ecb->dted_uarg; 10445 10446 dtrace_enabling_add(enab, new); 10447} 10448 10449static void 10450dtrace_enabling_dump(dtrace_enabling_t *enab) 10451{ 10452 int i; 10453 10454 for (i = 0; i < enab->dten_ndesc; i++) { 10455 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe; 10456 10457 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i, 10458 desc->dtpd_provider, desc->dtpd_mod, 10459 desc->dtpd_func, desc->dtpd_name); 10460 } 10461} 10462 10463static void 10464dtrace_enabling_destroy(dtrace_enabling_t *enab) 10465{ 10466 int i; 10467 dtrace_ecbdesc_t *ep; 10468 dtrace_vstate_t *vstate = enab->dten_vstate; 10469 10470 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10471 10472 for (i = 0; i < enab->dten_ndesc; i++) { 10473 dtrace_actdesc_t *act, *next; 10474 dtrace_predicate_t *pred; 10475 10476 ep = enab->dten_desc[i]; 10477 10478 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) 10479 dtrace_predicate_release(pred, vstate); 10480 10481 for (act = ep->dted_action; act != NULL; act = next) { 10482 next = act->dtad_next; 10483 dtrace_actdesc_release(act, vstate); 10484 } 10485 10486 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 10487 } 10488 10489 kmem_free(enab->dten_desc, 10490 enab->dten_maxdesc * sizeof (dtrace_enabling_t *)); 10491 10492 /* 10493 * If this was a retained enabling, decrement the dts_nretained count 10494 * and take it off of the dtrace_retained list. 10495 */ 10496 if (enab->dten_prev != NULL || enab->dten_next != NULL || 10497 dtrace_retained == enab) { 10498 ASSERT(enab->dten_vstate->dtvs_state != NULL); 10499 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); 10500 enab->dten_vstate->dtvs_state->dts_nretained--; 10501 } 10502 10503 if (enab->dten_prev == NULL) { 10504 if (dtrace_retained == enab) { 10505 dtrace_retained = enab->dten_next; 10506 10507 if (dtrace_retained != NULL) 10508 dtrace_retained->dten_prev = NULL; 10509 } 10510 } else { 10511 ASSERT(enab != dtrace_retained); 10512 ASSERT(dtrace_retained != NULL); 10513 enab->dten_prev->dten_next = enab->dten_next; 10514 } 10515 10516 if (enab->dten_next != NULL) { 10517 ASSERT(dtrace_retained != NULL); 10518 enab->dten_next->dten_prev = enab->dten_prev; 10519 } 10520 10521 kmem_free(enab, sizeof (dtrace_enabling_t)); 10522} 10523 10524static int 10525dtrace_enabling_retain(dtrace_enabling_t *enab) 10526{ 10527 dtrace_state_t *state; 10528 10529 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10530 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 10531 ASSERT(enab->dten_vstate != NULL); 10532 10533 state = enab->dten_vstate->dtvs_state; 10534 ASSERT(state != NULL); 10535 10536 /* 10537 * We only allow each state to retain dtrace_retain_max enablings. 10538 */ 10539 if (state->dts_nretained >= dtrace_retain_max) 10540 return (ENOSPC); 10541 10542 state->dts_nretained++; 10543 10544 if (dtrace_retained == NULL) { 10545 dtrace_retained = enab; 10546 return (0); 10547 } 10548 10549 enab->dten_next = dtrace_retained; 10550 dtrace_retained->dten_prev = enab; 10551 dtrace_retained = enab; 10552 10553 return (0); 10554} 10555 10556static int 10557dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, 10558 dtrace_probedesc_t *create) 10559{ 10560 dtrace_enabling_t *new, *enab; 10561 int found = 0, err = ENOENT; 10562 10563 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10564 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); 10565 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); 10566 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); 10567 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN); 10568 10569 new = dtrace_enabling_create(&state->dts_vstate); 10570 10571 /* 10572 * Iterate over all retained enablings, looking for enablings that 10573 * match the specified state. 10574 */ 10575 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 10576 int i; 10577 10578 /* 10579 * dtvs_state can only be NULL for helper enablings -- and 10580 * helper enablings can't be retained. 10581 */ 10582 ASSERT(enab->dten_vstate->dtvs_state != NULL); 10583 10584 if (enab->dten_vstate->dtvs_state != state) 10585 continue; 10586 10587 /* 10588 * Now iterate over each probe description; we're looking for 10589 * an exact match to the specified probe description. 10590 */ 10591 for (i = 0; i < enab->dten_ndesc; i++) { 10592 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 10593 dtrace_probedesc_t *pd = &ep->dted_probe; 10594 10595 if (strcmp(pd->dtpd_provider, match->dtpd_provider)) 10596 continue; 10597 10598 if (strcmp(pd->dtpd_mod, match->dtpd_mod)) 10599 continue; 10600 10601 if (strcmp(pd->dtpd_func, match->dtpd_func)) 10602 continue; 10603 10604 if (strcmp(pd->dtpd_name, match->dtpd_name)) 10605 continue; 10606 10607 /* 10608 * We have a winning probe! Add it to our growing 10609 * enabling. 10610 */ 10611 found = 1; 10612 dtrace_enabling_addlike(new, ep, create); 10613 } 10614 } 10615 10616 if (!found || (err = dtrace_enabling_retain(new)) != 0) { 10617 dtrace_enabling_destroy(new); 10618 return (err); 10619 } 10620 10621 return (0); 10622} 10623 10624static void 10625dtrace_enabling_retract(dtrace_state_t *state) 10626{ 10627 dtrace_enabling_t *enab, *next; 10628 10629 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10630 10631 /* 10632 * Iterate over all retained enablings, destroy the enablings retained 10633 * for the specified state. 10634 */ 10635 for (enab = dtrace_retained; enab != NULL; enab = next) { 10636 next = enab->dten_next; 10637 10638 /* 10639 * dtvs_state can only be NULL for helper enablings -- and 10640 * helper enablings can't be retained. 10641 */ 10642 ASSERT(enab->dten_vstate->dtvs_state != NULL); 10643 10644 if (enab->dten_vstate->dtvs_state == state) { 10645 ASSERT(state->dts_nretained > 0); 10646 dtrace_enabling_destroy(enab); 10647 } 10648 } 10649 10650 ASSERT(state->dts_nretained == 0); 10651} 10652 10653static int 10654dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) 10655{ 10656 int i = 0; 10657 int matched = 0; 10658 10659 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 10660 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10661 10662 for (i = 0; i < enab->dten_ndesc; i++) { 10663 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 10664 10665 enab->dten_current = ep; 10666 enab->dten_error = 0; 10667 10668 matched += dtrace_probe_enable(&ep->dted_probe, enab); 10669 10670 if (enab->dten_error != 0) { 10671 /* 10672 * If we get an error half-way through enabling the 10673 * probes, we kick out -- perhaps with some number of 10674 * them enabled. Leaving enabled probes enabled may 10675 * be slightly confusing for user-level, but we expect 10676 * that no one will attempt to actually drive on in 10677 * the face of such errors. If this is an anonymous 10678 * enabling (indicated with a NULL nmatched pointer), 10679 * we cmn_err() a message. We aren't expecting to 10680 * get such an error -- such as it can exist at all, 10681 * it would be a result of corrupted DOF in the driver 10682 * properties. 10683 */ 10684 if (nmatched == NULL) { 10685 cmn_err(CE_WARN, "dtrace_enabling_match() " 10686 "error on %p: %d", (void *)ep, 10687 enab->dten_error); 10688 } 10689 10690 return (enab->dten_error); 10691 } 10692 } 10693 10694 enab->dten_probegen = dtrace_probegen; 10695 if (nmatched != NULL) 10696 *nmatched = matched; 10697 10698 return (0); 10699} 10700 10701static void 10702dtrace_enabling_matchall(void) 10703{ 10704 dtrace_enabling_t *enab; 10705 10706 lck_mtx_lock(&cpu_lock); 10707 lck_mtx_lock(&dtrace_lock); 10708 10709 /* 10710 * Because we can be called after dtrace_detach() has been called, we 10711 * cannot assert that there are retained enablings. We can safely 10712 * load from dtrace_retained, however: the taskq_destroy() at the 10713 * end of dtrace_detach() will block pending our completion. 10714 */ 10715 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) 10716 (void) dtrace_enabling_match(enab, NULL); 10717 10718 lck_mtx_unlock(&dtrace_lock); 10719 lck_mtx_unlock(&cpu_lock); 10720} 10721 10722static int 10723dtrace_enabling_matchstate(dtrace_state_t *state, int *nmatched) 10724{ 10725 dtrace_enabling_t *enab; 10726 int matched, total_matched = 0, err; 10727 10728 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 10729 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10730 10731 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 10732 ASSERT(enab->dten_vstate->dtvs_state != NULL); 10733 10734 if (enab->dten_vstate->dtvs_state != state) 10735 continue; 10736 10737 if ((err = dtrace_enabling_match(enab, &matched)) != 0) 10738 return (err); 10739 10740 total_matched += matched; 10741 } 10742 10743 if (nmatched != NULL) 10744 *nmatched = total_matched; 10745 10746 return (0); 10747} 10748 10749/* 10750 * If an enabling is to be enabled without having matched probes (that is, if 10751 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the 10752 * enabling must be _primed_ by creating an ECB for every ECB description. 10753 * This must be done to assure that we know the number of speculations, the 10754 * number of aggregations, the minimum buffer size needed, etc. before we 10755 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually 10756 * enabling any probes, we create ECBs for every ECB decription, but with a 10757 * NULL probe -- which is exactly what this function does. 10758 */ 10759static void 10760dtrace_enabling_prime(dtrace_state_t *state) 10761{ 10762 dtrace_enabling_t *enab; 10763 int i; 10764 10765 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 10766 ASSERT(enab->dten_vstate->dtvs_state != NULL); 10767 10768 if (enab->dten_vstate->dtvs_state != state) 10769 continue; 10770 10771 /* 10772 * We don't want to prime an enabling more than once, lest 10773 * we allow a malicious user to induce resource exhaustion. 10774 * (The ECBs that result from priming an enabling aren't 10775 * leaked -- but they also aren't deallocated until the 10776 * consumer state is destroyed.) 10777 */ 10778 if (enab->dten_primed) 10779 continue; 10780 10781 for (i = 0; i < enab->dten_ndesc; i++) { 10782 enab->dten_current = enab->dten_desc[i]; 10783 (void) dtrace_probe_enable(NULL, enab); 10784 } 10785 10786 enab->dten_primed = 1; 10787 } 10788} 10789 10790/* 10791 * Called to indicate that probes should be provided due to retained 10792 * enablings. This is implemented in terms of dtrace_probe_provide(), but it 10793 * must take an initial lap through the enabling calling the dtps_provide() 10794 * entry point explicitly to allow for autocreated probes. 10795 */ 10796static void 10797dtrace_enabling_provide(dtrace_provider_t *prv) 10798{ 10799 int i, all = 0; 10800 dtrace_probedesc_t desc; 10801 10802 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10803 lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); 10804 10805 if (prv == NULL) { 10806 all = 1; 10807 prv = dtrace_provider; 10808 } 10809 10810 do { 10811 dtrace_enabling_t *enab = dtrace_retained; 10812 void *parg = prv->dtpv_arg; 10813 10814 for (; enab != NULL; enab = enab->dten_next) { 10815 for (i = 0; i < enab->dten_ndesc; i++) { 10816 desc = enab->dten_desc[i]->dted_probe; 10817 lck_mtx_unlock(&dtrace_lock); 10818 prv->dtpv_pops.dtps_provide(parg, &desc); 10819 lck_mtx_lock(&dtrace_lock); 10820 } 10821 } 10822 } while (all && (prv = prv->dtpv_next) != NULL); 10823 10824 lck_mtx_unlock(&dtrace_lock); 10825 dtrace_probe_provide(NULL, all ? NULL : prv); 10826 lck_mtx_lock(&dtrace_lock); 10827} 10828 10829/* 10830 * DTrace DOF Functions 10831 */ 10832/*ARGSUSED*/ 10833static void 10834dtrace_dof_error(dof_hdr_t *dof, const char *str) 10835{ 10836#pragma unused(dof) 10837 if (dtrace_err_verbose) 10838 cmn_err(CE_WARN, "failed to process DOF: %s", str); 10839 10840#ifdef DTRACE_ERRDEBUG 10841 dtrace_errdebug(str); 10842#endif 10843} 10844 10845/* 10846 * Create DOF out of a currently enabled state. Right now, we only create 10847 * DOF containing the run-time options -- but this could be expanded to create 10848 * complete DOF representing the enabled state. 10849 */ 10850static dof_hdr_t * 10851dtrace_dof_create(dtrace_state_t *state) 10852{ 10853 dof_hdr_t *dof; 10854 dof_sec_t *sec; 10855 dof_optdesc_t *opt; 10856 int i, len = sizeof (dof_hdr_t) + 10857 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + 10858 sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 10859 10860 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 10861 10862 dof = dt_kmem_zalloc_aligned(len, 8, KM_SLEEP); 10863 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; 10864 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; 10865 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; 10866 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3; 10867 10868 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE; 10869 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE; 10870 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION; 10871 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION; 10872 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS; 10873 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS; 10874 10875 dof->dofh_flags = 0; 10876 dof->dofh_hdrsize = sizeof (dof_hdr_t); 10877 dof->dofh_secsize = sizeof (dof_sec_t); 10878 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */ 10879 dof->dofh_secoff = sizeof (dof_hdr_t); 10880 dof->dofh_loadsz = len; 10881 dof->dofh_filesz = len; 10882 dof->dofh_pad = 0; 10883 10884 /* 10885 * Fill in the option section header... 10886 */ 10887 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t)); 10888 sec->dofs_type = DOF_SECT_OPTDESC; 10889 sec->dofs_align = sizeof (uint64_t); 10890 sec->dofs_flags = DOF_SECF_LOAD; 10891 sec->dofs_entsize = sizeof (dof_optdesc_t); 10892 10893 opt = (dof_optdesc_t *)((uintptr_t)sec + 10894 roundup(sizeof (dof_sec_t), sizeof (uint64_t))); 10895 10896 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof; 10897 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 10898 10899 for (i = 0; i < DTRACEOPT_MAX; i++) { 10900 opt[i].dofo_option = i; 10901 opt[i].dofo_strtab = DOF_SECIDX_NONE; 10902 opt[i].dofo_value = state->dts_options[i]; 10903 } 10904 10905 return (dof); 10906} 10907 10908static dof_hdr_t * 10909#if defined(__APPLE__) 10910dtrace_dof_copyin(user_addr_t uarg, int *errp) 10911#else 10912dtrace_dof_copyin(uintptr_t uarg, int *errp) 10913#endif 10914{ 10915 dof_hdr_t hdr, *dof; 10916 10917 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); 10918 10919 /* 10920 * First, we're going to copyin() the sizeof (dof_hdr_t). 10921 */ 10922#if defined(__APPLE__) 10923 if (copyin(uarg, &hdr, sizeof (hdr)) != 0) { 10924#else 10925 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { 10926#endif 10927 dtrace_dof_error(NULL, "failed to copyin DOF header"); 10928 *errp = EFAULT; 10929 return (NULL); 10930 } 10931 10932 /* 10933 * Now we'll allocate the entire DOF and copy it in -- provided 10934 * that the length isn't outrageous. 10935 */ 10936 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { 10937 dtrace_dof_error(&hdr, "load size exceeds maximum"); 10938 *errp = E2BIG; 10939 return (NULL); 10940 } 10941 10942 if (hdr.dofh_loadsz < sizeof (hdr)) { 10943 dtrace_dof_error(&hdr, "invalid load size"); 10944 *errp = EINVAL; 10945 return (NULL); 10946 } 10947 10948 dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); 10949 10950#if defined(__APPLE__) 10951 if (copyin(uarg, dof, hdr.dofh_loadsz) != 0) { 10952#else 10953 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { 10954#endif 10955 dt_kmem_free_aligned(dof, hdr.dofh_loadsz); 10956 *errp = EFAULT; 10957 return (NULL); 10958 } 10959 10960 return (dof); 10961} 10962 10963#if defined(__APPLE__) 10964 10965static dof_hdr_t * 10966dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) 10967{ 10968 dof_hdr_t hdr, *dof; 10969 10970 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); 10971 10972 /* 10973 * First, we're going to copyin() the sizeof (dof_hdr_t). 10974 */ 10975 if (uread(p, &hdr, sizeof(hdr), uarg) != KERN_SUCCESS) { 10976 dtrace_dof_error(NULL, "failed to copyin DOF header"); 10977 *errp = EFAULT; 10978 return (NULL); 10979 } 10980 10981 /* 10982 * Now we'll allocate the entire DOF and copy it in -- provided 10983 * that the length isn't outrageous. 10984 */ 10985 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { 10986 dtrace_dof_error(&hdr, "load size exceeds maximum"); 10987 *errp = E2BIG; 10988 return (NULL); 10989 } 10990 10991 if (hdr.dofh_loadsz < sizeof (hdr)) { 10992 dtrace_dof_error(&hdr, "invalid load size"); 10993 *errp = EINVAL; 10994 return (NULL); 10995 } 10996 10997 dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); 10998 10999 if (uread(p, dof, hdr.dofh_loadsz, uarg) != KERN_SUCCESS) { 11000 dt_kmem_free_aligned(dof, hdr.dofh_loadsz); 11001 *errp = EFAULT; 11002 return (NULL); 11003 } 11004 11005 return (dof); 11006} 11007 11008#endif /* __APPLE__ */ 11009 11010static dof_hdr_t * 11011dtrace_dof_property(const char *name) 11012{ 11013 uchar_t *buf; 11014 uint64_t loadsz; 11015 unsigned int len, i; 11016 dof_hdr_t *dof; 11017 11018 /* 11019 * Unfortunately, array of values in .conf files are always (and 11020 * only) interpreted to be integer arrays. We must read our DOF 11021 * as an integer array, and then squeeze it into a byte array. 11022 */ 11023 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, 11024 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) 11025 return (NULL); 11026 11027 for (i = 0; i < len; i++) 11028 buf[i] = (uchar_t)(((int *)buf)[i]); 11029 11030 if (len < sizeof (dof_hdr_t)) { 11031 ddi_prop_free(buf); 11032 dtrace_dof_error(NULL, "truncated header"); 11033 return (NULL); 11034 } 11035 11036 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { 11037 ddi_prop_free(buf); 11038 dtrace_dof_error(NULL, "truncated DOF"); 11039 return (NULL); 11040 } 11041 11042 if (loadsz >= dtrace_dof_maxsize) { 11043 ddi_prop_free(buf); 11044 dtrace_dof_error(NULL, "oversized DOF"); 11045 return (NULL); 11046 } 11047 11048 dof = dt_kmem_alloc_aligned(loadsz, 8, KM_SLEEP); 11049 bcopy(buf, dof, loadsz); 11050 ddi_prop_free(buf); 11051 11052 return (dof); 11053} 11054 11055static void 11056dtrace_dof_destroy(dof_hdr_t *dof) 11057{ 11058 dt_kmem_free_aligned(dof, dof->dofh_loadsz); 11059} 11060 11061/* 11062 * Return the dof_sec_t pointer corresponding to a given section index. If the 11063 * index is not valid, dtrace_dof_error() is called and NULL is returned. If 11064 * a type other than DOF_SECT_NONE is specified, the header is checked against 11065 * this type and NULL is returned if the types do not match. 11066 */ 11067static dof_sec_t * 11068dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i) 11069{ 11070 dof_sec_t *sec = (dof_sec_t *)(uintptr_t) 11071 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize); 11072 11073 if (i >= dof->dofh_secnum) { 11074 dtrace_dof_error(dof, "referenced section index is invalid"); 11075 return (NULL); 11076 } 11077 11078 if (!(sec->dofs_flags & DOF_SECF_LOAD)) { 11079 dtrace_dof_error(dof, "referenced section is not loadable"); 11080 return (NULL); 11081 } 11082 11083 if (type != DOF_SECT_NONE && type != sec->dofs_type) { 11084 dtrace_dof_error(dof, "referenced section is the wrong type"); 11085 return (NULL); 11086 } 11087 11088 return (sec); 11089} 11090 11091static dtrace_probedesc_t * 11092dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) 11093{ 11094 dof_probedesc_t *probe; 11095 dof_sec_t *strtab; 11096 uintptr_t daddr = (uintptr_t)dof; 11097 uintptr_t str; 11098 size_t size; 11099 11100 if (sec->dofs_type != DOF_SECT_PROBEDESC) { 11101 dtrace_dof_error(dof, "invalid probe section"); 11102 return (NULL); 11103 } 11104 11105 if (sec->dofs_align != sizeof (dof_secidx_t)) { 11106 dtrace_dof_error(dof, "bad alignment in probe description"); 11107 return (NULL); 11108 } 11109 11110 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) { 11111 dtrace_dof_error(dof, "truncated probe description"); 11112 return (NULL); 11113 } 11114 11115 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset); 11116 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab); 11117 11118 if (strtab == NULL) 11119 return (NULL); 11120 11121 str = daddr + strtab->dofs_offset; 11122 size = strtab->dofs_size; 11123 11124 if (probe->dofp_provider >= strtab->dofs_size) { 11125 dtrace_dof_error(dof, "corrupt probe provider"); 11126 return (NULL); 11127 } 11128 11129 (void) strncpy(desc->dtpd_provider, 11130 (char *)(str + probe->dofp_provider), 11131 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); 11132 11133 if (probe->dofp_mod >= strtab->dofs_size) { 11134 dtrace_dof_error(dof, "corrupt probe module"); 11135 return (NULL); 11136 } 11137 11138 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), 11139 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); 11140 11141 if (probe->dofp_func >= strtab->dofs_size) { 11142 dtrace_dof_error(dof, "corrupt probe function"); 11143 return (NULL); 11144 } 11145 11146 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), 11147 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); 11148 11149 if (probe->dofp_name >= strtab->dofs_size) { 11150 dtrace_dof_error(dof, "corrupt probe name"); 11151 return (NULL); 11152 } 11153 11154 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), 11155 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); 11156 11157 return (desc); 11158} 11159 11160static dtrace_difo_t * 11161dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11162 cred_t *cr) 11163{ 11164 dtrace_difo_t *dp; 11165 size_t ttl = 0; 11166 dof_difohdr_t *dofd; 11167 uintptr_t daddr = (uintptr_t)dof; 11168 size_t max_size = dtrace_difo_maxsize; 11169 int i, l, n; 11170 11171 static const struct { 11172 int section; 11173 int bufoffs; 11174 int lenoffs; 11175 int entsize; 11176 int align; 11177 const char *msg; 11178 } difo[] = { 11179 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf), 11180 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t), 11181 sizeof (dif_instr_t), "multiple DIF sections" }, 11182 11183 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab), 11184 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t), 11185 sizeof (uint64_t), "multiple integer tables" }, 11186 11187 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab), 11188 offsetof(dtrace_difo_t, dtdo_strlen), 0, 11189 sizeof (char), "multiple string tables" }, 11190 11191 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab), 11192 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), 11193 sizeof (uint_t), "multiple variable tables" }, 11194 11195#if !defined(__APPLE__) 11196 { DOF_SECT_NONE, 0, 0, 0, NULL } 11197#else 11198 { DOF_SECT_NONE, 0, 0, 0, 0, NULL } 11199#endif /* __APPLE__ */ 11200 }; 11201 11202 if (sec->dofs_type != DOF_SECT_DIFOHDR) { 11203 dtrace_dof_error(dof, "invalid DIFO header section"); 11204 return (NULL); 11205 } 11206 11207 if (sec->dofs_align != sizeof (dof_secidx_t)) { 11208 dtrace_dof_error(dof, "bad alignment in DIFO header"); 11209 return (NULL); 11210 } 11211 11212 if (sec->dofs_size < sizeof (dof_difohdr_t) || 11213 sec->dofs_size % sizeof (dof_secidx_t)) { 11214 dtrace_dof_error(dof, "bad size in DIFO header"); 11215 return (NULL); 11216 } 11217 11218 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 11219 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1; 11220 11221 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 11222 dp->dtdo_rtype = dofd->dofd_rtype; 11223 11224 for (l = 0; l < n; l++) { 11225 dof_sec_t *subsec; 11226 void **bufp; 11227 uint32_t *lenp; 11228 11229 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE, 11230 dofd->dofd_links[l])) == NULL) 11231 goto err; /* invalid section link */ 11232 11233 if (ttl + subsec->dofs_size > max_size) { 11234 dtrace_dof_error(dof, "exceeds maximum size"); 11235 goto err; 11236 } 11237 11238 ttl += subsec->dofs_size; 11239 11240 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { 11241 if (subsec->dofs_type != difo[i].section) 11242 continue; 11243 11244 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { 11245 dtrace_dof_error(dof, "section not loaded"); 11246 goto err; 11247 } 11248 11249 if (subsec->dofs_align != difo[i].align) { 11250 dtrace_dof_error(dof, "bad alignment"); 11251 goto err; 11252 } 11253 11254 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); 11255 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); 11256 11257 if (*bufp != NULL) { 11258 dtrace_dof_error(dof, difo[i].msg); 11259 goto err; 11260 } 11261 11262 if (difo[i].entsize != subsec->dofs_entsize) { 11263 dtrace_dof_error(dof, "entry size mismatch"); 11264 goto err; 11265 } 11266 11267 if (subsec->dofs_entsize != 0 && 11268 (subsec->dofs_size % subsec->dofs_entsize) != 0) { 11269 dtrace_dof_error(dof, "corrupt entry size"); 11270 goto err; 11271 } 11272 11273 *lenp = subsec->dofs_size; 11274 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP); 11275 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset), 11276 *bufp, subsec->dofs_size); 11277 11278 if (subsec->dofs_entsize != 0) 11279 *lenp /= subsec->dofs_entsize; 11280 11281 break; 11282 } 11283 11284 /* 11285 * If we encounter a loadable DIFO sub-section that is not 11286 * known to us, assume this is a broken program and fail. 11287 */ 11288 if (difo[i].section == DOF_SECT_NONE && 11289 (subsec->dofs_flags & DOF_SECF_LOAD)) { 11290 dtrace_dof_error(dof, "unrecognized DIFO subsection"); 11291 goto err; 11292 } 11293 } 11294 11295 if (dp->dtdo_buf == NULL) { 11296 /* 11297 * We can't have a DIF object without DIF text. 11298 */ 11299 dtrace_dof_error(dof, "missing DIF text"); 11300 goto err; 11301 } 11302 11303 /* 11304 * Before we validate the DIF object, run through the variable table 11305 * looking for the strings -- if any of their size are under, we'll set 11306 * their size to be the system-wide default string size. Note that 11307 * this should _not_ happen if the "strsize" option has been set -- 11308 * in this case, the compiler should have set the size to reflect the 11309 * setting of the option. 11310 */ 11311 for (i = 0; i < dp->dtdo_varlen; i++) { 11312 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 11313 dtrace_diftype_t *t = &v->dtdv_type; 11314 11315 if (v->dtdv_id < DIF_VAR_OTHER_UBASE) 11316 continue; 11317 11318 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0) 11319 t->dtdt_size = dtrace_strsize_default; 11320 } 11321 11322 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0) 11323 goto err; 11324 11325 dtrace_difo_init(dp, vstate); 11326 return (dp); 11327 11328err: 11329 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 11330 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 11331 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 11332 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 11333 11334 kmem_free(dp, sizeof (dtrace_difo_t)); 11335 return (NULL); 11336} 11337 11338static dtrace_predicate_t * 11339dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11340 cred_t *cr) 11341{ 11342 dtrace_difo_t *dp; 11343 11344 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL) 11345 return (NULL); 11346 11347 return (dtrace_predicate_create(dp)); 11348} 11349 11350static dtrace_actdesc_t * 11351dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11352 cred_t *cr) 11353{ 11354 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next; 11355 dof_actdesc_t *desc; 11356 dof_sec_t *difosec; 11357 size_t offs; 11358 uintptr_t daddr = (uintptr_t)dof; 11359 uint64_t arg; 11360 dtrace_actkind_t kind; 11361 11362 if (sec->dofs_type != DOF_SECT_ACTDESC) { 11363 dtrace_dof_error(dof, "invalid action section"); 11364 return (NULL); 11365 } 11366 11367 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) { 11368 dtrace_dof_error(dof, "truncated action description"); 11369 return (NULL); 11370 } 11371 11372 if (sec->dofs_align != sizeof (uint64_t)) { 11373 dtrace_dof_error(dof, "bad alignment in action description"); 11374 return (NULL); 11375 } 11376 11377 if (sec->dofs_size < sec->dofs_entsize) { 11378 dtrace_dof_error(dof, "section entry size exceeds total size"); 11379 return (NULL); 11380 } 11381 11382 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) { 11383 dtrace_dof_error(dof, "bad entry size in action description"); 11384 return (NULL); 11385 } 11386 11387 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) { 11388 dtrace_dof_error(dof, "actions exceed dtrace_actions_max"); 11389 return (NULL); 11390 } 11391 11392 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) { 11393 desc = (dof_actdesc_t *)(daddr + 11394 (uintptr_t)sec->dofs_offset + offs); 11395 kind = (dtrace_actkind_t)desc->dofa_kind; 11396 11397 if (DTRACEACT_ISPRINTFLIKE(kind) && 11398 (kind != DTRACEACT_PRINTA || 11399 desc->dofa_strtab != DOF_SECIDX_NONE)) { 11400 dof_sec_t *strtab; 11401 char *str, *fmt; 11402 uint64_t i; 11403 11404 /* 11405 * printf()-like actions must have a format string. 11406 */ 11407 if ((strtab = dtrace_dof_sect(dof, 11408 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) 11409 goto err; 11410 11411 str = (char *)((uintptr_t)dof + 11412 (uintptr_t)strtab->dofs_offset); 11413 11414 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) { 11415 if (str[i] == '\0') 11416 break; 11417 } 11418 11419 if (i >= strtab->dofs_size) { 11420 dtrace_dof_error(dof, "bogus format string"); 11421 goto err; 11422 } 11423 11424 if (i == desc->dofa_arg) { 11425 dtrace_dof_error(dof, "empty format string"); 11426 goto err; 11427 } 11428 11429 i -= desc->dofa_arg; 11430 fmt = kmem_alloc(i + 1, KM_SLEEP); 11431 bcopy(&str[desc->dofa_arg], fmt, i + 1); 11432 arg = (uint64_t)(uintptr_t)fmt; 11433 } else { 11434 if (kind == DTRACEACT_PRINTA) { 11435 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE); 11436 arg = 0; 11437 } else { 11438 arg = desc->dofa_arg; 11439 } 11440 } 11441 11442 act = dtrace_actdesc_create(kind, desc->dofa_ntuple, 11443 desc->dofa_uarg, arg); 11444 11445 if (last != NULL) { 11446 last->dtad_next = act; 11447 } else { 11448 first = act; 11449 } 11450 11451 last = act; 11452 11453 if (desc->dofa_difo == DOF_SECIDX_NONE) 11454 continue; 11455 11456 if ((difosec = dtrace_dof_sect(dof, 11457 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL) 11458 goto err; 11459 11460 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr); 11461 11462 if (act->dtad_difo == NULL) 11463 goto err; 11464 } 11465 11466 ASSERT(first != NULL); 11467 return (first); 11468 11469err: 11470 for (act = first; act != NULL; act = next) { 11471 next = act->dtad_next; 11472 dtrace_actdesc_release(act, vstate); 11473 } 11474 11475 return (NULL); 11476} 11477 11478static dtrace_ecbdesc_t * 11479dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 11480 cred_t *cr) 11481{ 11482 dtrace_ecbdesc_t *ep; 11483 dof_ecbdesc_t *ecb; 11484 dtrace_probedesc_t *desc; 11485 dtrace_predicate_t *pred = NULL; 11486 11487 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) { 11488 dtrace_dof_error(dof, "truncated ECB description"); 11489 return (NULL); 11490 } 11491 11492 if (sec->dofs_align != sizeof (uint64_t)) { 11493 dtrace_dof_error(dof, "bad alignment in ECB description"); 11494 return (NULL); 11495 } 11496 11497 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset); 11498 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes); 11499 11500 if (sec == NULL) 11501 return (NULL); 11502 11503 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 11504 ep->dted_uarg = ecb->dofe_uarg; 11505 desc = &ep->dted_probe; 11506 11507 if (dtrace_dof_probedesc(dof, sec, desc) == NULL) 11508 goto err; 11509 11510 if (ecb->dofe_pred != DOF_SECIDX_NONE) { 11511 if ((sec = dtrace_dof_sect(dof, 11512 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL) 11513 goto err; 11514 11515 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL) 11516 goto err; 11517 11518 ep->dted_pred.dtpdd_predicate = pred; 11519 } 11520 11521 if (ecb->dofe_actions != DOF_SECIDX_NONE) { 11522 if ((sec = dtrace_dof_sect(dof, 11523 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL) 11524 goto err; 11525 11526 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr); 11527 11528 if (ep->dted_action == NULL) 11529 goto err; 11530 } 11531 11532 return (ep); 11533 11534err: 11535 if (pred != NULL) 11536 dtrace_predicate_release(pred, vstate); 11537 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 11538 return (NULL); 11539} 11540 11541#if !defined(__APPLE__) /* APPLE dyld has already done this for us */ 11542/* 11543 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the 11544 * specified DOF. At present, this amounts to simply adding 'ubase' to the 11545 * site of any user SETX relocations to account for load object base address. 11546 * In the future, if we need other relocations, this function can be extended. 11547 */ 11548static int 11549dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase) 11550{ 11551 uintptr_t daddr = (uintptr_t)dof; 11552 dof_relohdr_t *dofr = 11553 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 11554 dof_sec_t *ss, *rs, *ts; 11555 dof_relodesc_t *r; 11556 uint_t i, n; 11557 11558 if (sec->dofs_size < sizeof (dof_relohdr_t) || 11559 sec->dofs_align != sizeof (dof_secidx_t)) { 11560 dtrace_dof_error(dof, "invalid relocation header"); 11561 return (-1); 11562 } 11563 11564 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); 11565 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); 11566 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); 11567 11568 if (ss == NULL || rs == NULL || ts == NULL) 11569 return (-1); /* dtrace_dof_error() has been called already */ 11570 11571 if (rs->dofs_entsize < sizeof (dof_relodesc_t) || 11572 rs->dofs_align != sizeof (uint64_t)) { 11573 dtrace_dof_error(dof, "invalid relocation section"); 11574 return (-1); 11575 } 11576 11577 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); 11578 n = rs->dofs_size / rs->dofs_entsize; 11579 11580 for (i = 0; i < n; i++) { 11581 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; 11582 11583 switch (r->dofr_type) { 11584 case DOF_RELO_NONE: 11585 break; 11586 case DOF_RELO_SETX: 11587 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + 11588 sizeof (uint64_t) > ts->dofs_size) { 11589 dtrace_dof_error(dof, "bad relocation offset"); 11590 return (-1); 11591 } 11592 11593 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { 11594 dtrace_dof_error(dof, "misaligned setx relo"); 11595 return (-1); 11596 } 11597 11598 *(uint64_t *)taddr += ubase; 11599 break; 11600 default: 11601 dtrace_dof_error(dof, "invalid relocation type"); 11602 return (-1); 11603 } 11604 11605 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); 11606 } 11607 11608 return (0); 11609} 11610#endif /* __APPLE__ */ 11611 11612/* 11613 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated 11614 * header: it should be at the front of a memory region that is at least 11615 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in 11616 * size. It need not be validated in any other way. 11617 */ 11618static int 11619dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, 11620 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes) 11621{ 11622 uint64_t len = dof->dofh_loadsz, seclen; 11623 uintptr_t daddr = (uintptr_t)dof; 11624 dtrace_ecbdesc_t *ep; 11625 dtrace_enabling_t *enab; 11626 uint_t i; 11627 11628 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 11629 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); 11630 11631 /* 11632 * Check the DOF header identification bytes. In addition to checking 11633 * valid settings, we also verify that unused bits/bytes are zeroed so 11634 * we can use them later without fear of regressing existing binaries. 11635 */ 11636 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0], 11637 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) { 11638 dtrace_dof_error(dof, "DOF magic string mismatch"); 11639 return (-1); 11640 } 11641 11642 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 && 11643 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) { 11644 dtrace_dof_error(dof, "DOF has invalid data model"); 11645 return (-1); 11646 } 11647 11648 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) { 11649 dtrace_dof_error(dof, "DOF encoding mismatch"); 11650 return (-1); 11651 } 11652 11653#if !defined(__APPLE__) 11654 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 11655 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { 11656 dtrace_dof_error(dof, "DOF version mismatch"); 11657 return (-1); 11658 } 11659#else 11660 /* 11661 * We only support DOF_VERSION_3 for now. 11662 */ 11663 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_3) { 11664 dtrace_dof_error(dof, "DOF version mismatch"); 11665 return (-1); 11666 } 11667#endif 11668 11669 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { 11670 dtrace_dof_error(dof, "DOF uses unsupported instruction set"); 11671 return (-1); 11672 } 11673 11674 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) { 11675 dtrace_dof_error(dof, "DOF uses too many integer registers"); 11676 return (-1); 11677 } 11678 11679 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) { 11680 dtrace_dof_error(dof, "DOF uses too many tuple registers"); 11681 return (-1); 11682 } 11683 11684 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) { 11685 if (dof->dofh_ident[i] != 0) { 11686 dtrace_dof_error(dof, "DOF has invalid ident byte set"); 11687 return (-1); 11688 } 11689 } 11690 11691 if (dof->dofh_flags & ~DOF_FL_VALID) { 11692 dtrace_dof_error(dof, "DOF has invalid flag bits set"); 11693 return (-1); 11694 } 11695 11696 if (dof->dofh_secsize == 0) { 11697 dtrace_dof_error(dof, "zero section header size"); 11698 return (-1); 11699 } 11700 11701 /* 11702 * Check that the section headers don't exceed the amount of DOF 11703 * data. Note that we cast the section size and number of sections 11704 * to uint64_t's to prevent possible overflow in the multiplication. 11705 */ 11706 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize; 11707 11708 if (dof->dofh_secoff > len || seclen > len || 11709 dof->dofh_secoff + seclen > len) { 11710 dtrace_dof_error(dof, "truncated section headers"); 11711 return (-1); 11712 } 11713 11714 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) { 11715 dtrace_dof_error(dof, "misaligned section headers"); 11716 return (-1); 11717 } 11718 11719 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) { 11720 dtrace_dof_error(dof, "misaligned section size"); 11721 return (-1); 11722 } 11723 11724 /* 11725 * Take an initial pass through the section headers to be sure that 11726 * the headers don't have stray offsets. If the 'noprobes' flag is 11727 * set, do not permit sections relating to providers, probes, or args. 11728 */ 11729 for (i = 0; i < dof->dofh_secnum; i++) { 11730 dof_sec_t *sec = (dof_sec_t *)(daddr + 11731 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 11732 11733 if (noprobes) { 11734 switch (sec->dofs_type) { 11735 case DOF_SECT_PROVIDER: 11736 case DOF_SECT_PROBES: 11737 case DOF_SECT_PRARGS: 11738 case DOF_SECT_PROFFS: 11739 dtrace_dof_error(dof, "illegal sections " 11740 "for enabling"); 11741 return (-1); 11742 } 11743 } 11744 11745 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 11746 continue; /* just ignore non-loadable sections */ 11747 11748 if (sec->dofs_align & (sec->dofs_align - 1)) { 11749 dtrace_dof_error(dof, "bad section alignment"); 11750 return (-1); 11751 } 11752 11753 if (sec->dofs_offset & (sec->dofs_align - 1)) { 11754 dtrace_dof_error(dof, "misaligned section"); 11755 return (-1); 11756 } 11757 11758 if (sec->dofs_offset > len || sec->dofs_size > len || 11759 sec->dofs_offset + sec->dofs_size > len) { 11760 dtrace_dof_error(dof, "corrupt section header"); 11761 return (-1); 11762 } 11763 11764 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr + 11765 sec->dofs_offset + sec->dofs_size - 1) != '\0') { 11766 dtrace_dof_error(dof, "non-terminating string table"); 11767 return (-1); 11768 } 11769 } 11770 11771#if !defined(__APPLE__) 11772 /* 11773 * APPLE NOTE: We have no relocation to perform. All dof values are 11774 * relative offsets. 11775 */ 11776 11777 /* 11778 * Take a second pass through the sections and locate and perform any 11779 * relocations that are present. We do this after the first pass to 11780 * be sure that all sections have had their headers validated. 11781 */ 11782 for (i = 0; i < dof->dofh_secnum; i++) { 11783 dof_sec_t *sec = (dof_sec_t *)(daddr + 11784 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 11785 11786 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 11787 continue; /* skip sections that are not loadable */ 11788 11789 switch (sec->dofs_type) { 11790 case DOF_SECT_URELHDR: 11791 if (dtrace_dof_relocate(dof, sec, ubase) != 0) 11792 return (-1); 11793 break; 11794 } 11795 } 11796#endif /* __APPLE__ */ 11797 11798 if ((enab = *enabp) == NULL) 11799 enab = *enabp = dtrace_enabling_create(vstate); 11800 11801 for (i = 0; i < dof->dofh_secnum; i++) { 11802 dof_sec_t *sec = (dof_sec_t *)(daddr + 11803 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 11804 11805 if (sec->dofs_type != DOF_SECT_ECBDESC) 11806 continue; 11807 11808#if !defined(__APPLE__) 11809 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { 11810 dtrace_enabling_destroy(enab); 11811 *enabp = NULL; 11812 return (-1); 11813 } 11814#else 11815 /* XXX Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc 11816 are checked for the NULL return value.) */ 11817 ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr); 11818 if (ep == NULL) { 11819 dtrace_enabling_destroy(enab); 11820 *enabp = NULL; 11821 return (-1); 11822 } 11823#endif /* __APPLE__ */ 11824 11825 dtrace_enabling_add(enab, ep); 11826 } 11827 11828 return (0); 11829} 11830 11831/* 11832 * Process DOF for any options. This routine assumes that the DOF has been 11833 * at least processed by dtrace_dof_slurp(). 11834 */ 11835static int 11836dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) 11837{ 11838 int i, rval; 11839 uint32_t entsize; 11840 size_t offs; 11841 dof_optdesc_t *desc; 11842 11843 for (i = 0; i < dof->dofh_secnum; i++) { 11844 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof + 11845 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 11846 11847 if (sec->dofs_type != DOF_SECT_OPTDESC) 11848 continue; 11849 11850 if (sec->dofs_align != sizeof (uint64_t)) { 11851 dtrace_dof_error(dof, "bad alignment in " 11852 "option description"); 11853 return (EINVAL); 11854 } 11855 11856 if ((entsize = sec->dofs_entsize) == 0) { 11857 dtrace_dof_error(dof, "zeroed option entry size"); 11858 return (EINVAL); 11859 } 11860 11861 if (entsize < sizeof (dof_optdesc_t)) { 11862 dtrace_dof_error(dof, "bad option entry size"); 11863 return (EINVAL); 11864 } 11865 11866 for (offs = 0; offs < sec->dofs_size; offs += entsize) { 11867 desc = (dof_optdesc_t *)((uintptr_t)dof + 11868 (uintptr_t)sec->dofs_offset + offs); 11869 11870 if (desc->dofo_strtab != DOF_SECIDX_NONE) { 11871 dtrace_dof_error(dof, "non-zero option string"); 11872 return (EINVAL); 11873 } 11874 11875 if (desc->dofo_value == DTRACEOPT_UNSET) { 11876 dtrace_dof_error(dof, "unset option"); 11877 return (EINVAL); 11878 } 11879 11880 if ((rval = dtrace_state_option(state, 11881 desc->dofo_option, desc->dofo_value)) != 0) { 11882 dtrace_dof_error(dof, "rejected option"); 11883 return (rval); 11884 } 11885 } 11886 } 11887 11888 return (0); 11889} 11890 11891/* 11892 * DTrace Consumer State Functions 11893 */ 11894#if defined(__APPLE__) 11895static 11896#endif /* __APPLE__ */ 11897int 11898dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) 11899{ 11900 size_t hashsize, maxper, min_size, chunksize = dstate->dtds_chunksize; 11901 void *base; 11902 uintptr_t limit; 11903 dtrace_dynvar_t *dvar, *next, *start; 11904 int i; 11905 11906 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 11907 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); 11908 11909 bzero(dstate, sizeof (dtrace_dstate_t)); 11910 11911 if ((dstate->dtds_chunksize = chunksize) == 0) 11912 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; 11913 11914 if (size < (min_size = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) 11915 size = min_size; 11916 11917 if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL) 11918 return (ENOMEM); 11919 11920 dstate->dtds_size = size; 11921 dstate->dtds_base = base; 11922 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP); 11923 bzero(dstate->dtds_percpu, (int)NCPU * sizeof (dtrace_dstate_percpu_t)); 11924 11925 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)); 11926 11927 if (hashsize != 1 && (hashsize & 1)) 11928 hashsize--; 11929 11930 dstate->dtds_hashsize = hashsize; 11931 dstate->dtds_hash = dstate->dtds_base; 11932 11933 /* 11934 * Set all of our hash buckets to point to the single sink, and (if 11935 * it hasn't already been set), set the sink's hash value to be the 11936 * sink sentinel value. The sink is needed for dynamic variable 11937 * lookups to know that they have iterated over an entire, valid hash 11938 * chain. 11939 */ 11940 for (i = 0; i < hashsize; i++) 11941 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink; 11942 11943 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK) 11944 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK; 11945 11946 /* 11947 * Determine number of active CPUs. Divide free list evenly among 11948 * active CPUs. 11949 */ 11950 start = (dtrace_dynvar_t *) 11951 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); 11952 limit = (uintptr_t)base + size; 11953 11954 maxper = (limit - (uintptr_t)start) / (int)NCPU; 11955 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; 11956 11957 for (i = 0; i < (int)NCPU; i++) { 11958 dstate->dtds_percpu[i].dtdsc_free = dvar = start; 11959 11960 /* 11961 * If we don't even have enough chunks to make it once through 11962 * NCPUs, we're just going to allocate everything to the first 11963 * CPU. And if we're on the last CPU, we're going to allocate 11964 * whatever is left over. In either case, we set the limit to 11965 * be the limit of the dynamic variable space. 11966 */ 11967 if (maxper == 0 || i == (int)NCPU - 1) { 11968 limit = (uintptr_t)base + size; 11969 start = NULL; 11970 } else { 11971 limit = (uintptr_t)start + maxper; 11972 start = (dtrace_dynvar_t *)limit; 11973 } 11974 11975 ASSERT(limit <= (uintptr_t)base + size); 11976 11977 for (;;) { 11978 next = (dtrace_dynvar_t *)((uintptr_t)dvar + 11979 dstate->dtds_chunksize); 11980 11981 if ((uintptr_t)next + dstate->dtds_chunksize >= limit) 11982 break; 11983 11984 dvar->dtdv_next = next; 11985 dvar = next; 11986 } 11987 11988 if (maxper == 0) 11989 break; 11990 } 11991 11992 return (0); 11993} 11994 11995#if defined(__APPLE__) 11996static 11997#endif /* __APPLE__ */ 11998void 11999dtrace_dstate_fini(dtrace_dstate_t *dstate) 12000{ 12001 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 12002 12003 if (dstate->dtds_base == NULL) 12004 return; 12005 12006 kmem_free(dstate->dtds_base, dstate->dtds_size); 12007 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu); 12008} 12009 12010static void 12011dtrace_vstate_fini(dtrace_vstate_t *vstate) 12012{ 12013 /* 12014 * Logical XOR, where are you? 12015 */ 12016 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL)); 12017 12018 if (vstate->dtvs_nglobals > 0) { 12019 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals * 12020 sizeof (dtrace_statvar_t *)); 12021 } 12022 12023 if (vstate->dtvs_ntlocals > 0) { 12024 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals * 12025 sizeof (dtrace_difv_t)); 12026 } 12027 12028 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL)); 12029 12030 if (vstate->dtvs_nlocals > 0) { 12031 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals * 12032 sizeof (dtrace_statvar_t *)); 12033 } 12034} 12035 12036static void 12037dtrace_state_clean(dtrace_state_t *state) 12038{ 12039 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) 12040 return; 12041 12042 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); 12043 dtrace_speculation_clean(state); 12044} 12045 12046static void 12047dtrace_state_deadman(dtrace_state_t *state) 12048{ 12049 hrtime_t now; 12050 12051 dtrace_sync(); 12052 12053 now = dtrace_gethrtime(); 12054 12055 if (state != dtrace_anon.dta_state && 12056 now - state->dts_laststatus >= dtrace_deadman_user) 12057 return; 12058 12059 /* 12060 * We must be sure that dts_alive never appears to be less than the 12061 * value upon entry to dtrace_state_deadman(), and because we lack a 12062 * dtrace_cas64(), we cannot store to it atomically. We thus instead 12063 * store INT64_MAX to it, followed by a memory barrier, followed by 12064 * the new value. This assures that dts_alive never appears to be 12065 * less than its true value, regardless of the order in which the 12066 * stores to the underlying storage are issued. 12067 */ 12068 state->dts_alive = INT64_MAX; 12069 dtrace_membar_producer(); 12070 state->dts_alive = now; 12071} 12072 12073#if defined(__APPLE__) 12074static 12075#endif /* __APPLE__ */ 12076dtrace_state_t * 12077dtrace_state_create(dev_t *devp, cred_t *cr) 12078{ 12079 minor_t minor; 12080 major_t major; 12081 char c[30]; 12082 dtrace_state_t *state; 12083 dtrace_optval_t *opt; 12084 int bufsize = (int)NCPU * sizeof (dtrace_buffer_t), i; 12085 12086 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 12087 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 12088 12089#if !defined(__APPLE__) 12090 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, 12091 VM_BESTFIT | VM_SLEEP); 12092#else 12093 /* 12094 * Darwin's DEVFS layer acquired the minor number for this "device" when it called 12095 * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number 12096 * (next unused according to vmem_alloc()) and then immediately put the number back in play 12097 * (by calling vmem_free()). Now that minor number is being used for an open, so committing it 12098 * to use. The following vmem_alloc() must deliver that same minor number. 12099 */ 12100 12101 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, 12102 VM_BESTFIT | VM_SLEEP); 12103 12104 if (NULL != devp) { 12105 ASSERT(getminor(*devp) == minor); 12106 if (getminor(*devp) != minor) { 12107 printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n", 12108 getminor(*devp), minor); 12109 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 12110 return NULL; 12111 } 12112 } else { 12113 /* NULL==devp iff "Anonymous state" (see dtrace_anon_property), 12114 * so just vend the minor device number here de novo since no "open" has occurred. */ 12115 } 12116 12117#endif /* __APPLE__ */ 12118 12119 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { 12120 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 12121 return (NULL); 12122 } 12123 12124 state = ddi_get_soft_state(dtrace_softstate, minor); 12125 state->dts_epid = DTRACE_EPIDNONE + 1; 12126 12127 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor); 12128 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1, 12129 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 12130 12131 if (devp != NULL) { 12132 major = getemajor(*devp); 12133 } else { 12134 major = ddi_driver_major(dtrace_devi); 12135 } 12136 12137 state->dts_dev = makedevice(major, minor); 12138 12139 if (devp != NULL) 12140 *devp = state->dts_dev; 12141 12142 /* 12143 * We allocate NCPU buffers. On the one hand, this can be quite 12144 * a bit of memory per instance (nearly 36K on a Starcat). On the 12145 * other hand, it saves an additional memory reference in the probe 12146 * path. 12147 */ 12148 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); 12149 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); 12150 state->dts_cleaner = CYCLIC_NONE; 12151 state->dts_deadman = CYCLIC_NONE; 12152 state->dts_vstate.dtvs_state = state; 12153 12154 for (i = 0; i < DTRACEOPT_MAX; i++) 12155 state->dts_options[i] = DTRACEOPT_UNSET; 12156 12157 /* 12158 * Set the default options. 12159 */ 12160 opt = state->dts_options; 12161 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH; 12162 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO; 12163 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default; 12164 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default; 12165 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL; 12166 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default; 12167 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default; 12168 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default; 12169 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default; 12170 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default; 12171 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default; 12172 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; 12173 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; 12174 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; 12175 12176 state->dts_activity = DTRACE_ACTIVITY_INACTIVE; 12177 12178 /* 12179 * Depending on the user credentials, we set flag bits which alter probe 12180 * visibility or the amount of destructiveness allowed. In the case of 12181 * actual anonymous tracing, or the possession of all privileges, all of 12182 * the normal checks are bypassed. 12183 */ 12184 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 12185 state->dts_cred.dcr_visible = DTRACE_CRV_ALL; 12186 state->dts_cred.dcr_action = DTRACE_CRA_ALL; 12187 } else { 12188 /* 12189 * Set up the credentials for this instantiation. We take a 12190 * hold on the credential to prevent it from disappearing on 12191 * us; this in turn prevents the zone_t referenced by this 12192 * credential from disappearing. This means that we can 12193 * examine the credential and the zone from probe context. 12194 */ 12195 crhold(cr); 12196 state->dts_cred.dcr_cred = cr; 12197 12198 /* 12199 * CRA_PROC means "we have *some* privilege for dtrace" and 12200 * unlocks the use of variables like pid, zonename, etc. 12201 */ 12202 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) || 12203 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 12204 state->dts_cred.dcr_action |= DTRACE_CRA_PROC; 12205 } 12206 12207 /* 12208 * dtrace_user allows use of syscall and profile providers. 12209 * If the user also has proc_owner and/or proc_zone, we 12210 * extend the scope to include additional visibility and 12211 * destructive power. 12212 */ 12213 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) { 12214 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) { 12215 state->dts_cred.dcr_visible |= 12216 DTRACE_CRV_ALLPROC; 12217 12218 state->dts_cred.dcr_action |= 12219 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12220 } 12221 12222 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) { 12223 state->dts_cred.dcr_visible |= 12224 DTRACE_CRV_ALLZONE; 12225 12226 state->dts_cred.dcr_action |= 12227 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12228 } 12229 12230 /* 12231 * If we have all privs in whatever zone this is, 12232 * we can do destructive things to processes which 12233 * have altered credentials. 12234 */ 12235#if !defined(__APPLE__) 12236 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 12237 cr->cr_zone->zone_privset)) { 12238 state->dts_cred.dcr_action |= 12239 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 12240 } 12241#else 12242 /* Darwin doesn't do zones. */ 12243 state->dts_cred.dcr_action |= 12244 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 12245#endif /* __APPLE__ */ 12246 } 12247 12248 /* 12249 * Holding the dtrace_kernel privilege also implies that 12250 * the user has the dtrace_user privilege from a visibility 12251 * perspective. But without further privileges, some 12252 * destructive actions are not available. 12253 */ 12254 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) { 12255 /* 12256 * Make all probes in all zones visible. However, 12257 * this doesn't mean that all actions become available 12258 * to all zones. 12259 */ 12260 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL | 12261 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE; 12262 12263 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL | 12264 DTRACE_CRA_PROC; 12265 /* 12266 * Holding proc_owner means that destructive actions 12267 * for *this* zone are allowed. 12268 */ 12269 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 12270 state->dts_cred.dcr_action |= 12271 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12272 12273 /* 12274 * Holding proc_zone means that destructive actions 12275 * for this user/group ID in all zones is allowed. 12276 */ 12277 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 12278 state->dts_cred.dcr_action |= 12279 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12280 12281 /* 12282 * If we have all privs in whatever zone this is, 12283 * we can do destructive things to processes which 12284 * have altered credentials. 12285 */ 12286#if !defined(__APPLE__) 12287 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 12288 cr->cr_zone->zone_privset)) { 12289 state->dts_cred.dcr_action |= 12290 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 12291 } 12292#else 12293 /* Darwin doesn't do zones. */ 12294 state->dts_cred.dcr_action |= 12295 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 12296#endif /* __APPLE__ */ 12297 } 12298 12299 /* 12300 * Holding the dtrace_proc privilege gives control over fasttrap 12301 * and pid providers. We need to grant wider destructive 12302 * privileges in the event that the user has proc_owner and/or 12303 * proc_zone. 12304 */ 12305 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 12306 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 12307 state->dts_cred.dcr_action |= 12308 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 12309 12310 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 12311 state->dts_cred.dcr_action |= 12312 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 12313 } 12314 } 12315 12316 return (state); 12317} 12318 12319static int 12320dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) 12321{ 12322 dtrace_optval_t *opt = state->dts_options, size; 12323 processorid_t cpu = 0; 12324 int flags = 0, rval; 12325 12326 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 12327 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 12328 ASSERT(which < DTRACEOPT_MAX); 12329 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || 12330 (state == dtrace_anon.dta_state && 12331 state->dts_activity == DTRACE_ACTIVITY_ACTIVE)); 12332 12333 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0) 12334 return (0); 12335 12336 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET) 12337 cpu = opt[DTRACEOPT_CPU]; 12338 12339 if (which == DTRACEOPT_SPECSIZE) 12340 flags |= DTRACEBUF_NOSWITCH; 12341 12342 if (which == DTRACEOPT_BUFSIZE) { 12343 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING) 12344 flags |= DTRACEBUF_RING; 12345 12346 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL) 12347 flags |= DTRACEBUF_FILL; 12348 12349 if (state != dtrace_anon.dta_state || 12350 state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 12351 flags |= DTRACEBUF_INACTIVE; 12352 } 12353 12354 for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { 12355 /* 12356 * The size must be 8-byte aligned. If the size is not 8-byte 12357 * aligned, drop it down by the difference. 12358 */ 12359 if (size & (sizeof (uint64_t) - 1)) 12360 size -= size & (sizeof (uint64_t) - 1); 12361 12362 if (size < state->dts_reserve) { 12363 /* 12364 * Buffers always must be large enough to accommodate 12365 * their prereserved space. We return E2BIG instead 12366 * of ENOMEM in this case to allow for user-level 12367 * software to differentiate the cases. 12368 */ 12369 return (E2BIG); 12370 } 12371 12372 rval = dtrace_buffer_alloc(buf, size, flags, cpu); 12373 12374 if (rval != ENOMEM) { 12375 opt[which] = size; 12376 return (rval); 12377 } 12378 12379 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 12380 return (rval); 12381 } 12382 12383 return (ENOMEM); 12384} 12385 12386static int 12387dtrace_state_buffers(dtrace_state_t *state) 12388{ 12389 dtrace_speculation_t *spec = state->dts_speculations; 12390 int rval, i; 12391 12392 if ((rval = dtrace_state_buffer(state, state->dts_buffer, 12393 DTRACEOPT_BUFSIZE)) != 0) 12394 return (rval); 12395 12396 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer, 12397 DTRACEOPT_AGGSIZE)) != 0) 12398 return (rval); 12399 12400 for (i = 0; i < state->dts_nspeculations; i++) { 12401 if ((rval = dtrace_state_buffer(state, 12402 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0) 12403 return (rval); 12404 } 12405 12406 return (0); 12407} 12408 12409static void 12410dtrace_state_prereserve(dtrace_state_t *state) 12411{ 12412 dtrace_ecb_t *ecb; 12413 dtrace_probe_t *probe; 12414 12415 state->dts_reserve = 0; 12416 12417 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL) 12418 return; 12419 12420 /* 12421 * If our buffer policy is a "fill" buffer policy, we need to set the 12422 * prereserved space to be the space required by the END probes. 12423 */ 12424 probe = dtrace_probes[dtrace_probeid_end - 1]; 12425 ASSERT(probe != NULL); 12426 12427 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 12428 if (ecb->dte_state != state) 12429 continue; 12430 12431 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment; 12432 } 12433} 12434 12435static int 12436dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) 12437{ 12438 dtrace_optval_t *opt = state->dts_options, sz, nspec; 12439 dtrace_speculation_t *spec; 12440 dtrace_buffer_t *buf; 12441 cyc_handler_t hdlr; 12442 cyc_time_t when; 12443 int rval = 0, i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t); 12444 dtrace_icookie_t cookie; 12445 12446 lck_mtx_lock(&cpu_lock); 12447 lck_mtx_lock(&dtrace_lock); 12448 12449 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 12450 rval = EBUSY; 12451 goto out; 12452 } 12453 12454 /* 12455 * Before we can perform any checks, we must prime all of the 12456 * retained enablings that correspond to this state. 12457 */ 12458 dtrace_enabling_prime(state); 12459 12460 if (state->dts_destructive && !state->dts_cred.dcr_destructive) { 12461 rval = EACCES; 12462 goto out; 12463 } 12464 12465 dtrace_state_prereserve(state); 12466 12467 /* 12468 * Now we want to do is try to allocate our speculations. 12469 * We do not automatically resize the number of speculations; if 12470 * this fails, we will fail the operation. 12471 */ 12472 nspec = opt[DTRACEOPT_NSPEC]; 12473 ASSERT(nspec != DTRACEOPT_UNSET); 12474 12475 if (nspec > INT_MAX) { 12476 rval = ENOMEM; 12477 goto out; 12478 } 12479 12480 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP); 12481 12482 if (spec == NULL) { 12483 rval = ENOMEM; 12484 goto out; 12485 } 12486 12487 state->dts_speculations = spec; 12488 state->dts_nspeculations = (int)nspec; 12489 12490 for (i = 0; i < nspec; i++) { 12491 if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) { 12492 rval = ENOMEM; 12493 goto err; 12494 } 12495 12496 spec[i].dtsp_buffer = buf; 12497 } 12498 12499 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) { 12500 if (dtrace_anon.dta_state == NULL) { 12501 rval = ENOENT; 12502 goto out; 12503 } 12504 12505 if (state->dts_necbs != 0) { 12506 rval = EALREADY; 12507 goto out; 12508 } 12509 12510 state->dts_anon = dtrace_anon_grab(); 12511 ASSERT(state->dts_anon != NULL); 12512 state = state->dts_anon; 12513 12514 /* 12515 * We want "grabanon" to be set in the grabbed state, so we'll 12516 * copy that option value from the grabbing state into the 12517 * grabbed state. 12518 */ 12519 state->dts_options[DTRACEOPT_GRABANON] = 12520 opt[DTRACEOPT_GRABANON]; 12521 12522 *cpu = dtrace_anon.dta_beganon; 12523 12524 /* 12525 * If the anonymous state is active (as it almost certainly 12526 * is if the anonymous enabling ultimately matched anything), 12527 * we don't allow any further option processing -- but we 12528 * don't return failure. 12529 */ 12530 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 12531 goto out; 12532 } 12533 12534 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET && 12535 opt[DTRACEOPT_AGGSIZE] != 0) { 12536 if (state->dts_aggregations == NULL) { 12537 /* 12538 * We're not going to create an aggregation buffer 12539 * because we don't have any ECBs that contain 12540 * aggregations -- set this option to 0. 12541 */ 12542 opt[DTRACEOPT_AGGSIZE] = 0; 12543 } else { 12544 /* 12545 * If we have an aggregation buffer, we must also have 12546 * a buffer to use as scratch. 12547 */ 12548 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || 12549 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { 12550 opt[DTRACEOPT_BUFSIZE] = state->dts_needed; 12551 } 12552 } 12553 } 12554 12555 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET && 12556 opt[DTRACEOPT_SPECSIZE] != 0) { 12557 if (!state->dts_speculates) { 12558 /* 12559 * We're not going to create speculation buffers 12560 * because we don't have any ECBs that actually 12561 * speculate -- set the speculation size to 0. 12562 */ 12563 opt[DTRACEOPT_SPECSIZE] = 0; 12564 } 12565 } 12566 12567 /* 12568 * The bare minimum size for any buffer that we're actually going to 12569 * do anything to is sizeof (uint64_t). 12570 */ 12571 sz = sizeof (uint64_t); 12572 12573 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) || 12574 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) || 12575 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) { 12576 /* 12577 * A buffer size has been explicitly set to 0 (or to a size 12578 * that will be adjusted to 0) and we need the space -- we 12579 * need to return failure. We return ENOSPC to differentiate 12580 * it from failing to allocate a buffer due to failure to meet 12581 * the reserve (for which we return E2BIG). 12582 */ 12583 rval = ENOSPC; 12584 goto out; 12585 } 12586 12587 if ((rval = dtrace_state_buffers(state)) != 0) 12588 goto err; 12589 12590 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET) 12591 sz = dtrace_dstate_defsize; 12592 12593 do { 12594 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz); 12595 12596 if (rval == 0) 12597 break; 12598 12599 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 12600 goto err; 12601 } while (sz >>= 1); 12602 12603 opt[DTRACEOPT_DYNVARSIZE] = sz; 12604 12605 if (rval != 0) 12606 goto err; 12607 12608 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max) 12609 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max; 12610 12611 if (opt[DTRACEOPT_CLEANRATE] == 0) 12612 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 12613 12614 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min) 12615 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min; 12616 12617 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) 12618 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 12619 12620 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; 12621 hdlr.cyh_arg = state; 12622 hdlr.cyh_level = CY_LOW_LEVEL; 12623 12624 when.cyt_when = 0; 12625 when.cyt_interval = opt[DTRACEOPT_CLEANRATE]; 12626 12627 state->dts_cleaner = cyclic_add(&hdlr, &when); 12628 12629 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman; 12630 hdlr.cyh_arg = state; 12631 hdlr.cyh_level = CY_LOW_LEVEL; 12632 12633 when.cyt_when = 0; 12634 when.cyt_interval = dtrace_deadman_interval; 12635 12636 state->dts_alive = state->dts_laststatus = dtrace_gethrtime(); 12637 state->dts_deadman = cyclic_add(&hdlr, &when); 12638 12639 state->dts_activity = DTRACE_ACTIVITY_WARMUP; 12640 12641 /* 12642 * Now it's time to actually fire the BEGIN probe. We need to disable 12643 * interrupts here both to record the CPU on which we fired the BEGIN 12644 * probe (the data from this CPU will be processed first at user 12645 * level) and to manually activate the buffer for this CPU. 12646 */ 12647 cookie = dtrace_interrupt_disable(); 12648 *cpu = CPU->cpu_id; 12649 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE); 12650 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE; 12651 12652 dtrace_probe(dtrace_probeid_begin, 12653 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 12654 dtrace_interrupt_enable(cookie); 12655 /* 12656 * We may have had an exit action from a BEGIN probe; only change our 12657 * state to ACTIVE if we're still in WARMUP. 12658 */ 12659 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP || 12660 state->dts_activity == DTRACE_ACTIVITY_DRAINING); 12661 12662 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP) 12663 state->dts_activity = DTRACE_ACTIVITY_ACTIVE; 12664 12665 /* 12666 * Regardless of whether or not now we're in ACTIVE or DRAINING, we 12667 * want each CPU to transition its principal buffer out of the 12668 * INACTIVE state. Doing this assures that no CPU will suddenly begin 12669 * processing an ECB halfway down a probe's ECB chain; all CPUs will 12670 * atomically transition from processing none of a state's ECBs to 12671 * processing all of them. 12672 */ 12673 dtrace_xcall(DTRACE_CPUALL, 12674 (dtrace_xcall_t)dtrace_buffer_activate, state); 12675 goto out; 12676 12677err: 12678 dtrace_buffer_free(state->dts_buffer); 12679 dtrace_buffer_free(state->dts_aggbuffer); 12680 12681 if ((nspec = state->dts_nspeculations) == 0) { 12682 ASSERT(state->dts_speculations == NULL); 12683 goto out; 12684 } 12685 12686 spec = state->dts_speculations; 12687 ASSERT(spec != NULL); 12688 12689 for (i = 0; i < state->dts_nspeculations; i++) { 12690 if ((buf = spec[i].dtsp_buffer) == NULL) 12691 break; 12692 12693 dtrace_buffer_free(buf); 12694 kmem_free(buf, bufsize); 12695 } 12696 12697 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 12698 state->dts_nspeculations = 0; 12699 state->dts_speculations = NULL; 12700 12701out: 12702 lck_mtx_unlock(&dtrace_lock); 12703 lck_mtx_unlock(&cpu_lock); 12704 12705 return (rval); 12706} 12707 12708static int 12709dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) 12710{ 12711 dtrace_icookie_t cookie; 12712 12713 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 12714 12715 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && 12716 state->dts_activity != DTRACE_ACTIVITY_DRAINING) 12717 return (EINVAL); 12718 12719 /* 12720 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync 12721 * to be sure that every CPU has seen it. See below for the details 12722 * on why this is done. 12723 */ 12724 state->dts_activity = DTRACE_ACTIVITY_DRAINING; 12725 dtrace_sync(); 12726 12727 /* 12728 * By this point, it is impossible for any CPU to be still processing 12729 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to 12730 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any 12731 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe() 12732 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN 12733 * iff we're in the END probe. 12734 */ 12735 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN; 12736 dtrace_sync(); 12737 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN); 12738 12739 /* 12740 * Finally, we can release the reserve and call the END probe. We 12741 * disable interrupts across calling the END probe to allow us to 12742 * return the CPU on which we actually called the END probe. This 12743 * allows user-land to be sure that this CPU's principal buffer is 12744 * processed last. 12745 */ 12746 state->dts_reserve = 0; 12747 12748 cookie = dtrace_interrupt_disable(); 12749 *cpu = CPU->cpu_id; 12750 dtrace_probe(dtrace_probeid_end, 12751 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 12752 dtrace_interrupt_enable(cookie); 12753 12754 state->dts_activity = DTRACE_ACTIVITY_STOPPED; 12755 dtrace_sync(); 12756 12757 return (0); 12758} 12759 12760static int 12761dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, 12762 dtrace_optval_t val) 12763{ 12764 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 12765 12766 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 12767 return (EBUSY); 12768 12769 if (option >= DTRACEOPT_MAX) 12770 return (EINVAL); 12771 12772 if (option != DTRACEOPT_CPU && val < 0) 12773 return (EINVAL); 12774 12775 switch (option) { 12776 case DTRACEOPT_DESTRUCTIVE: 12777 if (dtrace_destructive_disallow) 12778 return (EACCES); 12779 12780 state->dts_cred.dcr_destructive = 1; 12781 break; 12782 12783 case DTRACEOPT_BUFSIZE: 12784 case DTRACEOPT_DYNVARSIZE: 12785 case DTRACEOPT_AGGSIZE: 12786 case DTRACEOPT_SPECSIZE: 12787 case DTRACEOPT_STRSIZE: 12788 if (val < 0) 12789 return (EINVAL); 12790 12791 if (val >= LONG_MAX) { 12792 /* 12793 * If this is an otherwise negative value, set it to 12794 * the highest multiple of 128m less than LONG_MAX. 12795 * Technically, we're adjusting the size without 12796 * regard to the buffer resizing policy, but in fact, 12797 * this has no effect -- if we set the buffer size to 12798 * ~LONG_MAX and the buffer policy is ultimately set to 12799 * be "manual", the buffer allocation is guaranteed to 12800 * fail, if only because the allocation requires two 12801 * buffers. (We set the the size to the highest 12802 * multiple of 128m because it ensures that the size 12803 * will remain a multiple of a megabyte when 12804 * repeatedly halved -- all the way down to 15m.) 12805 */ 12806 val = LONG_MAX - (1 << 27) + 1; 12807 } 12808 } 12809 12810 state->dts_options[option] = val; 12811 12812 return (0); 12813} 12814 12815static void 12816dtrace_state_destroy(dtrace_state_t *state) 12817{ 12818 dtrace_ecb_t *ecb; 12819 dtrace_vstate_t *vstate = &state->dts_vstate; 12820 minor_t minor = getminor(state->dts_dev); 12821 int i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t); 12822 dtrace_speculation_t *spec = state->dts_speculations; 12823 int nspec = state->dts_nspeculations; 12824 uint32_t match; 12825 12826 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 12827 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 12828 12829 /* 12830 * First, retract any retained enablings for this state. 12831 */ 12832 dtrace_enabling_retract(state); 12833 ASSERT(state->dts_nretained == 0); 12834 12835 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE || 12836 state->dts_activity == DTRACE_ACTIVITY_DRAINING) { 12837 /* 12838 * We have managed to come into dtrace_state_destroy() on a 12839 * hot enabling -- almost certainly because of a disorderly 12840 * shutdown of a consumer. (That is, a consumer that is 12841 * exiting without having called dtrace_stop().) In this case, 12842 * we're going to set our activity to be KILLED, and then 12843 * issue a sync to be sure that everyone is out of probe 12844 * context before we start blowing away ECBs. 12845 */ 12846 state->dts_activity = DTRACE_ACTIVITY_KILLED; 12847 dtrace_sync(); 12848 } 12849 12850 /* 12851 * Release the credential hold we took in dtrace_state_create(). 12852 */ 12853 if (state->dts_cred.dcr_cred != NULL) 12854 crfree(state->dts_cred.dcr_cred); 12855 12856 /* 12857 * Now we can safely disable and destroy any enabled probes. Because 12858 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress 12859 * (especially if they're all enabled), we take two passes through the 12860 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and 12861 * in the second we disable whatever is left over. 12862 */ 12863 for (match = DTRACE_PRIV_KERNEL; ; match = 0) { 12864 for (i = 0; i < state->dts_necbs; i++) { 12865 if ((ecb = state->dts_ecbs[i]) == NULL) 12866 continue; 12867 12868 if (match && ecb->dte_probe != NULL) { 12869 dtrace_probe_t *probe = ecb->dte_probe; 12870 dtrace_provider_t *prov = probe->dtpr_provider; 12871 12872 if (!(prov->dtpv_priv.dtpp_flags & match)) 12873 continue; 12874 } 12875 12876 dtrace_ecb_disable(ecb); 12877 dtrace_ecb_destroy(ecb); 12878 } 12879 12880 if (!match) 12881 break; 12882 } 12883 12884 /* 12885 * Before we free the buffers, perform one more sync to assure that 12886 * every CPU is out of probe context. 12887 */ 12888 dtrace_sync(); 12889 12890 dtrace_buffer_free(state->dts_buffer); 12891 dtrace_buffer_free(state->dts_aggbuffer); 12892 12893 for (i = 0; i < nspec; i++) 12894 dtrace_buffer_free(spec[i].dtsp_buffer); 12895 12896 if (state->dts_cleaner != CYCLIC_NONE) 12897 cyclic_remove(state->dts_cleaner); 12898 12899 if (state->dts_deadman != CYCLIC_NONE) 12900 cyclic_remove(state->dts_deadman); 12901 12902 dtrace_dstate_fini(&vstate->dtvs_dynvars); 12903 dtrace_vstate_fini(vstate); 12904 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); 12905 12906 if (state->dts_aggregations != NULL) { 12907#ifdef DEBUG 12908 for (i = 0; i < state->dts_naggregations; i++) 12909 ASSERT(state->dts_aggregations[i] == NULL); 12910#endif 12911 ASSERT(state->dts_naggregations > 0); 12912 kmem_free(state->dts_aggregations, 12913 state->dts_naggregations * sizeof (dtrace_aggregation_t *)); 12914 } 12915 12916 kmem_free(state->dts_buffer, bufsize); 12917 kmem_free(state->dts_aggbuffer, bufsize); 12918 12919 for (i = 0; i < nspec; i++) 12920 kmem_free(spec[i].dtsp_buffer, bufsize); 12921 12922 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 12923 12924 dtrace_format_destroy(state); 12925 12926 vmem_destroy(state->dts_aggid_arena); 12927 ddi_soft_state_free(dtrace_softstate, minor); 12928 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 12929} 12930 12931/* 12932 * DTrace Anonymous Enabling Functions 12933 */ 12934static dtrace_state_t * 12935dtrace_anon_grab(void) 12936{ 12937 dtrace_state_t *state; 12938 12939 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 12940 12941 if ((state = dtrace_anon.dta_state) == NULL) { 12942 ASSERT(dtrace_anon.dta_enabling == NULL); 12943 return (NULL); 12944 } 12945 12946 ASSERT(dtrace_anon.dta_enabling != NULL); 12947 ASSERT(dtrace_retained != NULL); 12948 12949 dtrace_enabling_destroy(dtrace_anon.dta_enabling); 12950 dtrace_anon.dta_enabling = NULL; 12951 dtrace_anon.dta_state = NULL; 12952 12953 return (state); 12954} 12955 12956static void 12957dtrace_anon_property(void) 12958{ 12959 int i, rv; 12960 dtrace_state_t *state; 12961 dof_hdr_t *dof; 12962 char c[32]; /* enough for "dof-data-" + digits */ 12963 12964 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 12965 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 12966 12967 for (i = 0; ; i++) { 12968 (void) snprintf(c, sizeof (c), "dof-data-%d", i); 12969 12970 dtrace_err_verbose = 1; 12971 12972 if ((dof = dtrace_dof_property(c)) == NULL) { 12973 dtrace_err_verbose = 0; 12974 break; 12975 } 12976 12977 /* 12978 * We want to create anonymous state, so we need to transition 12979 * the kernel debugger to indicate that DTrace is active. If 12980 * this fails (e.g. because the debugger has modified text in 12981 * some way), we won't continue with the processing. 12982 */ 12983 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 12984 cmn_err(CE_NOTE, "kernel debugger active; anonymous " 12985 "enabling ignored."); 12986 dtrace_dof_destroy(dof); 12987 break; 12988 } 12989 12990 /* 12991 * If we haven't allocated an anonymous state, we'll do so now. 12992 */ 12993 if ((state = dtrace_anon.dta_state) == NULL) { 12994 state = dtrace_state_create(NULL, NULL); 12995 dtrace_anon.dta_state = state; 12996 12997 if (state == NULL) { 12998 /* 12999 * This basically shouldn't happen: the only 13000 * failure mode from dtrace_state_create() is a 13001 * failure of ddi_soft_state_zalloc() that 13002 * itself should never happen. Still, the 13003 * interface allows for a failure mode, and 13004 * we want to fail as gracefully as possible: 13005 * we'll emit an error message and cease 13006 * processing anonymous state in this case. 13007 */ 13008 cmn_err(CE_WARN, "failed to create " 13009 "anonymous state"); 13010 dtrace_dof_destroy(dof); 13011 break; 13012 } 13013 } 13014 13015 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(), 13016 &dtrace_anon.dta_enabling, 0, B_TRUE); 13017 13018 if (rv == 0) 13019 rv = dtrace_dof_options(dof, state); 13020 13021 dtrace_err_verbose = 0; 13022 dtrace_dof_destroy(dof); 13023 13024 if (rv != 0) { 13025 /* 13026 * This is malformed DOF; chuck any anonymous state 13027 * that we created. 13028 */ 13029 ASSERT(dtrace_anon.dta_enabling == NULL); 13030 dtrace_state_destroy(state); 13031 dtrace_anon.dta_state = NULL; 13032 break; 13033 } 13034 13035 ASSERT(dtrace_anon.dta_enabling != NULL); 13036 } 13037 13038 if (dtrace_anon.dta_enabling != NULL) { 13039 int rval; 13040 13041 /* 13042 * dtrace_enabling_retain() can only fail because we are 13043 * trying to retain more enablings than are allowed -- but 13044 * we only have one anonymous enabling, and we are guaranteed 13045 * to be allowed at least one retained enabling; we assert 13046 * that dtrace_enabling_retain() returns success. 13047 */ 13048 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling); 13049 ASSERT(rval == 0); 13050 13051 dtrace_enabling_dump(dtrace_anon.dta_enabling); 13052 } 13053} 13054 13055/* 13056 * DTrace Helper Functions 13057 */ 13058static void 13059dtrace_helper_trace(dtrace_helper_action_t *helper, 13060 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) 13061{ 13062 uint32_t size, next, nnext, i; 13063 dtrace_helptrace_t *ent; 13064 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 13065 13066 if (!dtrace_helptrace_enabled) 13067 return; 13068 13069 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); 13070 13071 /* 13072 * What would a tracing framework be without its own tracing 13073 * framework? (Well, a hell of a lot simpler, for starters...) 13074 */ 13075 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals * 13076 sizeof (uint64_t) - sizeof (uint64_t); 13077 13078 /* 13079 * Iterate until we can allocate a slot in the trace buffer. 13080 */ 13081 do { 13082 next = dtrace_helptrace_next; 13083 13084 if (next + size < dtrace_helptrace_bufsize) { 13085 nnext = next + size; 13086 } else { 13087 nnext = size; 13088 } 13089 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next); 13090 13091 /* 13092 * We have our slot; fill it in. 13093 */ 13094 if (nnext == size) 13095 next = 0; 13096 13097 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next]; 13098 ent->dtht_helper = helper; 13099 ent->dtht_where = where; 13100 ent->dtht_nlocals = vstate->dtvs_nlocals; 13101 13102 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ? 13103 mstate->dtms_fltoffs : -1; 13104 ent->dtht_fault = DTRACE_FLAGS2FLT(flags); 13105 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval; 13106 13107 for (i = 0; i < vstate->dtvs_nlocals; i++) { 13108 dtrace_statvar_t *svar; 13109 13110 if ((svar = vstate->dtvs_locals[i]) == NULL) 13111 continue; 13112 13113 ASSERT(svar->dtsv_size >= (int)NCPU * sizeof (uint64_t)); 13114 ent->dtht_locals[i] = 13115 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id]; 13116 } 13117} 13118 13119static uint64_t 13120dtrace_helper(int which, dtrace_mstate_t *mstate, 13121 dtrace_state_t *state, uint64_t arg0, uint64_t arg1) 13122{ 13123 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; 13124 uint64_t sarg0 = mstate->dtms_arg[0]; 13125 uint64_t sarg1 = mstate->dtms_arg[1]; 13126 uint64_t rval = 0; 13127 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers; 13128 dtrace_helper_action_t *helper; 13129 dtrace_vstate_t *vstate; 13130 dtrace_difo_t *pred; 13131 int i, trace = dtrace_helptrace_enabled; 13132 13133 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS); 13134 13135 if (helpers == NULL) 13136 return (0); 13137 13138 if ((helper = helpers->dthps_actions[which]) == NULL) 13139 return (0); 13140 13141 vstate = &helpers->dthps_vstate; 13142 mstate->dtms_arg[0] = arg0; 13143 mstate->dtms_arg[1] = arg1; 13144 13145 /* 13146 * Now iterate over each helper. If its predicate evaluates to 'true', 13147 * we'll call the corresponding actions. Note that the below calls 13148 * to dtrace_dif_emulate() may set faults in machine state. This is 13149 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow 13150 * the stored DIF offset with its own (which is the desired behavior). 13151 * Also, note the calls to dtrace_dif_emulate() may allocate scratch 13152 * from machine state; this is okay, too. 13153 */ 13154 for (; helper != NULL; helper = helper->dtha_next) { 13155 if ((pred = helper->dtha_predicate) != NULL) { 13156 if (trace) 13157 dtrace_helper_trace(helper, mstate, vstate, 0); 13158 13159 if (!dtrace_dif_emulate(pred, mstate, vstate, state)) 13160 goto next; 13161 13162 if (*flags & CPU_DTRACE_FAULT) 13163 goto err; 13164 } 13165 13166 for (i = 0; i < helper->dtha_nactions; i++) { 13167 if (trace) 13168 dtrace_helper_trace(helper, 13169 mstate, vstate, i + 1); 13170 13171 rval = dtrace_dif_emulate(helper->dtha_actions[i], 13172 mstate, vstate, state); 13173 13174 if (*flags & CPU_DTRACE_FAULT) 13175 goto err; 13176 } 13177 13178next: 13179 if (trace) 13180 dtrace_helper_trace(helper, mstate, vstate, 13181 DTRACE_HELPTRACE_NEXT); 13182 } 13183 13184 if (trace) 13185 dtrace_helper_trace(helper, mstate, vstate, 13186 DTRACE_HELPTRACE_DONE); 13187 13188 /* 13189 * Restore the arg0 that we saved upon entry. 13190 */ 13191 mstate->dtms_arg[0] = sarg0; 13192 mstate->dtms_arg[1] = sarg1; 13193 13194 return (rval); 13195 13196err: 13197 if (trace) 13198 dtrace_helper_trace(helper, mstate, vstate, 13199 DTRACE_HELPTRACE_ERR); 13200 13201 /* 13202 * Restore the arg0 that we saved upon entry. 13203 */ 13204 mstate->dtms_arg[0] = sarg0; 13205 mstate->dtms_arg[1] = sarg1; 13206 13207 return (NULL); 13208} 13209 13210static void 13211dtrace_helper_action_destroy(dtrace_helper_action_t *helper, 13212 dtrace_vstate_t *vstate) 13213{ 13214 int i; 13215 13216 if (helper->dtha_predicate != NULL) 13217 dtrace_difo_release(helper->dtha_predicate, vstate); 13218 13219 for (i = 0; i < helper->dtha_nactions; i++) { 13220 ASSERT(helper->dtha_actions[i] != NULL); 13221 dtrace_difo_release(helper->dtha_actions[i], vstate); 13222 } 13223 13224 kmem_free(helper->dtha_actions, 13225 helper->dtha_nactions * sizeof (dtrace_difo_t *)); 13226 kmem_free(helper, sizeof (dtrace_helper_action_t)); 13227} 13228 13229#if !defined(__APPLE__) 13230static int 13231dtrace_helper_destroygen(int gen) 13232{ 13233 proc_t *p = curproc; 13234#else 13235static int 13236dtrace_helper_destroygen(proc_t* p, int gen) 13237{ 13238#endif 13239 dtrace_helpers_t *help = p->p_dtrace_helpers; 13240 dtrace_vstate_t *vstate; 13241 int i; 13242 13243 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 13244 13245 if (help == NULL || gen > help->dthps_generation) 13246 return (EINVAL); 13247 13248 vstate = &help->dthps_vstate; 13249 13250 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 13251 dtrace_helper_action_t *last = NULL, *h, *next; 13252 13253 for (h = help->dthps_actions[i]; h != NULL; h = next) { 13254 next = h->dtha_next; 13255 13256 if (h->dtha_generation == gen) { 13257 if (last != NULL) { 13258 last->dtha_next = next; 13259 } else { 13260 help->dthps_actions[i] = next; 13261 } 13262 13263 dtrace_helper_action_destroy(h, vstate); 13264 } else { 13265 last = h; 13266 } 13267 } 13268 } 13269 13270 /* 13271 * Interate until we've cleared out all helper providers with the 13272 * given generation number. 13273 */ 13274 for (;;) { 13275 dtrace_helper_provider_t *prov = NULL; 13276 13277 /* 13278 * Look for a helper provider with the right generation. We 13279 * have to start back at the beginning of the list each time 13280 * because we drop dtrace_lock. It's unlikely that we'll make 13281 * more than two passes. 13282 */ 13283 for (i = 0; i < help->dthps_nprovs; i++) { 13284 prov = help->dthps_provs[i]; 13285 13286 if (prov->dthp_generation == gen) 13287 break; 13288 } 13289 13290 /* 13291 * If there were no matches, we're done. 13292 */ 13293 if (i == help->dthps_nprovs) 13294 break; 13295 13296 /* 13297 * Move the last helper provider into this slot. 13298 */ 13299 help->dthps_nprovs--; 13300 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs]; 13301 help->dthps_provs[help->dthps_nprovs] = NULL; 13302 13303 lck_mtx_unlock(&dtrace_lock); 13304 13305 /* 13306 * If we have a meta provider, remove this helper provider. 13307 */ 13308 lck_mtx_lock(&dtrace_meta_lock); 13309 if (dtrace_meta_pid != NULL) { 13310 ASSERT(dtrace_deferred_pid == NULL); 13311 dtrace_helper_provider_remove(&prov->dthp_prov, 13312 p->p_pid); 13313 } 13314 lck_mtx_unlock(&dtrace_meta_lock); 13315 13316 dtrace_helper_provider_destroy(prov); 13317 13318 lck_mtx_lock(&dtrace_lock); 13319 } 13320 13321 return (0); 13322} 13323 13324static int 13325dtrace_helper_validate(dtrace_helper_action_t *helper) 13326{ 13327 int err = 0, i; 13328 dtrace_difo_t *dp; 13329 13330 if ((dp = helper->dtha_predicate) != NULL) 13331 err += dtrace_difo_validate_helper(dp); 13332 13333 for (i = 0; i < helper->dtha_nactions; i++) 13334 err += dtrace_difo_validate_helper(helper->dtha_actions[i]); 13335 13336 return (err == 0); 13337} 13338 13339#if !defined(__APPLE__) 13340static int 13341dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep) 13342#else 13343static int 13344dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) 13345#endif 13346{ 13347 dtrace_helpers_t *help; 13348 dtrace_helper_action_t *helper, *last; 13349 dtrace_actdesc_t *act; 13350 dtrace_vstate_t *vstate; 13351 dtrace_predicate_t *pred; 13352 int count = 0, nactions = 0, i; 13353 13354 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) 13355 return (EINVAL); 13356 13357#if !defined(__APPLE__) 13358 help = curproc->p_dtrace_helpers; 13359#else 13360 help = p->p_dtrace_helpers; 13361#endif 13362 last = help->dthps_actions[which]; 13363 vstate = &help->dthps_vstate; 13364 13365 for (count = 0; last != NULL; last = last->dtha_next) { 13366 count++; 13367 if (last->dtha_next == NULL) 13368 break; 13369 } 13370 13371 /* 13372 * If we already have dtrace_helper_actions_max helper actions for this 13373 * helper action type, we'll refuse to add a new one. 13374 */ 13375 if (count >= dtrace_helper_actions_max) 13376 return (ENOSPC); 13377 13378 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP); 13379 helper->dtha_generation = help->dthps_generation; 13380 13381 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) { 13382 ASSERT(pred->dtp_difo != NULL); 13383 dtrace_difo_hold(pred->dtp_difo); 13384 helper->dtha_predicate = pred->dtp_difo; 13385 } 13386 13387 for (act = ep->dted_action; act != NULL; act = act->dtad_next) { 13388 if (act->dtad_kind != DTRACEACT_DIFEXPR) 13389 goto err; 13390 13391 if (act->dtad_difo == NULL) 13392 goto err; 13393 13394 nactions++; 13395 } 13396 13397 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) * 13398 (helper->dtha_nactions = nactions), KM_SLEEP); 13399 13400 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) { 13401 dtrace_difo_hold(act->dtad_difo); 13402 helper->dtha_actions[i++] = act->dtad_difo; 13403 } 13404 13405 if (!dtrace_helper_validate(helper)) 13406 goto err; 13407 13408 if (last == NULL) { 13409 help->dthps_actions[which] = helper; 13410 } else { 13411 last->dtha_next = helper; 13412 } 13413 13414 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { 13415 dtrace_helptrace_nlocals = vstate->dtvs_nlocals; 13416 dtrace_helptrace_next = 0; 13417 } 13418 13419 return (0); 13420err: 13421 dtrace_helper_action_destroy(helper, vstate); 13422 return (EINVAL); 13423} 13424 13425static void 13426dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, 13427 dof_helper_t *dofhp) 13428{ 13429 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); 13430 13431 lck_mtx_lock(&dtrace_meta_lock); 13432 lck_mtx_lock(&dtrace_lock); 13433 13434 if (!dtrace_attached() || dtrace_meta_pid == NULL) { 13435 /* 13436 * If the dtrace module is loaded but not attached, or if 13437 * there aren't isn't a meta provider registered to deal with 13438 * these provider descriptions, we need to postpone creating 13439 * the actual providers until later. 13440 */ 13441 13442 if (help->dthps_next == NULL && help->dthps_prev == NULL && 13443 dtrace_deferred_pid != help) { 13444 help->dthps_deferred = 1; 13445 help->dthps_pid = p->p_pid; 13446 help->dthps_next = dtrace_deferred_pid; 13447 help->dthps_prev = NULL; 13448 if (dtrace_deferred_pid != NULL) 13449 dtrace_deferred_pid->dthps_prev = help; 13450 dtrace_deferred_pid = help; 13451 } 13452 13453 lck_mtx_unlock(&dtrace_lock); 13454 13455 } else if (dofhp != NULL) { 13456 /* 13457 * If the dtrace module is loaded and we have a particular 13458 * helper provider description, pass that off to the 13459 * meta provider. 13460 */ 13461 13462 lck_mtx_unlock(&dtrace_lock); 13463 13464 dtrace_helper_provide(dofhp, p->p_pid); 13465 13466 } else { 13467 /* 13468 * Otherwise, just pass all the helper provider descriptions 13469 * off to the meta provider. 13470 */ 13471 13472 int i; 13473 lck_mtx_unlock(&dtrace_lock); 13474 13475 for (i = 0; i < help->dthps_nprovs; i++) { 13476 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 13477 p->p_pid); 13478 } 13479 } 13480 13481 lck_mtx_unlock(&dtrace_meta_lock); 13482} 13483 13484#if !defined(__APPLE__) 13485static int 13486dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) 13487#else 13488static int 13489dtrace_helper_provider_add(proc_t* p, dof_helper_t *dofhp, int gen) 13490#endif 13491{ 13492 dtrace_helpers_t *help; 13493 dtrace_helper_provider_t *hprov, **tmp_provs; 13494 uint_t tmp_maxprovs, i; 13495 13496 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 13497 13498#if !defined(__APPLE__) 13499 help = curproc->p_dtrace_helpers; 13500#else 13501 help = p->p_dtrace_helpers; 13502#endif 13503 ASSERT(help != NULL); 13504 13505 /* 13506 * If we already have dtrace_helper_providers_max helper providers, 13507 * we're refuse to add a new one. 13508 */ 13509 if (help->dthps_nprovs >= dtrace_helper_providers_max) 13510 return (ENOSPC); 13511 13512 /* 13513 * Check to make sure this isn't a duplicate. 13514 */ 13515 for (i = 0; i < help->dthps_nprovs; i++) { 13516 if (dofhp->dofhp_addr == 13517 help->dthps_provs[i]->dthp_prov.dofhp_addr) 13518 return (EALREADY); 13519 } 13520 13521 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP); 13522 hprov->dthp_prov = *dofhp; 13523 hprov->dthp_ref = 1; 13524 hprov->dthp_generation = gen; 13525 13526 /* 13527 * Allocate a bigger table for helper providers if it's already full. 13528 */ 13529 if (help->dthps_maxprovs == help->dthps_nprovs) { 13530 tmp_maxprovs = help->dthps_maxprovs; 13531 tmp_provs = help->dthps_provs; 13532 13533 if (help->dthps_maxprovs == 0) 13534 help->dthps_maxprovs = 2; 13535 else 13536 help->dthps_maxprovs *= 2; 13537 if (help->dthps_maxprovs > dtrace_helper_providers_max) 13538 help->dthps_maxprovs = dtrace_helper_providers_max; 13539 13540 ASSERT(tmp_maxprovs < help->dthps_maxprovs); 13541 13542 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs * 13543 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 13544 13545 if (tmp_provs != NULL) { 13546 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs * 13547 sizeof (dtrace_helper_provider_t *)); 13548 kmem_free(tmp_provs, tmp_maxprovs * 13549 sizeof (dtrace_helper_provider_t *)); 13550 } 13551 } 13552 13553 help->dthps_provs[help->dthps_nprovs] = hprov; 13554 help->dthps_nprovs++; 13555 13556 return (0); 13557} 13558 13559static void 13560dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov) 13561{ 13562 lck_mtx_lock(&dtrace_lock); 13563 13564 if (--hprov->dthp_ref == 0) { 13565 dof_hdr_t *dof; 13566 lck_mtx_unlock(&dtrace_lock); 13567 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof; 13568 dtrace_dof_destroy(dof); 13569 kmem_free(hprov, sizeof (dtrace_helper_provider_t)); 13570 } else { 13571 lck_mtx_unlock(&dtrace_lock); 13572 } 13573} 13574 13575static int 13576dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) 13577{ 13578 uintptr_t daddr = (uintptr_t)dof; 13579 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 13580 dof_provider_t *provider; 13581 dof_probe_t *probe; 13582 uint8_t *arg; 13583 char *strtab, *typestr; 13584 dof_stridx_t typeidx; 13585 size_t typesz; 13586 uint_t nprobes, j, k; 13587 13588 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER); 13589 13590 if (sec->dofs_offset & (sizeof (uint_t) - 1)) { 13591 dtrace_dof_error(dof, "misaligned section offset"); 13592 return (-1); 13593 } 13594 13595 /* 13596 * The section needs to be large enough to contain the DOF provider 13597 * structure appropriate for the given version. 13598 */ 13599 if (sec->dofs_size < 13600 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ? 13601 offsetof(dof_provider_t, dofpv_prenoffs) : 13602 sizeof (dof_provider_t))) { 13603 dtrace_dof_error(dof, "provider section too small"); 13604 return (-1); 13605 } 13606 13607 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 13608 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab); 13609 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes); 13610 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs); 13611 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs); 13612 13613 if (str_sec == NULL || prb_sec == NULL || 13614 arg_sec == NULL || off_sec == NULL) 13615 return (-1); 13616 13617 enoff_sec = NULL; 13618 13619 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 13620 provider->dofpv_prenoffs != DOF_SECT_NONE && 13621 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS, 13622 provider->dofpv_prenoffs)) == NULL) 13623 return (-1); 13624 13625 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 13626 13627 if (provider->dofpv_name >= str_sec->dofs_size || 13628 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) { 13629 dtrace_dof_error(dof, "invalid provider name"); 13630 return (-1); 13631 } 13632 13633 if (prb_sec->dofs_entsize == 0 || 13634 prb_sec->dofs_entsize > prb_sec->dofs_size) { 13635 dtrace_dof_error(dof, "invalid entry size"); 13636 return (-1); 13637 } 13638 13639 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) { 13640 dtrace_dof_error(dof, "misaligned entry size"); 13641 return (-1); 13642 } 13643 13644 if (off_sec->dofs_entsize != sizeof (uint32_t)) { 13645 dtrace_dof_error(dof, "invalid entry size"); 13646 return (-1); 13647 } 13648 13649 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) { 13650 dtrace_dof_error(dof, "misaligned section offset"); 13651 return (-1); 13652 } 13653 13654 if (arg_sec->dofs_entsize != sizeof (uint8_t)) { 13655 dtrace_dof_error(dof, "invalid entry size"); 13656 return (-1); 13657 } 13658 13659 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 13660 13661 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 13662 13663 /* 13664 * Take a pass through the probes to check for errors. 13665 */ 13666 for (j = 0; j < nprobes; j++) { 13667 probe = (dof_probe_t *)(uintptr_t)(daddr + 13668 prb_sec->dofs_offset + j * prb_sec->dofs_entsize); 13669 13670 if (probe->dofpr_func >= str_sec->dofs_size) { 13671 dtrace_dof_error(dof, "invalid function name"); 13672 return (-1); 13673 } 13674 13675 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) { 13676 dtrace_dof_error(dof, "function name too long"); 13677 return (-1); 13678 } 13679 13680 if (probe->dofpr_name >= str_sec->dofs_size || 13681 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) { 13682 dtrace_dof_error(dof, "invalid probe name"); 13683 return (-1); 13684 } 13685 13686 /* 13687 * The offset count must not wrap the index, and the offsets 13688 * must also not overflow the section's data. 13689 */ 13690 if (probe->dofpr_offidx + probe->dofpr_noffs < 13691 probe->dofpr_offidx || 13692 (probe->dofpr_offidx + probe->dofpr_noffs) * 13693 off_sec->dofs_entsize > off_sec->dofs_size) { 13694 dtrace_dof_error(dof, "invalid probe offset"); 13695 return (-1); 13696 } 13697 13698 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) { 13699 /* 13700 * If there's no is-enabled offset section, make sure 13701 * there aren't any is-enabled offsets. Otherwise 13702 * perform the same checks as for probe offsets 13703 * (immediately above). 13704 */ 13705 if (enoff_sec == NULL) { 13706 if (probe->dofpr_enoffidx != 0 || 13707 probe->dofpr_nenoffs != 0) { 13708 dtrace_dof_error(dof, "is-enabled " 13709 "offsets with null section"); 13710 return (-1); 13711 } 13712 } else if (probe->dofpr_enoffidx + 13713 probe->dofpr_nenoffs < probe->dofpr_enoffidx || 13714 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) * 13715 enoff_sec->dofs_entsize > enoff_sec->dofs_size) { 13716 dtrace_dof_error(dof, "invalid is-enabled " 13717 "offset"); 13718 return (-1); 13719 } 13720 13721 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) { 13722 dtrace_dof_error(dof, "zero probe and " 13723 "is-enabled offsets"); 13724 return (-1); 13725 } 13726 } else if (probe->dofpr_noffs == 0) { 13727 dtrace_dof_error(dof, "zero probe offsets"); 13728 return (-1); 13729 } 13730 13731 if (probe->dofpr_argidx + probe->dofpr_xargc < 13732 probe->dofpr_argidx || 13733 (probe->dofpr_argidx + probe->dofpr_xargc) * 13734 arg_sec->dofs_entsize > arg_sec->dofs_size) { 13735 dtrace_dof_error(dof, "invalid args"); 13736 return (-1); 13737 } 13738 13739 typeidx = probe->dofpr_nargv; 13740 typestr = strtab + probe->dofpr_nargv; 13741 for (k = 0; k < probe->dofpr_nargc; k++) { 13742 if (typeidx >= str_sec->dofs_size) { 13743 dtrace_dof_error(dof, "bad " 13744 "native argument type"); 13745 return (-1); 13746 } 13747 13748 typesz = strlen(typestr) + 1; 13749 if (typesz > DTRACE_ARGTYPELEN) { 13750 dtrace_dof_error(dof, "native " 13751 "argument type too long"); 13752 return (-1); 13753 } 13754 typeidx += typesz; 13755 typestr += typesz; 13756 } 13757 13758 typeidx = probe->dofpr_xargv; 13759 typestr = strtab + probe->dofpr_xargv; 13760 for (k = 0; k < probe->dofpr_xargc; k++) { 13761 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) { 13762 dtrace_dof_error(dof, "bad " 13763 "native argument index"); 13764 return (-1); 13765 } 13766 13767 if (typeidx >= str_sec->dofs_size) { 13768 dtrace_dof_error(dof, "bad " 13769 "translated argument type"); 13770 return (-1); 13771 } 13772 13773 typesz = strlen(typestr) + 1; 13774 if (typesz > DTRACE_ARGTYPELEN) { 13775 dtrace_dof_error(dof, "translated argument " 13776 "type too long"); 13777 return (-1); 13778 } 13779 13780 typeidx += typesz; 13781 typestr += typesz; 13782 } 13783 } 13784 13785 return (0); 13786} 13787 13788#if !defined(__APPLE__) 13789static int 13790dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp) 13791#else 13792static int 13793dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) 13794#endif 13795{ 13796 dtrace_helpers_t *help; 13797 dtrace_vstate_t *vstate; 13798 dtrace_enabling_t *enab = NULL; 13799 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; 13800 uintptr_t daddr = (uintptr_t)dof; 13801 13802 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 13803 13804#if !defined(__APPLE__) 13805 if ((help = curproc->p_dtrace_helpers) == NULL) 13806 help = dtrace_helpers_create(curproc); 13807#else 13808 if ((help = p->p_dtrace_helpers) == NULL) 13809 help = dtrace_helpers_create(p); 13810#endif 13811 13812 vstate = &help->dthps_vstate; 13813 13814 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, 13815 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) { 13816 dtrace_dof_destroy(dof); 13817 return (rv); 13818 } 13819 13820 /* 13821 * Look for helper providers and validate their descriptions. 13822 */ 13823 if (dhp != NULL) { 13824 for (i = 0; i < dof->dofh_secnum; i++) { 13825 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 13826 dof->dofh_secoff + i * dof->dofh_secsize); 13827 13828 if (sec->dofs_type != DOF_SECT_PROVIDER) 13829 continue; 13830 13831 if (dtrace_helper_provider_validate(dof, sec) != 0) { 13832 dtrace_enabling_destroy(enab); 13833 dtrace_dof_destroy(dof); 13834 return (-1); 13835 } 13836 13837 nprovs++; 13838 } 13839 } 13840 13841 /* 13842 * Now we need to walk through the ECB descriptions in the enabling. 13843 */ 13844 for (i = 0; i < enab->dten_ndesc; i++) { 13845 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 13846 dtrace_probedesc_t *desc = &ep->dted_probe; 13847 13848 if (strcmp(desc->dtpd_provider, "dtrace") != 0) 13849 continue; 13850 13851 if (strcmp(desc->dtpd_mod, "helper") != 0) 13852 continue; 13853 13854 if (strcmp(desc->dtpd_func, "ustack") != 0) 13855 continue; 13856 13857#if !defined(__APPLE__) 13858 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, ep)) != 0) 13859#else 13860 if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK, ep)) != 0) 13861#endif 13862 { 13863 /* 13864 * Adding this helper action failed -- we are now going 13865 * to rip out the entire generation and return failure. 13866 */ 13867#if !defined(__APPLE__) 13868 (void) dtrace_helper_destroygen(help->dthps_generation); 13869#else 13870 (void) dtrace_helper_destroygen(p, help->dthps_generation); 13871#endif 13872 dtrace_enabling_destroy(enab); 13873 dtrace_dof_destroy(dof); 13874 return (-1); 13875 } 13876 13877 nhelpers++; 13878 } 13879 13880 if (nhelpers < enab->dten_ndesc) 13881 dtrace_dof_error(dof, "unmatched helpers"); 13882 13883 gen = help->dthps_generation++; 13884 dtrace_enabling_destroy(enab); 13885 13886 if (dhp != NULL && nprovs > 0) { 13887 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; 13888#if !defined(__APPLE__) 13889 if (dtrace_helper_provider_add(dhp, gen) == 0) { 13890#else 13891 if (dtrace_helper_provider_add(p, dhp, gen) == 0) { 13892#endif 13893 lck_mtx_unlock(&dtrace_lock); 13894#if !defined(__APPLE__) 13895 dtrace_helper_provider_register(curproc, help, dhp); 13896#else 13897 dtrace_helper_provider_register(p, help, dhp); 13898#endif 13899 lck_mtx_lock(&dtrace_lock); 13900 13901 destroy = 0; 13902 } 13903 } 13904 13905 if (destroy) 13906 dtrace_dof_destroy(dof); 13907 13908 return (gen); 13909} 13910 13911#if defined(__APPLE__) 13912 13913/* 13914 * DTrace lazy dof 13915 * 13916 * DTrace user static probes (USDT probes) and helper actions are loaded 13917 * in a process by proccessing dof sections. The dof sections are passed 13918 * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather 13919 * expensive to process dof for a process that will never use it. There 13920 * is a memory cost (allocating the providers/probes), and a cpu cost 13921 * (creating the providers/probes). 13922 * 13923 * To reduce this cost, we use "lazy dof". The normal proceedure for 13924 * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t 13925 * block, and invoke dof_slurp_helper() on them. When "lazy dof" is 13926 * used, each process retains the dof_ioctl_data_t block, instead of 13927 * copying in the data it points to. 13928 * 13929 * The dof_ioctl_data_t blocks are managed as if they were the actual 13930 * processed dof; on fork the block is copied to the child, on exec and 13931 * exit the block is freed. 13932 * 13933 * If the process loads library(s) containing additional dof, the 13934 * new dof_ioctl_data_t is merged with the existing block. 13935 * 13936 * There are a few catches that make this slightly more difficult. 13937 * When dyld registers dof_ioctl_data_t blocks, it expects a unique 13938 * identifier value for each dof in the block. In non-lazy dof terms, 13939 * this is the generation that dof was loaded in. If we hand back 13940 * a UID for a lazy dof, that same UID must be able to unload the 13941 * dof once it has become non-lazy. To meet this requirement, the 13942 * code that loads lazy dof requires that the UID's for dof(s) in 13943 * the lazy dof be sorted, and in ascending order. It is okay to skip 13944 * UID's, I.E., 1 -> 5 -> 6 is legal. 13945 * 13946 * Once a process has become non-lazy, it will stay non-lazy. All 13947 * future dof operations for that process will be non-lazy, even 13948 * if the dof mode transitions back to lazy. 13949 * 13950 * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.). 13951 * That way if the lazy check fails due to transitioning to non-lazy, the 13952 * right thing is done with the newly faulted in dof. 13953 */ 13954 13955/* 13956 * This method is a bit squicky. It must handle: 13957 * 13958 * dof should not be lazy. 13959 * dof should have been handled lazily, but there was an error 13960 * dof was handled lazily, and needs to be freed. 13961 * dof was handled lazily, and must not be freed. 13962 * 13963 * 13964 * Returns EACCESS if dof should be handled non-lazily. 13965 * 13966 * KERN_SUCCESS and all other return codes indicate lazy handling of dof. 13967 * 13968 * If the dofs data is claimed by this method, dofs_claimed will be set. 13969 * Callers should not free claimed dofs. 13970 */ 13971int 13972dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claimed) 13973{ 13974 ASSERT(p); 13975 ASSERT(incoming_dofs && incoming_dofs->dofiod_count > 0); 13976 13977 int rval = 0; 13978 *dofs_claimed = 0; 13979 13980 lck_rw_lock_shared(&dtrace_dof_mode_lock); 13981 13982 /* 13983 * If we have lazy dof, dof mode better be LAZY_ON. 13984 */ 13985 ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); 13986 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); 13987 ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); 13988 13989 /* 13990 * Any existing helpers force non-lazy behavior. 13991 */ 13992 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) { 13993 lck_mtx_lock(&p->p_dtrace_sprlock); 13994 13995 dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs; 13996 unsigned int existing_dofs_count = (existing_dofs) ? existing_dofs->dofiod_count : 0; 13997 unsigned int i, merged_dofs_count = incoming_dofs->dofiod_count + existing_dofs_count; 13998 13999 /* 14000 * Range check... 14001 */ 14002 if (merged_dofs_count == 0 || merged_dofs_count > 1024) { 14003 dtrace_dof_error(NULL, "lazy_dofs_add merged_dofs_count out of range"); 14004 rval = EINVAL; 14005 goto unlock; 14006 } 14007 14008 /* 14009 * Each dof being added must be assigned a unique generation. 14010 */ 14011 uint64_t generation = (existing_dofs) ? existing_dofs->dofiod_helpers[existing_dofs_count - 1].dofhp_dof + 1 : 1; 14012 for (i=0; i<incoming_dofs->dofiod_count; i++) { 14013 /* 14014 * We rely on these being the same so we can overwrite dofhp_dof and not lose info. 14015 */ 14016 ASSERT(incoming_dofs->dofiod_helpers[i].dofhp_dof == incoming_dofs->dofiod_helpers[i].dofhp_addr); 14017 incoming_dofs->dofiod_helpers[i].dofhp_dof = generation++; 14018 } 14019 14020 14021 if (existing_dofs) { 14022 /* 14023 * Merge the existing and incoming dofs 14024 */ 14025 size_t merged_dofs_size = DOF_IOCTL_DATA_T_SIZE(merged_dofs_count); 14026 dof_ioctl_data_t* merged_dofs = kmem_alloc(merged_dofs_size, KM_SLEEP); 14027 14028 bcopy(&existing_dofs->dofiod_helpers[0], 14029 &merged_dofs->dofiod_helpers[0], 14030 sizeof(dof_helper_t) * existing_dofs_count); 14031 bcopy(&incoming_dofs->dofiod_helpers[0], 14032 &merged_dofs->dofiod_helpers[existing_dofs_count], 14033 sizeof(dof_helper_t) * incoming_dofs->dofiod_count); 14034 14035 merged_dofs->dofiod_count = merged_dofs_count; 14036 14037 kmem_free(existing_dofs, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count)); 14038 14039 p->p_dtrace_lazy_dofs = merged_dofs; 14040 } else { 14041 /* 14042 * Claim the incoming dofs 14043 */ 14044 *dofs_claimed = 1; 14045 p->p_dtrace_lazy_dofs = incoming_dofs; 14046 } 14047 14048#if DEBUG 14049 dof_ioctl_data_t* all_dofs = p->p_dtrace_lazy_dofs; 14050 for (i=0; i<all_dofs->dofiod_count-1; i++) { 14051 ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof); 14052 } 14053#endif DEBUG 14054 14055unlock: 14056 lck_mtx_unlock(&p->p_dtrace_sprlock); 14057 } else { 14058 rval = EACCES; 14059 } 14060 14061 lck_rw_unlock_shared(&dtrace_dof_mode_lock); 14062 14063 return rval; 14064} 14065 14066/* 14067 * Returns: 14068 * 14069 * EINVAL: lazy dof is enabled, but the requested generation was not found. 14070 * EACCES: This removal needs to be handled non-lazily. 14071 */ 14072int 14073dtrace_lazy_dofs_remove(proc_t *p, int generation) 14074{ 14075 int rval = EINVAL; 14076 14077 lck_rw_lock_shared(&dtrace_dof_mode_lock); 14078 14079 /* 14080 * If we have lazy dof, dof mode better be LAZY_ON. 14081 */ 14082 ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); 14083 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); 14084 ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); 14085 14086 /* 14087 * Any existing helpers force non-lazy behavior. 14088 */ 14089 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) { 14090 lck_mtx_lock(&p->p_dtrace_sprlock); 14091 14092 dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs; 14093 14094 if (existing_dofs) { 14095 int index, existing_dofs_count = existing_dofs->dofiod_count; 14096 for (index=0; index<existing_dofs_count; index++) { 14097 if ((int)existing_dofs->dofiod_helpers[index].dofhp_dof == generation) { 14098 dof_ioctl_data_t* removed_dofs = NULL; 14099 14100 /* 14101 * If there is only 1 dof, we'll delete it and swap in NULL. 14102 */ 14103 if (existing_dofs_count > 1) { 14104 int removed_dofs_count = existing_dofs_count - 1; 14105 size_t removed_dofs_size = DOF_IOCTL_DATA_T_SIZE(removed_dofs_count); 14106 14107 removed_dofs = kmem_alloc(removed_dofs_size, KM_SLEEP); 14108 removed_dofs->dofiod_count = removed_dofs_count; 14109 14110 /* 14111 * copy the remaining data. 14112 */ 14113 if (index > 0) { 14114 bcopy(&existing_dofs->dofiod_helpers[0], 14115 &removed_dofs->dofiod_helpers[0], 14116 index * sizeof(dof_helper_t)); 14117 } 14118 14119 if (index < existing_dofs_count-1) { 14120 bcopy(&existing_dofs->dofiod_helpers[index+1], 14121 &removed_dofs->dofiod_helpers[index], 14122 (existing_dofs_count - index - 1) * sizeof(dof_helper_t)); 14123 } 14124 } 14125 14126 kmem_free(existing_dofs, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count)); 14127 14128 p->p_dtrace_lazy_dofs = removed_dofs; 14129 14130 rval = KERN_SUCCESS; 14131 14132 break; 14133 } 14134 } 14135 14136#if DEBUG 14137 dof_ioctl_data_t* all_dofs = p->p_dtrace_lazy_dofs; 14138 if (all_dofs) { 14139 unsigned int i; 14140 for (i=0; i<all_dofs->dofiod_count-1; i++) { 14141 ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof); 14142 } 14143 } 14144#endif 14145 14146 } 14147 14148 lck_mtx_unlock(&p->p_dtrace_sprlock); 14149 } else { 14150 rval = EACCES; 14151 } 14152 14153 lck_rw_unlock_shared(&dtrace_dof_mode_lock); 14154 14155 return rval; 14156} 14157 14158void 14159dtrace_lazy_dofs_destroy(proc_t *p) 14160{ 14161 lck_rw_lock_shared(&dtrace_dof_mode_lock); 14162 lck_mtx_lock(&p->p_dtrace_sprlock); 14163 14164 /* 14165 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. 14166 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from 14167 * kern_exit.c and kern_exec.c. 14168 */ 14169 ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON || p->p_lflag & P_LEXIT); 14170 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); 14171 14172 dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; 14173 p->p_dtrace_lazy_dofs = NULL; 14174 14175 lck_mtx_unlock(&p->p_dtrace_sprlock); 14176 lck_rw_unlock_shared(&dtrace_dof_mode_lock); 14177 14178 if (lazy_dofs) { 14179 kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); 14180 } 14181} 14182 14183void 14184dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) 14185{ 14186 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); 14187 lck_mtx_assert(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); 14188 lck_mtx_assert(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); 14189 14190 lck_rw_lock_shared(&dtrace_dof_mode_lock); 14191 lck_mtx_lock(&parent->p_dtrace_sprlock); 14192 14193 /* 14194 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. 14195 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from 14196 * kern_fork.c 14197 */ 14198 ASSERT(parent->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); 14199 ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); 14200 /* 14201 * In theory we should hold the child sprlock, but this is safe... 14202 */ 14203 ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); 14204 14205 dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; 14206 dof_ioctl_data_t* child_dofs = NULL; 14207 if (parent_dofs) { 14208 size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); 14209 child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); 14210 bcopy(parent_dofs, child_dofs, parent_dofs_size); 14211 } 14212 14213 lck_mtx_unlock(&parent->p_dtrace_sprlock); 14214 14215 if (child_dofs) { 14216 lck_mtx_lock(&child->p_dtrace_sprlock); 14217 child->p_dtrace_lazy_dofs = child_dofs; 14218 lck_mtx_unlock(&child->p_dtrace_sprlock); 14219 } 14220 14221 lck_rw_unlock_shared(&dtrace_dof_mode_lock); 14222} 14223 14224static int 14225dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored) 14226{ 14227#pragma unused(ignored) 14228 /* 14229 * Okay to NULL test without taking the sprlock. 14230 */ 14231 return p->p_dtrace_lazy_dofs != NULL; 14232} 14233 14234static int 14235dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) 14236{ 14237#pragma unused(ignored) 14238 /* 14239 * It is possible this process may exit during our attempt to 14240 * fault in the dof. We could fix this by holding locks longer, 14241 * but the errors are benign. 14242 */ 14243 lck_mtx_lock(&p->p_dtrace_sprlock); 14244 14245 /* 14246 * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF 14247 */ 14248 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); 14249 ASSERT(dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF); 14250 14251 14252 dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; 14253 p->p_dtrace_lazy_dofs = NULL; 14254 14255 lck_mtx_unlock(&p->p_dtrace_sprlock); 14256 14257 /* 14258 * Process each dof_helper_t 14259 */ 14260 if (lazy_dofs != NULL) { 14261 unsigned int i; 14262 int rval; 14263 14264 for (i=0; i<lazy_dofs->dofiod_count; i++) { 14265 /* 14266 * When loading lazy dof, we depend on the generations being sorted in ascending order. 14267 */ 14268 ASSERT(i >= (lazy_dofs->dofiod_count - 1) || lazy_dofs->dofiod_helpers[i].dofhp_dof < lazy_dofs->dofiod_helpers[i+1].dofhp_dof); 14269 14270 dof_helper_t *dhp = &lazy_dofs->dofiod_helpers[i]; 14271 14272 /* 14273 * We stored the generation in dofhp_dof. Save it, and restore the original value. 14274 */ 14275 int generation = dhp->dofhp_dof; 14276 dhp->dofhp_dof = dhp->dofhp_addr; 14277 14278 dof_hdr_t *dof = dtrace_dof_copyin_from_proc(p, dhp->dofhp_dof, &rval); 14279 14280 if (dof != NULL) { 14281 dtrace_helpers_t *help; 14282 14283 lck_mtx_lock(&dtrace_lock); 14284 14285 /* 14286 * This must be done with the dtrace_lock held 14287 */ 14288 if ((help = p->p_dtrace_helpers) == NULL) 14289 help = dtrace_helpers_create(p); 14290 14291 /* 14292 * If the generation value has been bumped, someone snuck in 14293 * when we released the dtrace lock. We have to dump this generation, 14294 * there is no safe way to load it. 14295 */ 14296 if (help->dthps_generation <= generation) { 14297 help->dthps_generation = generation; 14298 14299 /* 14300 * dtrace_helper_slurp() takes responsibility for the dof -- 14301 * it may free it now or it may save it and free it later. 14302 */ 14303 if ((rval = dtrace_helper_slurp(p, dof, dhp)) != generation) { 14304 dtrace_dof_error(NULL, "returned value did not match expected generation"); 14305 } 14306 } 14307 14308 lck_mtx_unlock(&dtrace_lock); 14309 } 14310 } 14311 14312 kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); 14313 } 14314 14315 return PROC_RETURNED; 14316} 14317 14318#endif /* __APPLE__ */ 14319 14320static dtrace_helpers_t * 14321dtrace_helpers_create(proc_t *p) 14322{ 14323 dtrace_helpers_t *help; 14324 14325 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); 14326 ASSERT(p->p_dtrace_helpers == NULL); 14327 14328 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); 14329 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) * 14330 DTRACE_NHELPER_ACTIONS, KM_SLEEP); 14331 14332 p->p_dtrace_helpers = help; 14333 dtrace_helpers++; 14334 14335 return (help); 14336} 14337 14338#if !defined(__APPLE__) 14339static void 14340dtrace_helpers_destroy(void) 14341{ 14342 proc_t *p = curproc; 14343#else 14344static void 14345dtrace_helpers_destroy(proc_t* p) 14346{ 14347#endif 14348 dtrace_helpers_t *help; 14349 dtrace_vstate_t *vstate; 14350 int i; 14351 14352 lck_mtx_lock(&dtrace_lock); 14353 14354 ASSERT(p->p_dtrace_helpers != NULL); 14355 ASSERT(dtrace_helpers > 0); 14356 14357 help = p->p_dtrace_helpers; 14358 vstate = &help->dthps_vstate; 14359 14360 /* 14361 * We're now going to lose the help from this process. 14362 */ 14363 p->p_dtrace_helpers = NULL; 14364 dtrace_sync(); 14365 14366 /* 14367 * Destory the helper actions. 14368 */ 14369 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 14370 dtrace_helper_action_t *h, *next; 14371 14372 for (h = help->dthps_actions[i]; h != NULL; h = next) { 14373 next = h->dtha_next; 14374 dtrace_helper_action_destroy(h, vstate); 14375 h = next; 14376 } 14377 } 14378 14379 lck_mtx_unlock(&dtrace_lock); 14380 14381 /* 14382 * Destroy the helper providers. 14383 */ 14384 if (help->dthps_maxprovs > 0) { 14385 lck_mtx_lock(&dtrace_meta_lock); 14386 if (dtrace_meta_pid != NULL) { 14387 ASSERT(dtrace_deferred_pid == NULL); 14388 14389 for (i = 0; i < help->dthps_nprovs; i++) { 14390 dtrace_helper_provider_remove( 14391 &help->dthps_provs[i]->dthp_prov, p->p_pid); 14392 } 14393 } else { 14394 lck_mtx_lock(&dtrace_lock); 14395 ASSERT(help->dthps_deferred == 0 || 14396 help->dthps_next != NULL || 14397 help->dthps_prev != NULL || 14398 help == dtrace_deferred_pid); 14399 14400 /* 14401 * Remove the helper from the deferred list. 14402 */ 14403 if (help->dthps_next != NULL) 14404 help->dthps_next->dthps_prev = help->dthps_prev; 14405 if (help->dthps_prev != NULL) 14406 help->dthps_prev->dthps_next = help->dthps_next; 14407 if (dtrace_deferred_pid == help) { 14408 dtrace_deferred_pid = help->dthps_next; 14409 ASSERT(help->dthps_prev == NULL); 14410 } 14411 14412 lck_mtx_unlock(&dtrace_lock); 14413 } 14414 14415 lck_mtx_unlock(&dtrace_meta_lock); 14416 14417 for (i = 0; i < help->dthps_nprovs; i++) { 14418 dtrace_helper_provider_destroy(help->dthps_provs[i]); 14419 } 14420 14421 kmem_free(help->dthps_provs, help->dthps_maxprovs * 14422 sizeof (dtrace_helper_provider_t *)); 14423 } 14424 14425 lck_mtx_lock(&dtrace_lock); 14426 14427 dtrace_vstate_fini(&help->dthps_vstate); 14428 kmem_free(help->dthps_actions, 14429 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS); 14430 kmem_free(help, sizeof (dtrace_helpers_t)); 14431 14432 --dtrace_helpers; 14433 lck_mtx_unlock(&dtrace_lock); 14434} 14435 14436static void 14437dtrace_helpers_duplicate(proc_t *from, proc_t *to) 14438{ 14439 dtrace_helpers_t *help, *newhelp; 14440 dtrace_helper_action_t *helper, *new, *last; 14441 dtrace_difo_t *dp; 14442 dtrace_vstate_t *vstate; 14443 int i, j, sz, hasprovs = 0; 14444 14445 lck_mtx_lock(&dtrace_lock); 14446 ASSERT(from->p_dtrace_helpers != NULL); 14447 ASSERT(dtrace_helpers > 0); 14448 14449 help = from->p_dtrace_helpers; 14450 newhelp = dtrace_helpers_create(to); 14451 ASSERT(to->p_dtrace_helpers != NULL); 14452 14453 newhelp->dthps_generation = help->dthps_generation; 14454 vstate = &newhelp->dthps_vstate; 14455 14456 /* 14457 * Duplicate the helper actions. 14458 */ 14459 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 14460 if ((helper = help->dthps_actions[i]) == NULL) 14461 continue; 14462 14463 for (last = NULL; helper != NULL; helper = helper->dtha_next) { 14464 new = kmem_zalloc(sizeof (dtrace_helper_action_t), 14465 KM_SLEEP); 14466 new->dtha_generation = helper->dtha_generation; 14467 14468 if ((dp = helper->dtha_predicate) != NULL) { 14469 dp = dtrace_difo_duplicate(dp, vstate); 14470 new->dtha_predicate = dp; 14471 } 14472 14473 new->dtha_nactions = helper->dtha_nactions; 14474 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions; 14475 new->dtha_actions = kmem_alloc(sz, KM_SLEEP); 14476 14477 for (j = 0; j < new->dtha_nactions; j++) { 14478 dtrace_difo_t *dp = helper->dtha_actions[j]; 14479 14480 ASSERT(dp != NULL); 14481 dp = dtrace_difo_duplicate(dp, vstate); 14482 new->dtha_actions[j] = dp; 14483 } 14484 14485 if (last != NULL) { 14486 last->dtha_next = new; 14487 } else { 14488 newhelp->dthps_actions[i] = new; 14489 } 14490 14491 last = new; 14492 } 14493 } 14494 14495 /* 14496 * Duplicate the helper providers and register them with the 14497 * DTrace framework. 14498 */ 14499 if (help->dthps_nprovs > 0) { 14500 newhelp->dthps_nprovs = help->dthps_nprovs; 14501 newhelp->dthps_maxprovs = help->dthps_nprovs; 14502 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs * 14503 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 14504 for (i = 0; i < newhelp->dthps_nprovs; i++) { 14505 newhelp->dthps_provs[i] = help->dthps_provs[i]; 14506 newhelp->dthps_provs[i]->dthp_ref++; 14507 } 14508 14509 hasprovs = 1; 14510 } 14511 14512 lck_mtx_unlock(&dtrace_lock); 14513 14514 if (hasprovs) 14515 dtrace_helper_provider_register(to, newhelp, NULL); 14516} 14517 14518/* 14519 * DTrace Hook Functions 14520 */ 14521static void 14522dtrace_module_loaded(struct modctl *ctl) 14523{ 14524 dtrace_provider_t *prv; 14525 14526 lck_mtx_lock(&dtrace_provider_lock); 14527 lck_mtx_lock(&mod_lock); 14528 14529 // ASSERT(ctl->mod_busy); 14530 14531 /* 14532 * We're going to call each providers per-module provide operation 14533 * specifying only this module. 14534 */ 14535 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) 14536 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 14537 14538 lck_mtx_unlock(&mod_lock); 14539 lck_mtx_unlock(&dtrace_provider_lock); 14540 14541 /* 14542 * If we have any retained enablings, we need to match against them. 14543 * Enabling probes requires that cpu_lock be held, and we cannot hold 14544 * cpu_lock here -- it is legal for cpu_lock to be held when loading a 14545 * module. (In particular, this happens when loading scheduling 14546 * classes.) So if we have any retained enablings, we need to dispatch 14547 * our task queue to do the match for us. 14548 */ 14549 lck_mtx_lock(&dtrace_lock); 14550 14551 if (dtrace_retained == NULL) { 14552 lck_mtx_unlock(&dtrace_lock); 14553 return; 14554 } 14555 14556 (void) taskq_dispatch(dtrace_taskq, 14557 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); 14558 14559 lck_mtx_unlock(&dtrace_lock); 14560 14561 /* 14562 * And now, for a little heuristic sleaze: in general, we want to 14563 * match modules as soon as they load. However, we cannot guarantee 14564 * this, because it would lead us to the lock ordering violation 14565 * outlined above. The common case, of course, is that cpu_lock is 14566 * _not_ held -- so we delay here for a clock tick, hoping that that's 14567 * long enough for the task queue to do its work. If it's not, it's 14568 * not a serious problem -- it just means that the module that we 14569 * just loaded may not be immediately instrumentable. 14570 */ 14571 delay(1); 14572} 14573 14574static void 14575dtrace_module_unloaded(struct modctl *ctl) 14576{ 14577 dtrace_probe_t template, *probe, *first, *next; 14578 dtrace_provider_t *prov; 14579 14580 template.dtpr_mod = ctl->mod_modname; 14581 14582 lck_mtx_lock(&dtrace_provider_lock); 14583 lck_mtx_lock(&mod_lock); 14584 lck_mtx_lock(&dtrace_lock); 14585 14586 if (dtrace_bymod == NULL) { 14587 /* 14588 * The DTrace module is loaded (obviously) but not attached; 14589 * we don't have any work to do. 14590 */ 14591 lck_mtx_unlock(&dtrace_provider_lock); 14592 lck_mtx_unlock(&mod_lock); 14593 lck_mtx_unlock(&dtrace_lock); 14594 return; 14595 } 14596 14597 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); 14598 probe != NULL; probe = probe->dtpr_nextmod) { 14599 if (probe->dtpr_ecb != NULL) { 14600 lck_mtx_unlock(&dtrace_provider_lock); 14601 lck_mtx_unlock(&mod_lock); 14602 lck_mtx_unlock(&dtrace_lock); 14603 14604 /* 14605 * This shouldn't _actually_ be possible -- we're 14606 * unloading a module that has an enabled probe in it. 14607 * (It's normally up to the provider to make sure that 14608 * this can't happen.) However, because dtps_enable() 14609 * doesn't have a failure mode, there can be an 14610 * enable/unload race. Upshot: we don't want to 14611 * assert, but we're not going to disable the 14612 * probe, either. 14613 */ 14614 if (dtrace_err_verbose) { 14615 cmn_err(CE_WARN, "unloaded module '%s' had " 14616 "enabled probes", ctl->mod_modname); 14617 } 14618 14619 return; 14620 } 14621 } 14622 14623 probe = first; 14624 14625 for (first = NULL; probe != NULL; probe = next) { 14626 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); 14627 14628 dtrace_probes[probe->dtpr_id - 1] = NULL; 14629 14630 next = probe->dtpr_nextmod; 14631 dtrace_hash_remove(dtrace_bymod, probe); 14632 dtrace_hash_remove(dtrace_byfunc, probe); 14633 dtrace_hash_remove(dtrace_byname, probe); 14634 14635 if (first == NULL) { 14636 first = probe; 14637 probe->dtpr_nextmod = NULL; 14638 } else { 14639 probe->dtpr_nextmod = first; 14640 first = probe; 14641 } 14642 } 14643 14644 /* 14645 * We've removed all of the module's probes from the hash chains and 14646 * from the probe array. Now issue a dtrace_sync() to be sure that 14647 * everyone has cleared out from any probe array processing. 14648 */ 14649 dtrace_sync(); 14650 14651 for (probe = first; probe != NULL; probe = first) { 14652 first = probe->dtpr_nextmod; 14653 prov = probe->dtpr_provider; 14654 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, 14655 probe->dtpr_arg); 14656 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 14657 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 14658 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 14659 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); 14660#if !defined(__APPLE__) 14661 kmem_free(probe, sizeof (dtrace_probe_t)); 14662#else 14663 zfree(dtrace_probe_t_zone, probe); 14664#endif 14665 } 14666 14667 lck_mtx_unlock(&dtrace_lock); 14668 lck_mtx_unlock(&mod_lock); 14669 lck_mtx_unlock(&dtrace_provider_lock); 14670} 14671 14672void 14673dtrace_suspend(void) 14674{ 14675 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend)); 14676} 14677 14678void 14679dtrace_resume(void) 14680{ 14681 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume)); 14682} 14683 14684static int 14685dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) 14686{ 14687 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 14688 lck_mtx_lock(&dtrace_lock); 14689 14690 switch (what) { 14691 case CPU_CONFIG: { 14692 dtrace_state_t *state; 14693 dtrace_optval_t *opt, rs, c; 14694 14695 /* 14696 * For now, we only allocate a new buffer for anonymous state. 14697 */ 14698 if ((state = dtrace_anon.dta_state) == NULL) 14699 break; 14700 14701 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 14702 break; 14703 14704 opt = state->dts_options; 14705 c = opt[DTRACEOPT_CPU]; 14706 14707 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu) 14708 break; 14709 14710 /* 14711 * Regardless of what the actual policy is, we're going to 14712 * temporarily set our resize policy to be manual. We're 14713 * also going to temporarily set our CPU option to denote 14714 * the newly configured CPU. 14715 */ 14716 rs = opt[DTRACEOPT_BUFRESIZE]; 14717 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL; 14718 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu; 14719 14720 (void) dtrace_state_buffers(state); 14721 14722 opt[DTRACEOPT_BUFRESIZE] = rs; 14723 opt[DTRACEOPT_CPU] = c; 14724 14725 break; 14726 } 14727 14728 case CPU_UNCONFIG: 14729 /* 14730 * We don't free the buffer in the CPU_UNCONFIG case. (The 14731 * buffer will be freed when the consumer exits.) 14732 */ 14733 break; 14734 14735 default: 14736 break; 14737 } 14738 14739 lck_mtx_unlock(&dtrace_lock); 14740 return (0); 14741} 14742 14743static void 14744dtrace_cpu_setup_initial(processorid_t cpu) 14745{ 14746 (void) dtrace_cpu_setup(CPU_CONFIG, cpu); 14747} 14748 14749static void 14750dtrace_toxrange_add(uintptr_t base, uintptr_t limit) 14751{ 14752 if (dtrace_toxranges >= dtrace_toxranges_max) { 14753 int osize, nsize; 14754 dtrace_toxrange_t *range; 14755 14756 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 14757 14758 if (osize == 0) { 14759 ASSERT(dtrace_toxrange == NULL); 14760 ASSERT(dtrace_toxranges_max == 0); 14761 dtrace_toxranges_max = 1; 14762 } else { 14763 dtrace_toxranges_max <<= 1; 14764 } 14765 14766 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 14767 range = kmem_zalloc(nsize, KM_SLEEP); 14768 14769 if (dtrace_toxrange != NULL) { 14770 ASSERT(osize != 0); 14771 bcopy(dtrace_toxrange, range, osize); 14772 kmem_free(dtrace_toxrange, osize); 14773 } 14774 14775 dtrace_toxrange = range; 14776 } 14777 14778 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL); 14779 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL); 14780 14781 dtrace_toxrange[dtrace_toxranges].dtt_base = base; 14782 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; 14783 dtrace_toxranges++; 14784} 14785 14786/* 14787 * DTrace Driver Cookbook Functions 14788 */ 14789/*ARGSUSED*/ 14790static int 14791dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 14792{ 14793 dtrace_provider_id_t id; 14794 dtrace_state_t *state = NULL; 14795 dtrace_enabling_t *enab; 14796 14797 lck_mtx_lock(&cpu_lock); 14798 lck_mtx_lock(&dtrace_provider_lock); 14799 lck_mtx_lock(&dtrace_lock); 14800 14801 if (ddi_soft_state_init(&dtrace_softstate, 14802 sizeof (dtrace_state_t), 0) != 0) { 14803 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); 14804 lck_mtx_unlock(&cpu_lock); 14805 lck_mtx_unlock(&dtrace_provider_lock); 14806 lck_mtx_unlock(&dtrace_lock); 14807 return (DDI_FAILURE); 14808 } 14809 14810#if !defined(__APPLE__) 14811 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, 14812 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || 14813 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, 14814 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { 14815 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); 14816 ddi_remove_minor_node(devi, NULL); 14817 ddi_soft_state_fini(&dtrace_softstate); 14818 lck_mtx_unlock(&cpu_lock); 14819 lck_mtx_unlock(&dtrace_provider_lock); 14820 lck_mtx_unlock(&dtrace_lock); 14821 return (DDI_FAILURE); 14822 } 14823#endif /* __APPLE__ */ 14824 14825 ddi_report_dev(devi); 14826 dtrace_devi = devi; 14827 14828 dtrace_modload = dtrace_module_loaded; 14829 dtrace_modunload = dtrace_module_unloaded; 14830 dtrace_cpu_init = dtrace_cpu_setup_initial; 14831 dtrace_helpers_cleanup = dtrace_helpers_destroy; 14832 dtrace_helpers_fork = dtrace_helpers_duplicate; 14833 dtrace_cpustart_init = dtrace_suspend; 14834 dtrace_cpustart_fini = dtrace_resume; 14835 dtrace_debugger_init = dtrace_suspend; 14836 dtrace_debugger_fini = dtrace_resume; 14837 dtrace_kreloc_init = dtrace_suspend; 14838 dtrace_kreloc_fini = dtrace_resume; 14839 14840 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 14841 14842 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 14843 14844 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, 14845 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 14846 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, 14847 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, 14848 VM_SLEEP | VMC_IDENTIFIER); 14849 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 14850 1, INT_MAX, 0); 14851 14852 dtrace_state_cache = kmem_cache_create("dtrace_state_cache", 14853 sizeof (dtrace_dstate_percpu_t) * (int)NCPU, DTRACE_STATE_ALIGN, 14854 NULL, NULL, NULL, NULL, NULL, 0); 14855 14856 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); 14857 14858 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), 14859 offsetof(dtrace_probe_t, dtpr_nextmod), 14860 offsetof(dtrace_probe_t, dtpr_prevmod)); 14861 14862 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), 14863 offsetof(dtrace_probe_t, dtpr_nextfunc), 14864 offsetof(dtrace_probe_t, dtpr_prevfunc)); 14865 14866 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), 14867 offsetof(dtrace_probe_t, dtpr_nextname), 14868 offsetof(dtrace_probe_t, dtpr_prevname)); 14869 14870 if (dtrace_retain_max < 1) { 14871 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; " 14872 "setting to 1", dtrace_retain_max); 14873 dtrace_retain_max = 1; 14874 } 14875 14876 /* 14877 * Now discover our toxic ranges. 14878 */ 14879 dtrace_toxic_ranges(dtrace_toxrange_add); 14880 14881 /* 14882 * Before we register ourselves as a provider to our own framework, 14883 * we would like to assert that dtrace_provider is NULL -- but that's 14884 * not true if we were loaded as a dependency of a DTrace provider. 14885 * Once we've registered, we can assert that dtrace_provider is our 14886 * pseudo provider. 14887 */ 14888 (void) dtrace_register("dtrace", &dtrace_provider_attr, 14889 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id); 14890 14891 ASSERT(dtrace_provider != NULL); 14892 ASSERT((dtrace_provider_id_t)dtrace_provider == id); 14893 14894#if !defined(__APPLE__) 14895 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) 14896 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); 14897 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) 14898 dtrace_provider, NULL, NULL, "END", 0, NULL); 14899 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) 14900 dtrace_provider, NULL, NULL, "ERROR", 1, NULL); 14901#elif defined(__ppc__) || defined(__ppc64__) 14902 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) 14903 dtrace_provider, NULL, NULL, "BEGIN", 2, NULL); 14904 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) 14905 dtrace_provider, NULL, NULL, "END", 1, NULL); 14906 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) 14907 dtrace_provider, NULL, NULL, "ERROR", 4, NULL); 14908#elif (defined(__i386__) || defined (__x86_64__)) 14909 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) 14910 dtrace_provider, NULL, NULL, "BEGIN", 1, NULL); 14911 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) 14912 dtrace_provider, NULL, NULL, "END", 0, NULL); 14913 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) 14914 dtrace_provider, NULL, NULL, "ERROR", 3, NULL); 14915#else 14916#error Unknown Architecture 14917#endif /* __APPLE__ */ 14918 14919 dtrace_anon_property(); 14920 lck_mtx_unlock(&cpu_lock); 14921 14922 /* 14923 * If DTrace helper tracing is enabled, we need to allocate the 14924 * trace buffer and initialize the values. 14925 */ 14926 if (dtrace_helptrace_enabled) { 14927 ASSERT(dtrace_helptrace_buffer == NULL); 14928 dtrace_helptrace_buffer = 14929 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); 14930 dtrace_helptrace_next = 0; 14931 } 14932 14933 /* 14934 * If there are already providers, we must ask them to provide their 14935 * probes, and then match any anonymous enabling against them. Note 14936 * that there should be no other retained enablings at this time: 14937 * the only retained enablings at this time should be the anonymous 14938 * enabling. 14939 */ 14940 if (dtrace_anon.dta_enabling != NULL) { 14941 ASSERT(dtrace_retained == dtrace_anon.dta_enabling); 14942 14943 dtrace_enabling_provide(NULL); 14944 state = dtrace_anon.dta_state; 14945 14946 /* 14947 * We couldn't hold cpu_lock across the above call to 14948 * dtrace_enabling_provide(), but we must hold it to actually 14949 * enable the probes. We have to drop all of our locks, pick 14950 * up cpu_lock, and regain our locks before matching the 14951 * retained anonymous enabling. 14952 */ 14953 lck_mtx_unlock(&dtrace_lock); 14954 lck_mtx_unlock(&dtrace_provider_lock); 14955 14956 lck_mtx_lock(&cpu_lock); 14957 lck_mtx_lock(&dtrace_provider_lock); 14958 lck_mtx_lock(&dtrace_lock); 14959 14960 if ((enab = dtrace_anon.dta_enabling) != NULL) 14961 (void) dtrace_enabling_match(enab, NULL); 14962 14963 lck_mtx_unlock(&cpu_lock); 14964 } 14965 14966 lck_mtx_unlock(&dtrace_lock); 14967 lck_mtx_unlock(&dtrace_provider_lock); 14968 14969 if (state != NULL) { 14970 /* 14971 * If we created any anonymous state, set it going now. 14972 */ 14973 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon); 14974 } 14975 14976 return (DDI_SUCCESS); 14977} 14978 14979extern void fasttrap_init(void); 14980 14981/*ARGSUSED*/ 14982static int 14983dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 14984{ 14985#pragma unused(flag, otyp) 14986 dtrace_state_t *state; 14987 uint32_t priv; 14988 uid_t uid; 14989 zoneid_t zoneid; 14990 14991#if !defined(__APPLE__) 14992 if (getminor(*devp) == DTRACEMNRN_HELPER) 14993 return (0); 14994 14995 /* 14996 * If this wasn't an open with the "helper" minor, then it must be 14997 * the "dtrace" minor. 14998 */ 14999 ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE); 15000#else 15001 /* Darwin puts Helper on its own major device. */ 15002#endif /* __APPLE__ */ 15003 15004 /* 15005 * If no DTRACE_PRIV_* bits are set in the credential, then the 15006 * caller lacks sufficient permission to do anything with DTrace. 15007 */ 15008 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid); 15009 if (priv == DTRACE_PRIV_NONE) 15010 return (EACCES); 15011 15012#if defined(__APPLE__) 15013 /* 15014 * We delay the initialization of fasttrap as late as possible. 15015 * It certainly can't be later than now! 15016 */ 15017 fasttrap_init(); 15018#endif /* __APPLE__ */ 15019 15020 /* 15021 * Ask all providers to provide all their probes. 15022 */ 15023 lck_mtx_lock(&dtrace_provider_lock); 15024 dtrace_probe_provide(NULL, NULL); 15025 lck_mtx_unlock(&dtrace_provider_lock); 15026 15027 lck_mtx_lock(&cpu_lock); 15028 lck_mtx_lock(&dtrace_lock); 15029 dtrace_opens++; 15030 dtrace_membar_producer(); 15031 15032 /* 15033 * If the kernel debugger is active (that is, if the kernel debugger 15034 * modified text in some way), we won't allow the open. 15035 */ 15036 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 15037 dtrace_opens--; 15038 lck_mtx_unlock(&cpu_lock); 15039 lck_mtx_unlock(&dtrace_lock); 15040 return (EBUSY); 15041 } 15042 15043 state = dtrace_state_create(devp, cred_p); 15044 lck_mtx_unlock(&cpu_lock); 15045 15046 if (state == NULL) { 15047 if (--dtrace_opens == 0) 15048 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 15049 lck_mtx_unlock(&dtrace_lock); 15050 return (EAGAIN); 15051 } 15052 15053 lck_mtx_unlock(&dtrace_lock); 15054 15055#if defined(__APPLE__) 15056 lck_rw_lock_exclusive(&dtrace_dof_mode_lock); 15057 15058 /* 15059 * If we are currently lazy, transition states. 15060 * 15061 * Unlike dtrace_close, we do not need to check the 15062 * value of dtrace_opens, as any positive value (and 15063 * we count as 1) means we transition states. 15064 */ 15065 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) { 15066 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF; 15067 15068 /* 15069 * Iterate all existing processes and load lazy dofs. 15070 */ 15071 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, 15072 dtrace_lazy_dofs_proc_iterate_doit, 15073 NULL, 15074 dtrace_lazy_dofs_proc_iterate_filter, 15075 NULL); 15076 } 15077 15078 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); 15079#endif 15080 15081 return (0); 15082} 15083 15084/*ARGSUSED*/ 15085static int 15086dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 15087{ 15088#pragma unused(flag,otyp,cred_p) 15089 minor_t minor = getminor(dev); 15090 dtrace_state_t *state; 15091 15092#if !defined(__APPLE__) 15093 if (minor == DTRACEMNRN_HELPER) 15094 return (0); 15095#else 15096 /* Darwin puts Helper on its own major device. */ 15097#endif /* __APPLE__ */ 15098 15099 state = ddi_get_soft_state(dtrace_softstate, minor); 15100 15101 lck_mtx_lock(&cpu_lock); 15102 lck_mtx_lock(&dtrace_lock); 15103 15104 if (state->dts_anon) { 15105 /* 15106 * There is anonymous state. Destroy that first. 15107 */ 15108 ASSERT(dtrace_anon.dta_state == NULL); 15109 dtrace_state_destroy(state->dts_anon); 15110 } 15111 15112 dtrace_state_destroy(state); 15113 ASSERT(dtrace_opens > 0); 15114 if (--dtrace_opens == 0) 15115 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 15116 15117 lck_mtx_unlock(&dtrace_lock); 15118 lck_mtx_unlock(&cpu_lock); 15119 15120#if defined(__APPLE__) 15121 15122 /* 15123 * Lock ordering requires the dof mode lock be taken before 15124 * the dtrace_lock. 15125 */ 15126 lck_rw_lock_exclusive(&dtrace_dof_mode_lock); 15127 lck_mtx_lock(&dtrace_lock); 15128 15129 /* 15130 * If we are currently lazy-off, and this is the last close, transition to 15131 * lazy state. 15132 */ 15133 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF && dtrace_opens == 0) { 15134 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; 15135 } 15136 15137 lck_mtx_unlock(&dtrace_lock); 15138 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); 15139#endif 15140 15141 return (0); 15142} 15143 15144#if defined(__APPLE__) 15145/* 15146 * Introduce cast to quiet warnings. 15147 * XXX: This hides a lot of brokenness. 15148 */ 15149#define copyin(src, dst, len) copyin( (user_addr_t)(src), (dst), (len) ) 15150#define copyout(src, dst, len) copyout( (src), (user_addr_t)(dst), (len) ) 15151#endif /* __APPLE__ */ 15152 15153#if defined(__APPLE__) 15154/*ARGSUSED*/ 15155static int 15156dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) 15157{ 15158#pragma unused(rv) 15159 /* 15160 * Safe to check this outside the dof mode lock 15161 */ 15162 if (dtrace_dof_mode == DTRACE_DOF_MODE_NEVER) 15163 return KERN_SUCCESS; 15164 15165 switch (cmd) { 15166 case DTRACEHIOC_ADDDOF: { 15167 dof_helper_t *dhp = NULL; 15168 size_t dof_ioctl_data_size; 15169 dof_ioctl_data_t* multi_dof; 15170 unsigned int i; 15171 int rval = 0; 15172 user_addr_t user_address = *(user_addr_t*)arg; 15173 uint64_t dof_count; 15174 int multi_dof_claimed = 0; 15175 proc_t* p = current_proc(); 15176 15177 /* 15178 * Read the number of DOF sections being passed in. 15179 */ 15180 if (copyin(user_address + offsetof(dof_ioctl_data_t, dofiod_count), 15181 &dof_count, 15182 sizeof(dof_count))) { 15183 dtrace_dof_error(NULL, "failed to copyin dofiod_count"); 15184 return (EFAULT); 15185 } 15186 15187 /* 15188 * Range check the count. 15189 */ 15190 if (dof_count == 0 || dof_count > 1024) { 15191 dtrace_dof_error(NULL, "dofiod_count is not valid"); 15192 return (EINVAL); 15193 } 15194 15195 /* 15196 * Allocate a correctly sized structure and copyin the data. 15197 */ 15198 dof_ioctl_data_size = DOF_IOCTL_DATA_T_SIZE(dof_count); 15199 if ((multi_dof = kmem_alloc(dof_ioctl_data_size, KM_SLEEP)) == NULL) 15200 return (ENOMEM); 15201 15202 /* NOTE! We can no longer exit this method via return */ 15203 if (copyin(user_address, multi_dof, dof_ioctl_data_size) != 0) { 15204 dtrace_dof_error(NULL, "failed copyin of dof_ioctl_data_t"); 15205 rval = EFAULT; 15206 goto cleanup; 15207 } 15208 15209 /* 15210 * Check that the count didn't change between the first copyin and the second. 15211 */ 15212 if (multi_dof->dofiod_count != dof_count) { 15213 rval = EINVAL; 15214 goto cleanup; 15215 } 15216 15217 /* 15218 * Try to process lazily first. 15219 */ 15220 rval = dtrace_lazy_dofs_add(p, multi_dof, &multi_dof_claimed); 15221 15222 /* 15223 * If rval is EACCES, we must be non-lazy. 15224 */ 15225 if (rval == EACCES) { 15226 rval = 0; 15227 /* 15228 * Process each dof_helper_t 15229 */ 15230 i = 0; 15231 do { 15232 dhp = &multi_dof->dofiod_helpers[i]; 15233 15234 dof_hdr_t *dof = dtrace_dof_copyin(dhp->dofhp_dof, &rval); 15235 15236 if (dof != NULL) { 15237 lck_mtx_lock(&dtrace_lock); 15238 15239 /* 15240 * dtrace_helper_slurp() takes responsibility for the dof -- 15241 * it may free it now or it may save it and free it later. 15242 */ 15243 if ((dhp->dofhp_dof = (uint64_t)dtrace_helper_slurp(p, dof, dhp)) == -1ULL) { 15244 rval = EINVAL; 15245 } 15246 15247 lck_mtx_unlock(&dtrace_lock); 15248 } 15249 } while (++i < multi_dof->dofiod_count && rval == 0); 15250 } 15251 15252 /* 15253 * We need to copyout the multi_dof struct, because it contains 15254 * the generation (unique id) values needed to call DTRACEHIOC_REMOVE 15255 * 15256 * This could certainly be better optimized. 15257 */ 15258 if (copyout(multi_dof, user_address, dof_ioctl_data_size) != 0) { 15259 dtrace_dof_error(NULL, "failed copyout of dof_ioctl_data_t"); 15260 /* Don't overwrite pre-existing error code */ 15261 if (rval == 0) rval = EFAULT; 15262 } 15263 15264 cleanup: 15265 /* 15266 * If we had to allocate struct memory, free it. 15267 */ 15268 if (multi_dof != NULL && !multi_dof_claimed) { 15269 kmem_free(multi_dof, dof_ioctl_data_size); 15270 } 15271 15272 return rval; 15273 } 15274 15275 case DTRACEHIOC_REMOVE: { 15276 int generation = *(int*)arg; 15277 proc_t* p = current_proc(); 15278 15279 /* 15280 * Try lazy first. 15281 */ 15282 int rval = dtrace_lazy_dofs_remove(p, generation); 15283 15284 /* 15285 * EACCES means non-lazy 15286 */ 15287 if (rval == EACCES) { 15288 lck_mtx_lock(&dtrace_lock); 15289 rval = dtrace_helper_destroygen(p, generation); 15290 lck_mtx_unlock(&dtrace_lock); 15291 } 15292 15293 return (rval); 15294 } 15295 15296 default: 15297 break; 15298 } 15299 15300 return ENOTTY; 15301} 15302#endif /* __APPLE__ */ 15303 15304/*ARGSUSED*/ 15305static int 15306dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 15307{ 15308#pragma unused(md) 15309 15310 minor_t minor = getminor(dev); 15311 dtrace_state_t *state; 15312 int rval; 15313 15314#if !defined(__APPLE__) 15315 if (minor == DTRACEMNRN_HELPER) 15316 return (dtrace_ioctl_helper(cmd, arg, rv)); 15317#else 15318 /* Darwin puts Helper on its own major device. */ 15319#endif /* __APPLE__ */ 15320 15321 state = ddi_get_soft_state(dtrace_softstate, minor); 15322 15323 if (state->dts_anon) { 15324 ASSERT(dtrace_anon.dta_state == NULL); 15325 state = state->dts_anon; 15326 } 15327 15328 switch (cmd) { 15329 case DTRACEIOC_PROVIDER: { 15330 dtrace_providerdesc_t pvd; 15331 dtrace_provider_t *pvp; 15332 15333 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) 15334 return (EFAULT); 15335 15336 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; 15337 lck_mtx_lock(&dtrace_provider_lock); 15338 15339 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { 15340 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) 15341 break; 15342 } 15343 15344 lck_mtx_unlock(&dtrace_provider_lock); 15345 15346 if (pvp == NULL) 15347 return (ESRCH); 15348 15349 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); 15350 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); 15351 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) 15352 return (EFAULT); 15353 15354 return (0); 15355 } 15356 15357 case DTRACEIOC_EPROBE: { 15358 dtrace_eprobedesc_t epdesc; 15359 dtrace_ecb_t *ecb; 15360 dtrace_action_t *act; 15361 void *buf; 15362 size_t size; 15363 uintptr_t dest; 15364 int nrecs; 15365 15366 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) 15367 return (EFAULT); 15368 15369 lck_mtx_lock(&dtrace_lock); 15370 15371 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { 15372 lck_mtx_unlock(&dtrace_lock); 15373 return (EINVAL); 15374 } 15375 15376 if (ecb->dte_probe == NULL) { 15377 lck_mtx_unlock(&dtrace_lock); 15378 return (EINVAL); 15379 } 15380 15381 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; 15382 epdesc.dtepd_uarg = ecb->dte_uarg; 15383 epdesc.dtepd_size = ecb->dte_size; 15384 15385 nrecs = epdesc.dtepd_nrecs; 15386 epdesc.dtepd_nrecs = 0; 15387 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 15388 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 15389 continue; 15390 15391 epdesc.dtepd_nrecs++; 15392 } 15393 15394 /* 15395 * Now that we have the size, we need to allocate a temporary 15396 * buffer in which to store the complete description. We need 15397 * the temporary buffer to be able to drop dtrace_lock() 15398 * across the copyout(), below. 15399 */ 15400 size = sizeof (dtrace_eprobedesc_t) + 15401 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); 15402 15403 buf = kmem_alloc(size, KM_SLEEP); 15404 dest = (uintptr_t)buf; 15405 15406 bcopy(&epdesc, (void *)dest, sizeof (epdesc)); 15407 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); 15408 15409 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 15410 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 15411 continue; 15412 15413 if (nrecs-- == 0) 15414 break; 15415 15416 bcopy(&act->dta_rec, (void *)dest, 15417 sizeof (dtrace_recdesc_t)); 15418 dest += sizeof (dtrace_recdesc_t); 15419 } 15420 15421 lck_mtx_unlock(&dtrace_lock); 15422 15423 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 15424 kmem_free(buf, size); 15425 return (EFAULT); 15426 } 15427 15428 kmem_free(buf, size); 15429 return (0); 15430 } 15431 15432 case DTRACEIOC_AGGDESC: { 15433 dtrace_aggdesc_t aggdesc; 15434 dtrace_action_t *act; 15435 dtrace_aggregation_t *agg; 15436 int nrecs; 15437 uint32_t offs; 15438 dtrace_recdesc_t *lrec; 15439 void *buf; 15440 size_t size; 15441 uintptr_t dest; 15442 15443 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) 15444 return (EFAULT); 15445 15446 lck_mtx_lock(&dtrace_lock); 15447 15448 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { 15449 lck_mtx_unlock(&dtrace_lock); 15450 return (EINVAL); 15451 } 15452 15453 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; 15454 15455 nrecs = aggdesc.dtagd_nrecs; 15456 aggdesc.dtagd_nrecs = 0; 15457 15458 offs = agg->dtag_base; 15459 lrec = &agg->dtag_action.dta_rec; 15460 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; 15461 15462 for (act = agg->dtag_first; ; act = act->dta_next) { 15463 ASSERT(act->dta_intuple || 15464 DTRACEACT_ISAGG(act->dta_kind)); 15465 15466 /* 15467 * If this action has a record size of zero, it 15468 * denotes an argument to the aggregating action. 15469 * Because the presence of this record doesn't (or 15470 * shouldn't) affect the way the data is interpreted, 15471 * we don't copy it out to save user-level the 15472 * confusion of dealing with a zero-length record. 15473 */ 15474 if (act->dta_rec.dtrd_size == 0) { 15475 ASSERT(agg->dtag_hasarg); 15476 continue; 15477 } 15478 15479 aggdesc.dtagd_nrecs++; 15480 15481 if (act == &agg->dtag_action) 15482 break; 15483 } 15484 15485 /* 15486 * Now that we have the size, we need to allocate a temporary 15487 * buffer in which to store the complete description. We need 15488 * the temporary buffer to be able to drop dtrace_lock() 15489 * across the copyout(), below. 15490 */ 15491 size = sizeof (dtrace_aggdesc_t) + 15492 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); 15493 15494 buf = kmem_alloc(size, KM_SLEEP); 15495 dest = (uintptr_t)buf; 15496 15497 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); 15498 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); 15499 15500 for (act = agg->dtag_first; ; act = act->dta_next) { 15501 dtrace_recdesc_t rec = act->dta_rec; 15502 15503 /* 15504 * See the comment in the above loop for why we pass 15505 * over zero-length records. 15506 */ 15507 if (rec.dtrd_size == 0) { 15508 ASSERT(agg->dtag_hasarg); 15509 continue; 15510 } 15511 15512 if (nrecs-- == 0) 15513 break; 15514 15515 rec.dtrd_offset -= offs; 15516 bcopy(&rec, (void *)dest, sizeof (rec)); 15517 dest += sizeof (dtrace_recdesc_t); 15518 15519 if (act == &agg->dtag_action) 15520 break; 15521 } 15522 15523 lck_mtx_unlock(&dtrace_lock); 15524 15525 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 15526 kmem_free(buf, size); 15527 return (EFAULT); 15528 } 15529 15530 kmem_free(buf, size); 15531 return (0); 15532 } 15533 15534 case DTRACEIOC_ENABLE: { 15535 dof_hdr_t *dof; 15536 dtrace_enabling_t *enab = NULL; 15537 dtrace_vstate_t *vstate; 15538 int err = 0; 15539 15540 *rv = 0; 15541 15542 /* 15543 * If a NULL argument has been passed, we take this as our 15544 * cue to reevaluate our enablings. 15545 */ 15546 if (arg == NULL) { 15547 lck_mtx_lock(&cpu_lock); 15548 lck_mtx_lock(&dtrace_lock); 15549 err = dtrace_enabling_matchstate(state, rv); 15550 lck_mtx_unlock(&dtrace_lock); 15551 lck_mtx_unlock(&cpu_lock); 15552 15553 return (err); 15554 } 15555 15556 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) 15557 return (rval); 15558 15559 lck_mtx_lock(&cpu_lock); 15560 lck_mtx_lock(&dtrace_lock); 15561 vstate = &state->dts_vstate; 15562 15563 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 15564 lck_mtx_unlock(&dtrace_lock); 15565 lck_mtx_unlock(&cpu_lock); 15566 dtrace_dof_destroy(dof); 15567 return (EBUSY); 15568 } 15569 15570 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) { 15571 lck_mtx_unlock(&dtrace_lock); 15572 lck_mtx_unlock(&cpu_lock); 15573 dtrace_dof_destroy(dof); 15574 return (EINVAL); 15575 } 15576 15577 if ((rval = dtrace_dof_options(dof, state)) != 0) { 15578 dtrace_enabling_destroy(enab); 15579 lck_mtx_unlock(&dtrace_lock); 15580 lck_mtx_unlock(&cpu_lock); 15581 dtrace_dof_destroy(dof); 15582 return (rval); 15583 } 15584 15585 if ((err = dtrace_enabling_match(enab, rv)) == 0) { 15586 err = dtrace_enabling_retain(enab); 15587 } else { 15588 dtrace_enabling_destroy(enab); 15589 } 15590 15591 lck_mtx_unlock(&cpu_lock); 15592 lck_mtx_unlock(&dtrace_lock); 15593 dtrace_dof_destroy(dof); 15594 15595 return (err); 15596 } 15597 15598 case DTRACEIOC_REPLICATE: { 15599 dtrace_repldesc_t desc; 15600 dtrace_probedesc_t *match = &desc.dtrpd_match; 15601 dtrace_probedesc_t *create = &desc.dtrpd_create; 15602 int err; 15603 15604 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15605 return (EFAULT); 15606 15607 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15608 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15609 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15610 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15611 15612 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15613 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15614 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15615 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15616 15617 lck_mtx_lock(&dtrace_lock); 15618 err = dtrace_enabling_replicate(state, match, create); 15619 lck_mtx_unlock(&dtrace_lock); 15620 15621 return (err); 15622 } 15623 15624 case DTRACEIOC_PROBEMATCH: 15625 case DTRACEIOC_PROBES: { 15626 dtrace_probe_t *probe = NULL; 15627 dtrace_probedesc_t desc; 15628 dtrace_probekey_t pkey; 15629 dtrace_id_t i; 15630 int m = 0; 15631 uint32_t priv; 15632 uid_t uid; 15633 zoneid_t zoneid; 15634 15635 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15636 return (EFAULT); 15637 15638 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 15639 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 15640 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 15641 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 15642 15643 /* 15644 * Before we attempt to match this probe, we want to give 15645 * all providers the opportunity to provide it. 15646 */ 15647 if (desc.dtpd_id == DTRACE_IDNONE) { 15648 lck_mtx_lock(&dtrace_provider_lock); 15649 dtrace_probe_provide(&desc, NULL); 15650 lck_mtx_unlock(&dtrace_provider_lock); 15651 desc.dtpd_id++; 15652 } 15653 15654 if (cmd == DTRACEIOC_PROBEMATCH) { 15655 dtrace_probekey(&desc, &pkey); 15656 pkey.dtpk_id = DTRACE_IDNONE; 15657 } 15658 15659 dtrace_cred2priv(cr, &priv, &uid, &zoneid); 15660 15661 lck_mtx_lock(&dtrace_lock); 15662 15663 if (cmd == DTRACEIOC_PROBEMATCH) { 15664 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 15665 if ((probe = dtrace_probes[i - 1]) != NULL && 15666 (m = dtrace_match_probe(probe, &pkey, 15667 priv, uid, zoneid)) != 0) 15668 break; 15669 } 15670 15671 if (m < 0) { 15672 lck_mtx_unlock(&dtrace_lock); 15673 return (EINVAL); 15674 } 15675 15676 } else { 15677 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 15678 if ((probe = dtrace_probes[i - 1]) != NULL && 15679 dtrace_match_priv(probe, priv, uid, zoneid)) 15680 break; 15681 } 15682 } 15683 15684 if (probe == NULL) { 15685 lck_mtx_unlock(&dtrace_lock); 15686 return (ESRCH); 15687 } 15688 15689 dtrace_probe_description(probe, &desc); 15690 lck_mtx_unlock(&dtrace_lock); 15691 15692 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15693 return (EFAULT); 15694 15695 return (0); 15696 } 15697 15698 case DTRACEIOC_PROBEARG: { 15699 dtrace_argdesc_t desc; 15700 dtrace_probe_t *probe; 15701 dtrace_provider_t *prov; 15702 15703 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15704 return (EFAULT); 15705 15706 if (desc.dtargd_id == DTRACE_IDNONE) 15707 return (EINVAL); 15708 15709 if (desc.dtargd_ndx == DTRACE_ARGNONE) 15710 return (EINVAL); 15711 15712 lck_mtx_lock(&dtrace_provider_lock); 15713 lck_mtx_lock(&mod_lock); 15714 lck_mtx_lock(&dtrace_lock); 15715 15716 if (desc.dtargd_id > dtrace_nprobes) { 15717 lck_mtx_unlock(&dtrace_lock); 15718 lck_mtx_unlock(&mod_lock); 15719 lck_mtx_unlock(&dtrace_provider_lock); 15720 return (EINVAL); 15721 } 15722 15723 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) { 15724 lck_mtx_unlock(&dtrace_lock); 15725 lck_mtx_unlock(&mod_lock); 15726 lck_mtx_unlock(&dtrace_provider_lock); 15727 return (EINVAL); 15728 } 15729 15730 lck_mtx_unlock(&dtrace_lock); 15731 15732 prov = probe->dtpr_provider; 15733 15734 if (prov->dtpv_pops.dtps_getargdesc == NULL) { 15735 /* 15736 * There isn't any typed information for this probe. 15737 * Set the argument number to DTRACE_ARGNONE. 15738 */ 15739 desc.dtargd_ndx = DTRACE_ARGNONE; 15740 } else { 15741 desc.dtargd_native[0] = '\0'; 15742 desc.dtargd_xlate[0] = '\0'; 15743 desc.dtargd_mapping = desc.dtargd_ndx; 15744 15745 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, 15746 probe->dtpr_id, probe->dtpr_arg, &desc); 15747 } 15748 15749 lck_mtx_unlock(&mod_lock); 15750 lck_mtx_unlock(&dtrace_provider_lock); 15751 15752 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15753 return (EFAULT); 15754 15755 return (0); 15756 } 15757 15758 case DTRACEIOC_GO: { 15759 processorid_t cpuid; 15760 rval = dtrace_state_go(state, &cpuid); 15761 15762 if (rval != 0) 15763 return (rval); 15764 15765 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 15766 return (EFAULT); 15767 15768 return (0); 15769 } 15770 15771 case DTRACEIOC_STOP: { 15772 processorid_t cpuid; 15773 15774 lck_mtx_lock(&dtrace_lock); 15775 rval = dtrace_state_stop(state, &cpuid); 15776 lck_mtx_unlock(&dtrace_lock); 15777 15778 if (rval != 0) 15779 return (rval); 15780 15781 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 15782 return (EFAULT); 15783 15784 return (0); 15785 } 15786 15787 case DTRACEIOC_DOFGET: { 15788 dof_hdr_t hdr, *dof; 15789 uint64_t len; 15790 15791 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) 15792 return (EFAULT); 15793 15794 lck_mtx_lock(&dtrace_lock); 15795 dof = dtrace_dof_create(state); 15796 lck_mtx_unlock(&dtrace_lock); 15797 15798 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); 15799 rval = copyout(dof, (void *)arg, len); 15800 dtrace_dof_destroy(dof); 15801 15802 return (rval == 0 ? 0 : EFAULT); 15803 } 15804 15805 case DTRACEIOC_AGGSNAP: 15806 case DTRACEIOC_BUFSNAP: { 15807 dtrace_bufdesc_t desc; 15808 caddr_t cached; 15809 dtrace_buffer_t *buf; 15810 15811 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 15812 return (EFAULT); 15813 15814 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= (int)NCPU) 15815 return (EINVAL); 15816 15817 lck_mtx_lock(&dtrace_lock); 15818 15819 if (cmd == DTRACEIOC_BUFSNAP) { 15820 buf = &state->dts_buffer[desc.dtbd_cpu]; 15821 } else { 15822 buf = &state->dts_aggbuffer[desc.dtbd_cpu]; 15823 } 15824 15825 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { 15826 size_t sz = buf->dtb_offset; 15827 15828 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { 15829 lck_mtx_unlock(&dtrace_lock); 15830 return (EBUSY); 15831 } 15832 15833 /* 15834 * If this buffer has already been consumed, we're 15835 * going to indicate that there's nothing left here 15836 * to consume. 15837 */ 15838 if (buf->dtb_flags & DTRACEBUF_CONSUMED) { 15839 lck_mtx_unlock(&dtrace_lock); 15840 15841 desc.dtbd_size = 0; 15842 desc.dtbd_drops = 0; 15843 desc.dtbd_errors = 0; 15844 desc.dtbd_oldest = 0; 15845 sz = sizeof (desc); 15846 15847 if (copyout(&desc, (void *)arg, sz) != 0) 15848 return (EFAULT); 15849 15850 return (0); 15851 } 15852 15853 /* 15854 * If this is a ring buffer that has wrapped, we want 15855 * to copy the whole thing out. 15856 */ 15857 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 15858 dtrace_buffer_polish(buf); 15859 sz = buf->dtb_size; 15860 } 15861 15862 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { 15863 lck_mtx_unlock(&dtrace_lock); 15864 return (EFAULT); 15865 } 15866 15867 desc.dtbd_size = sz; 15868 desc.dtbd_drops = buf->dtb_drops; 15869 desc.dtbd_errors = buf->dtb_errors; 15870 desc.dtbd_oldest = buf->dtb_xamot_offset; 15871 15872 lck_mtx_unlock(&dtrace_lock); 15873 15874 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15875 return (EFAULT); 15876 15877 buf->dtb_flags |= DTRACEBUF_CONSUMED; 15878 15879 return (0); 15880 } 15881 15882 if (buf->dtb_tomax == NULL) { 15883 ASSERT(buf->dtb_xamot == NULL); 15884 lck_mtx_unlock(&dtrace_lock); 15885 return (ENOENT); 15886 } 15887 15888 cached = buf->dtb_tomax; 15889 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 15890 15891 dtrace_xcall(desc.dtbd_cpu, 15892 (dtrace_xcall_t)dtrace_buffer_switch, buf); 15893 15894 state->dts_errors += buf->dtb_xamot_errors; 15895 15896 /* 15897 * If the buffers did not actually switch, then the cross call 15898 * did not take place -- presumably because the given CPU is 15899 * not in the ready set. If this is the case, we'll return 15900 * ENOENT. 15901 */ 15902 if (buf->dtb_tomax == cached) { 15903 ASSERT(buf->dtb_xamot != cached); 15904 lck_mtx_unlock(&dtrace_lock); 15905 return (ENOENT); 15906 } 15907 15908 ASSERT(cached == buf->dtb_xamot); 15909 15910 /* 15911 * We have our snapshot; now copy it out. 15912 */ 15913 if (copyout(buf->dtb_xamot, desc.dtbd_data, 15914 buf->dtb_xamot_offset) != 0) { 15915 lck_mtx_unlock(&dtrace_lock); 15916 return (EFAULT); 15917 } 15918 15919 desc.dtbd_size = buf->dtb_xamot_offset; 15920 desc.dtbd_drops = buf->dtb_xamot_drops; 15921 desc.dtbd_errors = buf->dtb_xamot_errors; 15922 desc.dtbd_oldest = 0; 15923 15924 lck_mtx_unlock(&dtrace_lock); 15925 15926 /* 15927 * Finally, copy out the buffer description. 15928 */ 15929 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 15930 return (EFAULT); 15931 15932 return (0); 15933 } 15934 15935 case DTRACEIOC_CONF: { 15936 dtrace_conf_t conf; 15937 15938 bzero(&conf, sizeof (conf)); 15939 conf.dtc_difversion = DIF_VERSION; 15940 conf.dtc_difintregs = DIF_DIR_NREGS; 15941 conf.dtc_diftupregs = DIF_DTR_NREGS; 15942 conf.dtc_ctfmodel = CTF_MODEL_NATIVE; 15943 15944 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) 15945 return (EFAULT); 15946 15947 return (0); 15948 } 15949 15950 case DTRACEIOC_STATUS: { 15951 dtrace_status_t stat; 15952 dtrace_dstate_t *dstate; 15953 int i, j; 15954 uint64_t nerrs; 15955 15956 /* 15957 * See the comment in dtrace_state_deadman() for the reason 15958 * for setting dts_laststatus to INT64_MAX before setting 15959 * it to the correct value. 15960 */ 15961 state->dts_laststatus = INT64_MAX; 15962 dtrace_membar_producer(); 15963 state->dts_laststatus = dtrace_gethrtime(); 15964 15965 bzero(&stat, sizeof (stat)); 15966 15967 lck_mtx_lock(&dtrace_lock); 15968 15969 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { 15970 lck_mtx_unlock(&dtrace_lock); 15971 return (ENOENT); 15972 } 15973 15974 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) 15975 stat.dtst_exiting = 1; 15976 15977 nerrs = state->dts_errors; 15978 dstate = &state->dts_vstate.dtvs_dynvars; 15979 15980 for (i = 0; i < (int)NCPU; i++) { 15981 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; 15982 15983 stat.dtst_dyndrops += dcpu->dtdsc_drops; 15984 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; 15985 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; 15986 15987 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) 15988 stat.dtst_filled++; 15989 15990 nerrs += state->dts_buffer[i].dtb_errors; 15991 15992 for (j = 0; j < state->dts_nspeculations; j++) { 15993 dtrace_speculation_t *spec; 15994 dtrace_buffer_t *buf; 15995 15996 spec = &state->dts_speculations[j]; 15997 buf = &spec->dtsp_buffer[i]; 15998 stat.dtst_specdrops += buf->dtb_xamot_drops; 15999 } 16000 } 16001 16002 stat.dtst_specdrops_busy = state->dts_speculations_busy; 16003 stat.dtst_specdrops_unavail = state->dts_speculations_unavail; 16004 stat.dtst_stkstroverflows = state->dts_stkstroverflows; 16005 stat.dtst_dblerrors = state->dts_dblerrors; 16006 stat.dtst_killed = 16007 (state->dts_activity == DTRACE_ACTIVITY_KILLED); 16008 stat.dtst_errors = nerrs; 16009 16010 lck_mtx_unlock(&dtrace_lock); 16011 16012 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) 16013 return (EFAULT); 16014 16015 return (0); 16016 } 16017 16018 case DTRACEIOC_FORMAT: { 16019 dtrace_fmtdesc_t fmt; 16020 char *str; 16021 int len; 16022 16023 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) 16024 return (EFAULT); 16025 16026 lck_mtx_lock(&dtrace_lock); 16027 16028 if (fmt.dtfd_format == 0 || 16029 fmt.dtfd_format > state->dts_nformats) { 16030 lck_mtx_unlock(&dtrace_lock); 16031 return (EINVAL); 16032 } 16033 16034 /* 16035 * Format strings are allocated contiguously and they are 16036 * never freed; if a format index is less than the number 16037 * of formats, we can assert that the format map is non-NULL 16038 * and that the format for the specified index is non-NULL. 16039 */ 16040 ASSERT(state->dts_formats != NULL); 16041 str = state->dts_formats[fmt.dtfd_format - 1]; 16042 ASSERT(str != NULL); 16043 16044 len = strlen(str) + 1; 16045 16046 if (len > fmt.dtfd_length) { 16047 fmt.dtfd_length = len; 16048 16049 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { 16050 lck_mtx_unlock(&dtrace_lock); 16051 return (EINVAL); 16052 } 16053 } else { 16054 if (copyout(str, fmt.dtfd_string, len) != 0) { 16055 lck_mtx_unlock(&dtrace_lock); 16056 return (EINVAL); 16057 } 16058 } 16059 16060 lck_mtx_unlock(&dtrace_lock); 16061 return (0); 16062 } 16063 16064 default: 16065 break; 16066 } 16067 16068 return (ENOTTY); 16069} 16070 16071#if defined(__APPLE__) 16072#undef copyin 16073#undef copyout 16074#endif /* __APPLE__ */ 16075 16076#if !defined(__APPLE__) 16077/*ARGSUSED*/ 16078static int 16079dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 16080{ 16081 dtrace_state_t *state; 16082 16083 switch (cmd) { 16084 case DDI_DETACH: 16085 break; 16086 16087 case DDI_SUSPEND: 16088 return (DDI_SUCCESS); 16089 16090 default: 16091 return (DDI_FAILURE); 16092 } 16093 16094 lck_mtx_lock(&cpu_lock); 16095 lck_mtx_lock(&dtrace_provider_lock); 16096 lck_mtx_lock(&dtrace_lock); 16097 16098 ASSERT(dtrace_opens == 0); 16099 16100 if (dtrace_helpers > 0) { 16101 lck_mtx_unlock(&dtrace_provider_lock); 16102 lck_mtx_unlock(&dtrace_lock); 16103 lck_mtx_unlock(&cpu_lock); 16104 return (DDI_FAILURE); 16105 } 16106 16107 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { 16108 lck_mtx_unlock(&dtrace_provider_lock); 16109 lck_mtx_unlock(&dtrace_lock); 16110 lck_mtx_unlock(&cpu_lock); 16111 return (DDI_FAILURE); 16112 } 16113 16114 dtrace_provider = NULL; 16115 16116 if ((state = dtrace_anon_grab()) != NULL) { 16117 /* 16118 * If there were ECBs on this state, the provider should 16119 * have not been allowed to detach; assert that there is 16120 * none. 16121 */ 16122 ASSERT(state->dts_necbs == 0); 16123 dtrace_state_destroy(state); 16124 16125 /* 16126 * If we're being detached with anonymous state, we need to 16127 * indicate to the kernel debugger that DTrace is now inactive. 16128 */ 16129 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 16130 } 16131 16132 bzero(&dtrace_anon, sizeof (dtrace_anon_t)); 16133 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 16134 dtrace_cpu_init = NULL; 16135 dtrace_helpers_cleanup = NULL; 16136 dtrace_helpers_fork = NULL; 16137 dtrace_cpustart_init = NULL; 16138 dtrace_cpustart_fini = NULL; 16139 dtrace_debugger_init = NULL; 16140 dtrace_debugger_fini = NULL; 16141 dtrace_kreloc_init = NULL; 16142 dtrace_kreloc_fini = NULL; 16143 dtrace_modload = NULL; 16144 dtrace_modunload = NULL; 16145 16146 lck_mtx_unlock(&cpu_lock); 16147 16148 if (dtrace_helptrace_enabled) { 16149 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize); 16150 dtrace_helptrace_buffer = NULL; 16151 } 16152 16153 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *)); 16154 dtrace_probes = NULL; 16155 dtrace_nprobes = 0; 16156 16157 dtrace_hash_destroy(dtrace_bymod); 16158 dtrace_hash_destroy(dtrace_byfunc); 16159 dtrace_hash_destroy(dtrace_byname); 16160 dtrace_bymod = NULL; 16161 dtrace_byfunc = NULL; 16162 dtrace_byname = NULL; 16163 16164 kmem_cache_destroy(dtrace_state_cache); 16165 vmem_destroy(dtrace_minor); 16166 vmem_destroy(dtrace_arena); 16167 16168 if (dtrace_toxrange != NULL) { 16169 kmem_free(dtrace_toxrange, 16170 dtrace_toxranges_max * sizeof (dtrace_toxrange_t)); 16171 dtrace_toxrange = NULL; 16172 dtrace_toxranges = 0; 16173 dtrace_toxranges_max = 0; 16174 } 16175 16176 ddi_remove_minor_node(dtrace_devi, NULL); 16177 dtrace_devi = NULL; 16178 16179 ddi_soft_state_fini(&dtrace_softstate); 16180 16181 ASSERT(dtrace_vtime_references == 0); 16182 ASSERT(dtrace_opens == 0); 16183 ASSERT(dtrace_retained == NULL); 16184 16185 lck_mtx_unlock(&dtrace_lock); 16186 lck_mtx_unlock(&dtrace_provider_lock); 16187 16188 /* 16189 * We don't destroy the task queue until after we have dropped our 16190 * locks (taskq_destroy() may block on running tasks). To prevent 16191 * attempting to do work after we have effectively detached but before 16192 * the task queue has been destroyed, all tasks dispatched via the 16193 * task queue must check that DTrace is still attached before 16194 * performing any operation. 16195 */ 16196 taskq_destroy(dtrace_taskq); 16197 dtrace_taskq = NULL; 16198 16199 return (DDI_SUCCESS); 16200} 16201 16202/*ARGSUSED*/ 16203static int 16204dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 16205{ 16206 int error; 16207 16208 switch (infocmd) { 16209 case DDI_INFO_DEVT2DEVINFO: 16210 *result = (void *)dtrace_devi; 16211 error = DDI_SUCCESS; 16212 break; 16213 case DDI_INFO_DEVT2INSTANCE: 16214 *result = (void *)0; 16215 error = DDI_SUCCESS; 16216 break; 16217 default: 16218 error = DDI_FAILURE; 16219 } 16220 return (error); 16221} 16222 16223static struct cb_ops dtrace_cb_ops = { 16224 dtrace_open, /* open */ 16225 dtrace_close, /* close */ 16226 nulldev, /* strategy */ 16227 nulldev, /* print */ 16228 nodev, /* dump */ 16229 nodev, /* read */ 16230 nodev, /* write */ 16231 dtrace_ioctl, /* ioctl */ 16232 nodev, /* devmap */ 16233 nodev, /* mmap */ 16234 nodev, /* segmap */ 16235 nochpoll, /* poll */ 16236 ddi_prop_op, /* cb_prop_op */ 16237 0, /* streamtab */ 16238 D_NEW | D_MP /* Driver compatibility flag */ 16239}; 16240 16241static struct dev_ops dtrace_ops = { 16242 DEVO_REV, /* devo_rev */ 16243 0, /* refcnt */ 16244 dtrace_info, /* get_dev_info */ 16245 nulldev, /* identify */ 16246 nulldev, /* probe */ 16247 dtrace_attach, /* attach */ 16248 dtrace_detach, /* detach */ 16249 nodev, /* reset */ 16250 &dtrace_cb_ops, /* driver operations */ 16251 NULL, /* bus operations */ 16252 nodev /* dev power */ 16253}; 16254 16255static struct modldrv modldrv = { 16256 &mod_driverops, /* module type (this is a pseudo driver) */ 16257 "Dynamic Tracing", /* name of module */ 16258 &dtrace_ops, /* driver ops */ 16259}; 16260 16261static struct modlinkage modlinkage = { 16262 MODREV_1, 16263 (void *)&modldrv, 16264 NULL 16265}; 16266 16267int 16268_init(void) 16269{ 16270 return (mod_install(&modlinkage)); 16271} 16272 16273int 16274_info(struct modinfo *modinfop) 16275{ 16276 return (mod_info(&modlinkage, modinfop)); 16277} 16278 16279int 16280_fini(void) 16281{ 16282 return (mod_remove(&modlinkage)); 16283} 16284#else 16285 16286d_open_t _dtrace_open, helper_open; 16287d_close_t _dtrace_close, helper_close; 16288d_ioctl_t _dtrace_ioctl, helper_ioctl; 16289 16290int 16291_dtrace_open(dev_t dev, int flags, int devtype, struct proc *p) 16292{ 16293#pragma unused(p) 16294 dev_t locdev = dev; 16295 16296 return dtrace_open( &locdev, flags, devtype, CRED()); 16297} 16298 16299int 16300helper_open(dev_t dev, int flags, int devtype, struct proc *p) 16301{ 16302#pragma unused(dev,flags,devtype,p) 16303 return 0; 16304} 16305 16306int 16307_dtrace_close(dev_t dev, int flags, int devtype, struct proc *p) 16308{ 16309#pragma unused(p) 16310 return dtrace_close( dev, flags, devtype, CRED()); 16311} 16312 16313int 16314helper_close(dev_t dev, int flags, int devtype, struct proc *p) 16315{ 16316#pragma unused(dev,flags,devtype,p) 16317 return 0; 16318} 16319 16320int 16321_dtrace_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) 16322{ 16323#pragma unused(p) 16324 int err, rv = 0; 16325 16326 err = dtrace_ioctl(dev, (int)cmd, *(intptr_t *)data, fflag, CRED(), &rv); 16327 16328 /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ 16329 if (err != 0) { 16330 ASSERT( (err & 0xfffff000) == 0 ); 16331 return (err & 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ 16332 } else if (rv != 0) { 16333 ASSERT( (rv & 0xfff00000) == 0 ); 16334 return (((rv & 0xfffff) << 12)); /* ioctl returns -1 and errno set to a return value >= 4096 */ 16335 } else 16336 return 0; 16337} 16338 16339int 16340helper_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) 16341{ 16342#pragma unused(dev,fflag,p) 16343 int err, rv = 0; 16344 16345 err = dtrace_ioctl_helper((int)cmd, data, &rv); 16346 /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ 16347 if (err != 0) { 16348 ASSERT( (err & 0xfffff000) == 0 ); 16349 return (err & 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ 16350 } else if (rv != 0) { 16351 ASSERT( (rv & 0xfff00000) == 0 ); 16352 return (((rv & 0xfffff) << 20)); /* ioctl returns -1 and errno set to a return value >= 4096 */ 16353 } else 16354 return 0; 16355} 16356 16357#define HELPER_MAJOR -24 /* let the kernel pick the device number */ 16358 16359/* 16360 * A struct describing which functions will get invoked for certain 16361 * actions. 16362 */ 16363static struct cdevsw helper_cdevsw = 16364{ 16365 helper_open, /* open */ 16366 helper_close, /* close */ 16367 eno_rdwrt, /* read */ 16368 eno_rdwrt, /* write */ 16369 helper_ioctl, /* ioctl */ 16370 (stop_fcn_t *)nulldev, /* stop */ 16371 (reset_fcn_t *)nulldev, /* reset */ 16372 NULL, /* tty's */ 16373 eno_select, /* select */ 16374 eno_mmap, /* mmap */ 16375 eno_strat, /* strategy */ 16376 eno_getc, /* getc */ 16377 eno_putc, /* putc */ 16378 0 /* type */ 16379}; 16380 16381static int helper_majdevno = 0; 16382 16383static int gDTraceInited = 0; 16384 16385void 16386helper_init( void ) 16387{ 16388 /* 16389 * Once the "helper" is initialized, it can take ioctl calls that use locks 16390 * and zones initialized in dtrace_init. Make certain dtrace_init was called 16391 * before us. 16392 */ 16393 16394 if (!gDTraceInited) { 16395 panic("helper_init before dtrace_init\n"); 16396 } 16397 16398 if (0 >= helper_majdevno) 16399 { 16400 helper_majdevno = cdevsw_add(HELPER_MAJOR, &helper_cdevsw); 16401 16402 if (helper_majdevno < 0) { 16403 printf("helper_init: failed to allocate a major number!\n"); 16404 return; 16405 } 16406 16407 if (NULL == devfs_make_node( makedev(helper_majdevno, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, 16408 DTRACEMNR_HELPER, 0 )) { 16409 printf("dtrace_init: failed to devfs_make_node for helper!\n"); 16410 return; 16411 } 16412 } else 16413 panic("helper_init: called twice!\n"); 16414} 16415 16416#undef HELPER_MAJOR 16417 16418/* 16419 * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected. 16420 */ 16421static int 16422dtrace_clone_func(dev_t dev, int action) 16423{ 16424#pragma unused(dev) 16425 16426 if (action == DEVFS_CLONE_ALLOC) { 16427 if (NULL == dtrace_minor) /* Arena not created yet!?! */ 16428 return 0; 16429 else { 16430 /* 16431 * Propose a minor number, namely the next number that vmem_alloc() will return. 16432 * Immediately put it back in play by calling vmem_free(). 16433 */ 16434 int ret = (int)(uintptr_t)vmem_alloc(dtrace_minor, 1, VM_BESTFIT | VM_SLEEP); 16435 16436 vmem_free(dtrace_minor, (void *)(uintptr_t)ret, 1); 16437 16438 return ret; 16439 } 16440 } 16441 else if (action == DEVFS_CLONE_FREE) { 16442 return 0; 16443 } 16444 else return -1; 16445} 16446 16447#define DTRACE_MAJOR -24 /* let the kernel pick the device number */ 16448 16449static struct cdevsw dtrace_cdevsw = 16450{ 16451 _dtrace_open, /* open */ 16452 _dtrace_close, /* close */ 16453 eno_rdwrt, /* read */ 16454 eno_rdwrt, /* write */ 16455 _dtrace_ioctl, /* ioctl */ 16456 (stop_fcn_t *)nulldev, /* stop */ 16457 (reset_fcn_t *)nulldev, /* reset */ 16458 NULL, /* tty's */ 16459 eno_select, /* select */ 16460 eno_mmap, /* mmap */ 16461 eno_strat, /* strategy */ 16462 eno_getc, /* getc */ 16463 eno_putc, /* putc */ 16464 0 /* type */ 16465}; 16466 16467lck_attr_t* dtrace_lck_attr; 16468lck_grp_attr_t* dtrace_lck_grp_attr; 16469lck_grp_t* dtrace_lck_grp; 16470 16471static int gMajDevNo; 16472 16473void 16474dtrace_init( void ) 16475{ 16476 if (0 == gDTraceInited) { 16477 int i, ncpu = NCPU; 16478 16479 gMajDevNo = cdevsw_add(DTRACE_MAJOR, &dtrace_cdevsw); 16480 16481 if (gMajDevNo < 0) { 16482 printf("dtrace_init: failed to allocate a major number!\n"); 16483 gDTraceInited = 0; 16484 return; 16485 } 16486 16487 if (NULL == devfs_make_node_clone( makedev(gMajDevNo, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, 16488 dtrace_clone_func, DTRACEMNR_DTRACE, 0 )) { 16489 printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n"); 16490 gDTraceInited = 0; 16491 return; 16492 } 16493 16494#if defined(DTRACE_MEMORY_ZONES) 16495 16496 /* 16497 * Initialize the dtrace kalloc-emulation zones. 16498 */ 16499 dtrace_alloc_init(); 16500 16501#endif /* DTRACE_MEMORY_ZONES */ 16502 16503 /* 16504 * Allocate the dtrace_probe_t zone 16505 */ 16506 dtrace_probe_t_zone = zinit(sizeof(dtrace_probe_t), 16507 1024 * sizeof(dtrace_probe_t), 16508 sizeof(dtrace_probe_t), 16509 "dtrace.dtrace_probe_t"); 16510 16511 /* 16512 * Create the dtrace lock group and attrs. 16513 */ 16514 dtrace_lck_attr = lck_attr_alloc_init(); 16515 dtrace_lck_grp_attr= lck_grp_attr_alloc_init(); 16516 dtrace_lck_grp = lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr); 16517 16518 /* 16519 * We have to initialize all locks explicitly 16520 */ 16521 lck_mtx_init(&dtrace_lock, dtrace_lck_grp, dtrace_lck_attr); 16522 lck_mtx_init(&dtrace_provider_lock, dtrace_lck_grp, dtrace_lck_attr); 16523 lck_mtx_init(&dtrace_meta_lock, dtrace_lck_grp, dtrace_lck_attr); 16524#ifdef DEBUG 16525 lck_mtx_init(&dtrace_errlock, dtrace_lck_grp, dtrace_lck_attr); 16526#endif 16527 lck_rw_init(&dtrace_dof_mode_lock, dtrace_lck_grp, dtrace_lck_attr); 16528 16529 /* 16530 * The cpu_core structure consists of per-CPU state available in any context. 16531 * On some architectures, this may mean that the page(s) containing the 16532 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it 16533 * is up to the platform to assure that this is performed properly. Note that 16534 * the structure is sized to avoid false sharing. 16535 */ 16536 lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr); 16537 lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr); 16538 16539 cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP ); 16540 for (i = 0; i < ncpu; ++i) { 16541 lck_mtx_init(&cpu_core[i].cpuc_pid_lock, dtrace_lck_grp, dtrace_lck_attr); 16542 } 16543 16544 cpu_list = (cpu_t *)kmem_zalloc( ncpu * sizeof(cpu_t), KM_SLEEP ); 16545 for (i = 0; i < ncpu; ++i) { 16546 cpu_list[i].cpu_id = (processorid_t)i; 16547 cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]); 16548 lck_rw_init(&cpu_list[i].cpu_ft_lock, dtrace_lck_grp, dtrace_lck_attr); 16549 } 16550 16551 lck_mtx_lock(&cpu_lock); 16552 for (i = 0; i < ncpu; ++i) 16553 dtrace_cpu_setup_initial( (processorid_t)i ); /* In lieu of register_cpu_setup_func() callback */ 16554 lck_mtx_unlock(&cpu_lock); 16555 16556 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */ 16557 16558 /* 16559 * See dtrace_impl.h for a description of dof modes. 16560 * The default is lazy dof. 16561 * 16562 * XXX Warn if state is LAZY_OFF? It won't break anything, but 16563 * makes no sense... 16564 */ 16565 if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) { 16566 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; 16567 } 16568 16569 /* 16570 * Sanity check of dof mode value. 16571 */ 16572 switch (dtrace_dof_mode) { 16573 case DTRACE_DOF_MODE_NEVER: 16574 case DTRACE_DOF_MODE_LAZY_ON: 16575 /* valid modes, but nothing else we need to do */ 16576 break; 16577 16578 case DTRACE_DOF_MODE_LAZY_OFF: 16579 case DTRACE_DOF_MODE_NON_LAZY: 16580 /* Cannot wait for a dtrace_open to init fasttrap */ 16581 fasttrap_init(); 16582 break; 16583 16584 default: 16585 /* Invalid, clamp to non lazy */ 16586 dtrace_dof_mode = DTRACE_DOF_MODE_NON_LAZY; 16587 fasttrap_init(); 16588 break; 16589 } 16590 16591 gDTraceInited = 1; 16592 16593 } else 16594 panic("dtrace_init: called twice!\n"); 16595} 16596 16597void 16598dtrace_postinit(void) 16599{ 16600 dtrace_attach( (dev_info_t *)makedev(gMajDevNo, 0), 0 ); 16601} 16602#undef DTRACE_MAJOR 16603 16604/* 16605 * Routines used to register interest in cpu's being added to or removed 16606 * from the system. 16607 */ 16608void 16609register_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2) 16610{ 16611#pragma unused(ignore1,ignore2) 16612} 16613 16614void 16615unregister_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2) 16616{ 16617#pragma unused(ignore1,ignore2) 16618} 16619#endif /* __APPLE__ */ 16620