dtrace.c revision 284134
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 * 21 * $FreeBSD: stable/10/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c 284134 2015-06-07 20:10:11Z markj $ 22 */ 23 24/* 25 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 26 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 27 * Copyright (c) 2012 by Delphix. All rights reserved. 28 */ 29 30/* 31 * DTrace - Dynamic Tracing for Solaris 32 * 33 * This is the implementation of the Solaris Dynamic Tracing framework 34 * (DTrace). The user-visible interface to DTrace is described at length in 35 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace 36 * library, the in-kernel DTrace framework, and the DTrace providers are 37 * described in the block comments in the <sys/dtrace.h> header file. The 38 * internal architecture of DTrace is described in the block comments in the 39 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace 40 * implementation very much assume mastery of all of these sources; if one has 41 * an unanswered question about the implementation, one should consult them 42 * first. 43 * 44 * The functions here are ordered roughly as follows: 45 * 46 * - Probe context functions 47 * - Probe hashing functions 48 * - Non-probe context utility functions 49 * - Matching functions 50 * - Provider-to-Framework API functions 51 * - Probe management functions 52 * - DIF object functions 53 * - Format functions 54 * - Predicate functions 55 * - ECB functions 56 * - Buffer functions 57 * - Enabling functions 58 * - DOF functions 59 * - Anonymous enabling functions 60 * - Consumer state functions 61 * - Helper functions 62 * - Hook functions 63 * - Driver cookbook functions 64 * 65 * Each group of functions begins with a block comment labelled the "DTrace 66 * [Group] Functions", allowing one to find each block by searching forward 67 * on capital-f functions. 68 */ 69#include <sys/errno.h> 70#if !defined(sun) 71#include <sys/time.h> 72#endif 73#include <sys/stat.h> 74#include <sys/modctl.h> 75#include <sys/conf.h> 76#include <sys/systm.h> 77#if defined(sun) 78#include <sys/ddi.h> 79#include <sys/sunddi.h> 80#endif 81#include <sys/cpuvar.h> 82#include <sys/kmem.h> 83#if defined(sun) 84#include <sys/strsubr.h> 85#endif 86#include <sys/sysmacros.h> 87#include <sys/dtrace_impl.h> 88#include <sys/atomic.h> 89#include <sys/cmn_err.h> 90#if defined(sun) 91#include <sys/mutex_impl.h> 92#include <sys/rwlock_impl.h> 93#endif 94#include <sys/ctf_api.h> 95#if defined(sun) 96#include <sys/panic.h> 97#include <sys/priv_impl.h> 98#endif 99#include <sys/policy.h> 100#if defined(sun) 101#include <sys/cred_impl.h> 102#include <sys/procfs_isa.h> 103#endif 104#include <sys/taskq.h> 105#if defined(sun) 106#include <sys/mkdev.h> 107#include <sys/kdi.h> 108#endif 109#include <sys/zone.h> 110#include <sys/socket.h> 111#include <netinet/in.h> 112#include "strtolctype.h" 113 114/* FreeBSD includes: */ 115#if !defined(sun) 116#include <sys/callout.h> 117#include <sys/ctype.h> 118#include <sys/eventhandler.h> 119#include <sys/limits.h> 120#include <sys/kdb.h> 121#include <sys/kernel.h> 122#include <sys/malloc.h> 123#include <sys/sysctl.h> 124#include <sys/lock.h> 125#include <sys/mutex.h> 126#include <sys/rwlock.h> 127#include <sys/sx.h> 128#include <sys/dtrace_bsd.h> 129#include <netinet/in.h> 130#include "dtrace_cddl.h" 131#include "dtrace_debug.c" 132#endif 133 134/* 135 * DTrace Tunable Variables 136 * 137 * The following variables may be tuned by adding a line to /etc/system that 138 * includes both the name of the DTrace module ("dtrace") and the name of the 139 * variable. For example: 140 * 141 * set dtrace:dtrace_destructive_disallow = 1 142 * 143 * In general, the only variables that one should be tuning this way are those 144 * that affect system-wide DTrace behavior, and for which the default behavior 145 * is undesirable. Most of these variables are tunable on a per-consumer 146 * basis using DTrace options, and need not be tuned on a system-wide basis. 147 * When tuning these variables, avoid pathological values; while some attempt 148 * is made to verify the integrity of these variables, they are not considered 149 * part of the supported interface to DTrace, and they are therefore not 150 * checked comprehensively. Further, these variables should not be tuned 151 * dynamically via "mdb -kw" or other means; they should only be tuned via 152 * /etc/system. 153 */ 154int dtrace_destructive_disallow = 0; 155dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); 156size_t dtrace_difo_maxsize = (256 * 1024); 157dtrace_optval_t dtrace_dof_maxsize = (8 * 1024 * 1024); 158size_t dtrace_global_maxsize = (16 * 1024); 159size_t dtrace_actions_max = (16 * 1024); 160size_t dtrace_retain_max = 1024; 161dtrace_optval_t dtrace_helper_actions_max = 128; 162dtrace_optval_t dtrace_helper_providers_max = 32; 163dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); 164size_t dtrace_strsize_default = 256; 165dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ 166dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ 167dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ 168dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ 169dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ 170dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */ 171dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */ 172dtrace_optval_t dtrace_nspec_default = 1; 173dtrace_optval_t dtrace_specsize_default = 32 * 1024; 174dtrace_optval_t dtrace_stackframes_default = 20; 175dtrace_optval_t dtrace_ustackframes_default = 20; 176dtrace_optval_t dtrace_jstackframes_default = 50; 177dtrace_optval_t dtrace_jstackstrsize_default = 512; 178int dtrace_msgdsize_max = 128; 179hrtime_t dtrace_chill_max = MSEC2NSEC(500); /* 500 ms */ 180hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ 181int dtrace_devdepth_max = 32; 182int dtrace_err_verbose; 183hrtime_t dtrace_deadman_interval = NANOSEC; 184hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; 185hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; 186hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; 187#if !defined(sun) 188int dtrace_memstr_max = 4096; 189#endif 190 191/* 192 * DTrace External Variables 193 * 194 * As dtrace(7D) is a kernel module, any DTrace variables are obviously 195 * available to DTrace consumers via the backtick (`) syntax. One of these, 196 * dtrace_zero, is made deliberately so: it is provided as a source of 197 * well-known, zero-filled memory. While this variable is not documented, 198 * it is used by some translators as an implementation detail. 199 */ 200const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ 201 202/* 203 * DTrace Internal Variables 204 */ 205#if defined(sun) 206static dev_info_t *dtrace_devi; /* device info */ 207#endif 208#if defined(sun) 209static vmem_t *dtrace_arena; /* probe ID arena */ 210static vmem_t *dtrace_minor; /* minor number arena */ 211#else 212static taskq_t *dtrace_taskq; /* task queue */ 213static struct unrhdr *dtrace_arena; /* Probe ID number. */ 214#endif 215static dtrace_probe_t **dtrace_probes; /* array of all probes */ 216static int dtrace_nprobes; /* number of probes */ 217static dtrace_provider_t *dtrace_provider; /* provider list */ 218static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ 219static int dtrace_opens; /* number of opens */ 220static int dtrace_helpers; /* number of helpers */ 221static int dtrace_getf; /* number of unpriv getf()s */ 222#if defined(sun) 223static void *dtrace_softstate; /* softstate pointer */ 224#endif 225static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ 226static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ 227static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ 228static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */ 229static int dtrace_toxranges; /* number of toxic ranges */ 230static int dtrace_toxranges_max; /* size of toxic range array */ 231static dtrace_anon_t dtrace_anon; /* anonymous enabling */ 232static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */ 233static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */ 234static kthread_t *dtrace_panicked; /* panicking thread */ 235static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ 236static dtrace_genid_t dtrace_probegen; /* current probe generation */ 237static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ 238static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ 239static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ 240static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ 241static int dtrace_dynvar_failclean; /* dynvars failed to clean */ 242#if !defined(sun) 243static struct mtx dtrace_unr_mtx; 244MTX_SYSINIT(dtrace_unr_mtx, &dtrace_unr_mtx, "Unique resource identifier", MTX_DEF); 245int dtrace_in_probe; /* non-zero if executing a probe */ 246#if defined(__i386__) || defined(__amd64__) || defined(__mips__) || defined(__powerpc__) 247uintptr_t dtrace_in_probe_addr; /* Address of invop when already in probe */ 248#endif 249static eventhandler_tag dtrace_kld_load_tag; 250static eventhandler_tag dtrace_kld_unload_try_tag; 251#endif 252 253/* 254 * DTrace Locking 255 * DTrace is protected by three (relatively coarse-grained) locks: 256 * 257 * (1) dtrace_lock is required to manipulate essentially any DTrace state, 258 * including enabling state, probes, ECBs, consumer state, helper state, 259 * etc. Importantly, dtrace_lock is _not_ required when in probe context; 260 * probe context is lock-free -- synchronization is handled via the 261 * dtrace_sync() cross call mechanism. 262 * 263 * (2) dtrace_provider_lock is required when manipulating provider state, or 264 * when provider state must be held constant. 265 * 266 * (3) dtrace_meta_lock is required when manipulating meta provider state, or 267 * when meta provider state must be held constant. 268 * 269 * The lock ordering between these three locks is dtrace_meta_lock before 270 * dtrace_provider_lock before dtrace_lock. (In particular, there are 271 * several places where dtrace_provider_lock is held by the framework as it 272 * calls into the providers -- which then call back into the framework, 273 * grabbing dtrace_lock.) 274 * 275 * There are two other locks in the mix: mod_lock and cpu_lock. With respect 276 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical 277 * role as a coarse-grained lock; it is acquired before both of these locks. 278 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must 279 * be acquired _between_ dtrace_meta_lock and any other DTrace locks. 280 * mod_lock is similar with respect to dtrace_provider_lock in that it must be 281 * acquired _between_ dtrace_provider_lock and dtrace_lock. 282 */ 283static kmutex_t dtrace_lock; /* probe state lock */ 284static kmutex_t dtrace_provider_lock; /* provider state lock */ 285static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ 286 287#if !defined(sun) 288/* XXX FreeBSD hacks. */ 289#define cr_suid cr_svuid 290#define cr_sgid cr_svgid 291#define ipaddr_t in_addr_t 292#define mod_modname pathname 293#define vuprintf vprintf 294#define ttoproc(_a) ((_a)->td_proc) 295#define crgetzoneid(_a) 0 296#define NCPU MAXCPU 297#define SNOCD 0 298#define CPU_ON_INTR(_a) 0 299 300#define PRIV_EFFECTIVE (1 << 0) 301#define PRIV_DTRACE_KERNEL (1 << 1) 302#define PRIV_DTRACE_PROC (1 << 2) 303#define PRIV_DTRACE_USER (1 << 3) 304#define PRIV_PROC_OWNER (1 << 4) 305#define PRIV_PROC_ZONE (1 << 5) 306#define PRIV_ALL ~0 307 308SYSCTL_DECL(_debug_dtrace); 309SYSCTL_DECL(_kern_dtrace); 310#endif 311 312#if defined(sun) 313#define curcpu CPU->cpu_id 314#endif 315 316 317/* 318 * DTrace Provider Variables 319 * 320 * These are the variables relating to DTrace as a provider (that is, the 321 * provider of the BEGIN, END, and ERROR probes). 322 */ 323static dtrace_pattr_t dtrace_provider_attr = { 324{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 325{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 326{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 327{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 328{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, 329}; 330 331static void 332dtrace_nullop(void) 333{} 334 335static dtrace_pops_t dtrace_provider_ops = { 336 (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop, 337 (void (*)(void *, modctl_t *))dtrace_nullop, 338 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 339 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 340 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 341 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, 342 NULL, 343 NULL, 344 NULL, 345 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop 346}; 347 348static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ 349static dtrace_id_t dtrace_probeid_end; /* special END probe */ 350dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ 351 352/* 353 * DTrace Helper Tracing Variables 354 */ 355uint32_t dtrace_helptrace_next = 0; 356uint32_t dtrace_helptrace_nlocals; 357char *dtrace_helptrace_buffer; 358int dtrace_helptrace_bufsize = 512 * 1024; 359 360#ifdef DEBUG 361int dtrace_helptrace_enabled = 1; 362#else 363int dtrace_helptrace_enabled = 0; 364#endif 365 366/* 367 * DTrace Error Hashing 368 * 369 * On DEBUG kernels, DTrace will track the errors that has seen in a hash 370 * table. This is very useful for checking coverage of tests that are 371 * expected to induce DIF or DOF processing errors, and may be useful for 372 * debugging problems in the DIF code generator or in DOF generation . The 373 * error hash may be examined with the ::dtrace_errhash MDB dcmd. 374 */ 375#ifdef DEBUG 376static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; 377static const char *dtrace_errlast; 378static kthread_t *dtrace_errthread; 379static kmutex_t dtrace_errlock; 380#endif 381 382/* 383 * DTrace Macros and Constants 384 * 385 * These are various macros that are useful in various spots in the 386 * implementation, along with a few random constants that have no meaning 387 * outside of the implementation. There is no real structure to this cpp 388 * mishmash -- but is there ever? 389 */ 390#define DTRACE_HASHSTR(hash, probe) \ 391 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) 392 393#define DTRACE_HASHNEXT(hash, probe) \ 394 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) 395 396#define DTRACE_HASHPREV(hash, probe) \ 397 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) 398 399#define DTRACE_HASHEQ(hash, lhs, rhs) \ 400 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ 401 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) 402 403#define DTRACE_AGGHASHSIZE_SLEW 17 404 405#define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) 406 407/* 408 * The key for a thread-local variable consists of the lower 61 bits of the 409 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. 410 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never 411 * equal to a variable identifier. This is necessary (but not sufficient) to 412 * assure that global associative arrays never collide with thread-local 413 * variables. To guarantee that they cannot collide, we must also define the 414 * order for keying dynamic variables. That order is: 415 * 416 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] 417 * 418 * Because the variable-key and the tls-key are in orthogonal spaces, there is 419 * no way for a global variable key signature to match a thread-local key 420 * signature. 421 */ 422#if defined(sun) 423#define DTRACE_TLS_THRKEY(where) { \ 424 uint_t intr = 0; \ 425 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ 426 for (; actv; actv >>= 1) \ 427 intr++; \ 428 ASSERT(intr < (1 << 3)); \ 429 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ 430 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 431} 432#else 433#define DTRACE_TLS_THRKEY(where) { \ 434 solaris_cpu_t *_c = &solaris_cpu[curcpu]; \ 435 uint_t intr = 0; \ 436 uint_t actv = _c->cpu_intr_actv; \ 437 for (; actv; actv >>= 1) \ 438 intr++; \ 439 ASSERT(intr < (1 << 3)); \ 440 (where) = ((curthread->td_tid + DIF_VARIABLE_MAX) & \ 441 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 442} 443#endif 444 445#define DT_BSWAP_8(x) ((x) & 0xff) 446#define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) 447#define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) 448#define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) 449 450#define DT_MASK_LO 0x00000000FFFFFFFFULL 451 452#define DTRACE_STORE(type, tomax, offset, what) \ 453 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); 454 455#ifndef __x86 456#define DTRACE_ALIGNCHECK(addr, size, flags) \ 457 if (addr & (size - 1)) { \ 458 *flags |= CPU_DTRACE_BADALIGN; \ 459 cpu_core[curcpu].cpuc_dtrace_illval = addr; \ 460 return (0); \ 461 } 462#else 463#define DTRACE_ALIGNCHECK(addr, size, flags) 464#endif 465 466/* 467 * Test whether a range of memory starting at testaddr of size testsz falls 468 * within the range of memory described by addr, sz. We take care to avoid 469 * problems with overflow and underflow of the unsigned quantities, and 470 * disallow all negative sizes. Ranges of size 0 are allowed. 471 */ 472#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ 473 ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \ 474 (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \ 475 (testaddr) + (testsz) >= (testaddr)) 476 477/* 478 * Test whether alloc_sz bytes will fit in the scratch region. We isolate 479 * alloc_sz on the righthand side of the comparison in order to avoid overflow 480 * or underflow in the comparison with it. This is simpler than the INRANGE 481 * check above, because we know that the dtms_scratch_ptr is valid in the 482 * range. Allocations of size zero are allowed. 483 */ 484#define DTRACE_INSCRATCH(mstate, alloc_sz) \ 485 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ 486 (mstate)->dtms_scratch_ptr >= (alloc_sz)) 487 488#define DTRACE_LOADFUNC(bits) \ 489/*CSTYLED*/ \ 490uint##bits##_t \ 491dtrace_load##bits(uintptr_t addr) \ 492{ \ 493 size_t size = bits / NBBY; \ 494 /*CSTYLED*/ \ 495 uint##bits##_t rval; \ 496 int i; \ 497 volatile uint16_t *flags = (volatile uint16_t *) \ 498 &cpu_core[curcpu].cpuc_dtrace_flags; \ 499 \ 500 DTRACE_ALIGNCHECK(addr, size, flags); \ 501 \ 502 for (i = 0; i < dtrace_toxranges; i++) { \ 503 if (addr >= dtrace_toxrange[i].dtt_limit) \ 504 continue; \ 505 \ 506 if (addr + size <= dtrace_toxrange[i].dtt_base) \ 507 continue; \ 508 \ 509 /* \ 510 * This address falls within a toxic region; return 0. \ 511 */ \ 512 *flags |= CPU_DTRACE_BADADDR; \ 513 cpu_core[curcpu].cpuc_dtrace_illval = addr; \ 514 return (0); \ 515 } \ 516 \ 517 *flags |= CPU_DTRACE_NOFAULT; \ 518 /*CSTYLED*/ \ 519 rval = *((volatile uint##bits##_t *)addr); \ 520 *flags &= ~CPU_DTRACE_NOFAULT; \ 521 \ 522 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ 523} 524 525#ifdef _LP64 526#define dtrace_loadptr dtrace_load64 527#else 528#define dtrace_loadptr dtrace_load32 529#endif 530 531#define DTRACE_DYNHASH_FREE 0 532#define DTRACE_DYNHASH_SINK 1 533#define DTRACE_DYNHASH_VALID 2 534 535#define DTRACE_MATCH_NEXT 0 536#define DTRACE_MATCH_DONE 1 537#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') 538#define DTRACE_STATE_ALIGN 64 539 540#define DTRACE_FLAGS2FLT(flags) \ 541 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ 542 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ 543 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ 544 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ 545 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ 546 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ 547 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ 548 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ 549 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ 550 DTRACEFLT_UNKNOWN) 551 552#define DTRACEACT_ISSTRING(act) \ 553 ((act)->dta_kind == DTRACEACT_DIFEXPR && \ 554 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) 555 556/* Function prototype definitions: */ 557static size_t dtrace_strlen(const char *, size_t); 558static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); 559static void dtrace_enabling_provide(dtrace_provider_t *); 560static int dtrace_enabling_match(dtrace_enabling_t *, int *); 561static void dtrace_enabling_matchall(void); 562static void dtrace_enabling_reap(void); 563static dtrace_state_t *dtrace_anon_grab(void); 564static uint64_t dtrace_helper(int, dtrace_mstate_t *, 565 dtrace_state_t *, uint64_t, uint64_t); 566static dtrace_helpers_t *dtrace_helpers_create(proc_t *); 567static void dtrace_buffer_drop(dtrace_buffer_t *); 568static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); 569static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, 570 dtrace_state_t *, dtrace_mstate_t *); 571static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, 572 dtrace_optval_t); 573static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); 574static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); 575uint16_t dtrace_load16(uintptr_t); 576uint32_t dtrace_load32(uintptr_t); 577uint64_t dtrace_load64(uintptr_t); 578uint8_t dtrace_load8(uintptr_t); 579void dtrace_dynvar_clean(dtrace_dstate_t *); 580dtrace_dynvar_t *dtrace_dynvar(dtrace_dstate_t *, uint_t, dtrace_key_t *, 581 size_t, dtrace_dynvar_op_t, dtrace_mstate_t *, dtrace_vstate_t *); 582uintptr_t dtrace_dif_varstr(uintptr_t, dtrace_state_t *, dtrace_mstate_t *); 583static int dtrace_priv_proc(dtrace_state_t *); 584static void dtrace_getf_barrier(void); 585 586/* 587 * DTrace Probe Context Functions 588 * 589 * These functions are called from probe context. Because probe context is 590 * any context in which C may be called, arbitrarily locks may be held, 591 * interrupts may be disabled, we may be in arbitrary dispatched state, etc. 592 * As a result, functions called from probe context may only call other DTrace 593 * support functions -- they may not interact at all with the system at large. 594 * (Note that the ASSERT macro is made probe-context safe by redefining it in 595 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary 596 * loads are to be performed from probe context, they _must_ be in terms of 597 * the safe dtrace_load*() variants. 598 * 599 * Some functions in this block are not actually called from probe context; 600 * for these functions, there will be a comment above the function reading 601 * "Note: not called from probe context." 602 */ 603void 604dtrace_panic(const char *format, ...) 605{ 606 va_list alist; 607 608 va_start(alist, format); 609#ifdef __FreeBSD__ 610 vpanic(format, alist); 611#else 612 dtrace_vpanic(format, alist); 613#endif 614 va_end(alist); 615} 616 617int 618dtrace_assfail(const char *a, const char *f, int l) 619{ 620 dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); 621 622 /* 623 * We just need something here that even the most clever compiler 624 * cannot optimize away. 625 */ 626 return (a[(uintptr_t)f]); 627} 628 629/* 630 * Atomically increment a specified error counter from probe context. 631 */ 632static void 633dtrace_error(uint32_t *counter) 634{ 635 /* 636 * Most counters stored to in probe context are per-CPU counters. 637 * However, there are some error conditions that are sufficiently 638 * arcane that they don't merit per-CPU storage. If these counters 639 * are incremented concurrently on different CPUs, scalability will be 640 * adversely affected -- but we don't expect them to be white-hot in a 641 * correctly constructed enabling... 642 */ 643 uint32_t oval, nval; 644 645 do { 646 oval = *counter; 647 648 if ((nval = oval + 1) == 0) { 649 /* 650 * If the counter would wrap, set it to 1 -- assuring 651 * that the counter is never zero when we have seen 652 * errors. (The counter must be 32-bits because we 653 * aren't guaranteed a 64-bit compare&swap operation.) 654 * To save this code both the infamy of being fingered 655 * by a priggish news story and the indignity of being 656 * the target of a neo-puritan witch trial, we're 657 * carefully avoiding any colorful description of the 658 * likelihood of this condition -- but suffice it to 659 * say that it is only slightly more likely than the 660 * overflow of predicate cache IDs, as discussed in 661 * dtrace_predicate_create(). 662 */ 663 nval = 1; 664 } 665 } while (dtrace_cas32(counter, oval, nval) != oval); 666} 667 668/* 669 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a 670 * uint8_t, a uint16_t, a uint32_t and a uint64_t. 671 */ 672DTRACE_LOADFUNC(8) 673DTRACE_LOADFUNC(16) 674DTRACE_LOADFUNC(32) 675DTRACE_LOADFUNC(64) 676 677static int 678dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) 679{ 680 if (dest < mstate->dtms_scratch_base) 681 return (0); 682 683 if (dest + size < dest) 684 return (0); 685 686 if (dest + size > mstate->dtms_scratch_ptr) 687 return (0); 688 689 return (1); 690} 691 692static int 693dtrace_canstore_statvar(uint64_t addr, size_t sz, 694 dtrace_statvar_t **svars, int nsvars) 695{ 696 int i; 697 698 for (i = 0; i < nsvars; i++) { 699 dtrace_statvar_t *svar = svars[i]; 700 701 if (svar == NULL || svar->dtsv_size == 0) 702 continue; 703 704 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) 705 return (1); 706 } 707 708 return (0); 709} 710 711/* 712 * Check to see if the address is within a memory region to which a store may 713 * be issued. This includes the DTrace scratch areas, and any DTrace variable 714 * region. The caller of dtrace_canstore() is responsible for performing any 715 * alignment checks that are needed before stores are actually executed. 716 */ 717static int 718dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 719 dtrace_vstate_t *vstate) 720{ 721 /* 722 * First, check to see if the address is in scratch space... 723 */ 724 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, 725 mstate->dtms_scratch_size)) 726 return (1); 727 728 /* 729 * Now check to see if it's a dynamic variable. This check will pick 730 * up both thread-local variables and any global dynamically-allocated 731 * variables. 732 */ 733 if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base, 734 vstate->dtvs_dynvars.dtds_size)) { 735 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 736 uintptr_t base = (uintptr_t)dstate->dtds_base + 737 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); 738 uintptr_t chunkoffs; 739 740 /* 741 * Before we assume that we can store here, we need to make 742 * sure that it isn't in our metadata -- storing to our 743 * dynamic variable metadata would corrupt our state. For 744 * the range to not include any dynamic variable metadata, 745 * it must: 746 * 747 * (1) Start above the hash table that is at the base of 748 * the dynamic variable space 749 * 750 * (2) Have a starting chunk offset that is beyond the 751 * dtrace_dynvar_t that is at the base of every chunk 752 * 753 * (3) Not span a chunk boundary 754 * 755 */ 756 if (addr < base) 757 return (0); 758 759 chunkoffs = (addr - base) % dstate->dtds_chunksize; 760 761 if (chunkoffs < sizeof (dtrace_dynvar_t)) 762 return (0); 763 764 if (chunkoffs + sz > dstate->dtds_chunksize) 765 return (0); 766 767 return (1); 768 } 769 770 /* 771 * Finally, check the static local and global variables. These checks 772 * take the longest, so we perform them last. 773 */ 774 if (dtrace_canstore_statvar(addr, sz, 775 vstate->dtvs_locals, vstate->dtvs_nlocals)) 776 return (1); 777 778 if (dtrace_canstore_statvar(addr, sz, 779 vstate->dtvs_globals, vstate->dtvs_nglobals)) 780 return (1); 781 782 return (0); 783} 784 785 786/* 787 * Convenience routine to check to see if the address is within a memory 788 * region in which a load may be issued given the user's privilege level; 789 * if not, it sets the appropriate error flags and loads 'addr' into the 790 * illegal value slot. 791 * 792 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement 793 * appropriate memory access protection. 794 */ 795static int 796dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 797 dtrace_vstate_t *vstate) 798{ 799 volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; 800 file_t *fp; 801 802 /* 803 * If we hold the privilege to read from kernel memory, then 804 * everything is readable. 805 */ 806 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 807 return (1); 808 809 /* 810 * You can obviously read that which you can store. 811 */ 812 if (dtrace_canstore(addr, sz, mstate, vstate)) 813 return (1); 814 815 /* 816 * We're allowed to read from our own string table. 817 */ 818 if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab, 819 mstate->dtms_difo->dtdo_strlen)) 820 return (1); 821 822 if (vstate->dtvs_state != NULL && 823 dtrace_priv_proc(vstate->dtvs_state)) { 824 proc_t *p; 825 826 /* 827 * When we have privileges to the current process, there are 828 * several context-related kernel structures that are safe to 829 * read, even absent the privilege to read from kernel memory. 830 * These reads are safe because these structures contain only 831 * state that (1) we're permitted to read, (2) is harmless or 832 * (3) contains pointers to additional kernel state that we're 833 * not permitted to read (and as such, do not present an 834 * opportunity for privilege escalation). Finally (and 835 * critically), because of the nature of their relation with 836 * the current thread context, the memory associated with these 837 * structures cannot change over the duration of probe context, 838 * and it is therefore impossible for this memory to be 839 * deallocated and reallocated as something else while it's 840 * being operated upon. 841 */ 842 if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) 843 return (1); 844 845 if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr, 846 sz, curthread->t_procp, sizeof (proc_t))) { 847 return (1); 848 } 849 850 if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz, 851 curthread->t_cred, sizeof (cred_t))) { 852 return (1); 853 } 854 855#if defined(sun) 856 if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz, 857 &(p->p_pidp->pid_id), sizeof (pid_t))) { 858 return (1); 859 } 860 861 if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz, 862 curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) { 863 return (1); 864 } 865#endif 866 } 867 868 if ((fp = mstate->dtms_getf) != NULL) { 869 uintptr_t psz = sizeof (void *); 870 vnode_t *vp; 871 vnodeops_t *op; 872 873 /* 874 * When getf() returns a file_t, the enabling is implicitly 875 * granted the (transient) right to read the returned file_t 876 * as well as the v_path and v_op->vnop_name of the underlying 877 * vnode. These accesses are allowed after a successful 878 * getf() because the members that they refer to cannot change 879 * once set -- and the barrier logic in the kernel's closef() 880 * path assures that the file_t and its referenced vode_t 881 * cannot themselves be stale (that is, it impossible for 882 * either dtms_getf itself or its f_vnode member to reference 883 * freed memory). 884 */ 885 if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) 886 return (1); 887 888 if ((vp = fp->f_vnode) != NULL) { 889#if defined(sun) 890 if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) 891 return (1); 892 if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz, 893 vp->v_path, strlen(vp->v_path) + 1)) { 894 return (1); 895 } 896#endif 897 898 if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) 899 return (1); 900 901#if defined(sun) 902 if ((op = vp->v_op) != NULL && 903 DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) { 904 return (1); 905 } 906 907 if (op != NULL && op->vnop_name != NULL && 908 DTRACE_INRANGE(addr, sz, op->vnop_name, 909 strlen(op->vnop_name) + 1)) { 910 return (1); 911 } 912#endif 913 } 914 } 915 916 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); 917 *illval = addr; 918 return (0); 919} 920 921/* 922 * Convenience routine to check to see if a given string is within a memory 923 * region in which a load may be issued given the user's privilege level; 924 * this exists so that we don't need to issue unnecessary dtrace_strlen() 925 * calls in the event that the user has all privileges. 926 */ 927static int 928dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, 929 dtrace_vstate_t *vstate) 930{ 931 size_t strsz; 932 933 /* 934 * If we hold the privilege to read from kernel memory, then 935 * everything is readable. 936 */ 937 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 938 return (1); 939 940 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz); 941 if (dtrace_canload(addr, strsz, mstate, vstate)) 942 return (1); 943 944 return (0); 945} 946 947/* 948 * Convenience routine to check to see if a given variable is within a memory 949 * region in which a load may be issued given the user's privilege level. 950 */ 951static int 952dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, 953 dtrace_vstate_t *vstate) 954{ 955 size_t sz; 956 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 957 958 /* 959 * If we hold the privilege to read from kernel memory, then 960 * everything is readable. 961 */ 962 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 963 return (1); 964 965 if (type->dtdt_kind == DIF_TYPE_STRING) 966 sz = dtrace_strlen(src, 967 vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1; 968 else 969 sz = type->dtdt_size; 970 971 return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); 972} 973 974/* 975 * Convert a string to a signed integer using safe loads. 976 * 977 * NOTE: This function uses various macros from strtolctype.h to manipulate 978 * digit values, etc -- these have all been checked to ensure they make 979 * no additional function calls. 980 */ 981static int64_t 982dtrace_strtoll(char *input, int base, size_t limit) 983{ 984 uintptr_t pos = (uintptr_t)input; 985 int64_t val = 0; 986 int x; 987 boolean_t neg = B_FALSE; 988 char c, cc, ccc; 989 uintptr_t end = pos + limit; 990 991 /* 992 * Consume any whitespace preceding digits. 993 */ 994 while ((c = dtrace_load8(pos)) == ' ' || c == '\t') 995 pos++; 996 997 /* 998 * Handle an explicit sign if one is present. 999 */ 1000 if (c == '-' || c == '+') { 1001 if (c == '-') 1002 neg = B_TRUE; 1003 c = dtrace_load8(++pos); 1004 } 1005 1006 /* 1007 * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it 1008 * if present. 1009 */ 1010 if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' || 1011 cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) { 1012 pos += 2; 1013 c = ccc; 1014 } 1015 1016 /* 1017 * Read in contiguous digits until the first non-digit character. 1018 */ 1019 for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base; 1020 c = dtrace_load8(++pos)) 1021 val = val * base + x; 1022 1023 return (neg ? -val : val); 1024} 1025 1026/* 1027 * Compare two strings using safe loads. 1028 */ 1029static int 1030dtrace_strncmp(char *s1, char *s2, size_t limit) 1031{ 1032 uint8_t c1, c2; 1033 volatile uint16_t *flags; 1034 1035 if (s1 == s2 || limit == 0) 1036 return (0); 1037 1038 flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 1039 1040 do { 1041 if (s1 == NULL) { 1042 c1 = '\0'; 1043 } else { 1044 c1 = dtrace_load8((uintptr_t)s1++); 1045 } 1046 1047 if (s2 == NULL) { 1048 c2 = '\0'; 1049 } else { 1050 c2 = dtrace_load8((uintptr_t)s2++); 1051 } 1052 1053 if (c1 != c2) 1054 return (c1 - c2); 1055 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); 1056 1057 return (0); 1058} 1059 1060/* 1061 * Compute strlen(s) for a string using safe memory accesses. The additional 1062 * len parameter is used to specify a maximum length to ensure completion. 1063 */ 1064static size_t 1065dtrace_strlen(const char *s, size_t lim) 1066{ 1067 uint_t len; 1068 1069 for (len = 0; len != lim; len++) { 1070 if (dtrace_load8((uintptr_t)s++) == '\0') 1071 break; 1072 } 1073 1074 return (len); 1075} 1076 1077/* 1078 * Check if an address falls within a toxic region. 1079 */ 1080static int 1081dtrace_istoxic(uintptr_t kaddr, size_t size) 1082{ 1083 uintptr_t taddr, tsize; 1084 int i; 1085 1086 for (i = 0; i < dtrace_toxranges; i++) { 1087 taddr = dtrace_toxrange[i].dtt_base; 1088 tsize = dtrace_toxrange[i].dtt_limit - taddr; 1089 1090 if (kaddr - taddr < tsize) { 1091 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 1092 cpu_core[curcpu].cpuc_dtrace_illval = kaddr; 1093 return (1); 1094 } 1095 1096 if (taddr - kaddr < size) { 1097 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 1098 cpu_core[curcpu].cpuc_dtrace_illval = taddr; 1099 return (1); 1100 } 1101 } 1102 1103 return (0); 1104} 1105 1106/* 1107 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe 1108 * memory specified by the DIF program. The dst is assumed to be safe memory 1109 * that we can store to directly because it is managed by DTrace. As with 1110 * standard bcopy, overlapping copies are handled properly. 1111 */ 1112static void 1113dtrace_bcopy(const void *src, void *dst, size_t len) 1114{ 1115 if (len != 0) { 1116 uint8_t *s1 = dst; 1117 const uint8_t *s2 = src; 1118 1119 if (s1 <= s2) { 1120 do { 1121 *s1++ = dtrace_load8((uintptr_t)s2++); 1122 } while (--len != 0); 1123 } else { 1124 s2 += len; 1125 s1 += len; 1126 1127 do { 1128 *--s1 = dtrace_load8((uintptr_t)--s2); 1129 } while (--len != 0); 1130 } 1131 } 1132} 1133 1134/* 1135 * Copy src to dst using safe memory accesses, up to either the specified 1136 * length, or the point that a nul byte is encountered. The src is assumed to 1137 * be unsafe memory specified by the DIF program. The dst is assumed to be 1138 * safe memory that we can store to directly because it is managed by DTrace. 1139 * Unlike dtrace_bcopy(), overlapping regions are not handled. 1140 */ 1141static void 1142dtrace_strcpy(const void *src, void *dst, size_t len) 1143{ 1144 if (len != 0) { 1145 uint8_t *s1 = dst, c; 1146 const uint8_t *s2 = src; 1147 1148 do { 1149 *s1++ = c = dtrace_load8((uintptr_t)s2++); 1150 } while (--len != 0 && c != '\0'); 1151 } 1152} 1153 1154/* 1155 * Copy src to dst, deriving the size and type from the specified (BYREF) 1156 * variable type. The src is assumed to be unsafe memory specified by the DIF 1157 * program. The dst is assumed to be DTrace variable memory that is of the 1158 * specified type; we assume that we can store to directly. 1159 */ 1160static void 1161dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) 1162{ 1163 ASSERT(type->dtdt_flags & DIF_TF_BYREF); 1164 1165 if (type->dtdt_kind == DIF_TYPE_STRING) { 1166 dtrace_strcpy(src, dst, type->dtdt_size); 1167 } else { 1168 dtrace_bcopy(src, dst, type->dtdt_size); 1169 } 1170} 1171 1172/* 1173 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be 1174 * unsafe memory specified by the DIF program. The s2 data is assumed to be 1175 * safe memory that we can access directly because it is managed by DTrace. 1176 */ 1177static int 1178dtrace_bcmp(const void *s1, const void *s2, size_t len) 1179{ 1180 volatile uint16_t *flags; 1181 1182 flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 1183 1184 if (s1 == s2) 1185 return (0); 1186 1187 if (s1 == NULL || s2 == NULL) 1188 return (1); 1189 1190 if (s1 != s2 && len != 0) { 1191 const uint8_t *ps1 = s1; 1192 const uint8_t *ps2 = s2; 1193 1194 do { 1195 if (dtrace_load8((uintptr_t)ps1++) != *ps2++) 1196 return (1); 1197 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); 1198 } 1199 return (0); 1200} 1201 1202/* 1203 * Zero the specified region using a simple byte-by-byte loop. Note that this 1204 * is for safe DTrace-managed memory only. 1205 */ 1206static void 1207dtrace_bzero(void *dst, size_t len) 1208{ 1209 uchar_t *cp; 1210 1211 for (cp = dst; len != 0; len--) 1212 *cp++ = 0; 1213} 1214 1215static void 1216dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) 1217{ 1218 uint64_t result[2]; 1219 1220 result[0] = addend1[0] + addend2[0]; 1221 result[1] = addend1[1] + addend2[1] + 1222 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); 1223 1224 sum[0] = result[0]; 1225 sum[1] = result[1]; 1226} 1227 1228/* 1229 * Shift the 128-bit value in a by b. If b is positive, shift left. 1230 * If b is negative, shift right. 1231 */ 1232static void 1233dtrace_shift_128(uint64_t *a, int b) 1234{ 1235 uint64_t mask; 1236 1237 if (b == 0) 1238 return; 1239 1240 if (b < 0) { 1241 b = -b; 1242 if (b >= 64) { 1243 a[0] = a[1] >> (b - 64); 1244 a[1] = 0; 1245 } else { 1246 a[0] >>= b; 1247 mask = 1LL << (64 - b); 1248 mask -= 1; 1249 a[0] |= ((a[1] & mask) << (64 - b)); 1250 a[1] >>= b; 1251 } 1252 } else { 1253 if (b >= 64) { 1254 a[1] = a[0] << (b - 64); 1255 a[0] = 0; 1256 } else { 1257 a[1] <<= b; 1258 mask = a[0] >> (64 - b); 1259 a[1] |= mask; 1260 a[0] <<= b; 1261 } 1262 } 1263} 1264 1265/* 1266 * The basic idea is to break the 2 64-bit values into 4 32-bit values, 1267 * use native multiplication on those, and then re-combine into the 1268 * resulting 128-bit value. 1269 * 1270 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = 1271 * hi1 * hi2 << 64 + 1272 * hi1 * lo2 << 32 + 1273 * hi2 * lo1 << 32 + 1274 * lo1 * lo2 1275 */ 1276static void 1277dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) 1278{ 1279 uint64_t hi1, hi2, lo1, lo2; 1280 uint64_t tmp[2]; 1281 1282 hi1 = factor1 >> 32; 1283 hi2 = factor2 >> 32; 1284 1285 lo1 = factor1 & DT_MASK_LO; 1286 lo2 = factor2 & DT_MASK_LO; 1287 1288 product[0] = lo1 * lo2; 1289 product[1] = hi1 * hi2; 1290 1291 tmp[0] = hi1 * lo2; 1292 tmp[1] = 0; 1293 dtrace_shift_128(tmp, 32); 1294 dtrace_add_128(product, tmp, product); 1295 1296 tmp[0] = hi2 * lo1; 1297 tmp[1] = 0; 1298 dtrace_shift_128(tmp, 32); 1299 dtrace_add_128(product, tmp, product); 1300} 1301 1302/* 1303 * This privilege check should be used by actions and subroutines to 1304 * verify that the user credentials of the process that enabled the 1305 * invoking ECB match the target credentials 1306 */ 1307static int 1308dtrace_priv_proc_common_user(dtrace_state_t *state) 1309{ 1310 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1311 1312 /* 1313 * We should always have a non-NULL state cred here, since if cred 1314 * is null (anonymous tracing), we fast-path bypass this routine. 1315 */ 1316 ASSERT(s_cr != NULL); 1317 1318 if ((cr = CRED()) != NULL && 1319 s_cr->cr_uid == cr->cr_uid && 1320 s_cr->cr_uid == cr->cr_ruid && 1321 s_cr->cr_uid == cr->cr_suid && 1322 s_cr->cr_gid == cr->cr_gid && 1323 s_cr->cr_gid == cr->cr_rgid && 1324 s_cr->cr_gid == cr->cr_sgid) 1325 return (1); 1326 1327 return (0); 1328} 1329 1330/* 1331 * This privilege check should be used by actions and subroutines to 1332 * verify that the zone of the process that enabled the invoking ECB 1333 * matches the target credentials 1334 */ 1335static int 1336dtrace_priv_proc_common_zone(dtrace_state_t *state) 1337{ 1338#if defined(sun) 1339 cred_t *cr, *s_cr = state->dts_cred.dcr_cred; 1340 1341 /* 1342 * We should always have a non-NULL state cred here, since if cred 1343 * is null (anonymous tracing), we fast-path bypass this routine. 1344 */ 1345 ASSERT(s_cr != NULL); 1346 1347 if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone) 1348 return (1); 1349 1350 return (0); 1351#else 1352 return (1); 1353#endif 1354} 1355 1356/* 1357 * This privilege check should be used by actions and subroutines to 1358 * verify that the process has not setuid or changed credentials. 1359 */ 1360static int 1361dtrace_priv_proc_common_nocd(void) 1362{ 1363 proc_t *proc; 1364 1365 if ((proc = ttoproc(curthread)) != NULL && 1366 !(proc->p_flag & SNOCD)) 1367 return (1); 1368 1369 return (0); 1370} 1371 1372static int 1373dtrace_priv_proc_destructive(dtrace_state_t *state) 1374{ 1375 int action = state->dts_cred.dcr_action; 1376 1377 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && 1378 dtrace_priv_proc_common_zone(state) == 0) 1379 goto bad; 1380 1381 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) && 1382 dtrace_priv_proc_common_user(state) == 0) 1383 goto bad; 1384 1385 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) && 1386 dtrace_priv_proc_common_nocd() == 0) 1387 goto bad; 1388 1389 return (1); 1390 1391bad: 1392 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1393 1394 return (0); 1395} 1396 1397static int 1398dtrace_priv_proc_control(dtrace_state_t *state) 1399{ 1400 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) 1401 return (1); 1402 1403 if (dtrace_priv_proc_common_zone(state) && 1404 dtrace_priv_proc_common_user(state) && 1405 dtrace_priv_proc_common_nocd()) 1406 return (1); 1407 1408 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1409 1410 return (0); 1411} 1412 1413static int 1414dtrace_priv_proc(dtrace_state_t *state) 1415{ 1416 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) 1417 return (1); 1418 1419 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; 1420 1421 return (0); 1422} 1423 1424static int 1425dtrace_priv_kernel(dtrace_state_t *state) 1426{ 1427 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) 1428 return (1); 1429 1430 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1431 1432 return (0); 1433} 1434 1435static int 1436dtrace_priv_kernel_destructive(dtrace_state_t *state) 1437{ 1438 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) 1439 return (1); 1440 1441 cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; 1442 1443 return (0); 1444} 1445 1446/* 1447 * Determine if the dte_cond of the specified ECB allows for processing of 1448 * the current probe to continue. Note that this routine may allow continued 1449 * processing, but with access(es) stripped from the mstate's dtms_access 1450 * field. 1451 */ 1452static int 1453dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, 1454 dtrace_ecb_t *ecb) 1455{ 1456 dtrace_probe_t *probe = ecb->dte_probe; 1457 dtrace_provider_t *prov = probe->dtpr_provider; 1458 dtrace_pops_t *pops = &prov->dtpv_pops; 1459 int mode = DTRACE_MODE_NOPRIV_DROP; 1460 1461 ASSERT(ecb->dte_cond); 1462 1463#if defined(sun) 1464 if (pops->dtps_mode != NULL) { 1465 mode = pops->dtps_mode(prov->dtpv_arg, 1466 probe->dtpr_id, probe->dtpr_arg); 1467 1468 ASSERT((mode & DTRACE_MODE_USER) || 1469 (mode & DTRACE_MODE_KERNEL)); 1470 ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || 1471 (mode & DTRACE_MODE_NOPRIV_DROP)); 1472 } 1473 1474 /* 1475 * If the dte_cond bits indicate that this consumer is only allowed to 1476 * see user-mode firings of this probe, call the provider's dtps_mode() 1477 * entry point to check that the probe was fired while in a user 1478 * context. If that's not the case, use the policy specified by the 1479 * provider to determine if we drop the probe or merely restrict 1480 * operation. 1481 */ 1482 if (ecb->dte_cond & DTRACE_COND_USERMODE) { 1483 ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); 1484 1485 if (!(mode & DTRACE_MODE_USER)) { 1486 if (mode & DTRACE_MODE_NOPRIV_DROP) 1487 return (0); 1488 1489 mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; 1490 } 1491 } 1492#endif 1493 1494 /* 1495 * This is more subtle than it looks. We have to be absolutely certain 1496 * that CRED() isn't going to change out from under us so it's only 1497 * legit to examine that structure if we're in constrained situations. 1498 * Currently, the only times we'll this check is if a non-super-user 1499 * has enabled the profile or syscall providers -- providers that 1500 * allow visibility of all processes. For the profile case, the check 1501 * above will ensure that we're examining a user context. 1502 */ 1503 if (ecb->dte_cond & DTRACE_COND_OWNER) { 1504 cred_t *cr; 1505 cred_t *s_cr = state->dts_cred.dcr_cred; 1506 proc_t *proc; 1507 1508 ASSERT(s_cr != NULL); 1509 1510 if ((cr = CRED()) == NULL || 1511 s_cr->cr_uid != cr->cr_uid || 1512 s_cr->cr_uid != cr->cr_ruid || 1513 s_cr->cr_uid != cr->cr_suid || 1514 s_cr->cr_gid != cr->cr_gid || 1515 s_cr->cr_gid != cr->cr_rgid || 1516 s_cr->cr_gid != cr->cr_sgid || 1517 (proc = ttoproc(curthread)) == NULL || 1518 (proc->p_flag & SNOCD)) { 1519 if (mode & DTRACE_MODE_NOPRIV_DROP) 1520 return (0); 1521 1522#if defined(sun) 1523 mstate->dtms_access &= ~DTRACE_ACCESS_PROC; 1524#endif 1525 } 1526 } 1527 1528#if defined(sun) 1529 /* 1530 * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not 1531 * in our zone, check to see if our mode policy is to restrict rather 1532 * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC 1533 * and DTRACE_ACCESS_ARGS 1534 */ 1535 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { 1536 cred_t *cr; 1537 cred_t *s_cr = state->dts_cred.dcr_cred; 1538 1539 ASSERT(s_cr != NULL); 1540 1541 if ((cr = CRED()) == NULL || 1542 s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { 1543 if (mode & DTRACE_MODE_NOPRIV_DROP) 1544 return (0); 1545 1546 mstate->dtms_access &= 1547 ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); 1548 } 1549 } 1550#endif 1551 1552 return (1); 1553} 1554 1555/* 1556 * Note: not called from probe context. This function is called 1557 * asynchronously (and at a regular interval) from outside of probe context to 1558 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable 1559 * cleaning is explained in detail in <sys/dtrace_impl.h>. 1560 */ 1561void 1562dtrace_dynvar_clean(dtrace_dstate_t *dstate) 1563{ 1564 dtrace_dynvar_t *dirty; 1565 dtrace_dstate_percpu_t *dcpu; 1566 dtrace_dynvar_t **rinsep; 1567 int i, j, work = 0; 1568 1569 for (i = 0; i < NCPU; i++) { 1570 dcpu = &dstate->dtds_percpu[i]; 1571 rinsep = &dcpu->dtdsc_rinsing; 1572 1573 /* 1574 * If the dirty list is NULL, there is no dirty work to do. 1575 */ 1576 if (dcpu->dtdsc_dirty == NULL) 1577 continue; 1578 1579 if (dcpu->dtdsc_rinsing != NULL) { 1580 /* 1581 * If the rinsing list is non-NULL, then it is because 1582 * this CPU was selected to accept another CPU's 1583 * dirty list -- and since that time, dirty buffers 1584 * have accumulated. This is a highly unlikely 1585 * condition, but we choose to ignore the dirty 1586 * buffers -- they'll be picked up a future cleanse. 1587 */ 1588 continue; 1589 } 1590 1591 if (dcpu->dtdsc_clean != NULL) { 1592 /* 1593 * If the clean list is non-NULL, then we're in a 1594 * situation where a CPU has done deallocations (we 1595 * have a non-NULL dirty list) but no allocations (we 1596 * also have a non-NULL clean list). We can't simply 1597 * move the dirty list into the clean list on this 1598 * CPU, yet we also don't want to allow this condition 1599 * to persist, lest a short clean list prevent a 1600 * massive dirty list from being cleaned (which in 1601 * turn could lead to otherwise avoidable dynamic 1602 * drops). To deal with this, we look for some CPU 1603 * with a NULL clean list, NULL dirty list, and NULL 1604 * rinsing list -- and then we borrow this CPU to 1605 * rinse our dirty list. 1606 */ 1607 for (j = 0; j < NCPU; j++) { 1608 dtrace_dstate_percpu_t *rinser; 1609 1610 rinser = &dstate->dtds_percpu[j]; 1611 1612 if (rinser->dtdsc_rinsing != NULL) 1613 continue; 1614 1615 if (rinser->dtdsc_dirty != NULL) 1616 continue; 1617 1618 if (rinser->dtdsc_clean != NULL) 1619 continue; 1620 1621 rinsep = &rinser->dtdsc_rinsing; 1622 break; 1623 } 1624 1625 if (j == NCPU) { 1626 /* 1627 * We were unable to find another CPU that 1628 * could accept this dirty list -- we are 1629 * therefore unable to clean it now. 1630 */ 1631 dtrace_dynvar_failclean++; 1632 continue; 1633 } 1634 } 1635 1636 work = 1; 1637 1638 /* 1639 * Atomically move the dirty list aside. 1640 */ 1641 do { 1642 dirty = dcpu->dtdsc_dirty; 1643 1644 /* 1645 * Before we zap the dirty list, set the rinsing list. 1646 * (This allows for a potential assertion in 1647 * dtrace_dynvar(): if a free dynamic variable appears 1648 * on a hash chain, either the dirty list or the 1649 * rinsing list for some CPU must be non-NULL.) 1650 */ 1651 *rinsep = dirty; 1652 dtrace_membar_producer(); 1653 } while (dtrace_casptr(&dcpu->dtdsc_dirty, 1654 dirty, NULL) != dirty); 1655 } 1656 1657 if (!work) { 1658 /* 1659 * We have no work to do; we can simply return. 1660 */ 1661 return; 1662 } 1663 1664 dtrace_sync(); 1665 1666 for (i = 0; i < NCPU; i++) { 1667 dcpu = &dstate->dtds_percpu[i]; 1668 1669 if (dcpu->dtdsc_rinsing == NULL) 1670 continue; 1671 1672 /* 1673 * We are now guaranteed that no hash chain contains a pointer 1674 * into this dirty list; we can make it clean. 1675 */ 1676 ASSERT(dcpu->dtdsc_clean == NULL); 1677 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing; 1678 dcpu->dtdsc_rinsing = NULL; 1679 } 1680 1681 /* 1682 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make 1683 * sure that all CPUs have seen all of the dtdsc_clean pointers. 1684 * This prevents a race whereby a CPU incorrectly decides that 1685 * the state should be something other than DTRACE_DSTATE_CLEAN 1686 * after dtrace_dynvar_clean() has completed. 1687 */ 1688 dtrace_sync(); 1689 1690 dstate->dtds_state = DTRACE_DSTATE_CLEAN; 1691} 1692 1693/* 1694 * Depending on the value of the op parameter, this function looks-up, 1695 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an 1696 * allocation is requested, this function will return a pointer to a 1697 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no 1698 * variable can be allocated. If NULL is returned, the appropriate counter 1699 * will be incremented. 1700 */ 1701dtrace_dynvar_t * 1702dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, 1703 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, 1704 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) 1705{ 1706 uint64_t hashval = DTRACE_DYNHASH_VALID; 1707 dtrace_dynhash_t *hash = dstate->dtds_hash; 1708 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL; 1709 processorid_t me = curcpu, cpu = me; 1710 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me]; 1711 size_t bucket, ksize; 1712 size_t chunksize = dstate->dtds_chunksize; 1713 uintptr_t kdata, lock, nstate; 1714 uint_t i; 1715 1716 ASSERT(nkeys != 0); 1717 1718 /* 1719 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time" 1720 * algorithm. For the by-value portions, we perform the algorithm in 1721 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a 1722 * bit, and seems to have only a minute effect on distribution. For 1723 * the by-reference data, we perform "One-at-a-time" iterating (safely) 1724 * over each referenced byte. It's painful to do this, but it's much 1725 * better than pathological hash distribution. The efficacy of the 1726 * hashing algorithm (and a comparison with other algorithms) may be 1727 * found by running the ::dtrace_dynstat MDB dcmd. 1728 */ 1729 for (i = 0; i < nkeys; i++) { 1730 if (key[i].dttk_size == 0) { 1731 uint64_t val = key[i].dttk_value; 1732 1733 hashval += (val >> 48) & 0xffff; 1734 hashval += (hashval << 10); 1735 hashval ^= (hashval >> 6); 1736 1737 hashval += (val >> 32) & 0xffff; 1738 hashval += (hashval << 10); 1739 hashval ^= (hashval >> 6); 1740 1741 hashval += (val >> 16) & 0xffff; 1742 hashval += (hashval << 10); 1743 hashval ^= (hashval >> 6); 1744 1745 hashval += val & 0xffff; 1746 hashval += (hashval << 10); 1747 hashval ^= (hashval >> 6); 1748 } else { 1749 /* 1750 * This is incredibly painful, but it beats the hell 1751 * out of the alternative. 1752 */ 1753 uint64_t j, size = key[i].dttk_size; 1754 uintptr_t base = (uintptr_t)key[i].dttk_value; 1755 1756 if (!dtrace_canload(base, size, mstate, vstate)) 1757 break; 1758 1759 for (j = 0; j < size; j++) { 1760 hashval += dtrace_load8(base + j); 1761 hashval += (hashval << 10); 1762 hashval ^= (hashval >> 6); 1763 } 1764 } 1765 } 1766 1767 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) 1768 return (NULL); 1769 1770 hashval += (hashval << 3); 1771 hashval ^= (hashval >> 11); 1772 hashval += (hashval << 15); 1773 1774 /* 1775 * There is a remote chance (ideally, 1 in 2^31) that our hashval 1776 * comes out to be one of our two sentinel hash values. If this 1777 * actually happens, we set the hashval to be a value known to be a 1778 * non-sentinel value. 1779 */ 1780 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK) 1781 hashval = DTRACE_DYNHASH_VALID; 1782 1783 /* 1784 * Yes, it's painful to do a divide here. If the cycle count becomes 1785 * important here, tricks can be pulled to reduce it. (However, it's 1786 * critical that hash collisions be kept to an absolute minimum; 1787 * they're much more painful than a divide.) It's better to have a 1788 * solution that generates few collisions and still keeps things 1789 * relatively simple. 1790 */ 1791 bucket = hashval % dstate->dtds_hashsize; 1792 1793 if (op == DTRACE_DYNVAR_DEALLOC) { 1794 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock; 1795 1796 for (;;) { 1797 while ((lock = *lockp) & 1) 1798 continue; 1799 1800 if (dtrace_casptr((volatile void *)lockp, 1801 (volatile void *)lock, (volatile void *)(lock + 1)) == (void *)lock) 1802 break; 1803 } 1804 1805 dtrace_membar_producer(); 1806 } 1807 1808top: 1809 prev = NULL; 1810 lock = hash[bucket].dtdh_lock; 1811 1812 dtrace_membar_consumer(); 1813 1814 start = hash[bucket].dtdh_chain; 1815 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK || 1816 start->dtdv_hashval != DTRACE_DYNHASH_FREE || 1817 op != DTRACE_DYNVAR_DEALLOC)); 1818 1819 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) { 1820 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple; 1821 dtrace_key_t *dkey = &dtuple->dtt_key[0]; 1822 1823 if (dvar->dtdv_hashval != hashval) { 1824 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) { 1825 /* 1826 * We've reached the sink, and therefore the 1827 * end of the hash chain; we can kick out of 1828 * the loop knowing that we have seen a valid 1829 * snapshot of state. 1830 */ 1831 ASSERT(dvar->dtdv_next == NULL); 1832 ASSERT(dvar == &dtrace_dynhash_sink); 1833 break; 1834 } 1835 1836 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) { 1837 /* 1838 * We've gone off the rails: somewhere along 1839 * the line, one of the members of this hash 1840 * chain was deleted. Note that we could also 1841 * detect this by simply letting this loop run 1842 * to completion, as we would eventually hit 1843 * the end of the dirty list. However, we 1844 * want to avoid running the length of the 1845 * dirty list unnecessarily (it might be quite 1846 * long), so we catch this as early as 1847 * possible by detecting the hash marker. In 1848 * this case, we simply set dvar to NULL and 1849 * break; the conditional after the loop will 1850 * send us back to top. 1851 */ 1852 dvar = NULL; 1853 break; 1854 } 1855 1856 goto next; 1857 } 1858 1859 if (dtuple->dtt_nkeys != nkeys) 1860 goto next; 1861 1862 for (i = 0; i < nkeys; i++, dkey++) { 1863 if (dkey->dttk_size != key[i].dttk_size) 1864 goto next; /* size or type mismatch */ 1865 1866 if (dkey->dttk_size != 0) { 1867 if (dtrace_bcmp( 1868 (void *)(uintptr_t)key[i].dttk_value, 1869 (void *)(uintptr_t)dkey->dttk_value, 1870 dkey->dttk_size)) 1871 goto next; 1872 } else { 1873 if (dkey->dttk_value != key[i].dttk_value) 1874 goto next; 1875 } 1876 } 1877 1878 if (op != DTRACE_DYNVAR_DEALLOC) 1879 return (dvar); 1880 1881 ASSERT(dvar->dtdv_next == NULL || 1882 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE); 1883 1884 if (prev != NULL) { 1885 ASSERT(hash[bucket].dtdh_chain != dvar); 1886 ASSERT(start != dvar); 1887 ASSERT(prev->dtdv_next == dvar); 1888 prev->dtdv_next = dvar->dtdv_next; 1889 } else { 1890 if (dtrace_casptr(&hash[bucket].dtdh_chain, 1891 start, dvar->dtdv_next) != start) { 1892 /* 1893 * We have failed to atomically swing the 1894 * hash table head pointer, presumably because 1895 * of a conflicting allocation on another CPU. 1896 * We need to reread the hash chain and try 1897 * again. 1898 */ 1899 goto top; 1900 } 1901 } 1902 1903 dtrace_membar_producer(); 1904 1905 /* 1906 * Now set the hash value to indicate that it's free. 1907 */ 1908 ASSERT(hash[bucket].dtdh_chain != dvar); 1909 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 1910 1911 dtrace_membar_producer(); 1912 1913 /* 1914 * Set the next pointer to point at the dirty list, and 1915 * atomically swing the dirty pointer to the newly freed dvar. 1916 */ 1917 do { 1918 next = dcpu->dtdsc_dirty; 1919 dvar->dtdv_next = next; 1920 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next); 1921 1922 /* 1923 * Finally, unlock this hash bucket. 1924 */ 1925 ASSERT(hash[bucket].dtdh_lock == lock); 1926 ASSERT(lock & 1); 1927 hash[bucket].dtdh_lock++; 1928 1929 return (NULL); 1930next: 1931 prev = dvar; 1932 continue; 1933 } 1934 1935 if (dvar == NULL) { 1936 /* 1937 * If dvar is NULL, it is because we went off the rails: 1938 * one of the elements that we traversed in the hash chain 1939 * was deleted while we were traversing it. In this case, 1940 * we assert that we aren't doing a dealloc (deallocs lock 1941 * the hash bucket to prevent themselves from racing with 1942 * one another), and retry the hash chain traversal. 1943 */ 1944 ASSERT(op != DTRACE_DYNVAR_DEALLOC); 1945 goto top; 1946 } 1947 1948 if (op != DTRACE_DYNVAR_ALLOC) { 1949 /* 1950 * If we are not to allocate a new variable, we want to 1951 * return NULL now. Before we return, check that the value 1952 * of the lock word hasn't changed. If it has, we may have 1953 * seen an inconsistent snapshot. 1954 */ 1955 if (op == DTRACE_DYNVAR_NOALLOC) { 1956 if (hash[bucket].dtdh_lock != lock) 1957 goto top; 1958 } else { 1959 ASSERT(op == DTRACE_DYNVAR_DEALLOC); 1960 ASSERT(hash[bucket].dtdh_lock == lock); 1961 ASSERT(lock & 1); 1962 hash[bucket].dtdh_lock++; 1963 } 1964 1965 return (NULL); 1966 } 1967 1968 /* 1969 * We need to allocate a new dynamic variable. The size we need is the 1970 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the 1971 * size of any auxiliary key data (rounded up to 8-byte alignment) plus 1972 * the size of any referred-to data (dsize). We then round the final 1973 * size up to the chunksize for allocation. 1974 */ 1975 for (ksize = 0, i = 0; i < nkeys; i++) 1976 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 1977 1978 /* 1979 * This should be pretty much impossible, but could happen if, say, 1980 * strange DIF specified the tuple. Ideally, this should be an 1981 * assertion and not an error condition -- but that requires that the 1982 * chunksize calculation in dtrace_difo_chunksize() be absolutely 1983 * bullet-proof. (That is, it must not be able to be fooled by 1984 * malicious DIF.) Given the lack of backwards branches in DIF, 1985 * solving this would presumably not amount to solving the Halting 1986 * Problem -- but it still seems awfully hard. 1987 */ 1988 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) + 1989 ksize + dsize > chunksize) { 1990 dcpu->dtdsc_drops++; 1991 return (NULL); 1992 } 1993 1994 nstate = DTRACE_DSTATE_EMPTY; 1995 1996 do { 1997retry: 1998 free = dcpu->dtdsc_free; 1999 2000 if (free == NULL) { 2001 dtrace_dynvar_t *clean = dcpu->dtdsc_clean; 2002 void *rval; 2003 2004 if (clean == NULL) { 2005 /* 2006 * We're out of dynamic variable space on 2007 * this CPU. Unless we have tried all CPUs, 2008 * we'll try to allocate from a different 2009 * CPU. 2010 */ 2011 switch (dstate->dtds_state) { 2012 case DTRACE_DSTATE_CLEAN: { 2013 void *sp = &dstate->dtds_state; 2014 2015 if (++cpu >= NCPU) 2016 cpu = 0; 2017 2018 if (dcpu->dtdsc_dirty != NULL && 2019 nstate == DTRACE_DSTATE_EMPTY) 2020 nstate = DTRACE_DSTATE_DIRTY; 2021 2022 if (dcpu->dtdsc_rinsing != NULL) 2023 nstate = DTRACE_DSTATE_RINSING; 2024 2025 dcpu = &dstate->dtds_percpu[cpu]; 2026 2027 if (cpu != me) 2028 goto retry; 2029 2030 (void) dtrace_cas32(sp, 2031 DTRACE_DSTATE_CLEAN, nstate); 2032 2033 /* 2034 * To increment the correct bean 2035 * counter, take another lap. 2036 */ 2037 goto retry; 2038 } 2039 2040 case DTRACE_DSTATE_DIRTY: 2041 dcpu->dtdsc_dirty_drops++; 2042 break; 2043 2044 case DTRACE_DSTATE_RINSING: 2045 dcpu->dtdsc_rinsing_drops++; 2046 break; 2047 2048 case DTRACE_DSTATE_EMPTY: 2049 dcpu->dtdsc_drops++; 2050 break; 2051 } 2052 2053 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP); 2054 return (NULL); 2055 } 2056 2057 /* 2058 * The clean list appears to be non-empty. We want to 2059 * move the clean list to the free list; we start by 2060 * moving the clean pointer aside. 2061 */ 2062 if (dtrace_casptr(&dcpu->dtdsc_clean, 2063 clean, NULL) != clean) { 2064 /* 2065 * We are in one of two situations: 2066 * 2067 * (a) The clean list was switched to the 2068 * free list by another CPU. 2069 * 2070 * (b) The clean list was added to by the 2071 * cleansing cyclic. 2072 * 2073 * In either of these situations, we can 2074 * just reattempt the free list allocation. 2075 */ 2076 goto retry; 2077 } 2078 2079 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); 2080 2081 /* 2082 * Now we'll move the clean list to our free list. 2083 * It's impossible for this to fail: the only way 2084 * the free list can be updated is through this 2085 * code path, and only one CPU can own the clean list. 2086 * Thus, it would only be possible for this to fail if 2087 * this code were racing with dtrace_dynvar_clean(). 2088 * (That is, if dtrace_dynvar_clean() updated the clean 2089 * list, and we ended up racing to update the free 2090 * list.) This race is prevented by the dtrace_sync() 2091 * in dtrace_dynvar_clean() -- which flushes the 2092 * owners of the clean lists out before resetting 2093 * the clean lists. 2094 */ 2095 dcpu = &dstate->dtds_percpu[me]; 2096 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); 2097 ASSERT(rval == NULL); 2098 goto retry; 2099 } 2100 2101 dvar = free; 2102 new_free = dvar->dtdv_next; 2103 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free); 2104 2105 /* 2106 * We have now allocated a new chunk. We copy the tuple keys into the 2107 * tuple array and copy any referenced key data into the data space 2108 * following the tuple array. As we do this, we relocate dttk_value 2109 * in the final tuple to point to the key data address in the chunk. 2110 */ 2111 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys]; 2112 dvar->dtdv_data = (void *)(kdata + ksize); 2113 dvar->dtdv_tuple.dtt_nkeys = nkeys; 2114 2115 for (i = 0; i < nkeys; i++) { 2116 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i]; 2117 size_t kesize = key[i].dttk_size; 2118 2119 if (kesize != 0) { 2120 dtrace_bcopy( 2121 (const void *)(uintptr_t)key[i].dttk_value, 2122 (void *)kdata, kesize); 2123 dkey->dttk_value = kdata; 2124 kdata += P2ROUNDUP(kesize, sizeof (uint64_t)); 2125 } else { 2126 dkey->dttk_value = key[i].dttk_value; 2127 } 2128 2129 dkey->dttk_size = kesize; 2130 } 2131 2132 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE); 2133 dvar->dtdv_hashval = hashval; 2134 dvar->dtdv_next = start; 2135 2136 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start) 2137 return (dvar); 2138 2139 /* 2140 * The cas has failed. Either another CPU is adding an element to 2141 * this hash chain, or another CPU is deleting an element from this 2142 * hash chain. The simplest way to deal with both of these cases 2143 * (though not necessarily the most efficient) is to free our 2144 * allocated block and tail-call ourselves. Note that the free is 2145 * to the dirty list and _not_ to the free list. This is to prevent 2146 * races with allocators, above. 2147 */ 2148 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; 2149 2150 dtrace_membar_producer(); 2151 2152 do { 2153 free = dcpu->dtdsc_dirty; 2154 dvar->dtdv_next = free; 2155 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); 2156 2157 return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate)); 2158} 2159 2160/*ARGSUSED*/ 2161static void 2162dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) 2163{ 2164 if ((int64_t)nval < (int64_t)*oval) 2165 *oval = nval; 2166} 2167 2168/*ARGSUSED*/ 2169static void 2170dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) 2171{ 2172 if ((int64_t)nval > (int64_t)*oval) 2173 *oval = nval; 2174} 2175 2176static void 2177dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr) 2178{ 2179 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET; 2180 int64_t val = (int64_t)nval; 2181 2182 if (val < 0) { 2183 for (i = 0; i < zero; i++) { 2184 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) { 2185 quanta[i] += incr; 2186 return; 2187 } 2188 } 2189 } else { 2190 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) { 2191 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) { 2192 quanta[i - 1] += incr; 2193 return; 2194 } 2195 } 2196 2197 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr; 2198 return; 2199 } 2200 2201 ASSERT(0); 2202} 2203 2204static void 2205dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) 2206{ 2207 uint64_t arg = *lquanta++; 2208 int32_t base = DTRACE_LQUANTIZE_BASE(arg); 2209 uint16_t step = DTRACE_LQUANTIZE_STEP(arg); 2210 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); 2211 int32_t val = (int32_t)nval, level; 2212 2213 ASSERT(step != 0); 2214 ASSERT(levels != 0); 2215 2216 if (val < base) { 2217 /* 2218 * This is an underflow. 2219 */ 2220 lquanta[0] += incr; 2221 return; 2222 } 2223 2224 level = (val - base) / step; 2225 2226 if (level < levels) { 2227 lquanta[level + 1] += incr; 2228 return; 2229 } 2230 2231 /* 2232 * This is an overflow. 2233 */ 2234 lquanta[levels + 1] += incr; 2235} 2236 2237static int 2238dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, 2239 uint16_t high, uint16_t nsteps, int64_t value) 2240{ 2241 int64_t this = 1, last, next; 2242 int base = 1, order; 2243 2244 ASSERT(factor <= nsteps); 2245 ASSERT(nsteps % factor == 0); 2246 2247 for (order = 0; order < low; order++) 2248 this *= factor; 2249 2250 /* 2251 * If our value is less than our factor taken to the power of the 2252 * low order of magnitude, it goes into the zeroth bucket. 2253 */ 2254 if (value < (last = this)) 2255 return (0); 2256 2257 for (this *= factor; order <= high; order++) { 2258 int nbuckets = this > nsteps ? nsteps : this; 2259 2260 if ((next = this * factor) < this) { 2261 /* 2262 * We should not generally get log/linear quantizations 2263 * with a high magnitude that allows 64-bits to 2264 * overflow, but we nonetheless protect against this 2265 * by explicitly checking for overflow, and clamping 2266 * our value accordingly. 2267 */ 2268 value = this - 1; 2269 } 2270 2271 if (value < this) { 2272 /* 2273 * If our value lies within this order of magnitude, 2274 * determine its position by taking the offset within 2275 * the order of magnitude, dividing by the bucket 2276 * width, and adding to our (accumulated) base. 2277 */ 2278 return (base + (value - last) / (this / nbuckets)); 2279 } 2280 2281 base += nbuckets - (nbuckets / factor); 2282 last = this; 2283 this = next; 2284 } 2285 2286 /* 2287 * Our value is greater than or equal to our factor taken to the 2288 * power of one plus the high magnitude -- return the top bucket. 2289 */ 2290 return (base); 2291} 2292 2293static void 2294dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) 2295{ 2296 uint64_t arg = *llquanta++; 2297 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); 2298 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); 2299 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); 2300 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); 2301 2302 llquanta[dtrace_aggregate_llquantize_bucket(factor, 2303 low, high, nsteps, nval)] += incr; 2304} 2305 2306/*ARGSUSED*/ 2307static void 2308dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) 2309{ 2310 data[0]++; 2311 data[1] += nval; 2312} 2313 2314/*ARGSUSED*/ 2315static void 2316dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) 2317{ 2318 int64_t snval = (int64_t)nval; 2319 uint64_t tmp[2]; 2320 2321 data[0]++; 2322 data[1] += nval; 2323 2324 /* 2325 * What we want to say here is: 2326 * 2327 * data[2] += nval * nval; 2328 * 2329 * But given that nval is 64-bit, we could easily overflow, so 2330 * we do this as 128-bit arithmetic. 2331 */ 2332 if (snval < 0) 2333 snval = -snval; 2334 2335 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); 2336 dtrace_add_128(data + 2, tmp, data + 2); 2337} 2338 2339/*ARGSUSED*/ 2340static void 2341dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) 2342{ 2343 *oval = *oval + 1; 2344} 2345 2346/*ARGSUSED*/ 2347static void 2348dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) 2349{ 2350 *oval += nval; 2351} 2352 2353/* 2354 * Aggregate given the tuple in the principal data buffer, and the aggregating 2355 * action denoted by the specified dtrace_aggregation_t. The aggregation 2356 * buffer is specified as the buf parameter. This routine does not return 2357 * failure; if there is no space in the aggregation buffer, the data will be 2358 * dropped, and a corresponding counter incremented. 2359 */ 2360static void 2361dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, 2362 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) 2363{ 2364 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec; 2365 uint32_t i, ndx, size, fsize; 2366 uint32_t align = sizeof (uint64_t) - 1; 2367 dtrace_aggbuffer_t *agb; 2368 dtrace_aggkey_t *key; 2369 uint32_t hashval = 0, limit, isstr; 2370 caddr_t tomax, data, kdata; 2371 dtrace_actkind_t action; 2372 dtrace_action_t *act; 2373 uintptr_t offs; 2374 2375 if (buf == NULL) 2376 return; 2377 2378 if (!agg->dtag_hasarg) { 2379 /* 2380 * Currently, only quantize() and lquantize() take additional 2381 * arguments, and they have the same semantics: an increment 2382 * value that defaults to 1 when not present. If additional 2383 * aggregating actions take arguments, the setting of the 2384 * default argument value will presumably have to become more 2385 * sophisticated... 2386 */ 2387 arg = 1; 2388 } 2389 2390 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION; 2391 size = rec->dtrd_offset - agg->dtag_base; 2392 fsize = size + rec->dtrd_size; 2393 2394 ASSERT(dbuf->dtb_tomax != NULL); 2395 data = dbuf->dtb_tomax + offset + agg->dtag_base; 2396 2397 if ((tomax = buf->dtb_tomax) == NULL) { 2398 dtrace_buffer_drop(buf); 2399 return; 2400 } 2401 2402 /* 2403 * The metastructure is always at the bottom of the buffer. 2404 */ 2405 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size - 2406 sizeof (dtrace_aggbuffer_t)); 2407 2408 if (buf->dtb_offset == 0) { 2409 /* 2410 * We just kludge up approximately 1/8th of the size to be 2411 * buckets. If this guess ends up being routinely 2412 * off-the-mark, we may need to dynamically readjust this 2413 * based on past performance. 2414 */ 2415 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t); 2416 2417 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) < 2418 (uintptr_t)tomax || hashsize == 0) { 2419 /* 2420 * We've been given a ludicrously small buffer; 2421 * increment our drop count and leave. 2422 */ 2423 dtrace_buffer_drop(buf); 2424 return; 2425 } 2426 2427 /* 2428 * And now, a pathetic attempt to try to get a an odd (or 2429 * perchance, a prime) hash size for better hash distribution. 2430 */ 2431 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3)) 2432 hashsize -= DTRACE_AGGHASHSIZE_SLEW; 2433 2434 agb->dtagb_hashsize = hashsize; 2435 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb - 2436 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *)); 2437 agb->dtagb_free = (uintptr_t)agb->dtagb_hash; 2438 2439 for (i = 0; i < agb->dtagb_hashsize; i++) 2440 agb->dtagb_hash[i] = NULL; 2441 } 2442 2443 ASSERT(agg->dtag_first != NULL); 2444 ASSERT(agg->dtag_first->dta_intuple); 2445 2446 /* 2447 * Calculate the hash value based on the key. Note that we _don't_ 2448 * include the aggid in the hashing (but we will store it as part of 2449 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time" 2450 * algorithm: a simple, quick algorithm that has no known funnels, and 2451 * gets good distribution in practice. The efficacy of the hashing 2452 * algorithm (and a comparison with other algorithms) may be found by 2453 * running the ::dtrace_aggstat MDB dcmd. 2454 */ 2455 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 2456 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2457 limit = i + act->dta_rec.dtrd_size; 2458 ASSERT(limit <= size); 2459 isstr = DTRACEACT_ISSTRING(act); 2460 2461 for (; i < limit; i++) { 2462 hashval += data[i]; 2463 hashval += (hashval << 10); 2464 hashval ^= (hashval >> 6); 2465 2466 if (isstr && data[i] == '\0') 2467 break; 2468 } 2469 } 2470 2471 hashval += (hashval << 3); 2472 hashval ^= (hashval >> 11); 2473 hashval += (hashval << 15); 2474 2475 /* 2476 * Yes, the divide here is expensive -- but it's generally the least 2477 * of the performance issues given the amount of data that we iterate 2478 * over to compute hash values, compare data, etc. 2479 */ 2480 ndx = hashval % agb->dtagb_hashsize; 2481 2482 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) { 2483 ASSERT((caddr_t)key >= tomax); 2484 ASSERT((caddr_t)key < tomax + buf->dtb_size); 2485 2486 if (hashval != key->dtak_hashval || key->dtak_size != size) 2487 continue; 2488 2489 kdata = key->dtak_data; 2490 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size); 2491 2492 for (act = agg->dtag_first; act->dta_intuple; 2493 act = act->dta_next) { 2494 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2495 limit = i + act->dta_rec.dtrd_size; 2496 ASSERT(limit <= size); 2497 isstr = DTRACEACT_ISSTRING(act); 2498 2499 for (; i < limit; i++) { 2500 if (kdata[i] != data[i]) 2501 goto next; 2502 2503 if (isstr && data[i] == '\0') 2504 break; 2505 } 2506 } 2507 2508 if (action != key->dtak_action) { 2509 /* 2510 * We are aggregating on the same value in the same 2511 * aggregation with two different aggregating actions. 2512 * (This should have been picked up in the compiler, 2513 * so we may be dealing with errant or devious DIF.) 2514 * This is an error condition; we indicate as much, 2515 * and return. 2516 */ 2517 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 2518 return; 2519 } 2520 2521 /* 2522 * This is a hit: we need to apply the aggregator to 2523 * the value at this key. 2524 */ 2525 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg); 2526 return; 2527next: 2528 continue; 2529 } 2530 2531 /* 2532 * We didn't find it. We need to allocate some zero-filled space, 2533 * link it into the hash table appropriately, and apply the aggregator 2534 * to the (zero-filled) value. 2535 */ 2536 offs = buf->dtb_offset; 2537 while (offs & (align - 1)) 2538 offs += sizeof (uint32_t); 2539 2540 /* 2541 * If we don't have enough room to both allocate a new key _and_ 2542 * its associated data, increment the drop count and return. 2543 */ 2544 if ((uintptr_t)tomax + offs + fsize > 2545 agb->dtagb_free - sizeof (dtrace_aggkey_t)) { 2546 dtrace_buffer_drop(buf); 2547 return; 2548 } 2549 2550 /*CONSTCOND*/ 2551 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1))); 2552 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t)); 2553 agb->dtagb_free -= sizeof (dtrace_aggkey_t); 2554 2555 key->dtak_data = kdata = tomax + offs; 2556 buf->dtb_offset = offs + fsize; 2557 2558 /* 2559 * Now copy the data across. 2560 */ 2561 *((dtrace_aggid_t *)kdata) = agg->dtag_id; 2562 2563 for (i = sizeof (dtrace_aggid_t); i < size; i++) 2564 kdata[i] = data[i]; 2565 2566 /* 2567 * Because strings are not zeroed out by default, we need to iterate 2568 * looking for actions that store strings, and we need to explicitly 2569 * pad these strings out with zeroes. 2570 */ 2571 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { 2572 int nul; 2573 2574 if (!DTRACEACT_ISSTRING(act)) 2575 continue; 2576 2577 i = act->dta_rec.dtrd_offset - agg->dtag_base; 2578 limit = i + act->dta_rec.dtrd_size; 2579 ASSERT(limit <= size); 2580 2581 for (nul = 0; i < limit; i++) { 2582 if (nul) { 2583 kdata[i] = '\0'; 2584 continue; 2585 } 2586 2587 if (data[i] != '\0') 2588 continue; 2589 2590 nul = 1; 2591 } 2592 } 2593 2594 for (i = size; i < fsize; i++) 2595 kdata[i] = 0; 2596 2597 key->dtak_hashval = hashval; 2598 key->dtak_size = size; 2599 key->dtak_action = action; 2600 key->dtak_next = agb->dtagb_hash[ndx]; 2601 agb->dtagb_hash[ndx] = key; 2602 2603 /* 2604 * Finally, apply the aggregator. 2605 */ 2606 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial; 2607 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg); 2608} 2609 2610/* 2611 * Given consumer state, this routine finds a speculation in the INACTIVE 2612 * state and transitions it into the ACTIVE state. If there is no speculation 2613 * in the INACTIVE state, 0 is returned. In this case, no error counter is 2614 * incremented -- it is up to the caller to take appropriate action. 2615 */ 2616static int 2617dtrace_speculation(dtrace_state_t *state) 2618{ 2619 int i = 0; 2620 dtrace_speculation_state_t current; 2621 uint32_t *stat = &state->dts_speculations_unavail, count; 2622 2623 while (i < state->dts_nspeculations) { 2624 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2625 2626 current = spec->dtsp_state; 2627 2628 if (current != DTRACESPEC_INACTIVE) { 2629 if (current == DTRACESPEC_COMMITTINGMANY || 2630 current == DTRACESPEC_COMMITTING || 2631 current == DTRACESPEC_DISCARDING) 2632 stat = &state->dts_speculations_busy; 2633 i++; 2634 continue; 2635 } 2636 2637 if (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2638 current, DTRACESPEC_ACTIVE) == current) 2639 return (i + 1); 2640 } 2641 2642 /* 2643 * We couldn't find a speculation. If we found as much as a single 2644 * busy speculation buffer, we'll attribute this failure as "busy" 2645 * instead of "unavail". 2646 */ 2647 do { 2648 count = *stat; 2649 } while (dtrace_cas32(stat, count, count + 1) != count); 2650 2651 return (0); 2652} 2653 2654/* 2655 * This routine commits an active speculation. If the specified speculation 2656 * is not in a valid state to perform a commit(), this routine will silently do 2657 * nothing. The state of the specified speculation is transitioned according 2658 * to the state transition diagram outlined in <sys/dtrace_impl.h> 2659 */ 2660static void 2661dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, 2662 dtrace_specid_t which) 2663{ 2664 dtrace_speculation_t *spec; 2665 dtrace_buffer_t *src, *dest; 2666 uintptr_t daddr, saddr, dlimit, slimit; 2667 dtrace_speculation_state_t current, new = 0; 2668 intptr_t offs; 2669 uint64_t timestamp; 2670 2671 if (which == 0) 2672 return; 2673 2674 if (which > state->dts_nspeculations) { 2675 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2676 return; 2677 } 2678 2679 spec = &state->dts_speculations[which - 1]; 2680 src = &spec->dtsp_buffer[cpu]; 2681 dest = &state->dts_buffer[cpu]; 2682 2683 do { 2684 current = spec->dtsp_state; 2685 2686 if (current == DTRACESPEC_COMMITTINGMANY) 2687 break; 2688 2689 switch (current) { 2690 case DTRACESPEC_INACTIVE: 2691 case DTRACESPEC_DISCARDING: 2692 return; 2693 2694 case DTRACESPEC_COMMITTING: 2695 /* 2696 * This is only possible if we are (a) commit()'ing 2697 * without having done a prior speculate() on this CPU 2698 * and (b) racing with another commit() on a different 2699 * CPU. There's nothing to do -- we just assert that 2700 * our offset is 0. 2701 */ 2702 ASSERT(src->dtb_offset == 0); 2703 return; 2704 2705 case DTRACESPEC_ACTIVE: 2706 new = DTRACESPEC_COMMITTING; 2707 break; 2708 2709 case DTRACESPEC_ACTIVEONE: 2710 /* 2711 * This speculation is active on one CPU. If our 2712 * buffer offset is non-zero, we know that the one CPU 2713 * must be us. Otherwise, we are committing on a 2714 * different CPU from the speculate(), and we must 2715 * rely on being asynchronously cleaned. 2716 */ 2717 if (src->dtb_offset != 0) { 2718 new = DTRACESPEC_COMMITTING; 2719 break; 2720 } 2721 /*FALLTHROUGH*/ 2722 2723 case DTRACESPEC_ACTIVEMANY: 2724 new = DTRACESPEC_COMMITTINGMANY; 2725 break; 2726 2727 default: 2728 ASSERT(0); 2729 } 2730 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2731 current, new) != current); 2732 2733 /* 2734 * We have set the state to indicate that we are committing this 2735 * speculation. Now reserve the necessary space in the destination 2736 * buffer. 2737 */ 2738 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset, 2739 sizeof (uint64_t), state, NULL)) < 0) { 2740 dtrace_buffer_drop(dest); 2741 goto out; 2742 } 2743 2744 /* 2745 * We have sufficient space to copy the speculative buffer into the 2746 * primary buffer. First, modify the speculative buffer, filling 2747 * in the timestamp of all entries with the current time. The data 2748 * must have the commit() time rather than the time it was traced, 2749 * so that all entries in the primary buffer are in timestamp order. 2750 */ 2751 timestamp = dtrace_gethrtime(); 2752 saddr = (uintptr_t)src->dtb_tomax; 2753 slimit = saddr + src->dtb_offset; 2754 while (saddr < slimit) { 2755 size_t size; 2756 dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr; 2757 2758 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) { 2759 saddr += sizeof (dtrace_epid_t); 2760 continue; 2761 } 2762 ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs); 2763 size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size; 2764 2765 ASSERT3U(saddr + size, <=, slimit); 2766 ASSERT3U(size, >=, sizeof (dtrace_rechdr_t)); 2767 ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX); 2768 2769 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp); 2770 2771 saddr += size; 2772 } 2773 2774 /* 2775 * Copy the buffer across. (Note that this is a 2776 * highly subobtimal bcopy(); in the unlikely event that this becomes 2777 * a serious performance issue, a high-performance DTrace-specific 2778 * bcopy() should obviously be invented.) 2779 */ 2780 daddr = (uintptr_t)dest->dtb_tomax + offs; 2781 dlimit = daddr + src->dtb_offset; 2782 saddr = (uintptr_t)src->dtb_tomax; 2783 2784 /* 2785 * First, the aligned portion. 2786 */ 2787 while (dlimit - daddr >= sizeof (uint64_t)) { 2788 *((uint64_t *)daddr) = *((uint64_t *)saddr); 2789 2790 daddr += sizeof (uint64_t); 2791 saddr += sizeof (uint64_t); 2792 } 2793 2794 /* 2795 * Now any left-over bit... 2796 */ 2797 while (dlimit - daddr) 2798 *((uint8_t *)daddr++) = *((uint8_t *)saddr++); 2799 2800 /* 2801 * Finally, commit the reserved space in the destination buffer. 2802 */ 2803 dest->dtb_offset = offs + src->dtb_offset; 2804 2805out: 2806 /* 2807 * If we're lucky enough to be the only active CPU on this speculation 2808 * buffer, we can just set the state back to DTRACESPEC_INACTIVE. 2809 */ 2810 if (current == DTRACESPEC_ACTIVE || 2811 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { 2812 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, 2813 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); 2814 2815 ASSERT(rval == DTRACESPEC_COMMITTING); 2816 } 2817 2818 src->dtb_offset = 0; 2819 src->dtb_xamot_drops += src->dtb_drops; 2820 src->dtb_drops = 0; 2821} 2822 2823/* 2824 * This routine discards an active speculation. If the specified speculation 2825 * is not in a valid state to perform a discard(), this routine will silently 2826 * do nothing. The state of the specified speculation is transitioned 2827 * according to the state transition diagram outlined in <sys/dtrace_impl.h> 2828 */ 2829static void 2830dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, 2831 dtrace_specid_t which) 2832{ 2833 dtrace_speculation_t *spec; 2834 dtrace_speculation_state_t current, new = 0; 2835 dtrace_buffer_t *buf; 2836 2837 if (which == 0) 2838 return; 2839 2840 if (which > state->dts_nspeculations) { 2841 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 2842 return; 2843 } 2844 2845 spec = &state->dts_speculations[which - 1]; 2846 buf = &spec->dtsp_buffer[cpu]; 2847 2848 do { 2849 current = spec->dtsp_state; 2850 2851 switch (current) { 2852 case DTRACESPEC_INACTIVE: 2853 case DTRACESPEC_COMMITTINGMANY: 2854 case DTRACESPEC_COMMITTING: 2855 case DTRACESPEC_DISCARDING: 2856 return; 2857 2858 case DTRACESPEC_ACTIVE: 2859 case DTRACESPEC_ACTIVEMANY: 2860 new = DTRACESPEC_DISCARDING; 2861 break; 2862 2863 case DTRACESPEC_ACTIVEONE: 2864 if (buf->dtb_offset != 0) { 2865 new = DTRACESPEC_INACTIVE; 2866 } else { 2867 new = DTRACESPEC_DISCARDING; 2868 } 2869 break; 2870 2871 default: 2872 ASSERT(0); 2873 } 2874 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 2875 current, new) != current); 2876 2877 buf->dtb_offset = 0; 2878 buf->dtb_drops = 0; 2879} 2880 2881/* 2882 * Note: not called from probe context. This function is called 2883 * asynchronously from cross call context to clean any speculations that are 2884 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be 2885 * transitioned back to the INACTIVE state until all CPUs have cleaned the 2886 * speculation. 2887 */ 2888static void 2889dtrace_speculation_clean_here(dtrace_state_t *state) 2890{ 2891 dtrace_icookie_t cookie; 2892 processorid_t cpu = curcpu; 2893 dtrace_buffer_t *dest = &state->dts_buffer[cpu]; 2894 dtrace_specid_t i; 2895 2896 cookie = dtrace_interrupt_disable(); 2897 2898 if (dest->dtb_tomax == NULL) { 2899 dtrace_interrupt_enable(cookie); 2900 return; 2901 } 2902 2903 for (i = 0; i < state->dts_nspeculations; i++) { 2904 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2905 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; 2906 2907 if (src->dtb_tomax == NULL) 2908 continue; 2909 2910 if (spec->dtsp_state == DTRACESPEC_DISCARDING) { 2911 src->dtb_offset = 0; 2912 continue; 2913 } 2914 2915 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 2916 continue; 2917 2918 if (src->dtb_offset == 0) 2919 continue; 2920 2921 dtrace_speculation_commit(state, cpu, i + 1); 2922 } 2923 2924 dtrace_interrupt_enable(cookie); 2925} 2926 2927/* 2928 * Note: not called from probe context. This function is called 2929 * asynchronously (and at a regular interval) to clean any speculations that 2930 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there 2931 * is work to be done, it cross calls all CPUs to perform that work; 2932 * COMMITMANY and DISCARDING speculations may not be transitioned back to the 2933 * INACTIVE state until they have been cleaned by all CPUs. 2934 */ 2935static void 2936dtrace_speculation_clean(dtrace_state_t *state) 2937{ 2938 int work = 0, rv; 2939 dtrace_specid_t i; 2940 2941 for (i = 0; i < state->dts_nspeculations; i++) { 2942 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2943 2944 ASSERT(!spec->dtsp_cleaning); 2945 2946 if (spec->dtsp_state != DTRACESPEC_DISCARDING && 2947 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) 2948 continue; 2949 2950 work++; 2951 spec->dtsp_cleaning = 1; 2952 } 2953 2954 if (!work) 2955 return; 2956 2957 dtrace_xcall(DTRACE_CPUALL, 2958 (dtrace_xcall_t)dtrace_speculation_clean_here, state); 2959 2960 /* 2961 * We now know that all CPUs have committed or discarded their 2962 * speculation buffers, as appropriate. We can now set the state 2963 * to inactive. 2964 */ 2965 for (i = 0; i < state->dts_nspeculations; i++) { 2966 dtrace_speculation_t *spec = &state->dts_speculations[i]; 2967 dtrace_speculation_state_t current, new; 2968 2969 if (!spec->dtsp_cleaning) 2970 continue; 2971 2972 current = spec->dtsp_state; 2973 ASSERT(current == DTRACESPEC_DISCARDING || 2974 current == DTRACESPEC_COMMITTINGMANY); 2975 2976 new = DTRACESPEC_INACTIVE; 2977 2978 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new); 2979 ASSERT(rv == current); 2980 spec->dtsp_cleaning = 0; 2981 } 2982} 2983 2984/* 2985 * Called as part of a speculate() to get the speculative buffer associated 2986 * with a given speculation. Returns NULL if the specified speculation is not 2987 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and 2988 * the active CPU is not the specified CPU -- the speculation will be 2989 * atomically transitioned into the ACTIVEMANY state. 2990 */ 2991static dtrace_buffer_t * 2992dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, 2993 dtrace_specid_t which) 2994{ 2995 dtrace_speculation_t *spec; 2996 dtrace_speculation_state_t current, new = 0; 2997 dtrace_buffer_t *buf; 2998 2999 if (which == 0) 3000 return (NULL); 3001 3002 if (which > state->dts_nspeculations) { 3003 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 3004 return (NULL); 3005 } 3006 3007 spec = &state->dts_speculations[which - 1]; 3008 buf = &spec->dtsp_buffer[cpuid]; 3009 3010 do { 3011 current = spec->dtsp_state; 3012 3013 switch (current) { 3014 case DTRACESPEC_INACTIVE: 3015 case DTRACESPEC_COMMITTINGMANY: 3016 case DTRACESPEC_DISCARDING: 3017 return (NULL); 3018 3019 case DTRACESPEC_COMMITTING: 3020 ASSERT(buf->dtb_offset == 0); 3021 return (NULL); 3022 3023 case DTRACESPEC_ACTIVEONE: 3024 /* 3025 * This speculation is currently active on one CPU. 3026 * Check the offset in the buffer; if it's non-zero, 3027 * that CPU must be us (and we leave the state alone). 3028 * If it's zero, assume that we're starting on a new 3029 * CPU -- and change the state to indicate that the 3030 * speculation is active on more than one CPU. 3031 */ 3032 if (buf->dtb_offset != 0) 3033 return (buf); 3034 3035 new = DTRACESPEC_ACTIVEMANY; 3036 break; 3037 3038 case DTRACESPEC_ACTIVEMANY: 3039 return (buf); 3040 3041 case DTRACESPEC_ACTIVE: 3042 new = DTRACESPEC_ACTIVEONE; 3043 break; 3044 3045 default: 3046 ASSERT(0); 3047 } 3048 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, 3049 current, new) != current); 3050 3051 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY); 3052 return (buf); 3053} 3054 3055/* 3056 * Return a string. In the event that the user lacks the privilege to access 3057 * arbitrary kernel memory, we copy the string out to scratch memory so that we 3058 * don't fail access checking. 3059 * 3060 * dtrace_dif_variable() uses this routine as a helper for various 3061 * builtin values such as 'execname' and 'probefunc.' 3062 */ 3063uintptr_t 3064dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, 3065 dtrace_mstate_t *mstate) 3066{ 3067 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 3068 uintptr_t ret; 3069 size_t strsz; 3070 3071 /* 3072 * The easy case: this probe is allowed to read all of memory, so 3073 * we can just return this as a vanilla pointer. 3074 */ 3075 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) 3076 return (addr); 3077 3078 /* 3079 * This is the tougher case: we copy the string in question from 3080 * kernel memory into scratch memory and return it that way: this 3081 * ensures that we won't trip up when access checking tests the 3082 * BYREF return value. 3083 */ 3084 strsz = dtrace_strlen((char *)addr, size) + 1; 3085 3086 if (mstate->dtms_scratch_ptr + strsz > 3087 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3088 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3089 return (0); 3090 } 3091 3092 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, 3093 strsz); 3094 ret = mstate->dtms_scratch_ptr; 3095 mstate->dtms_scratch_ptr += strsz; 3096 return (ret); 3097} 3098 3099/* 3100 * Return a string from a memoy address which is known to have one or 3101 * more concatenated, individually zero terminated, sub-strings. 3102 * In the event that the user lacks the privilege to access 3103 * arbitrary kernel memory, we copy the string out to scratch memory so that we 3104 * don't fail access checking. 3105 * 3106 * dtrace_dif_variable() uses this routine as a helper for various 3107 * builtin values such as 'execargs'. 3108 */ 3109static uintptr_t 3110dtrace_dif_varstrz(uintptr_t addr, size_t strsz, dtrace_state_t *state, 3111 dtrace_mstate_t *mstate) 3112{ 3113 char *p; 3114 size_t i; 3115 uintptr_t ret; 3116 3117 if (mstate->dtms_scratch_ptr + strsz > 3118 mstate->dtms_scratch_base + mstate->dtms_scratch_size) { 3119 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 3120 return (0); 3121 } 3122 3123 dtrace_bcopy((const void *)addr, (void *)mstate->dtms_scratch_ptr, 3124 strsz); 3125 3126 /* Replace sub-string termination characters with a space. */ 3127 for (p = (char *) mstate->dtms_scratch_ptr, i = 0; i < strsz - 1; 3128 p++, i++) 3129 if (*p == '\0') 3130 *p = ' '; 3131 3132 ret = mstate->dtms_scratch_ptr; 3133 mstate->dtms_scratch_ptr += strsz; 3134 return (ret); 3135} 3136 3137/* 3138 * This function implements the DIF emulator's variable lookups. The emulator 3139 * passes a reserved variable identifier and optional built-in array index. 3140 */ 3141static uint64_t 3142dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, 3143 uint64_t ndx) 3144{ 3145 /* 3146 * If we're accessing one of the uncached arguments, we'll turn this 3147 * into a reference in the args array. 3148 */ 3149 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { 3150 ndx = v - DIF_VAR_ARG0; 3151 v = DIF_VAR_ARGS; 3152 } 3153 3154 switch (v) { 3155 case DIF_VAR_ARGS: 3156 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); 3157 if (ndx >= sizeof (mstate->dtms_arg) / 3158 sizeof (mstate->dtms_arg[0])) { 3159 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 3160 dtrace_provider_t *pv; 3161 uint64_t val; 3162 3163 pv = mstate->dtms_probe->dtpr_provider; 3164 if (pv->dtpv_pops.dtps_getargval != NULL) 3165 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, 3166 mstate->dtms_probe->dtpr_id, 3167 mstate->dtms_probe->dtpr_arg, ndx, aframes); 3168 else 3169 val = dtrace_getarg(ndx, aframes); 3170 3171 /* 3172 * This is regrettably required to keep the compiler 3173 * from tail-optimizing the call to dtrace_getarg(). 3174 * The condition always evaluates to true, but the 3175 * compiler has no way of figuring that out a priori. 3176 * (None of this would be necessary if the compiler 3177 * could be relied upon to _always_ tail-optimize 3178 * the call to dtrace_getarg() -- but it can't.) 3179 */ 3180 if (mstate->dtms_probe != NULL) 3181 return (val); 3182 3183 ASSERT(0); 3184 } 3185 3186 return (mstate->dtms_arg[ndx]); 3187 3188#if defined(sun) 3189 case DIF_VAR_UREGS: { 3190 klwp_t *lwp; 3191 3192 if (!dtrace_priv_proc(state)) 3193 return (0); 3194 3195 if ((lwp = curthread->t_lwp) == NULL) { 3196 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 3197 cpu_core[curcpu].cpuc_dtrace_illval = NULL; 3198 return (0); 3199 } 3200 3201 return (dtrace_getreg(lwp->lwp_regs, ndx)); 3202 return (0); 3203 } 3204#else 3205 case DIF_VAR_UREGS: { 3206 struct trapframe *tframe; 3207 3208 if (!dtrace_priv_proc(state)) 3209 return (0); 3210 3211 if ((tframe = curthread->td_frame) == NULL) { 3212 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); 3213 cpu_core[curcpu].cpuc_dtrace_illval = 0; 3214 return (0); 3215 } 3216 3217 return (dtrace_getreg(tframe, ndx)); 3218 } 3219#endif 3220 3221 case DIF_VAR_CURTHREAD: 3222 if (!dtrace_priv_proc(state)) 3223 return (0); 3224 return ((uint64_t)(uintptr_t)curthread); 3225 3226 case DIF_VAR_TIMESTAMP: 3227 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { 3228 mstate->dtms_timestamp = dtrace_gethrtime(); 3229 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP; 3230 } 3231 return (mstate->dtms_timestamp); 3232 3233 case DIF_VAR_VTIMESTAMP: 3234 ASSERT(dtrace_vtime_references != 0); 3235 return (curthread->t_dtrace_vtime); 3236 3237 case DIF_VAR_WALLTIMESTAMP: 3238 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { 3239 mstate->dtms_walltimestamp = dtrace_gethrestime(); 3240 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP; 3241 } 3242 return (mstate->dtms_walltimestamp); 3243 3244#if defined(sun) 3245 case DIF_VAR_IPL: 3246 if (!dtrace_priv_kernel(state)) 3247 return (0); 3248 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) { 3249 mstate->dtms_ipl = dtrace_getipl(); 3250 mstate->dtms_present |= DTRACE_MSTATE_IPL; 3251 } 3252 return (mstate->dtms_ipl); 3253#endif 3254 3255 case DIF_VAR_EPID: 3256 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID); 3257 return (mstate->dtms_epid); 3258 3259 case DIF_VAR_ID: 3260 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3261 return (mstate->dtms_probe->dtpr_id); 3262 3263 case DIF_VAR_STACKDEPTH: 3264 if (!dtrace_priv_kernel(state)) 3265 return (0); 3266 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { 3267 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 3268 3269 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); 3270 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; 3271 } 3272 return (mstate->dtms_stackdepth); 3273 3274 case DIF_VAR_USTACKDEPTH: 3275 if (!dtrace_priv_proc(state)) 3276 return (0); 3277 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { 3278 /* 3279 * See comment in DIF_VAR_PID. 3280 */ 3281 if (DTRACE_ANCHORED(mstate->dtms_probe) && 3282 CPU_ON_INTR(CPU)) { 3283 mstate->dtms_ustackdepth = 0; 3284 } else { 3285 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3286 mstate->dtms_ustackdepth = 3287 dtrace_getustackdepth(); 3288 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3289 } 3290 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH; 3291 } 3292 return (mstate->dtms_ustackdepth); 3293 3294 case DIF_VAR_CALLER: 3295 if (!dtrace_priv_kernel(state)) 3296 return (0); 3297 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { 3298 int aframes = mstate->dtms_probe->dtpr_aframes + 2; 3299 3300 if (!DTRACE_ANCHORED(mstate->dtms_probe)) { 3301 /* 3302 * If this is an unanchored probe, we are 3303 * required to go through the slow path: 3304 * dtrace_caller() only guarantees correct 3305 * results for anchored probes. 3306 */ 3307 pc_t caller[2] = {0, 0}; 3308 3309 dtrace_getpcstack(caller, 2, aframes, 3310 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); 3311 mstate->dtms_caller = caller[1]; 3312 } else if ((mstate->dtms_caller = 3313 dtrace_caller(aframes)) == -1) { 3314 /* 3315 * We have failed to do this the quick way; 3316 * we must resort to the slower approach of 3317 * calling dtrace_getpcstack(). 3318 */ 3319 pc_t caller = 0; 3320 3321 dtrace_getpcstack(&caller, 1, aframes, NULL); 3322 mstate->dtms_caller = caller; 3323 } 3324 3325 mstate->dtms_present |= DTRACE_MSTATE_CALLER; 3326 } 3327 return (mstate->dtms_caller); 3328 3329 case DIF_VAR_UCALLER: 3330 if (!dtrace_priv_proc(state)) 3331 return (0); 3332 3333 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { 3334 uint64_t ustack[3]; 3335 3336 /* 3337 * dtrace_getupcstack() fills in the first uint64_t 3338 * with the current PID. The second uint64_t will 3339 * be the program counter at user-level. The third 3340 * uint64_t will contain the caller, which is what 3341 * we're after. 3342 */ 3343 ustack[2] = 0; 3344 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 3345 dtrace_getupcstack(ustack, 3); 3346 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 3347 mstate->dtms_ucaller = ustack[2]; 3348 mstate->dtms_present |= DTRACE_MSTATE_UCALLER; 3349 } 3350 3351 return (mstate->dtms_ucaller); 3352 3353 case DIF_VAR_PROBEPROV: 3354 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3355 return (dtrace_dif_varstr( 3356 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, 3357 state, mstate)); 3358 3359 case DIF_VAR_PROBEMOD: 3360 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3361 return (dtrace_dif_varstr( 3362 (uintptr_t)mstate->dtms_probe->dtpr_mod, 3363 state, mstate)); 3364 3365 case DIF_VAR_PROBEFUNC: 3366 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3367 return (dtrace_dif_varstr( 3368 (uintptr_t)mstate->dtms_probe->dtpr_func, 3369 state, mstate)); 3370 3371 case DIF_VAR_PROBENAME: 3372 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); 3373 return (dtrace_dif_varstr( 3374 (uintptr_t)mstate->dtms_probe->dtpr_name, 3375 state, mstate)); 3376 3377 case DIF_VAR_PID: 3378 if (!dtrace_priv_proc(state)) 3379 return (0); 3380 3381#if defined(sun) 3382 /* 3383 * Note that we are assuming that an unanchored probe is 3384 * always due to a high-level interrupt. (And we're assuming 3385 * that there is only a single high level interrupt.) 3386 */ 3387 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3388 return (pid0.pid_id); 3389 3390 /* 3391 * It is always safe to dereference one's own t_procp pointer: 3392 * it always points to a valid, allocated proc structure. 3393 * Further, it is always safe to dereference the p_pidp member 3394 * of one's own proc structure. (These are truisms becuase 3395 * threads and processes don't clean up their own state -- 3396 * they leave that task to whomever reaps them.) 3397 */ 3398 return ((uint64_t)curthread->t_procp->p_pidp->pid_id); 3399#else 3400 return ((uint64_t)curproc->p_pid); 3401#endif 3402 3403 case DIF_VAR_PPID: 3404 if (!dtrace_priv_proc(state)) 3405 return (0); 3406 3407#if defined(sun) 3408 /* 3409 * See comment in DIF_VAR_PID. 3410 */ 3411 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3412 return (pid0.pid_id); 3413 3414 /* 3415 * It is always safe to dereference one's own t_procp pointer: 3416 * it always points to a valid, allocated proc structure. 3417 * (This is true because threads don't clean up their own 3418 * state -- they leave that task to whomever reaps them.) 3419 */ 3420 return ((uint64_t)curthread->t_procp->p_ppid); 3421#else 3422 if (curproc->p_pid == proc0.p_pid) 3423 return (curproc->p_pid); 3424 else 3425 return (curproc->p_pptr->p_pid); 3426#endif 3427 3428 case DIF_VAR_TID: 3429#if defined(sun) 3430 /* 3431 * See comment in DIF_VAR_PID. 3432 */ 3433 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3434 return (0); 3435#endif 3436 3437 return ((uint64_t)curthread->t_tid); 3438 3439 case DIF_VAR_EXECARGS: { 3440 struct pargs *p_args = curthread->td_proc->p_args; 3441 3442 if (p_args == NULL) 3443 return(0); 3444 3445 return (dtrace_dif_varstrz( 3446 (uintptr_t) p_args->ar_args, p_args->ar_length, state, mstate)); 3447 } 3448 3449 case DIF_VAR_EXECNAME: 3450#if defined(sun) 3451 if (!dtrace_priv_proc(state)) 3452 return (0); 3453 3454 /* 3455 * See comment in DIF_VAR_PID. 3456 */ 3457 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3458 return ((uint64_t)(uintptr_t)p0.p_user.u_comm); 3459 3460 /* 3461 * It is always safe to dereference one's own t_procp pointer: 3462 * it always points to a valid, allocated proc structure. 3463 * (This is true because threads don't clean up their own 3464 * state -- they leave that task to whomever reaps them.) 3465 */ 3466 return (dtrace_dif_varstr( 3467 (uintptr_t)curthread->t_procp->p_user.u_comm, 3468 state, mstate)); 3469#else 3470 return (dtrace_dif_varstr( 3471 (uintptr_t) curthread->td_proc->p_comm, state, mstate)); 3472#endif 3473 3474 case DIF_VAR_ZONENAME: 3475#if defined(sun) 3476 if (!dtrace_priv_proc(state)) 3477 return (0); 3478 3479 /* 3480 * See comment in DIF_VAR_PID. 3481 */ 3482 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3483 return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); 3484 3485 /* 3486 * It is always safe to dereference one's own t_procp pointer: 3487 * it always points to a valid, allocated proc structure. 3488 * (This is true because threads don't clean up their own 3489 * state -- they leave that task to whomever reaps them.) 3490 */ 3491 return (dtrace_dif_varstr( 3492 (uintptr_t)curthread->t_procp->p_zone->zone_name, 3493 state, mstate)); 3494#else 3495 return (0); 3496#endif 3497 3498 case DIF_VAR_UID: 3499 if (!dtrace_priv_proc(state)) 3500 return (0); 3501 3502#if defined(sun) 3503 /* 3504 * See comment in DIF_VAR_PID. 3505 */ 3506 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3507 return ((uint64_t)p0.p_cred->cr_uid); 3508#endif 3509 3510 /* 3511 * It is always safe to dereference one's own t_procp pointer: 3512 * it always points to a valid, allocated proc structure. 3513 * (This is true because threads don't clean up their own 3514 * state -- they leave that task to whomever reaps them.) 3515 * 3516 * Additionally, it is safe to dereference one's own process 3517 * credential, since this is never NULL after process birth. 3518 */ 3519 return ((uint64_t)curthread->t_procp->p_cred->cr_uid); 3520 3521 case DIF_VAR_GID: 3522 if (!dtrace_priv_proc(state)) 3523 return (0); 3524 3525#if defined(sun) 3526 /* 3527 * See comment in DIF_VAR_PID. 3528 */ 3529 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3530 return ((uint64_t)p0.p_cred->cr_gid); 3531#endif 3532 3533 /* 3534 * It is always safe to dereference one's own t_procp pointer: 3535 * it always points to a valid, allocated proc structure. 3536 * (This is true because threads don't clean up their own 3537 * state -- they leave that task to whomever reaps them.) 3538 * 3539 * Additionally, it is safe to dereference one's own process 3540 * credential, since this is never NULL after process birth. 3541 */ 3542 return ((uint64_t)curthread->t_procp->p_cred->cr_gid); 3543 3544 case DIF_VAR_ERRNO: { 3545#if defined(sun) 3546 klwp_t *lwp; 3547 if (!dtrace_priv_proc(state)) 3548 return (0); 3549 3550 /* 3551 * See comment in DIF_VAR_PID. 3552 */ 3553 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) 3554 return (0); 3555 3556 /* 3557 * It is always safe to dereference one's own t_lwp pointer in 3558 * the event that this pointer is non-NULL. (This is true 3559 * because threads and lwps don't clean up their own state -- 3560 * they leave that task to whomever reaps them.) 3561 */ 3562 if ((lwp = curthread->t_lwp) == NULL) 3563 return (0); 3564 3565 return ((uint64_t)lwp->lwp_errno); 3566#else 3567 return (curthread->td_errno); 3568#endif 3569 } 3570#if !defined(sun) 3571 case DIF_VAR_CPU: { 3572 return curcpu; 3573 } 3574#endif 3575 default: 3576 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 3577 return (0); 3578 } 3579} 3580 3581 3582typedef enum dtrace_json_state { 3583 DTRACE_JSON_REST = 1, 3584 DTRACE_JSON_OBJECT, 3585 DTRACE_JSON_STRING, 3586 DTRACE_JSON_STRING_ESCAPE, 3587 DTRACE_JSON_STRING_ESCAPE_UNICODE, 3588 DTRACE_JSON_COLON, 3589 DTRACE_JSON_COMMA, 3590 DTRACE_JSON_VALUE, 3591 DTRACE_JSON_IDENTIFIER, 3592 DTRACE_JSON_NUMBER, 3593 DTRACE_JSON_NUMBER_FRAC, 3594 DTRACE_JSON_NUMBER_EXP, 3595 DTRACE_JSON_COLLECT_OBJECT 3596} dtrace_json_state_t; 3597 3598/* 3599 * This function possesses just enough knowledge about JSON to extract a single 3600 * value from a JSON string and store it in the scratch buffer. It is able 3601 * to extract nested object values, and members of arrays by index. 3602 * 3603 * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to 3604 * be looked up as we descend into the object tree. e.g. 3605 * 3606 * foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL 3607 * with nelems = 5. 3608 * 3609 * The run time of this function must be bounded above by strsize to limit the 3610 * amount of work done in probe context. As such, it is implemented as a 3611 * simple state machine, reading one character at a time using safe loads 3612 * until we find the requested element, hit a parsing error or run off the 3613 * end of the object or string. 3614 * 3615 * As there is no way for a subroutine to return an error without interrupting 3616 * clause execution, we simply return NULL in the event of a missing key or any 3617 * other error condition. Each NULL return in this function is commented with 3618 * the error condition it represents -- parsing or otherwise. 3619 * 3620 * The set of states for the state machine closely matches the JSON 3621 * specification (http://json.org/). Briefly: 3622 * 3623 * DTRACE_JSON_REST: 3624 * Skip whitespace until we find either a top-level Object, moving 3625 * to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE. 3626 * 3627 * DTRACE_JSON_OBJECT: 3628 * Locate the next key String in an Object. Sets a flag to denote 3629 * the next String as a key string and moves to DTRACE_JSON_STRING. 3630 * 3631 * DTRACE_JSON_COLON: 3632 * Skip whitespace until we find the colon that separates key Strings 3633 * from their values. Once found, move to DTRACE_JSON_VALUE. 3634 * 3635 * DTRACE_JSON_VALUE: 3636 * Detects the type of the next value (String, Number, Identifier, Object 3637 * or Array) and routes to the states that process that type. Here we also 3638 * deal with the element selector list if we are requested to traverse down 3639 * into the object tree. 3640 * 3641 * DTRACE_JSON_COMMA: 3642 * Skip whitespace until we find the comma that separates key-value pairs 3643 * in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays 3644 * (similarly DTRACE_JSON_VALUE). All following literal value processing 3645 * states return to this state at the end of their value, unless otherwise 3646 * noted. 3647 * 3648 * DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP: 3649 * Processes a Number literal from the JSON, including any exponent 3650 * component that may be present. Numbers are returned as strings, which 3651 * may be passed to strtoll() if an integer is required. 3652 * 3653 * DTRACE_JSON_IDENTIFIER: 3654 * Processes a "true", "false" or "null" literal in the JSON. 3655 * 3656 * DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE, 3657 * DTRACE_JSON_STRING_ESCAPE_UNICODE: 3658 * Processes a String literal from the JSON, whether the String denotes 3659 * a key, a value or part of a larger Object. Handles all escape sequences 3660 * present in the specification, including four-digit unicode characters, 3661 * but merely includes the escape sequence without converting it to the 3662 * actual escaped character. If the String is flagged as a key, we 3663 * move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA. 3664 * 3665 * DTRACE_JSON_COLLECT_OBJECT: 3666 * This state collects an entire Object (or Array), correctly handling 3667 * embedded strings. If the full element selector list matches this nested 3668 * object, we return the Object in full as a string. If not, we use this 3669 * state to skip to the next value at this level and continue processing. 3670 * 3671 * NOTE: This function uses various macros from strtolctype.h to manipulate 3672 * digit values, etc -- these have all been checked to ensure they make 3673 * no additional function calls. 3674 */ 3675static char * 3676dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems, 3677 char *dest) 3678{ 3679 dtrace_json_state_t state = DTRACE_JSON_REST; 3680 int64_t array_elem = INT64_MIN; 3681 int64_t array_pos = 0; 3682 uint8_t escape_unicount = 0; 3683 boolean_t string_is_key = B_FALSE; 3684 boolean_t collect_object = B_FALSE; 3685 boolean_t found_key = B_FALSE; 3686 boolean_t in_array = B_FALSE; 3687 uint32_t braces = 0, brackets = 0; 3688 char *elem = elemlist; 3689 char *dd = dest; 3690 uintptr_t cur; 3691 3692 for (cur = json; cur < json + size; cur++) { 3693 char cc = dtrace_load8(cur); 3694 if (cc == '\0') 3695 return (NULL); 3696 3697 switch (state) { 3698 case DTRACE_JSON_REST: 3699 if (isspace(cc)) 3700 break; 3701 3702 if (cc == '{') { 3703 state = DTRACE_JSON_OBJECT; 3704 break; 3705 } 3706 3707 if (cc == '[') { 3708 in_array = B_TRUE; 3709 array_pos = 0; 3710 array_elem = dtrace_strtoll(elem, 10, size); 3711 found_key = array_elem == 0 ? B_TRUE : B_FALSE; 3712 state = DTRACE_JSON_VALUE; 3713 break; 3714 } 3715 3716 /* 3717 * ERROR: expected to find a top-level object or array. 3718 */ 3719 return (NULL); 3720 case DTRACE_JSON_OBJECT: 3721 if (isspace(cc)) 3722 break; 3723 3724 if (cc == '"') { 3725 state = DTRACE_JSON_STRING; 3726 string_is_key = B_TRUE; 3727 break; 3728 } 3729 3730 /* 3731 * ERROR: either the object did not start with a key 3732 * string, or we've run off the end of the object 3733 * without finding the requested key. 3734 */ 3735 return (NULL); 3736 case DTRACE_JSON_STRING: 3737 if (cc == '\\') { 3738 *dd++ = '\\'; 3739 state = DTRACE_JSON_STRING_ESCAPE; 3740 break; 3741 } 3742 3743 if (cc == '"') { 3744 if (collect_object) { 3745 /* 3746 * We don't reset the dest here, as 3747 * the string is part of a larger 3748 * object being collected. 3749 */ 3750 *dd++ = cc; 3751 collect_object = B_FALSE; 3752 state = DTRACE_JSON_COLLECT_OBJECT; 3753 break; 3754 } 3755 *dd = '\0'; 3756 dd = dest; /* reset string buffer */ 3757 if (string_is_key) { 3758 if (dtrace_strncmp(dest, elem, 3759 size) == 0) 3760 found_key = B_TRUE; 3761 } else if (found_key) { 3762 if (nelems > 1) { 3763 /* 3764 * We expected an object, not 3765 * this string. 3766 */ 3767 return (NULL); 3768 } 3769 return (dest); 3770 } 3771 state = string_is_key ? DTRACE_JSON_COLON : 3772 DTRACE_JSON_COMMA; 3773 string_is_key = B_FALSE; 3774 break; 3775 } 3776 3777 *dd++ = cc; 3778 break; 3779 case DTRACE_JSON_STRING_ESCAPE: 3780 *dd++ = cc; 3781 if (cc == 'u') { 3782 escape_unicount = 0; 3783 state = DTRACE_JSON_STRING_ESCAPE_UNICODE; 3784 } else { 3785 state = DTRACE_JSON_STRING; 3786 } 3787 break; 3788 case DTRACE_JSON_STRING_ESCAPE_UNICODE: 3789 if (!isxdigit(cc)) { 3790 /* 3791 * ERROR: invalid unicode escape, expected 3792 * four valid hexidecimal digits. 3793 */ 3794 return (NULL); 3795 } 3796 3797 *dd++ = cc; 3798 if (++escape_unicount == 4) 3799 state = DTRACE_JSON_STRING; 3800 break; 3801 case DTRACE_JSON_COLON: 3802 if (isspace(cc)) 3803 break; 3804 3805 if (cc == ':') { 3806 state = DTRACE_JSON_VALUE; 3807 break; 3808 } 3809 3810 /* 3811 * ERROR: expected a colon. 3812 */ 3813 return (NULL); 3814 case DTRACE_JSON_COMMA: 3815 if (isspace(cc)) 3816 break; 3817 3818 if (cc == ',') { 3819 if (in_array) { 3820 state = DTRACE_JSON_VALUE; 3821 if (++array_pos == array_elem) 3822 found_key = B_TRUE; 3823 } else { 3824 state = DTRACE_JSON_OBJECT; 3825 } 3826 break; 3827 } 3828 3829 /* 3830 * ERROR: either we hit an unexpected character, or 3831 * we reached the end of the object or array without 3832 * finding the requested key. 3833 */ 3834 return (NULL); 3835 case DTRACE_JSON_IDENTIFIER: 3836 if (islower(cc)) { 3837 *dd++ = cc; 3838 break; 3839 } 3840 3841 *dd = '\0'; 3842 dd = dest; /* reset string buffer */ 3843 3844 if (dtrace_strncmp(dest, "true", 5) == 0 || 3845 dtrace_strncmp(dest, "false", 6) == 0 || 3846 dtrace_strncmp(dest, "null", 5) == 0) { 3847 if (found_key) { 3848 if (nelems > 1) { 3849 /* 3850 * ERROR: We expected an object, 3851 * not this identifier. 3852 */ 3853 return (NULL); 3854 } 3855 return (dest); 3856 } else { 3857 cur--; 3858 state = DTRACE_JSON_COMMA; 3859 break; 3860 } 3861 } 3862 3863 /* 3864 * ERROR: we did not recognise the identifier as one 3865 * of those in the JSON specification. 3866 */ 3867 return (NULL); 3868 case DTRACE_JSON_NUMBER: 3869 if (cc == '.') { 3870 *dd++ = cc; 3871 state = DTRACE_JSON_NUMBER_FRAC; 3872 break; 3873 } 3874 3875 if (cc == 'x' || cc == 'X') { 3876 /* 3877 * ERROR: specification explicitly excludes 3878 * hexidecimal or octal numbers. 3879 */ 3880 return (NULL); 3881 } 3882 3883 /* FALLTHRU */ 3884 case DTRACE_JSON_NUMBER_FRAC: 3885 if (cc == 'e' || cc == 'E') { 3886 *dd++ = cc; 3887 state = DTRACE_JSON_NUMBER_EXP; 3888 break; 3889 } 3890 3891 if (cc == '+' || cc == '-') { 3892 /* 3893 * ERROR: expect sign as part of exponent only. 3894 */ 3895 return (NULL); 3896 } 3897 /* FALLTHRU */ 3898 case DTRACE_JSON_NUMBER_EXP: 3899 if (isdigit(cc) || cc == '+' || cc == '-') { 3900 *dd++ = cc; 3901 break; 3902 } 3903 3904 *dd = '\0'; 3905 dd = dest; /* reset string buffer */ 3906 if (found_key) { 3907 if (nelems > 1) { 3908 /* 3909 * ERROR: We expected an object, not 3910 * this number. 3911 */ 3912 return (NULL); 3913 } 3914 return (dest); 3915 } 3916 3917 cur--; 3918 state = DTRACE_JSON_COMMA; 3919 break; 3920 case DTRACE_JSON_VALUE: 3921 if (isspace(cc)) 3922 break; 3923 3924 if (cc == '{' || cc == '[') { 3925 if (nelems > 1 && found_key) { 3926 in_array = cc == '[' ? B_TRUE : B_FALSE; 3927 /* 3928 * If our element selector directs us 3929 * to descend into this nested object, 3930 * then move to the next selector 3931 * element in the list and restart the 3932 * state machine. 3933 */ 3934 while (*elem != '\0') 3935 elem++; 3936 elem++; /* skip the inter-element NUL */ 3937 nelems--; 3938 dd = dest; 3939 if (in_array) { 3940 state = DTRACE_JSON_VALUE; 3941 array_pos = 0; 3942 array_elem = dtrace_strtoll( 3943 elem, 10, size); 3944 found_key = array_elem == 0 ? 3945 B_TRUE : B_FALSE; 3946 } else { 3947 found_key = B_FALSE; 3948 state = DTRACE_JSON_OBJECT; 3949 } 3950 break; 3951 } 3952 3953 /* 3954 * Otherwise, we wish to either skip this 3955 * nested object or return it in full. 3956 */ 3957 if (cc == '[') 3958 brackets = 1; 3959 else 3960 braces = 1; 3961 *dd++ = cc; 3962 state = DTRACE_JSON_COLLECT_OBJECT; 3963 break; 3964 } 3965 3966 if (cc == '"') { 3967 state = DTRACE_JSON_STRING; 3968 break; 3969 } 3970 3971 if (islower(cc)) { 3972 /* 3973 * Here we deal with true, false and null. 3974 */ 3975 *dd++ = cc; 3976 state = DTRACE_JSON_IDENTIFIER; 3977 break; 3978 } 3979 3980 if (cc == '-' || isdigit(cc)) { 3981 *dd++ = cc; 3982 state = DTRACE_JSON_NUMBER; 3983 break; 3984 } 3985 3986 /* 3987 * ERROR: unexpected character at start of value. 3988 */ 3989 return (NULL); 3990 case DTRACE_JSON_COLLECT_OBJECT: 3991 if (cc == '\0') 3992 /* 3993 * ERROR: unexpected end of input. 3994 */ 3995 return (NULL); 3996 3997 *dd++ = cc; 3998 if (cc == '"') { 3999 collect_object = B_TRUE; 4000 state = DTRACE_JSON_STRING; 4001 break; 4002 } 4003 4004 if (cc == ']') { 4005 if (brackets-- == 0) { 4006 /* 4007 * ERROR: unbalanced brackets. 4008 */ 4009 return (NULL); 4010 } 4011 } else if (cc == '}') { 4012 if (braces-- == 0) { 4013 /* 4014 * ERROR: unbalanced braces. 4015 */ 4016 return (NULL); 4017 } 4018 } else if (cc == '{') { 4019 braces++; 4020 } else if (cc == '[') { 4021 brackets++; 4022 } 4023 4024 if (brackets == 0 && braces == 0) { 4025 if (found_key) { 4026 *dd = '\0'; 4027 return (dest); 4028 } 4029 dd = dest; /* reset string buffer */ 4030 state = DTRACE_JSON_COMMA; 4031 } 4032 break; 4033 } 4034 } 4035 return (NULL); 4036} 4037 4038/* 4039 * Emulate the execution of DTrace ID subroutines invoked by the call opcode. 4040 * Notice that we don't bother validating the proper number of arguments or 4041 * their types in the tuple stack. This isn't needed because all argument 4042 * interpretation is safe because of our load safety -- the worst that can 4043 * happen is that a bogus program can obtain bogus results. 4044 */ 4045static void 4046dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, 4047 dtrace_key_t *tupregs, int nargs, 4048 dtrace_mstate_t *mstate, dtrace_state_t *state) 4049{ 4050 volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; 4051 volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; 4052 dtrace_vstate_t *vstate = &state->dts_vstate; 4053 4054#if defined(sun) 4055 union { 4056 mutex_impl_t mi; 4057 uint64_t mx; 4058 } m; 4059 4060 union { 4061 krwlock_t ri; 4062 uintptr_t rw; 4063 } r; 4064#else 4065 struct thread *lowner; 4066 union { 4067 struct lock_object *li; 4068 uintptr_t lx; 4069 } l; 4070#endif 4071 4072 switch (subr) { 4073 case DIF_SUBR_RAND: 4074 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; 4075 break; 4076 4077#if defined(sun) 4078 case DIF_SUBR_MUTEX_OWNED: 4079 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 4080 mstate, vstate)) { 4081 regs[rd] = 0; 4082 break; 4083 } 4084 4085 m.mx = dtrace_load64(tupregs[0].dttk_value); 4086 if (MUTEX_TYPE_ADAPTIVE(&m.mi)) 4087 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; 4088 else 4089 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock); 4090 break; 4091 4092 case DIF_SUBR_MUTEX_OWNER: 4093 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 4094 mstate, vstate)) { 4095 regs[rd] = 0; 4096 break; 4097 } 4098 4099 m.mx = dtrace_load64(tupregs[0].dttk_value); 4100 if (MUTEX_TYPE_ADAPTIVE(&m.mi) && 4101 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) 4102 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi); 4103 else 4104 regs[rd] = 0; 4105 break; 4106 4107 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: 4108 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 4109 mstate, vstate)) { 4110 regs[rd] = 0; 4111 break; 4112 } 4113 4114 m.mx = dtrace_load64(tupregs[0].dttk_value); 4115 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); 4116 break; 4117 4118 case DIF_SUBR_MUTEX_TYPE_SPIN: 4119 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), 4120 mstate, vstate)) { 4121 regs[rd] = 0; 4122 break; 4123 } 4124 4125 m.mx = dtrace_load64(tupregs[0].dttk_value); 4126 regs[rd] = MUTEX_TYPE_SPIN(&m.mi); 4127 break; 4128 4129 case DIF_SUBR_RW_READ_HELD: { 4130 uintptr_t tmp; 4131 4132 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 4133 mstate, vstate)) { 4134 regs[rd] = 0; 4135 break; 4136 } 4137 4138 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 4139 regs[rd] = _RW_READ_HELD(&r.ri, tmp); 4140 break; 4141 } 4142 4143 case DIF_SUBR_RW_WRITE_HELD: 4144 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), 4145 mstate, vstate)) { 4146 regs[rd] = 0; 4147 break; 4148 } 4149 4150 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 4151 regs[rd] = _RW_WRITE_HELD(&r.ri); 4152 break; 4153 4154 case DIF_SUBR_RW_ISWRITER: 4155 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), 4156 mstate, vstate)) { 4157 regs[rd] = 0; 4158 break; 4159 } 4160 4161 r.rw = dtrace_loadptr(tupregs[0].dttk_value); 4162 regs[rd] = _RW_ISWRITER(&r.ri); 4163 break; 4164 4165#else 4166 case DIF_SUBR_MUTEX_OWNED: 4167 if (!dtrace_canload(tupregs[0].dttk_value, 4168 sizeof (struct lock_object), mstate, vstate)) { 4169 regs[rd] = 0; 4170 break; 4171 } 4172 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4173 regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); 4174 break; 4175 4176 case DIF_SUBR_MUTEX_OWNER: 4177 if (!dtrace_canload(tupregs[0].dttk_value, 4178 sizeof (struct lock_object), mstate, vstate)) { 4179 regs[rd] = 0; 4180 break; 4181 } 4182 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4183 LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); 4184 regs[rd] = (uintptr_t)lowner; 4185 break; 4186 4187 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: 4188 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx), 4189 mstate, vstate)) { 4190 regs[rd] = 0; 4191 break; 4192 } 4193 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4194 /* XXX - should be only LC_SLEEPABLE? */ 4195 regs[rd] = (LOCK_CLASS(l.li)->lc_flags & 4196 (LC_SLEEPLOCK | LC_SLEEPABLE)) != 0; 4197 break; 4198 4199 case DIF_SUBR_MUTEX_TYPE_SPIN: 4200 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx), 4201 mstate, vstate)) { 4202 regs[rd] = 0; 4203 break; 4204 } 4205 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4206 regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SPINLOCK) != 0; 4207 break; 4208 4209 case DIF_SUBR_RW_READ_HELD: 4210 case DIF_SUBR_SX_SHARED_HELD: 4211 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 4212 mstate, vstate)) { 4213 regs[rd] = 0; 4214 break; 4215 } 4216 l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); 4217 regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) && 4218 lowner == NULL; 4219 break; 4220 4221 case DIF_SUBR_RW_WRITE_HELD: 4222 case DIF_SUBR_SX_EXCLUSIVE_HELD: 4223 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 4224 mstate, vstate)) { 4225 regs[rd] = 0; 4226 break; 4227 } 4228 l.lx = dtrace_loadptr(tupregs[0].dttk_value); 4229 LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); 4230 regs[rd] = (lowner == curthread); 4231 break; 4232 4233 case DIF_SUBR_RW_ISWRITER: 4234 case DIF_SUBR_SX_ISEXCLUSIVE: 4235 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), 4236 mstate, vstate)) { 4237 regs[rd] = 0; 4238 break; 4239 } 4240 l.lx = dtrace_loadptr(tupregs[0].dttk_value); 4241 regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) && 4242 lowner != NULL; 4243 break; 4244#endif /* ! defined(sun) */ 4245 4246 case DIF_SUBR_BCOPY: { 4247 /* 4248 * We need to be sure that the destination is in the scratch 4249 * region -- no other region is allowed. 4250 */ 4251 uintptr_t src = tupregs[0].dttk_value; 4252 uintptr_t dest = tupregs[1].dttk_value; 4253 size_t size = tupregs[2].dttk_value; 4254 4255 if (!dtrace_inscratch(dest, size, mstate)) { 4256 *flags |= CPU_DTRACE_BADADDR; 4257 *illval = regs[rd]; 4258 break; 4259 } 4260 4261 if (!dtrace_canload(src, size, mstate, vstate)) { 4262 regs[rd] = 0; 4263 break; 4264 } 4265 4266 dtrace_bcopy((void *)src, (void *)dest, size); 4267 break; 4268 } 4269 4270 case DIF_SUBR_ALLOCA: 4271 case DIF_SUBR_COPYIN: { 4272 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 4273 uint64_t size = 4274 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; 4275 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; 4276 4277 /* 4278 * This action doesn't require any credential checks since 4279 * probes will not activate in user contexts to which the 4280 * enabling user does not have permissions. 4281 */ 4282 4283 /* 4284 * Rounding up the user allocation size could have overflowed 4285 * a large, bogus allocation (like -1ULL) to 0. 4286 */ 4287 if (scratch_size < size || 4288 !DTRACE_INSCRATCH(mstate, scratch_size)) { 4289 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4290 regs[rd] = 0; 4291 break; 4292 } 4293 4294 if (subr == DIF_SUBR_COPYIN) { 4295 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4296 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); 4297 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4298 } 4299 4300 mstate->dtms_scratch_ptr += scratch_size; 4301 regs[rd] = dest; 4302 break; 4303 } 4304 4305 case DIF_SUBR_COPYINTO: { 4306 uint64_t size = tupregs[1].dttk_value; 4307 uintptr_t dest = tupregs[2].dttk_value; 4308 4309 /* 4310 * This action doesn't require any credential checks since 4311 * probes will not activate in user contexts to which the 4312 * enabling user does not have permissions. 4313 */ 4314 if (!dtrace_inscratch(dest, size, mstate)) { 4315 *flags |= CPU_DTRACE_BADADDR; 4316 *illval = regs[rd]; 4317 break; 4318 } 4319 4320 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4321 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); 4322 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4323 break; 4324 } 4325 4326 case DIF_SUBR_COPYINSTR: { 4327 uintptr_t dest = mstate->dtms_scratch_ptr; 4328 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4329 4330 if (nargs > 1 && tupregs[1].dttk_value < size) 4331 size = tupregs[1].dttk_value + 1; 4332 4333 /* 4334 * This action doesn't require any credential checks since 4335 * probes will not activate in user contexts to which the 4336 * enabling user does not have permissions. 4337 */ 4338 if (!DTRACE_INSCRATCH(mstate, size)) { 4339 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4340 regs[rd] = 0; 4341 break; 4342 } 4343 4344 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4345 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); 4346 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4347 4348 ((char *)dest)[size - 1] = '\0'; 4349 mstate->dtms_scratch_ptr += size; 4350 regs[rd] = dest; 4351 break; 4352 } 4353 4354#if defined(sun) 4355 case DIF_SUBR_MSGSIZE: 4356 case DIF_SUBR_MSGDSIZE: { 4357 uintptr_t baddr = tupregs[0].dttk_value, daddr; 4358 uintptr_t wptr, rptr; 4359 size_t count = 0; 4360 int cont = 0; 4361 4362 while (baddr != 0 && !(*flags & CPU_DTRACE_FAULT)) { 4363 4364 if (!dtrace_canload(baddr, sizeof (mblk_t), mstate, 4365 vstate)) { 4366 regs[rd] = 0; 4367 break; 4368 } 4369 4370 wptr = dtrace_loadptr(baddr + 4371 offsetof(mblk_t, b_wptr)); 4372 4373 rptr = dtrace_loadptr(baddr + 4374 offsetof(mblk_t, b_rptr)); 4375 4376 if (wptr < rptr) { 4377 *flags |= CPU_DTRACE_BADADDR; 4378 *illval = tupregs[0].dttk_value; 4379 break; 4380 } 4381 4382 daddr = dtrace_loadptr(baddr + 4383 offsetof(mblk_t, b_datap)); 4384 4385 baddr = dtrace_loadptr(baddr + 4386 offsetof(mblk_t, b_cont)); 4387 4388 /* 4389 * We want to prevent against denial-of-service here, 4390 * so we're only going to search the list for 4391 * dtrace_msgdsize_max mblks. 4392 */ 4393 if (cont++ > dtrace_msgdsize_max) { 4394 *flags |= CPU_DTRACE_ILLOP; 4395 break; 4396 } 4397 4398 if (subr == DIF_SUBR_MSGDSIZE) { 4399 if (dtrace_load8(daddr + 4400 offsetof(dblk_t, db_type)) != M_DATA) 4401 continue; 4402 } 4403 4404 count += wptr - rptr; 4405 } 4406 4407 if (!(*flags & CPU_DTRACE_FAULT)) 4408 regs[rd] = count; 4409 4410 break; 4411 } 4412#endif 4413 4414 case DIF_SUBR_PROGENYOF: { 4415 pid_t pid = tupregs[0].dttk_value; 4416 proc_t *p; 4417 int rval = 0; 4418 4419 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4420 4421 for (p = curthread->t_procp; p != NULL; p = p->p_parent) { 4422#if defined(sun) 4423 if (p->p_pidp->pid_id == pid) { 4424#else 4425 if (p->p_pid == pid) { 4426#endif 4427 rval = 1; 4428 break; 4429 } 4430 } 4431 4432 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4433 4434 regs[rd] = rval; 4435 break; 4436 } 4437 4438 case DIF_SUBR_SPECULATION: 4439 regs[rd] = dtrace_speculation(state); 4440 break; 4441 4442 case DIF_SUBR_COPYOUT: { 4443 uintptr_t kaddr = tupregs[0].dttk_value; 4444 uintptr_t uaddr = tupregs[1].dttk_value; 4445 uint64_t size = tupregs[2].dttk_value; 4446 4447 if (!dtrace_destructive_disallow && 4448 dtrace_priv_proc_control(state) && 4449 !dtrace_istoxic(kaddr, size)) { 4450 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4451 dtrace_copyout(kaddr, uaddr, size, flags); 4452 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4453 } 4454 break; 4455 } 4456 4457 case DIF_SUBR_COPYOUTSTR: { 4458 uintptr_t kaddr = tupregs[0].dttk_value; 4459 uintptr_t uaddr = tupregs[1].dttk_value; 4460 uint64_t size = tupregs[2].dttk_value; 4461 4462 if (!dtrace_destructive_disallow && 4463 dtrace_priv_proc_control(state) && 4464 !dtrace_istoxic(kaddr, size)) { 4465 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 4466 dtrace_copyoutstr(kaddr, uaddr, size, flags); 4467 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 4468 } 4469 break; 4470 } 4471 4472 case DIF_SUBR_STRLEN: { 4473 size_t sz; 4474 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; 4475 sz = dtrace_strlen((char *)addr, 4476 state->dts_options[DTRACEOPT_STRSIZE]); 4477 4478 if (!dtrace_canload(addr, sz + 1, mstate, vstate)) { 4479 regs[rd] = 0; 4480 break; 4481 } 4482 4483 regs[rd] = sz; 4484 4485 break; 4486 } 4487 4488 case DIF_SUBR_STRCHR: 4489 case DIF_SUBR_STRRCHR: { 4490 /* 4491 * We're going to iterate over the string looking for the 4492 * specified character. We will iterate until we have reached 4493 * the string length or we have found the character. If this 4494 * is DIF_SUBR_STRRCHR, we will look for the last occurrence 4495 * of the specified character instead of the first. 4496 */ 4497 uintptr_t saddr = tupregs[0].dttk_value; 4498 uintptr_t addr = tupregs[0].dttk_value; 4499 uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; 4500 char c, target = (char)tupregs[1].dttk_value; 4501 4502 for (regs[rd] = 0; addr < limit; addr++) { 4503 if ((c = dtrace_load8(addr)) == target) { 4504 regs[rd] = addr; 4505 4506 if (subr == DIF_SUBR_STRCHR) 4507 break; 4508 } 4509 4510 if (c == '\0') 4511 break; 4512 } 4513 4514 if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) { 4515 regs[rd] = 0; 4516 break; 4517 } 4518 4519 break; 4520 } 4521 4522 case DIF_SUBR_STRSTR: 4523 case DIF_SUBR_INDEX: 4524 case DIF_SUBR_RINDEX: { 4525 /* 4526 * We're going to iterate over the string looking for the 4527 * specified string. We will iterate until we have reached 4528 * the string length or we have found the string. (Yes, this 4529 * is done in the most naive way possible -- but considering 4530 * that the string we're searching for is likely to be 4531 * relatively short, the complexity of Rabin-Karp or similar 4532 * hardly seems merited.) 4533 */ 4534 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value; 4535 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value; 4536 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4537 size_t len = dtrace_strlen(addr, size); 4538 size_t sublen = dtrace_strlen(substr, size); 4539 char *limit = addr + len, *orig = addr; 4540 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1; 4541 int inc = 1; 4542 4543 regs[rd] = notfound; 4544 4545 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { 4546 regs[rd] = 0; 4547 break; 4548 } 4549 4550 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, 4551 vstate)) { 4552 regs[rd] = 0; 4553 break; 4554 } 4555 4556 /* 4557 * strstr() and index()/rindex() have similar semantics if 4558 * both strings are the empty string: strstr() returns a 4559 * pointer to the (empty) string, and index() and rindex() 4560 * both return index 0 (regardless of any position argument). 4561 */ 4562 if (sublen == 0 && len == 0) { 4563 if (subr == DIF_SUBR_STRSTR) 4564 regs[rd] = (uintptr_t)addr; 4565 else 4566 regs[rd] = 0; 4567 break; 4568 } 4569 4570 if (subr != DIF_SUBR_STRSTR) { 4571 if (subr == DIF_SUBR_RINDEX) { 4572 limit = orig - 1; 4573 addr += len; 4574 inc = -1; 4575 } 4576 4577 /* 4578 * Both index() and rindex() take an optional position 4579 * argument that denotes the starting position. 4580 */ 4581 if (nargs == 3) { 4582 int64_t pos = (int64_t)tupregs[2].dttk_value; 4583 4584 /* 4585 * If the position argument to index() is 4586 * negative, Perl implicitly clamps it at 4587 * zero. This semantic is a little surprising 4588 * given the special meaning of negative 4589 * positions to similar Perl functions like 4590 * substr(), but it appears to reflect a 4591 * notion that index() can start from a 4592 * negative index and increment its way up to 4593 * the string. Given this notion, Perl's 4594 * rindex() is at least self-consistent in 4595 * that it implicitly clamps positions greater 4596 * than the string length to be the string 4597 * length. Where Perl completely loses 4598 * coherence, however, is when the specified 4599 * substring is the empty string (""). In 4600 * this case, even if the position is 4601 * negative, rindex() returns 0 -- and even if 4602 * the position is greater than the length, 4603 * index() returns the string length. These 4604 * semantics violate the notion that index() 4605 * should never return a value less than the 4606 * specified position and that rindex() should 4607 * never return a value greater than the 4608 * specified position. (One assumes that 4609 * these semantics are artifacts of Perl's 4610 * implementation and not the results of 4611 * deliberate design -- it beggars belief that 4612 * even Larry Wall could desire such oddness.) 4613 * While in the abstract one would wish for 4614 * consistent position semantics across 4615 * substr(), index() and rindex() -- or at the 4616 * very least self-consistent position 4617 * semantics for index() and rindex() -- we 4618 * instead opt to keep with the extant Perl 4619 * semantics, in all their broken glory. (Do 4620 * we have more desire to maintain Perl's 4621 * semantics than Perl does? Probably.) 4622 */ 4623 if (subr == DIF_SUBR_RINDEX) { 4624 if (pos < 0) { 4625 if (sublen == 0) 4626 regs[rd] = 0; 4627 break; 4628 } 4629 4630 if (pos > len) 4631 pos = len; 4632 } else { 4633 if (pos < 0) 4634 pos = 0; 4635 4636 if (pos >= len) { 4637 if (sublen == 0) 4638 regs[rd] = len; 4639 break; 4640 } 4641 } 4642 4643 addr = orig + pos; 4644 } 4645 } 4646 4647 for (regs[rd] = notfound; addr != limit; addr += inc) { 4648 if (dtrace_strncmp(addr, substr, sublen) == 0) { 4649 if (subr != DIF_SUBR_STRSTR) { 4650 /* 4651 * As D index() and rindex() are 4652 * modeled on Perl (and not on awk), 4653 * we return a zero-based (and not a 4654 * one-based) index. (For you Perl 4655 * weenies: no, we're not going to add 4656 * $[ -- and shouldn't you be at a con 4657 * or something?) 4658 */ 4659 regs[rd] = (uintptr_t)(addr - orig); 4660 break; 4661 } 4662 4663 ASSERT(subr == DIF_SUBR_STRSTR); 4664 regs[rd] = (uintptr_t)addr; 4665 break; 4666 } 4667 } 4668 4669 break; 4670 } 4671 4672 case DIF_SUBR_STRTOK: { 4673 uintptr_t addr = tupregs[0].dttk_value; 4674 uintptr_t tokaddr = tupregs[1].dttk_value; 4675 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4676 uintptr_t limit, toklimit = tokaddr + size; 4677 uint8_t c = 0, tokmap[32]; /* 256 / 8 */ 4678 char *dest = (char *)mstate->dtms_scratch_ptr; 4679 int i; 4680 4681 /* 4682 * Check both the token buffer and (later) the input buffer, 4683 * since both could be non-scratch addresses. 4684 */ 4685 if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) { 4686 regs[rd] = 0; 4687 break; 4688 } 4689 4690 if (!DTRACE_INSCRATCH(mstate, size)) { 4691 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4692 regs[rd] = 0; 4693 break; 4694 } 4695 4696 if (addr == 0) { 4697 /* 4698 * If the address specified is NULL, we use our saved 4699 * strtok pointer from the mstate. Note that this 4700 * means that the saved strtok pointer is _only_ 4701 * valid within multiple enablings of the same probe -- 4702 * it behaves like an implicit clause-local variable. 4703 */ 4704 addr = mstate->dtms_strtok; 4705 } else { 4706 /* 4707 * If the user-specified address is non-NULL we must 4708 * access check it. This is the only time we have 4709 * a chance to do so, since this address may reside 4710 * in the string table of this clause-- future calls 4711 * (when we fetch addr from mstate->dtms_strtok) 4712 * would fail this access check. 4713 */ 4714 if (!dtrace_strcanload(addr, size, mstate, vstate)) { 4715 regs[rd] = 0; 4716 break; 4717 } 4718 } 4719 4720 /* 4721 * First, zero the token map, and then process the token 4722 * string -- setting a bit in the map for every character 4723 * found in the token string. 4724 */ 4725 for (i = 0; i < sizeof (tokmap); i++) 4726 tokmap[i] = 0; 4727 4728 for (; tokaddr < toklimit; tokaddr++) { 4729 if ((c = dtrace_load8(tokaddr)) == '\0') 4730 break; 4731 4732 ASSERT((c >> 3) < sizeof (tokmap)); 4733 tokmap[c >> 3] |= (1 << (c & 0x7)); 4734 } 4735 4736 for (limit = addr + size; addr < limit; addr++) { 4737 /* 4738 * We're looking for a character that is _not_ contained 4739 * in the token string. 4740 */ 4741 if ((c = dtrace_load8(addr)) == '\0') 4742 break; 4743 4744 if (!(tokmap[c >> 3] & (1 << (c & 0x7)))) 4745 break; 4746 } 4747 4748 if (c == '\0') { 4749 /* 4750 * We reached the end of the string without finding 4751 * any character that was not in the token string. 4752 * We return NULL in this case, and we set the saved 4753 * address to NULL as well. 4754 */ 4755 regs[rd] = 0; 4756 mstate->dtms_strtok = 0; 4757 break; 4758 } 4759 4760 /* 4761 * From here on, we're copying into the destination string. 4762 */ 4763 for (i = 0; addr < limit && i < size - 1; addr++) { 4764 if ((c = dtrace_load8(addr)) == '\0') 4765 break; 4766 4767 if (tokmap[c >> 3] & (1 << (c & 0x7))) 4768 break; 4769 4770 ASSERT(i < size); 4771 dest[i++] = c; 4772 } 4773 4774 ASSERT(i < size); 4775 dest[i] = '\0'; 4776 regs[rd] = (uintptr_t)dest; 4777 mstate->dtms_scratch_ptr += size; 4778 mstate->dtms_strtok = addr; 4779 break; 4780 } 4781 4782 case DIF_SUBR_SUBSTR: { 4783 uintptr_t s = tupregs[0].dttk_value; 4784 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4785 char *d = (char *)mstate->dtms_scratch_ptr; 4786 int64_t index = (int64_t)tupregs[1].dttk_value; 4787 int64_t remaining = (int64_t)tupregs[2].dttk_value; 4788 size_t len = dtrace_strlen((char *)s, size); 4789 int64_t i; 4790 4791 if (!dtrace_canload(s, len + 1, mstate, vstate)) { 4792 regs[rd] = 0; 4793 break; 4794 } 4795 4796 if (!DTRACE_INSCRATCH(mstate, size)) { 4797 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4798 regs[rd] = 0; 4799 break; 4800 } 4801 4802 if (nargs <= 2) 4803 remaining = (int64_t)size; 4804 4805 if (index < 0) { 4806 index += len; 4807 4808 if (index < 0 && index + remaining > 0) { 4809 remaining += index; 4810 index = 0; 4811 } 4812 } 4813 4814 if (index >= len || index < 0) { 4815 remaining = 0; 4816 } else if (remaining < 0) { 4817 remaining += len - index; 4818 } else if (index + remaining > size) { 4819 remaining = size - index; 4820 } 4821 4822 for (i = 0; i < remaining; i++) { 4823 if ((d[i] = dtrace_load8(s + index + i)) == '\0') 4824 break; 4825 } 4826 4827 d[i] = '\0'; 4828 4829 mstate->dtms_scratch_ptr += size; 4830 regs[rd] = (uintptr_t)d; 4831 break; 4832 } 4833 4834 case DIF_SUBR_JSON: { 4835 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4836 uintptr_t json = tupregs[0].dttk_value; 4837 size_t jsonlen = dtrace_strlen((char *)json, size); 4838 uintptr_t elem = tupregs[1].dttk_value; 4839 size_t elemlen = dtrace_strlen((char *)elem, size); 4840 4841 char *dest = (char *)mstate->dtms_scratch_ptr; 4842 char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1; 4843 char *ee = elemlist; 4844 int nelems = 1; 4845 uintptr_t cur; 4846 4847 if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) || 4848 !dtrace_canload(elem, elemlen + 1, mstate, vstate)) { 4849 regs[rd] = 0; 4850 break; 4851 } 4852 4853 if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) { 4854 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4855 regs[rd] = 0; 4856 break; 4857 } 4858 4859 /* 4860 * Read the element selector and split it up into a packed list 4861 * of strings. 4862 */ 4863 for (cur = elem; cur < elem + elemlen; cur++) { 4864 char cc = dtrace_load8(cur); 4865 4866 if (cur == elem && cc == '[') { 4867 /* 4868 * If the first element selector key is 4869 * actually an array index then ignore the 4870 * bracket. 4871 */ 4872 continue; 4873 } 4874 4875 if (cc == ']') 4876 continue; 4877 4878 if (cc == '.' || cc == '[') { 4879 nelems++; 4880 cc = '\0'; 4881 } 4882 4883 *ee++ = cc; 4884 } 4885 *ee++ = '\0'; 4886 4887 if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist, 4888 nelems, dest)) != 0) 4889 mstate->dtms_scratch_ptr += jsonlen + 1; 4890 break; 4891 } 4892 4893 case DIF_SUBR_TOUPPER: 4894 case DIF_SUBR_TOLOWER: { 4895 uintptr_t s = tupregs[0].dttk_value; 4896 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4897 char *dest = (char *)mstate->dtms_scratch_ptr, c; 4898 size_t len = dtrace_strlen((char *)s, size); 4899 char lower, upper, convert; 4900 int64_t i; 4901 4902 if (subr == DIF_SUBR_TOUPPER) { 4903 lower = 'a'; 4904 upper = 'z'; 4905 convert = 'A'; 4906 } else { 4907 lower = 'A'; 4908 upper = 'Z'; 4909 convert = 'a'; 4910 } 4911 4912 if (!dtrace_canload(s, len + 1, mstate, vstate)) { 4913 regs[rd] = 0; 4914 break; 4915 } 4916 4917 if (!DTRACE_INSCRATCH(mstate, size)) { 4918 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4919 regs[rd] = 0; 4920 break; 4921 } 4922 4923 for (i = 0; i < size - 1; i++) { 4924 if ((c = dtrace_load8(s + i)) == '\0') 4925 break; 4926 4927 if (c >= lower && c <= upper) 4928 c = convert + (c - lower); 4929 4930 dest[i] = c; 4931 } 4932 4933 ASSERT(i < size); 4934 dest[i] = '\0'; 4935 regs[rd] = (uintptr_t)dest; 4936 mstate->dtms_scratch_ptr += size; 4937 break; 4938 } 4939 4940#if defined(sun) 4941 case DIF_SUBR_GETMAJOR: 4942#ifdef _LP64 4943 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; 4944#else 4945 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; 4946#endif 4947 break; 4948 4949 case DIF_SUBR_GETMINOR: 4950#ifdef _LP64 4951 regs[rd] = tupregs[0].dttk_value & MAXMIN64; 4952#else 4953 regs[rd] = tupregs[0].dttk_value & MAXMIN; 4954#endif 4955 break; 4956 4957 case DIF_SUBR_DDI_PATHNAME: { 4958 /* 4959 * This one is a galactic mess. We are going to roughly 4960 * emulate ddi_pathname(), but it's made more complicated 4961 * by the fact that we (a) want to include the minor name and 4962 * (b) must proceed iteratively instead of recursively. 4963 */ 4964 uintptr_t dest = mstate->dtms_scratch_ptr; 4965 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 4966 char *start = (char *)dest, *end = start + size - 1; 4967 uintptr_t daddr = tupregs[0].dttk_value; 4968 int64_t minor = (int64_t)tupregs[1].dttk_value; 4969 char *s; 4970 int i, len, depth = 0; 4971 4972 /* 4973 * Due to all the pointer jumping we do and context we must 4974 * rely upon, we just mandate that the user must have kernel 4975 * read privileges to use this routine. 4976 */ 4977 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) { 4978 *flags |= CPU_DTRACE_KPRIV; 4979 *illval = daddr; 4980 regs[rd] = 0; 4981 } 4982 4983 if (!DTRACE_INSCRATCH(mstate, size)) { 4984 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 4985 regs[rd] = 0; 4986 break; 4987 } 4988 4989 *end = '\0'; 4990 4991 /* 4992 * We want to have a name for the minor. In order to do this, 4993 * we need to walk the minor list from the devinfo. We want 4994 * to be sure that we don't infinitely walk a circular list, 4995 * so we check for circularity by sending a scout pointer 4996 * ahead two elements for every element that we iterate over; 4997 * if the list is circular, these will ultimately point to the 4998 * same element. You may recognize this little trick as the 4999 * answer to a stupid interview question -- one that always 5000 * seems to be asked by those who had to have it laboriously 5001 * explained to them, and who can't even concisely describe 5002 * the conditions under which one would be forced to resort to 5003 * this technique. Needless to say, those conditions are 5004 * found here -- and probably only here. Is this the only use 5005 * of this infamous trick in shipping, production code? If it 5006 * isn't, it probably should be... 5007 */ 5008 if (minor != -1) { 5009 uintptr_t maddr = dtrace_loadptr(daddr + 5010 offsetof(struct dev_info, devi_minor)); 5011 5012 uintptr_t next = offsetof(struct ddi_minor_data, next); 5013 uintptr_t name = offsetof(struct ddi_minor_data, 5014 d_minor) + offsetof(struct ddi_minor, name); 5015 uintptr_t dev = offsetof(struct ddi_minor_data, 5016 d_minor) + offsetof(struct ddi_minor, dev); 5017 uintptr_t scout; 5018 5019 if (maddr != NULL) 5020 scout = dtrace_loadptr(maddr + next); 5021 5022 while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 5023 uint64_t m; 5024#ifdef _LP64 5025 m = dtrace_load64(maddr + dev) & MAXMIN64; 5026#else 5027 m = dtrace_load32(maddr + dev) & MAXMIN; 5028#endif 5029 if (m != minor) { 5030 maddr = dtrace_loadptr(maddr + next); 5031 5032 if (scout == NULL) 5033 continue; 5034 5035 scout = dtrace_loadptr(scout + next); 5036 5037 if (scout == NULL) 5038 continue; 5039 5040 scout = dtrace_loadptr(scout + next); 5041 5042 if (scout == NULL) 5043 continue; 5044 5045 if (scout == maddr) { 5046 *flags |= CPU_DTRACE_ILLOP; 5047 break; 5048 } 5049 5050 continue; 5051 } 5052 5053 /* 5054 * We have the minor data. Now we need to 5055 * copy the minor's name into the end of the 5056 * pathname. 5057 */ 5058 s = (char *)dtrace_loadptr(maddr + name); 5059 len = dtrace_strlen(s, size); 5060 5061 if (*flags & CPU_DTRACE_FAULT) 5062 break; 5063 5064 if (len != 0) { 5065 if ((end -= (len + 1)) < start) 5066 break; 5067 5068 *end = ':'; 5069 } 5070 5071 for (i = 1; i <= len; i++) 5072 end[i] = dtrace_load8((uintptr_t)s++); 5073 break; 5074 } 5075 } 5076 5077 while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { 5078 ddi_node_state_t devi_state; 5079 5080 devi_state = dtrace_load32(daddr + 5081 offsetof(struct dev_info, devi_node_state)); 5082 5083 if (*flags & CPU_DTRACE_FAULT) 5084 break; 5085 5086 if (devi_state >= DS_INITIALIZED) { 5087 s = (char *)dtrace_loadptr(daddr + 5088 offsetof(struct dev_info, devi_addr)); 5089 len = dtrace_strlen(s, size); 5090 5091 if (*flags & CPU_DTRACE_FAULT) 5092 break; 5093 5094 if (len != 0) { 5095 if ((end -= (len + 1)) < start) 5096 break; 5097 5098 *end = '@'; 5099 } 5100 5101 for (i = 1; i <= len; i++) 5102 end[i] = dtrace_load8((uintptr_t)s++); 5103 } 5104 5105 /* 5106 * Now for the node name... 5107 */ 5108 s = (char *)dtrace_loadptr(daddr + 5109 offsetof(struct dev_info, devi_node_name)); 5110 5111 daddr = dtrace_loadptr(daddr + 5112 offsetof(struct dev_info, devi_parent)); 5113 5114 /* 5115 * If our parent is NULL (that is, if we're the root 5116 * node), we're going to use the special path 5117 * "devices". 5118 */ 5119 if (daddr == 0) 5120 s = "devices"; 5121 5122 len = dtrace_strlen(s, size); 5123 if (*flags & CPU_DTRACE_FAULT) 5124 break; 5125 5126 if ((end -= (len + 1)) < start) 5127 break; 5128 5129 for (i = 1; i <= len; i++) 5130 end[i] = dtrace_load8((uintptr_t)s++); 5131 *end = '/'; 5132 5133 if (depth++ > dtrace_devdepth_max) { 5134 *flags |= CPU_DTRACE_ILLOP; 5135 break; 5136 } 5137 } 5138 5139 if (end < start) 5140 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5141 5142 if (daddr == 0) { 5143 regs[rd] = (uintptr_t)end; 5144 mstate->dtms_scratch_ptr += size; 5145 } 5146 5147 break; 5148 } 5149#endif 5150 5151 case DIF_SUBR_STRJOIN: { 5152 char *d = (char *)mstate->dtms_scratch_ptr; 5153 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5154 uintptr_t s1 = tupregs[0].dttk_value; 5155 uintptr_t s2 = tupregs[1].dttk_value; 5156 int i = 0; 5157 5158 if (!dtrace_strcanload(s1, size, mstate, vstate) || 5159 !dtrace_strcanload(s2, size, mstate, vstate)) { 5160 regs[rd] = 0; 5161 break; 5162 } 5163 5164 if (!DTRACE_INSCRATCH(mstate, size)) { 5165 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5166 regs[rd] = 0; 5167 break; 5168 } 5169 5170 for (;;) { 5171 if (i >= size) { 5172 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5173 regs[rd] = 0; 5174 break; 5175 } 5176 5177 if ((d[i++] = dtrace_load8(s1++)) == '\0') { 5178 i--; 5179 break; 5180 } 5181 } 5182 5183 for (;;) { 5184 if (i >= size) { 5185 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5186 regs[rd] = 0; 5187 break; 5188 } 5189 5190 if ((d[i++] = dtrace_load8(s2++)) == '\0') 5191 break; 5192 } 5193 5194 if (i < size) { 5195 mstate->dtms_scratch_ptr += i; 5196 regs[rd] = (uintptr_t)d; 5197 } 5198 5199 break; 5200 } 5201 5202 case DIF_SUBR_STRTOLL: { 5203 uintptr_t s = tupregs[0].dttk_value; 5204 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5205 int base = 10; 5206 5207 if (nargs > 1) { 5208 if ((base = tupregs[1].dttk_value) <= 1 || 5209 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { 5210 *flags |= CPU_DTRACE_ILLOP; 5211 break; 5212 } 5213 } 5214 5215 if (!dtrace_strcanload(s, size, mstate, vstate)) { 5216 regs[rd] = INT64_MIN; 5217 break; 5218 } 5219 5220 regs[rd] = dtrace_strtoll((char *)s, base, size); 5221 break; 5222 } 5223 5224 case DIF_SUBR_LLTOSTR: { 5225 int64_t i = (int64_t)tupregs[0].dttk_value; 5226 uint64_t val, digit; 5227 uint64_t size = 65; /* enough room for 2^64 in binary */ 5228 char *end = (char *)mstate->dtms_scratch_ptr + size - 1; 5229 int base = 10; 5230 5231 if (nargs > 1) { 5232 if ((base = tupregs[1].dttk_value) <= 1 || 5233 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { 5234 *flags |= CPU_DTRACE_ILLOP; 5235 break; 5236 } 5237 } 5238 5239 val = (base == 10 && i < 0) ? i * -1 : i; 5240 5241 if (!DTRACE_INSCRATCH(mstate, size)) { 5242 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5243 regs[rd] = 0; 5244 break; 5245 } 5246 5247 for (*end-- = '\0'; val; val /= base) { 5248 if ((digit = val % base) <= '9' - '0') { 5249 *end-- = '0' + digit; 5250 } else { 5251 *end-- = 'a' + (digit - ('9' - '0') - 1); 5252 } 5253 } 5254 5255 if (i == 0 && base == 16) 5256 *end-- = '0'; 5257 5258 if (base == 16) 5259 *end-- = 'x'; 5260 5261 if (i == 0 || base == 8 || base == 16) 5262 *end-- = '0'; 5263 5264 if (i < 0 && base == 10) 5265 *end-- = '-'; 5266 5267 regs[rd] = (uintptr_t)end + 1; 5268 mstate->dtms_scratch_ptr += size; 5269 break; 5270 } 5271 5272 case DIF_SUBR_HTONS: 5273 case DIF_SUBR_NTOHS: 5274#if BYTE_ORDER == BIG_ENDIAN 5275 regs[rd] = (uint16_t)tupregs[0].dttk_value; 5276#else 5277 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); 5278#endif 5279 break; 5280 5281 5282 case DIF_SUBR_HTONL: 5283 case DIF_SUBR_NTOHL: 5284#if BYTE_ORDER == BIG_ENDIAN 5285 regs[rd] = (uint32_t)tupregs[0].dttk_value; 5286#else 5287 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); 5288#endif 5289 break; 5290 5291 5292 case DIF_SUBR_HTONLL: 5293 case DIF_SUBR_NTOHLL: 5294#if BYTE_ORDER == BIG_ENDIAN 5295 regs[rd] = (uint64_t)tupregs[0].dttk_value; 5296#else 5297 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); 5298#endif 5299 break; 5300 5301 5302 case DIF_SUBR_DIRNAME: 5303 case DIF_SUBR_BASENAME: { 5304 char *dest = (char *)mstate->dtms_scratch_ptr; 5305 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5306 uintptr_t src = tupregs[0].dttk_value; 5307 int i, j, len = dtrace_strlen((char *)src, size); 5308 int lastbase = -1, firstbase = -1, lastdir = -1; 5309 int start, end; 5310 5311 if (!dtrace_canload(src, len + 1, mstate, vstate)) { 5312 regs[rd] = 0; 5313 break; 5314 } 5315 5316 if (!DTRACE_INSCRATCH(mstate, size)) { 5317 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5318 regs[rd] = 0; 5319 break; 5320 } 5321 5322 /* 5323 * The basename and dirname for a zero-length string is 5324 * defined to be "." 5325 */ 5326 if (len == 0) { 5327 len = 1; 5328 src = (uintptr_t)"."; 5329 } 5330 5331 /* 5332 * Start from the back of the string, moving back toward the 5333 * front until we see a character that isn't a slash. That 5334 * character is the last character in the basename. 5335 */ 5336 for (i = len - 1; i >= 0; i--) { 5337 if (dtrace_load8(src + i) != '/') 5338 break; 5339 } 5340 5341 if (i >= 0) 5342 lastbase = i; 5343 5344 /* 5345 * Starting from the last character in the basename, move 5346 * towards the front until we find a slash. The character 5347 * that we processed immediately before that is the first 5348 * character in the basename. 5349 */ 5350 for (; i >= 0; i--) { 5351 if (dtrace_load8(src + i) == '/') 5352 break; 5353 } 5354 5355 if (i >= 0) 5356 firstbase = i + 1; 5357 5358 /* 5359 * Now keep going until we find a non-slash character. That 5360 * character is the last character in the dirname. 5361 */ 5362 for (; i >= 0; i--) { 5363 if (dtrace_load8(src + i) != '/') 5364 break; 5365 } 5366 5367 if (i >= 0) 5368 lastdir = i; 5369 5370 ASSERT(!(lastbase == -1 && firstbase != -1)); 5371 ASSERT(!(firstbase == -1 && lastdir != -1)); 5372 5373 if (lastbase == -1) { 5374 /* 5375 * We didn't find a non-slash character. We know that 5376 * the length is non-zero, so the whole string must be 5377 * slashes. In either the dirname or the basename 5378 * case, we return '/'. 5379 */ 5380 ASSERT(firstbase == -1); 5381 firstbase = lastbase = lastdir = 0; 5382 } 5383 5384 if (firstbase == -1) { 5385 /* 5386 * The entire string consists only of a basename 5387 * component. If we're looking for dirname, we need 5388 * to change our string to be just "."; if we're 5389 * looking for a basename, we'll just set the first 5390 * character of the basename to be 0. 5391 */ 5392 if (subr == DIF_SUBR_DIRNAME) { 5393 ASSERT(lastdir == -1); 5394 src = (uintptr_t)"."; 5395 lastdir = 0; 5396 } else { 5397 firstbase = 0; 5398 } 5399 } 5400 5401 if (subr == DIF_SUBR_DIRNAME) { 5402 if (lastdir == -1) { 5403 /* 5404 * We know that we have a slash in the name -- 5405 * or lastdir would be set to 0, above. And 5406 * because lastdir is -1, we know that this 5407 * slash must be the first character. (That 5408 * is, the full string must be of the form 5409 * "/basename".) In this case, the last 5410 * character of the directory name is 0. 5411 */ 5412 lastdir = 0; 5413 } 5414 5415 start = 0; 5416 end = lastdir; 5417 } else { 5418 ASSERT(subr == DIF_SUBR_BASENAME); 5419 ASSERT(firstbase != -1 && lastbase != -1); 5420 start = firstbase; 5421 end = lastbase; 5422 } 5423 5424 for (i = start, j = 0; i <= end && j < size - 1; i++, j++) 5425 dest[j] = dtrace_load8(src + i); 5426 5427 dest[j] = '\0'; 5428 regs[rd] = (uintptr_t)dest; 5429 mstate->dtms_scratch_ptr += size; 5430 break; 5431 } 5432 5433 case DIF_SUBR_GETF: { 5434 uintptr_t fd = tupregs[0].dttk_value; 5435 struct filedesc *fdp; 5436 file_t *fp; 5437 5438 if (!dtrace_priv_proc(state)) { 5439 regs[rd] = 0; 5440 break; 5441 } 5442 fdp = curproc->p_fd; 5443 FILEDESC_SLOCK(fdp); 5444 fp = fget_locked(fdp, fd); 5445 mstate->dtms_getf = fp; 5446 regs[rd] = (uintptr_t)fp; 5447 FILEDESC_SUNLOCK(fdp); 5448 break; 5449 } 5450 5451 case DIF_SUBR_CLEANPATH: { 5452 char *dest = (char *)mstate->dtms_scratch_ptr, c; 5453 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; 5454 uintptr_t src = tupregs[0].dttk_value; 5455 int i = 0, j = 0; 5456#if defined(sun) 5457 zone_t *z; 5458#endif 5459 5460 if (!dtrace_strcanload(src, size, mstate, vstate)) { 5461 regs[rd] = 0; 5462 break; 5463 } 5464 5465 if (!DTRACE_INSCRATCH(mstate, size)) { 5466 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5467 regs[rd] = 0; 5468 break; 5469 } 5470 5471 /* 5472 * Move forward, loading each character. 5473 */ 5474 do { 5475 c = dtrace_load8(src + i++); 5476next: 5477 if (j + 5 >= size) /* 5 = strlen("/..c\0") */ 5478 break; 5479 5480 if (c != '/') { 5481 dest[j++] = c; 5482 continue; 5483 } 5484 5485 c = dtrace_load8(src + i++); 5486 5487 if (c == '/') { 5488 /* 5489 * We have two slashes -- we can just advance 5490 * to the next character. 5491 */ 5492 goto next; 5493 } 5494 5495 if (c != '.') { 5496 /* 5497 * This is not "." and it's not ".." -- we can 5498 * just store the "/" and this character and 5499 * drive on. 5500 */ 5501 dest[j++] = '/'; 5502 dest[j++] = c; 5503 continue; 5504 } 5505 5506 c = dtrace_load8(src + i++); 5507 5508 if (c == '/') { 5509 /* 5510 * This is a "/./" component. We're not going 5511 * to store anything in the destination buffer; 5512 * we're just going to go to the next component. 5513 */ 5514 goto next; 5515 } 5516 5517 if (c != '.') { 5518 /* 5519 * This is not ".." -- we can just store the 5520 * "/." and this character and continue 5521 * processing. 5522 */ 5523 dest[j++] = '/'; 5524 dest[j++] = '.'; 5525 dest[j++] = c; 5526 continue; 5527 } 5528 5529 c = dtrace_load8(src + i++); 5530 5531 if (c != '/' && c != '\0') { 5532 /* 5533 * This is not ".." -- it's "..[mumble]". 5534 * We'll store the "/.." and this character 5535 * and continue processing. 5536 */ 5537 dest[j++] = '/'; 5538 dest[j++] = '.'; 5539 dest[j++] = '.'; 5540 dest[j++] = c; 5541 continue; 5542 } 5543 5544 /* 5545 * This is "/../" or "/..\0". We need to back up 5546 * our destination pointer until we find a "/". 5547 */ 5548 i--; 5549 while (j != 0 && dest[--j] != '/') 5550 continue; 5551 5552 if (c == '\0') 5553 dest[++j] = '/'; 5554 } while (c != '\0'); 5555 5556 dest[j] = '\0'; 5557 5558#if defined(sun) 5559 if (mstate->dtms_getf != NULL && 5560 !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) && 5561 (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) { 5562 /* 5563 * If we've done a getf() as a part of this ECB and we 5564 * don't have kernel access (and we're not in the global 5565 * zone), check if the path we cleaned up begins with 5566 * the zone's root path, and trim it off if so. Note 5567 * that this is an output cleanliness issue, not a 5568 * security issue: knowing one's zone root path does 5569 * not enable privilege escalation. 5570 */ 5571 if (strstr(dest, z->zone_rootpath) == dest) 5572 dest += strlen(z->zone_rootpath) - 1; 5573 } 5574#endif 5575 5576 regs[rd] = (uintptr_t)dest; 5577 mstate->dtms_scratch_ptr += size; 5578 break; 5579 } 5580 5581 case DIF_SUBR_INET_NTOA: 5582 case DIF_SUBR_INET_NTOA6: 5583 case DIF_SUBR_INET_NTOP: { 5584 size_t size; 5585 int af, argi, i; 5586 char *base, *end; 5587 5588 if (subr == DIF_SUBR_INET_NTOP) { 5589 af = (int)tupregs[0].dttk_value; 5590 argi = 1; 5591 } else { 5592 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6; 5593 argi = 0; 5594 } 5595 5596 if (af == AF_INET) { 5597 ipaddr_t ip4; 5598 uint8_t *ptr8, val; 5599 5600 /* 5601 * Safely load the IPv4 address. 5602 */ 5603 ip4 = dtrace_load32(tupregs[argi].dttk_value); 5604 5605 /* 5606 * Check an IPv4 string will fit in scratch. 5607 */ 5608 size = INET_ADDRSTRLEN; 5609 if (!DTRACE_INSCRATCH(mstate, size)) { 5610 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5611 regs[rd] = 0; 5612 break; 5613 } 5614 base = (char *)mstate->dtms_scratch_ptr; 5615 end = (char *)mstate->dtms_scratch_ptr + size - 1; 5616 5617 /* 5618 * Stringify as a dotted decimal quad. 5619 */ 5620 *end-- = '\0'; 5621 ptr8 = (uint8_t *)&ip4; 5622 for (i = 3; i >= 0; i--) { 5623 val = ptr8[i]; 5624 5625 if (val == 0) { 5626 *end-- = '0'; 5627 } else { 5628 for (; val; val /= 10) { 5629 *end-- = '0' + (val % 10); 5630 } 5631 } 5632 5633 if (i > 0) 5634 *end-- = '.'; 5635 } 5636 ASSERT(end + 1 >= base); 5637 5638 } else if (af == AF_INET6) { 5639 struct in6_addr ip6; 5640 int firstzero, tryzero, numzero, v6end; 5641 uint16_t val; 5642 const char digits[] = "0123456789abcdef"; 5643 5644 /* 5645 * Stringify using RFC 1884 convention 2 - 16 bit 5646 * hexadecimal values with a zero-run compression. 5647 * Lower case hexadecimal digits are used. 5648 * eg, fe80::214:4fff:fe0b:76c8. 5649 * The IPv4 embedded form is returned for inet_ntop, 5650 * just the IPv4 string is returned for inet_ntoa6. 5651 */ 5652 5653 /* 5654 * Safely load the IPv6 address. 5655 */ 5656 dtrace_bcopy( 5657 (void *)(uintptr_t)tupregs[argi].dttk_value, 5658 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr)); 5659 5660 /* 5661 * Check an IPv6 string will fit in scratch. 5662 */ 5663 size = INET6_ADDRSTRLEN; 5664 if (!DTRACE_INSCRATCH(mstate, size)) { 5665 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5666 regs[rd] = 0; 5667 break; 5668 } 5669 base = (char *)mstate->dtms_scratch_ptr; 5670 end = (char *)mstate->dtms_scratch_ptr + size - 1; 5671 *end-- = '\0'; 5672 5673 /* 5674 * Find the longest run of 16 bit zero values 5675 * for the single allowed zero compression - "::". 5676 */ 5677 firstzero = -1; 5678 tryzero = -1; 5679 numzero = 1; 5680 for (i = 0; i < sizeof (struct in6_addr); i++) { 5681#if defined(sun) 5682 if (ip6._S6_un._S6_u8[i] == 0 && 5683#else 5684 if (ip6.__u6_addr.__u6_addr8[i] == 0 && 5685#endif 5686 tryzero == -1 && i % 2 == 0) { 5687 tryzero = i; 5688 continue; 5689 } 5690 5691 if (tryzero != -1 && 5692#if defined(sun) 5693 (ip6._S6_un._S6_u8[i] != 0 || 5694#else 5695 (ip6.__u6_addr.__u6_addr8[i] != 0 || 5696#endif 5697 i == sizeof (struct in6_addr) - 1)) { 5698 5699 if (i - tryzero <= numzero) { 5700 tryzero = -1; 5701 continue; 5702 } 5703 5704 firstzero = tryzero; 5705 numzero = i - i % 2 - tryzero; 5706 tryzero = -1; 5707 5708#if defined(sun) 5709 if (ip6._S6_un._S6_u8[i] == 0 && 5710#else 5711 if (ip6.__u6_addr.__u6_addr8[i] == 0 && 5712#endif 5713 i == sizeof (struct in6_addr) - 1) 5714 numzero += 2; 5715 } 5716 } 5717 ASSERT(firstzero + numzero <= sizeof (struct in6_addr)); 5718 5719 /* 5720 * Check for an IPv4 embedded address. 5721 */ 5722 v6end = sizeof (struct in6_addr) - 2; 5723 if (IN6_IS_ADDR_V4MAPPED(&ip6) || 5724 IN6_IS_ADDR_V4COMPAT(&ip6)) { 5725 for (i = sizeof (struct in6_addr) - 1; 5726 i >= DTRACE_V4MAPPED_OFFSET; i--) { 5727 ASSERT(end >= base); 5728 5729#if defined(sun) 5730 val = ip6._S6_un._S6_u8[i]; 5731#else 5732 val = ip6.__u6_addr.__u6_addr8[i]; 5733#endif 5734 5735 if (val == 0) { 5736 *end-- = '0'; 5737 } else { 5738 for (; val; val /= 10) { 5739 *end-- = '0' + val % 10; 5740 } 5741 } 5742 5743 if (i > DTRACE_V4MAPPED_OFFSET) 5744 *end-- = '.'; 5745 } 5746 5747 if (subr == DIF_SUBR_INET_NTOA6) 5748 goto inetout; 5749 5750 /* 5751 * Set v6end to skip the IPv4 address that 5752 * we have already stringified. 5753 */ 5754 v6end = 10; 5755 } 5756 5757 /* 5758 * Build the IPv6 string by working through the 5759 * address in reverse. 5760 */ 5761 for (i = v6end; i >= 0; i -= 2) { 5762 ASSERT(end >= base); 5763 5764 if (i == firstzero + numzero - 2) { 5765 *end-- = ':'; 5766 *end-- = ':'; 5767 i -= numzero - 2; 5768 continue; 5769 } 5770 5771 if (i < 14 && i != firstzero - 2) 5772 *end-- = ':'; 5773 5774#if defined(sun) 5775 val = (ip6._S6_un._S6_u8[i] << 8) + 5776 ip6._S6_un._S6_u8[i + 1]; 5777#else 5778 val = (ip6.__u6_addr.__u6_addr8[i] << 8) + 5779 ip6.__u6_addr.__u6_addr8[i + 1]; 5780#endif 5781 5782 if (val == 0) { 5783 *end-- = '0'; 5784 } else { 5785 for (; val; val /= 16) { 5786 *end-- = digits[val % 16]; 5787 } 5788 } 5789 } 5790 ASSERT(end + 1 >= base); 5791 5792 } else { 5793 /* 5794 * The user didn't use AH_INET or AH_INET6. 5795 */ 5796 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 5797 regs[rd] = 0; 5798 break; 5799 } 5800 5801inetout: regs[rd] = (uintptr_t)end + 1; 5802 mstate->dtms_scratch_ptr += size; 5803 break; 5804 } 5805 5806 case DIF_SUBR_MEMREF: { 5807 uintptr_t size = 2 * sizeof(uintptr_t); 5808 uintptr_t *memref = (uintptr_t *) P2ROUNDUP(mstate->dtms_scratch_ptr, sizeof(uintptr_t)); 5809 size_t scratch_size = ((uintptr_t) memref - mstate->dtms_scratch_ptr) + size; 5810 5811 /* address and length */ 5812 memref[0] = tupregs[0].dttk_value; 5813 memref[1] = tupregs[1].dttk_value; 5814 5815 regs[rd] = (uintptr_t) memref; 5816 mstate->dtms_scratch_ptr += scratch_size; 5817 break; 5818 } 5819 5820#if !defined(sun) 5821 case DIF_SUBR_MEMSTR: { 5822 char *str = (char *)mstate->dtms_scratch_ptr; 5823 uintptr_t mem = tupregs[0].dttk_value; 5824 char c = tupregs[1].dttk_value; 5825 size_t size = tupregs[2].dttk_value; 5826 uint8_t n; 5827 int i; 5828 5829 regs[rd] = 0; 5830 5831 if (size == 0) 5832 break; 5833 5834 if (!dtrace_canload(mem, size - 1, mstate, vstate)) 5835 break; 5836 5837 if (!DTRACE_INSCRATCH(mstate, size)) { 5838 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 5839 break; 5840 } 5841 5842 if (dtrace_memstr_max != 0 && size > dtrace_memstr_max) { 5843 *flags |= CPU_DTRACE_ILLOP; 5844 break; 5845 } 5846 5847 for (i = 0; i < size - 1; i++) { 5848 n = dtrace_load8(mem++); 5849 str[i] = (n == 0) ? c : n; 5850 } 5851 str[size - 1] = 0; 5852 5853 regs[rd] = (uintptr_t)str; 5854 mstate->dtms_scratch_ptr += size; 5855 break; 5856 } 5857#endif 5858 5859 case DIF_SUBR_TYPEREF: { 5860 uintptr_t size = 4 * sizeof(uintptr_t); 5861 uintptr_t *typeref = (uintptr_t *) P2ROUNDUP(mstate->dtms_scratch_ptr, sizeof(uintptr_t)); 5862 size_t scratch_size = ((uintptr_t) typeref - mstate->dtms_scratch_ptr) + size; 5863 5864 /* address, num_elements, type_str, type_len */ 5865 typeref[0] = tupregs[0].dttk_value; 5866 typeref[1] = tupregs[1].dttk_value; 5867 typeref[2] = tupregs[2].dttk_value; 5868 typeref[3] = tupregs[3].dttk_value; 5869 5870 regs[rd] = (uintptr_t) typeref; 5871 mstate->dtms_scratch_ptr += scratch_size; 5872 break; 5873 } 5874 } 5875} 5876 5877/* 5878 * Emulate the execution of DTrace IR instructions specified by the given 5879 * DIF object. This function is deliberately void of assertions as all of 5880 * the necessary checks are handled by a call to dtrace_difo_validate(). 5881 */ 5882static uint64_t 5883dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, 5884 dtrace_vstate_t *vstate, dtrace_state_t *state) 5885{ 5886 const dif_instr_t *text = difo->dtdo_buf; 5887 const uint_t textlen = difo->dtdo_len; 5888 const char *strtab = difo->dtdo_strtab; 5889 const uint64_t *inttab = difo->dtdo_inttab; 5890 5891 uint64_t rval = 0; 5892 dtrace_statvar_t *svar; 5893 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; 5894 dtrace_difv_t *v; 5895 volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; 5896 volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; 5897 5898 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 5899 uint64_t regs[DIF_DIR_NREGS]; 5900 uint64_t *tmp; 5901 5902 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; 5903 int64_t cc_r; 5904 uint_t pc = 0, id, opc = 0; 5905 uint8_t ttop = 0; 5906 dif_instr_t instr; 5907 uint_t r1, r2, rd; 5908 5909 /* 5910 * We stash the current DIF object into the machine state: we need it 5911 * for subsequent access checking. 5912 */ 5913 mstate->dtms_difo = difo; 5914 5915 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ 5916 5917 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { 5918 opc = pc; 5919 5920 instr = text[pc++]; 5921 r1 = DIF_INSTR_R1(instr); 5922 r2 = DIF_INSTR_R2(instr); 5923 rd = DIF_INSTR_RD(instr); 5924 5925 switch (DIF_INSTR_OP(instr)) { 5926 case DIF_OP_OR: 5927 regs[rd] = regs[r1] | regs[r2]; 5928 break; 5929 case DIF_OP_XOR: 5930 regs[rd] = regs[r1] ^ regs[r2]; 5931 break; 5932 case DIF_OP_AND: 5933 regs[rd] = regs[r1] & regs[r2]; 5934 break; 5935 case DIF_OP_SLL: 5936 regs[rd] = regs[r1] << regs[r2]; 5937 break; 5938 case DIF_OP_SRL: 5939 regs[rd] = regs[r1] >> regs[r2]; 5940 break; 5941 case DIF_OP_SUB: 5942 regs[rd] = regs[r1] - regs[r2]; 5943 break; 5944 case DIF_OP_ADD: 5945 regs[rd] = regs[r1] + regs[r2]; 5946 break; 5947 case DIF_OP_MUL: 5948 regs[rd] = regs[r1] * regs[r2]; 5949 break; 5950 case DIF_OP_SDIV: 5951 if (regs[r2] == 0) { 5952 regs[rd] = 0; 5953 *flags |= CPU_DTRACE_DIVZERO; 5954 } else { 5955 regs[rd] = (int64_t)regs[r1] / 5956 (int64_t)regs[r2]; 5957 } 5958 break; 5959 5960 case DIF_OP_UDIV: 5961 if (regs[r2] == 0) { 5962 regs[rd] = 0; 5963 *flags |= CPU_DTRACE_DIVZERO; 5964 } else { 5965 regs[rd] = regs[r1] / regs[r2]; 5966 } 5967 break; 5968 5969 case DIF_OP_SREM: 5970 if (regs[r2] == 0) { 5971 regs[rd] = 0; 5972 *flags |= CPU_DTRACE_DIVZERO; 5973 } else { 5974 regs[rd] = (int64_t)regs[r1] % 5975 (int64_t)regs[r2]; 5976 } 5977 break; 5978 5979 case DIF_OP_UREM: 5980 if (regs[r2] == 0) { 5981 regs[rd] = 0; 5982 *flags |= CPU_DTRACE_DIVZERO; 5983 } else { 5984 regs[rd] = regs[r1] % regs[r2]; 5985 } 5986 break; 5987 5988 case DIF_OP_NOT: 5989 regs[rd] = ~regs[r1]; 5990 break; 5991 case DIF_OP_MOV: 5992 regs[rd] = regs[r1]; 5993 break; 5994 case DIF_OP_CMP: 5995 cc_r = regs[r1] - regs[r2]; 5996 cc_n = cc_r < 0; 5997 cc_z = cc_r == 0; 5998 cc_v = 0; 5999 cc_c = regs[r1] < regs[r2]; 6000 break; 6001 case DIF_OP_TST: 6002 cc_n = cc_v = cc_c = 0; 6003 cc_z = regs[r1] == 0; 6004 break; 6005 case DIF_OP_BA: 6006 pc = DIF_INSTR_LABEL(instr); 6007 break; 6008 case DIF_OP_BE: 6009 if (cc_z) 6010 pc = DIF_INSTR_LABEL(instr); 6011 break; 6012 case DIF_OP_BNE: 6013 if (cc_z == 0) 6014 pc = DIF_INSTR_LABEL(instr); 6015 break; 6016 case DIF_OP_BG: 6017 if ((cc_z | (cc_n ^ cc_v)) == 0) 6018 pc = DIF_INSTR_LABEL(instr); 6019 break; 6020 case DIF_OP_BGU: 6021 if ((cc_c | cc_z) == 0) 6022 pc = DIF_INSTR_LABEL(instr); 6023 break; 6024 case DIF_OP_BGE: 6025 if ((cc_n ^ cc_v) == 0) 6026 pc = DIF_INSTR_LABEL(instr); 6027 break; 6028 case DIF_OP_BGEU: 6029 if (cc_c == 0) 6030 pc = DIF_INSTR_LABEL(instr); 6031 break; 6032 case DIF_OP_BL: 6033 if (cc_n ^ cc_v) 6034 pc = DIF_INSTR_LABEL(instr); 6035 break; 6036 case DIF_OP_BLU: 6037 if (cc_c) 6038 pc = DIF_INSTR_LABEL(instr); 6039 break; 6040 case DIF_OP_BLE: 6041 if (cc_z | (cc_n ^ cc_v)) 6042 pc = DIF_INSTR_LABEL(instr); 6043 break; 6044 case DIF_OP_BLEU: 6045 if (cc_c | cc_z) 6046 pc = DIF_INSTR_LABEL(instr); 6047 break; 6048 case DIF_OP_RLDSB: 6049 if (!dtrace_canload(regs[r1], 1, mstate, vstate)) 6050 break; 6051 /*FALLTHROUGH*/ 6052 case DIF_OP_LDSB: 6053 regs[rd] = (int8_t)dtrace_load8(regs[r1]); 6054 break; 6055 case DIF_OP_RLDSH: 6056 if (!dtrace_canload(regs[r1], 2, mstate, vstate)) 6057 break; 6058 /*FALLTHROUGH*/ 6059 case DIF_OP_LDSH: 6060 regs[rd] = (int16_t)dtrace_load16(regs[r1]); 6061 break; 6062 case DIF_OP_RLDSW: 6063 if (!dtrace_canload(regs[r1], 4, mstate, vstate)) 6064 break; 6065 /*FALLTHROUGH*/ 6066 case DIF_OP_LDSW: 6067 regs[rd] = (int32_t)dtrace_load32(regs[r1]); 6068 break; 6069 case DIF_OP_RLDUB: 6070 if (!dtrace_canload(regs[r1], 1, mstate, vstate)) 6071 break; 6072 /*FALLTHROUGH*/ 6073 case DIF_OP_LDUB: 6074 regs[rd] = dtrace_load8(regs[r1]); 6075 break; 6076 case DIF_OP_RLDUH: 6077 if (!dtrace_canload(regs[r1], 2, mstate, vstate)) 6078 break; 6079 /*FALLTHROUGH*/ 6080 case DIF_OP_LDUH: 6081 regs[rd] = dtrace_load16(regs[r1]); 6082 break; 6083 case DIF_OP_RLDUW: 6084 if (!dtrace_canload(regs[r1], 4, mstate, vstate)) 6085 break; 6086 /*FALLTHROUGH*/ 6087 case DIF_OP_LDUW: 6088 regs[rd] = dtrace_load32(regs[r1]); 6089 break; 6090 case DIF_OP_RLDX: 6091 if (!dtrace_canload(regs[r1], 8, mstate, vstate)) 6092 break; 6093 /*FALLTHROUGH*/ 6094 case DIF_OP_LDX: 6095 regs[rd] = dtrace_load64(regs[r1]); 6096 break; 6097 case DIF_OP_ULDSB: 6098 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6099 regs[rd] = (int8_t) 6100 dtrace_fuword8((void *)(uintptr_t)regs[r1]); 6101 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6102 break; 6103 case DIF_OP_ULDSH: 6104 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6105 regs[rd] = (int16_t) 6106 dtrace_fuword16((void *)(uintptr_t)regs[r1]); 6107 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6108 break; 6109 case DIF_OP_ULDSW: 6110 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6111 regs[rd] = (int32_t) 6112 dtrace_fuword32((void *)(uintptr_t)regs[r1]); 6113 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6114 break; 6115 case DIF_OP_ULDUB: 6116 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6117 regs[rd] = 6118 dtrace_fuword8((void *)(uintptr_t)regs[r1]); 6119 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6120 break; 6121 case DIF_OP_ULDUH: 6122 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6123 regs[rd] = 6124 dtrace_fuword16((void *)(uintptr_t)regs[r1]); 6125 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6126 break; 6127 case DIF_OP_ULDUW: 6128 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6129 regs[rd] = 6130 dtrace_fuword32((void *)(uintptr_t)regs[r1]); 6131 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6132 break; 6133 case DIF_OP_ULDX: 6134 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6135 regs[rd] = 6136 dtrace_fuword64((void *)(uintptr_t)regs[r1]); 6137 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6138 break; 6139 case DIF_OP_RET: 6140 rval = regs[rd]; 6141 pc = textlen; 6142 break; 6143 case DIF_OP_NOP: 6144 break; 6145 case DIF_OP_SETX: 6146 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)]; 6147 break; 6148 case DIF_OP_SETS: 6149 regs[rd] = (uint64_t)(uintptr_t) 6150 (strtab + DIF_INSTR_STRING(instr)); 6151 break; 6152 case DIF_OP_SCMP: { 6153 size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; 6154 uintptr_t s1 = regs[r1]; 6155 uintptr_t s2 = regs[r2]; 6156 6157 if (s1 != 0 && 6158 !dtrace_strcanload(s1, sz, mstate, vstate)) 6159 break; 6160 if (s2 != 0 && 6161 !dtrace_strcanload(s2, sz, mstate, vstate)) 6162 break; 6163 6164 cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz); 6165 6166 cc_n = cc_r < 0; 6167 cc_z = cc_r == 0; 6168 cc_v = cc_c = 0; 6169 break; 6170 } 6171 case DIF_OP_LDGA: 6172 regs[rd] = dtrace_dif_variable(mstate, state, 6173 r1, regs[r2]); 6174 break; 6175 case DIF_OP_LDGS: 6176 id = DIF_INSTR_VAR(instr); 6177 6178 if (id >= DIF_VAR_OTHER_UBASE) { 6179 uintptr_t a; 6180 6181 id -= DIF_VAR_OTHER_UBASE; 6182 svar = vstate->dtvs_globals[id]; 6183 ASSERT(svar != NULL); 6184 v = &svar->dtsv_var; 6185 6186 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) { 6187 regs[rd] = svar->dtsv_data; 6188 break; 6189 } 6190 6191 a = (uintptr_t)svar->dtsv_data; 6192 6193 if (*(uint8_t *)a == UINT8_MAX) { 6194 /* 6195 * If the 0th byte is set to UINT8_MAX 6196 * then this is to be treated as a 6197 * reference to a NULL variable. 6198 */ 6199 regs[rd] = 0; 6200 } else { 6201 regs[rd] = a + sizeof (uint64_t); 6202 } 6203 6204 break; 6205 } 6206 6207 regs[rd] = dtrace_dif_variable(mstate, state, id, 0); 6208 break; 6209 6210 case DIF_OP_STGS: 6211 id = DIF_INSTR_VAR(instr); 6212 6213 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6214 id -= DIF_VAR_OTHER_UBASE; 6215 6216 svar = vstate->dtvs_globals[id]; 6217 ASSERT(svar != NULL); 6218 v = &svar->dtsv_var; 6219 6220 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6221 uintptr_t a = (uintptr_t)svar->dtsv_data; 6222 6223 ASSERT(a != 0); 6224 ASSERT(svar->dtsv_size != 0); 6225 6226 if (regs[rd] == 0) { 6227 *(uint8_t *)a = UINT8_MAX; 6228 break; 6229 } else { 6230 *(uint8_t *)a = 0; 6231 a += sizeof (uint64_t); 6232 } 6233 if (!dtrace_vcanload( 6234 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 6235 mstate, vstate)) 6236 break; 6237 6238 dtrace_vcopy((void *)(uintptr_t)regs[rd], 6239 (void *)a, &v->dtdv_type); 6240 break; 6241 } 6242 6243 svar->dtsv_data = regs[rd]; 6244 break; 6245 6246 case DIF_OP_LDTA: 6247 /* 6248 * There are no DTrace built-in thread-local arrays at 6249 * present. This opcode is saved for future work. 6250 */ 6251 *flags |= CPU_DTRACE_ILLOP; 6252 regs[rd] = 0; 6253 break; 6254 6255 case DIF_OP_LDLS: 6256 id = DIF_INSTR_VAR(instr); 6257 6258 if (id < DIF_VAR_OTHER_UBASE) { 6259 /* 6260 * For now, this has no meaning. 6261 */ 6262 regs[rd] = 0; 6263 break; 6264 } 6265 6266 id -= DIF_VAR_OTHER_UBASE; 6267 6268 ASSERT(id < vstate->dtvs_nlocals); 6269 ASSERT(vstate->dtvs_locals != NULL); 6270 6271 svar = vstate->dtvs_locals[id]; 6272 ASSERT(svar != NULL); 6273 v = &svar->dtsv_var; 6274 6275 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6276 uintptr_t a = (uintptr_t)svar->dtsv_data; 6277 size_t sz = v->dtdv_type.dtdt_size; 6278 6279 sz += sizeof (uint64_t); 6280 ASSERT(svar->dtsv_size == NCPU * sz); 6281 a += curcpu * sz; 6282 6283 if (*(uint8_t *)a == UINT8_MAX) { 6284 /* 6285 * If the 0th byte is set to UINT8_MAX 6286 * then this is to be treated as a 6287 * reference to a NULL variable. 6288 */ 6289 regs[rd] = 0; 6290 } else { 6291 regs[rd] = a + sizeof (uint64_t); 6292 } 6293 6294 break; 6295 } 6296 6297 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); 6298 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 6299 regs[rd] = tmp[curcpu]; 6300 break; 6301 6302 case DIF_OP_STLS: 6303 id = DIF_INSTR_VAR(instr); 6304 6305 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6306 id -= DIF_VAR_OTHER_UBASE; 6307 ASSERT(id < vstate->dtvs_nlocals); 6308 6309 ASSERT(vstate->dtvs_locals != NULL); 6310 svar = vstate->dtvs_locals[id]; 6311 ASSERT(svar != NULL); 6312 v = &svar->dtsv_var; 6313 6314 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6315 uintptr_t a = (uintptr_t)svar->dtsv_data; 6316 size_t sz = v->dtdv_type.dtdt_size; 6317 6318 sz += sizeof (uint64_t); 6319 ASSERT(svar->dtsv_size == NCPU * sz); 6320 a += curcpu * sz; 6321 6322 if (regs[rd] == 0) { 6323 *(uint8_t *)a = UINT8_MAX; 6324 break; 6325 } else { 6326 *(uint8_t *)a = 0; 6327 a += sizeof (uint64_t); 6328 } 6329 6330 if (!dtrace_vcanload( 6331 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 6332 mstate, vstate)) 6333 break; 6334 6335 dtrace_vcopy((void *)(uintptr_t)regs[rd], 6336 (void *)a, &v->dtdv_type); 6337 break; 6338 } 6339 6340 ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); 6341 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; 6342 tmp[curcpu] = regs[rd]; 6343 break; 6344 6345 case DIF_OP_LDTS: { 6346 dtrace_dynvar_t *dvar; 6347 dtrace_key_t *key; 6348 6349 id = DIF_INSTR_VAR(instr); 6350 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6351 id -= DIF_VAR_OTHER_UBASE; 6352 v = &vstate->dtvs_tlocals[id]; 6353 6354 key = &tupregs[DIF_DTR_NREGS]; 6355 key[0].dttk_value = (uint64_t)id; 6356 key[0].dttk_size = 0; 6357 DTRACE_TLS_THRKEY(key[1].dttk_value); 6358 key[1].dttk_size = 0; 6359 6360 dvar = dtrace_dynvar(dstate, 2, key, 6361 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, 6362 mstate, vstate); 6363 6364 if (dvar == NULL) { 6365 regs[rd] = 0; 6366 break; 6367 } 6368 6369 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6370 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 6371 } else { 6372 regs[rd] = *((uint64_t *)dvar->dtdv_data); 6373 } 6374 6375 break; 6376 } 6377 6378 case DIF_OP_STTS: { 6379 dtrace_dynvar_t *dvar; 6380 dtrace_key_t *key; 6381 6382 id = DIF_INSTR_VAR(instr); 6383 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6384 id -= DIF_VAR_OTHER_UBASE; 6385 6386 key = &tupregs[DIF_DTR_NREGS]; 6387 key[0].dttk_value = (uint64_t)id; 6388 key[0].dttk_size = 0; 6389 DTRACE_TLS_THRKEY(key[1].dttk_value); 6390 key[1].dttk_size = 0; 6391 v = &vstate->dtvs_tlocals[id]; 6392 6393 dvar = dtrace_dynvar(dstate, 2, key, 6394 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 6395 v->dtdv_type.dtdt_size : sizeof (uint64_t), 6396 regs[rd] ? DTRACE_DYNVAR_ALLOC : 6397 DTRACE_DYNVAR_DEALLOC, mstate, vstate); 6398 6399 /* 6400 * Given that we're storing to thread-local data, 6401 * we need to flush our predicate cache. 6402 */ 6403 curthread->t_predcache = 0; 6404 6405 if (dvar == NULL) 6406 break; 6407 6408 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6409 if (!dtrace_vcanload( 6410 (void *)(uintptr_t)regs[rd], 6411 &v->dtdv_type, mstate, vstate)) 6412 break; 6413 6414 dtrace_vcopy((void *)(uintptr_t)regs[rd], 6415 dvar->dtdv_data, &v->dtdv_type); 6416 } else { 6417 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 6418 } 6419 6420 break; 6421 } 6422 6423 case DIF_OP_SRA: 6424 regs[rd] = (int64_t)regs[r1] >> regs[r2]; 6425 break; 6426 6427 case DIF_OP_CALL: 6428 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd, 6429 regs, tupregs, ttop, mstate, state); 6430 break; 6431 6432 case DIF_OP_PUSHTR: 6433 if (ttop == DIF_DTR_NREGS) { 6434 *flags |= CPU_DTRACE_TUPOFLOW; 6435 break; 6436 } 6437 6438 if (r1 == DIF_TYPE_STRING) { 6439 /* 6440 * If this is a string type and the size is 0, 6441 * we'll use the system-wide default string 6442 * size. Note that we are _not_ looking at 6443 * the value of the DTRACEOPT_STRSIZE option; 6444 * had this been set, we would expect to have 6445 * a non-zero size value in the "pushtr". 6446 */ 6447 tupregs[ttop].dttk_size = 6448 dtrace_strlen((char *)(uintptr_t)regs[rd], 6449 regs[r2] ? regs[r2] : 6450 dtrace_strsize_default) + 1; 6451 } else { 6452 tupregs[ttop].dttk_size = regs[r2]; 6453 } 6454 6455 tupregs[ttop++].dttk_value = regs[rd]; 6456 break; 6457 6458 case DIF_OP_PUSHTV: 6459 if (ttop == DIF_DTR_NREGS) { 6460 *flags |= CPU_DTRACE_TUPOFLOW; 6461 break; 6462 } 6463 6464 tupregs[ttop].dttk_value = regs[rd]; 6465 tupregs[ttop++].dttk_size = 0; 6466 break; 6467 6468 case DIF_OP_POPTS: 6469 if (ttop != 0) 6470 ttop--; 6471 break; 6472 6473 case DIF_OP_FLUSHTS: 6474 ttop = 0; 6475 break; 6476 6477 case DIF_OP_LDGAA: 6478 case DIF_OP_LDTAA: { 6479 dtrace_dynvar_t *dvar; 6480 dtrace_key_t *key = tupregs; 6481 uint_t nkeys = ttop; 6482 6483 id = DIF_INSTR_VAR(instr); 6484 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6485 id -= DIF_VAR_OTHER_UBASE; 6486 6487 key[nkeys].dttk_value = (uint64_t)id; 6488 key[nkeys++].dttk_size = 0; 6489 6490 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { 6491 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 6492 key[nkeys++].dttk_size = 0; 6493 v = &vstate->dtvs_tlocals[id]; 6494 } else { 6495 v = &vstate->dtvs_globals[id]->dtsv_var; 6496 } 6497 6498 dvar = dtrace_dynvar(dstate, nkeys, key, 6499 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 6500 v->dtdv_type.dtdt_size : sizeof (uint64_t), 6501 DTRACE_DYNVAR_NOALLOC, mstate, vstate); 6502 6503 if (dvar == NULL) { 6504 regs[rd] = 0; 6505 break; 6506 } 6507 6508 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6509 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; 6510 } else { 6511 regs[rd] = *((uint64_t *)dvar->dtdv_data); 6512 } 6513 6514 break; 6515 } 6516 6517 case DIF_OP_STGAA: 6518 case DIF_OP_STTAA: { 6519 dtrace_dynvar_t *dvar; 6520 dtrace_key_t *key = tupregs; 6521 uint_t nkeys = ttop; 6522 6523 id = DIF_INSTR_VAR(instr); 6524 ASSERT(id >= DIF_VAR_OTHER_UBASE); 6525 id -= DIF_VAR_OTHER_UBASE; 6526 6527 key[nkeys].dttk_value = (uint64_t)id; 6528 key[nkeys++].dttk_size = 0; 6529 6530 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { 6531 DTRACE_TLS_THRKEY(key[nkeys].dttk_value); 6532 key[nkeys++].dttk_size = 0; 6533 v = &vstate->dtvs_tlocals[id]; 6534 } else { 6535 v = &vstate->dtvs_globals[id]->dtsv_var; 6536 } 6537 6538 dvar = dtrace_dynvar(dstate, nkeys, key, 6539 v->dtdv_type.dtdt_size > sizeof (uint64_t) ? 6540 v->dtdv_type.dtdt_size : sizeof (uint64_t), 6541 regs[rd] ? DTRACE_DYNVAR_ALLOC : 6542 DTRACE_DYNVAR_DEALLOC, mstate, vstate); 6543 6544 if (dvar == NULL) 6545 break; 6546 6547 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { 6548 if (!dtrace_vcanload( 6549 (void *)(uintptr_t)regs[rd], &v->dtdv_type, 6550 mstate, vstate)) 6551 break; 6552 6553 dtrace_vcopy((void *)(uintptr_t)regs[rd], 6554 dvar->dtdv_data, &v->dtdv_type); 6555 } else { 6556 *((uint64_t *)dvar->dtdv_data) = regs[rd]; 6557 } 6558 6559 break; 6560 } 6561 6562 case DIF_OP_ALLOCS: { 6563 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 6564 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; 6565 6566 /* 6567 * Rounding up the user allocation size could have 6568 * overflowed large, bogus allocations (like -1ULL) to 6569 * 0. 6570 */ 6571 if (size < regs[r1] || 6572 !DTRACE_INSCRATCH(mstate, size)) { 6573 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 6574 regs[rd] = 0; 6575 break; 6576 } 6577 6578 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); 6579 mstate->dtms_scratch_ptr += size; 6580 regs[rd] = ptr; 6581 break; 6582 } 6583 6584 case DIF_OP_COPYS: 6585 if (!dtrace_canstore(regs[rd], regs[r2], 6586 mstate, vstate)) { 6587 *flags |= CPU_DTRACE_BADADDR; 6588 *illval = regs[rd]; 6589 break; 6590 } 6591 6592 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) 6593 break; 6594 6595 dtrace_bcopy((void *)(uintptr_t)regs[r1], 6596 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); 6597 break; 6598 6599 case DIF_OP_STB: 6600 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) { 6601 *flags |= CPU_DTRACE_BADADDR; 6602 *illval = regs[rd]; 6603 break; 6604 } 6605 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1]; 6606 break; 6607 6608 case DIF_OP_STH: 6609 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) { 6610 *flags |= CPU_DTRACE_BADADDR; 6611 *illval = regs[rd]; 6612 break; 6613 } 6614 if (regs[rd] & 1) { 6615 *flags |= CPU_DTRACE_BADALIGN; 6616 *illval = regs[rd]; 6617 break; 6618 } 6619 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1]; 6620 break; 6621 6622 case DIF_OP_STW: 6623 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) { 6624 *flags |= CPU_DTRACE_BADADDR; 6625 *illval = regs[rd]; 6626 break; 6627 } 6628 if (regs[rd] & 3) { 6629 *flags |= CPU_DTRACE_BADALIGN; 6630 *illval = regs[rd]; 6631 break; 6632 } 6633 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1]; 6634 break; 6635 6636 case DIF_OP_STX: 6637 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) { 6638 *flags |= CPU_DTRACE_BADADDR; 6639 *illval = regs[rd]; 6640 break; 6641 } 6642 if (regs[rd] & 7) { 6643 *flags |= CPU_DTRACE_BADALIGN; 6644 *illval = regs[rd]; 6645 break; 6646 } 6647 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; 6648 break; 6649 } 6650 } 6651 6652 if (!(*flags & CPU_DTRACE_FAULT)) 6653 return (rval); 6654 6655 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t); 6656 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS; 6657 6658 return (0); 6659} 6660 6661static void 6662dtrace_action_breakpoint(dtrace_ecb_t *ecb) 6663{ 6664 dtrace_probe_t *probe = ecb->dte_probe; 6665 dtrace_provider_t *prov = probe->dtpr_provider; 6666 char c[DTRACE_FULLNAMELEN + 80], *str; 6667 char *msg = "dtrace: breakpoint action at probe "; 6668 char *ecbmsg = " (ecb "; 6669 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); 6670 uintptr_t val = (uintptr_t)ecb; 6671 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; 6672 6673 if (dtrace_destructive_disallow) 6674 return; 6675 6676 /* 6677 * It's impossible to be taking action on the NULL probe. 6678 */ 6679 ASSERT(probe != NULL); 6680 6681 /* 6682 * This is a poor man's (destitute man's?) sprintf(): we want to 6683 * print the provider name, module name, function name and name of 6684 * the probe, along with the hex address of the ECB with the breakpoint 6685 * action -- all of which we must place in the character buffer by 6686 * hand. 6687 */ 6688 while (*msg != '\0') 6689 c[i++] = *msg++; 6690 6691 for (str = prov->dtpv_name; *str != '\0'; str++) 6692 c[i++] = *str; 6693 c[i++] = ':'; 6694 6695 for (str = probe->dtpr_mod; *str != '\0'; str++) 6696 c[i++] = *str; 6697 c[i++] = ':'; 6698 6699 for (str = probe->dtpr_func; *str != '\0'; str++) 6700 c[i++] = *str; 6701 c[i++] = ':'; 6702 6703 for (str = probe->dtpr_name; *str != '\0'; str++) 6704 c[i++] = *str; 6705 6706 while (*ecbmsg != '\0') 6707 c[i++] = *ecbmsg++; 6708 6709 while (shift >= 0) { 6710 mask = (uintptr_t)0xf << shift; 6711 6712 if (val >= ((uintptr_t)1 << shift)) 6713 c[i++] = "0123456789abcdef"[(val & mask) >> shift]; 6714 shift -= 4; 6715 } 6716 6717 c[i++] = ')'; 6718 c[i] = '\0'; 6719 6720#if defined(sun) 6721 debug_enter(c); 6722#else 6723 kdb_enter(KDB_WHY_DTRACE, "breakpoint action"); 6724#endif 6725} 6726 6727static void 6728dtrace_action_panic(dtrace_ecb_t *ecb) 6729{ 6730 dtrace_probe_t *probe = ecb->dte_probe; 6731 6732 /* 6733 * It's impossible to be taking action on the NULL probe. 6734 */ 6735 ASSERT(probe != NULL); 6736 6737 if (dtrace_destructive_disallow) 6738 return; 6739 6740 if (dtrace_panicked != NULL) 6741 return; 6742 6743 if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) 6744 return; 6745 6746 /* 6747 * We won the right to panic. (We want to be sure that only one 6748 * thread calls panic() from dtrace_probe(), and that panic() is 6749 * called exactly once.) 6750 */ 6751 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", 6752 probe->dtpr_provider->dtpv_name, probe->dtpr_mod, 6753 probe->dtpr_func, probe->dtpr_name, (void *)ecb); 6754} 6755 6756static void 6757dtrace_action_raise(uint64_t sig) 6758{ 6759 if (dtrace_destructive_disallow) 6760 return; 6761 6762 if (sig >= NSIG) { 6763 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 6764 return; 6765 } 6766 6767#if defined(sun) 6768 /* 6769 * raise() has a queue depth of 1 -- we ignore all subsequent 6770 * invocations of the raise() action. 6771 */ 6772 if (curthread->t_dtrace_sig == 0) 6773 curthread->t_dtrace_sig = (uint8_t)sig; 6774 6775 curthread->t_sig_check = 1; 6776 aston(curthread); 6777#else 6778 struct proc *p = curproc; 6779 PROC_LOCK(p); 6780 kern_psignal(p, sig); 6781 PROC_UNLOCK(p); 6782#endif 6783} 6784 6785static void 6786dtrace_action_stop(void) 6787{ 6788 if (dtrace_destructive_disallow) 6789 return; 6790 6791#if defined(sun) 6792 if (!curthread->t_dtrace_stop) { 6793 curthread->t_dtrace_stop = 1; 6794 curthread->t_sig_check = 1; 6795 aston(curthread); 6796 } 6797#else 6798 struct proc *p = curproc; 6799 PROC_LOCK(p); 6800 kern_psignal(p, SIGSTOP); 6801 PROC_UNLOCK(p); 6802#endif 6803} 6804 6805static void 6806dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) 6807{ 6808 hrtime_t now; 6809 volatile uint16_t *flags; 6810#if defined(sun) 6811 cpu_t *cpu = CPU; 6812#else 6813 cpu_t *cpu = &solaris_cpu[curcpu]; 6814#endif 6815 6816 if (dtrace_destructive_disallow) 6817 return; 6818 6819 flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 6820 6821 now = dtrace_gethrtime(); 6822 6823 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) { 6824 /* 6825 * We need to advance the mark to the current time. 6826 */ 6827 cpu->cpu_dtrace_chillmark = now; 6828 cpu->cpu_dtrace_chilled = 0; 6829 } 6830 6831 /* 6832 * Now check to see if the requested chill time would take us over 6833 * the maximum amount of time allowed in the chill interval. (Or 6834 * worse, if the calculation itself induces overflow.) 6835 */ 6836 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max || 6837 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) { 6838 *flags |= CPU_DTRACE_ILLOP; 6839 return; 6840 } 6841 6842 while (dtrace_gethrtime() - now < val) 6843 continue; 6844 6845 /* 6846 * Normally, we assure that the value of the variable "timestamp" does 6847 * not change within an ECB. The presence of chill() represents an 6848 * exception to this rule, however. 6849 */ 6850 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP; 6851 cpu->cpu_dtrace_chilled += val; 6852} 6853 6854static void 6855dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, 6856 uint64_t *buf, uint64_t arg) 6857{ 6858 int nframes = DTRACE_USTACK_NFRAMES(arg); 6859 int strsize = DTRACE_USTACK_STRSIZE(arg); 6860 uint64_t *pcs = &buf[1], *fps; 6861 char *str = (char *)&pcs[nframes]; 6862 int size, offs = 0, i, j; 6863 uintptr_t old = mstate->dtms_scratch_ptr, saved; 6864 uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; 6865 char *sym; 6866 6867 /* 6868 * Should be taking a faster path if string space has not been 6869 * allocated. 6870 */ 6871 ASSERT(strsize != 0); 6872 6873 /* 6874 * We will first allocate some temporary space for the frame pointers. 6875 */ 6876 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8); 6877 size = (uintptr_t)fps - mstate->dtms_scratch_ptr + 6878 (nframes * sizeof (uint64_t)); 6879 6880 if (!DTRACE_INSCRATCH(mstate, size)) { 6881 /* 6882 * Not enough room for our frame pointers -- need to indicate 6883 * that we ran out of scratch space. 6884 */ 6885 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); 6886 return; 6887 } 6888 6889 mstate->dtms_scratch_ptr += size; 6890 saved = mstate->dtms_scratch_ptr; 6891 6892 /* 6893 * Now get a stack with both program counters and frame pointers. 6894 */ 6895 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6896 dtrace_getufpstack(buf, fps, nframes + 1); 6897 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6898 6899 /* 6900 * If that faulted, we're cooked. 6901 */ 6902 if (*flags & CPU_DTRACE_FAULT) 6903 goto out; 6904 6905 /* 6906 * Now we want to walk up the stack, calling the USTACK helper. For 6907 * each iteration, we restore the scratch pointer. 6908 */ 6909 for (i = 0; i < nframes; i++) { 6910 mstate->dtms_scratch_ptr = saved; 6911 6912 if (offs >= strsize) 6913 break; 6914 6915 sym = (char *)(uintptr_t)dtrace_helper( 6916 DTRACE_HELPER_ACTION_USTACK, 6917 mstate, state, pcs[i], fps[i]); 6918 6919 /* 6920 * If we faulted while running the helper, we're going to 6921 * clear the fault and null out the corresponding string. 6922 */ 6923 if (*flags & CPU_DTRACE_FAULT) { 6924 *flags &= ~CPU_DTRACE_FAULT; 6925 str[offs++] = '\0'; 6926 continue; 6927 } 6928 6929 if (sym == NULL) { 6930 str[offs++] = '\0'; 6931 continue; 6932 } 6933 6934 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6935 6936 /* 6937 * Now copy in the string that the helper returned to us. 6938 */ 6939 for (j = 0; offs + j < strsize; j++) { 6940 if ((str[offs + j] = sym[j]) == '\0') 6941 break; 6942 } 6943 6944 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6945 6946 offs += j + 1; 6947 } 6948 6949 if (offs >= strsize) { 6950 /* 6951 * If we didn't have room for all of the strings, we don't 6952 * abort processing -- this needn't be a fatal error -- but we 6953 * still want to increment a counter (dts_stkstroverflows) to 6954 * allow this condition to be warned about. (If this is from 6955 * a jstack() action, it is easily tuned via jstackstrsize.) 6956 */ 6957 dtrace_error(&state->dts_stkstroverflows); 6958 } 6959 6960 while (offs < strsize) 6961 str[offs++] = '\0'; 6962 6963out: 6964 mstate->dtms_scratch_ptr = old; 6965} 6966 6967static void 6968dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size, 6969 size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind) 6970{ 6971 volatile uint16_t *flags; 6972 uint64_t val = *valp; 6973 size_t valoffs = *valoffsp; 6974 6975 flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; 6976 ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF); 6977 6978 /* 6979 * If this is a string, we're going to only load until we find the zero 6980 * byte -- after which we'll store zero bytes. 6981 */ 6982 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { 6983 char c = '\0' + 1; 6984 size_t s; 6985 6986 for (s = 0; s < size; s++) { 6987 if (c != '\0' && dtkind == DIF_TF_BYREF) { 6988 c = dtrace_load8(val++); 6989 } else if (c != '\0' && dtkind == DIF_TF_BYUREF) { 6990 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 6991 c = dtrace_fuword8((void *)(uintptr_t)val++); 6992 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 6993 if (*flags & CPU_DTRACE_FAULT) 6994 break; 6995 } 6996 6997 DTRACE_STORE(uint8_t, tomax, valoffs++, c); 6998 6999 if (c == '\0' && intuple) 7000 break; 7001 } 7002 } else { 7003 uint8_t c; 7004 while (valoffs < end) { 7005 if (dtkind == DIF_TF_BYREF) { 7006 c = dtrace_load8(val++); 7007 } else if (dtkind == DIF_TF_BYUREF) { 7008 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 7009 c = dtrace_fuword8((void *)(uintptr_t)val++); 7010 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 7011 if (*flags & CPU_DTRACE_FAULT) 7012 break; 7013 } 7014 7015 DTRACE_STORE(uint8_t, tomax, 7016 valoffs++, c); 7017 } 7018 } 7019 7020 *valp = val; 7021 *valoffsp = valoffs; 7022} 7023 7024/* 7025 * If you're looking for the epicenter of DTrace, you just found it. This 7026 * is the function called by the provider to fire a probe -- from which all 7027 * subsequent probe-context DTrace activity emanates. 7028 */ 7029void 7030dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, 7031 uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) 7032{ 7033 processorid_t cpuid; 7034 dtrace_icookie_t cookie; 7035 dtrace_probe_t *probe; 7036 dtrace_mstate_t mstate; 7037 dtrace_ecb_t *ecb; 7038 dtrace_action_t *act; 7039 intptr_t offs; 7040 size_t size; 7041 int vtime, onintr; 7042 volatile uint16_t *flags; 7043 hrtime_t now; 7044 7045 if (panicstr != NULL) 7046 return; 7047 7048#if defined(sun) 7049 /* 7050 * Kick out immediately if this CPU is still being born (in which case 7051 * curthread will be set to -1) or the current thread can't allow 7052 * probes in its current context. 7053 */ 7054 if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE)) 7055 return; 7056#endif 7057 7058 cookie = dtrace_interrupt_disable(); 7059 probe = dtrace_probes[id - 1]; 7060 cpuid = curcpu; 7061 onintr = CPU_ON_INTR(CPU); 7062 7063 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && 7064 probe->dtpr_predcache == curthread->t_predcache) { 7065 /* 7066 * We have hit in the predicate cache; we know that 7067 * this predicate would evaluate to be false. 7068 */ 7069 dtrace_interrupt_enable(cookie); 7070 return; 7071 } 7072 7073#if defined(sun) 7074 if (panic_quiesce) { 7075#else 7076 if (panicstr != NULL) { 7077#endif 7078 /* 7079 * We don't trace anything if we're panicking. 7080 */ 7081 dtrace_interrupt_enable(cookie); 7082 return; 7083 } 7084 7085 now = dtrace_gethrtime(); 7086 vtime = dtrace_vtime_references != 0; 7087 7088 if (vtime && curthread->t_dtrace_start) 7089 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; 7090 7091 mstate.dtms_difo = NULL; 7092 mstate.dtms_probe = probe; 7093 mstate.dtms_strtok = 0; 7094 mstate.dtms_arg[0] = arg0; 7095 mstate.dtms_arg[1] = arg1; 7096 mstate.dtms_arg[2] = arg2; 7097 mstate.dtms_arg[3] = arg3; 7098 mstate.dtms_arg[4] = arg4; 7099 7100 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags; 7101 7102 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 7103 dtrace_predicate_t *pred = ecb->dte_predicate; 7104 dtrace_state_t *state = ecb->dte_state; 7105 dtrace_buffer_t *buf = &state->dts_buffer[cpuid]; 7106 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; 7107 dtrace_vstate_t *vstate = &state->dts_vstate; 7108 dtrace_provider_t *prov = probe->dtpr_provider; 7109 uint64_t tracememsize = 0; 7110 int committed = 0; 7111 caddr_t tomax; 7112 7113 /* 7114 * A little subtlety with the following (seemingly innocuous) 7115 * declaration of the automatic 'val': by looking at the 7116 * code, you might think that it could be declared in the 7117 * action processing loop, below. (That is, it's only used in 7118 * the action processing loop.) However, it must be declared 7119 * out of that scope because in the case of DIF expression 7120 * arguments to aggregating actions, one iteration of the 7121 * action loop will use the last iteration's value. 7122 */ 7123 uint64_t val = 0; 7124 7125 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; 7126 mstate.dtms_getf = NULL; 7127 7128 *flags &= ~CPU_DTRACE_ERROR; 7129 7130 if (prov == dtrace_provider) { 7131 /* 7132 * If dtrace itself is the provider of this probe, 7133 * we're only going to continue processing the ECB if 7134 * arg0 (the dtrace_state_t) is equal to the ECB's 7135 * creating state. (This prevents disjoint consumers 7136 * from seeing one another's metaprobes.) 7137 */ 7138 if (arg0 != (uint64_t)(uintptr_t)state) 7139 continue; 7140 } 7141 7142 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) { 7143 /* 7144 * We're not currently active. If our provider isn't 7145 * the dtrace pseudo provider, we're not interested. 7146 */ 7147 if (prov != dtrace_provider) 7148 continue; 7149 7150 /* 7151 * Now we must further check if we are in the BEGIN 7152 * probe. If we are, we will only continue processing 7153 * if we're still in WARMUP -- if one BEGIN enabling 7154 * has invoked the exit() action, we don't want to 7155 * evaluate subsequent BEGIN enablings. 7156 */ 7157 if (probe->dtpr_id == dtrace_probeid_begin && 7158 state->dts_activity != DTRACE_ACTIVITY_WARMUP) { 7159 ASSERT(state->dts_activity == 7160 DTRACE_ACTIVITY_DRAINING); 7161 continue; 7162 } 7163 } 7164 7165 if (ecb->dte_cond) { 7166 /* 7167 * If the dte_cond bits indicate that this 7168 * consumer is only allowed to see user-mode firings 7169 * of this probe, call the provider's dtps_usermode() 7170 * entry point to check that the probe was fired 7171 * while in a user context. Skip this ECB if that's 7172 * not the case. 7173 */ 7174 if ((ecb->dte_cond & DTRACE_COND_USERMODE) && 7175 prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, 7176 probe->dtpr_id, probe->dtpr_arg) == 0) 7177 continue; 7178 7179#if defined(sun) 7180 /* 7181 * This is more subtle than it looks. We have to be 7182 * absolutely certain that CRED() isn't going to 7183 * change out from under us so it's only legit to 7184 * examine that structure if we're in constrained 7185 * situations. Currently, the only times we'll this 7186 * check is if a non-super-user has enabled the 7187 * profile or syscall providers -- providers that 7188 * allow visibility of all processes. For the 7189 * profile case, the check above will ensure that 7190 * we're examining a user context. 7191 */ 7192 if (ecb->dte_cond & DTRACE_COND_OWNER) { 7193 cred_t *cr; 7194 cred_t *s_cr = 7195 ecb->dte_state->dts_cred.dcr_cred; 7196 proc_t *proc; 7197 7198 ASSERT(s_cr != NULL); 7199 7200 if ((cr = CRED()) == NULL || 7201 s_cr->cr_uid != cr->cr_uid || 7202 s_cr->cr_uid != cr->cr_ruid || 7203 s_cr->cr_uid != cr->cr_suid || 7204 s_cr->cr_gid != cr->cr_gid || 7205 s_cr->cr_gid != cr->cr_rgid || 7206 s_cr->cr_gid != cr->cr_sgid || 7207 (proc = ttoproc(curthread)) == NULL || 7208 (proc->p_flag & SNOCD)) 7209 continue; 7210 } 7211 7212 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { 7213 cred_t *cr; 7214 cred_t *s_cr = 7215 ecb->dte_state->dts_cred.dcr_cred; 7216 7217 ASSERT(s_cr != NULL); 7218 7219 if ((cr = CRED()) == NULL || 7220 s_cr->cr_zone->zone_id != 7221 cr->cr_zone->zone_id) 7222 continue; 7223 } 7224#endif 7225 } 7226 7227 if (now - state->dts_alive > dtrace_deadman_timeout) { 7228 /* 7229 * We seem to be dead. Unless we (a) have kernel 7230 * destructive permissions (b) have explicitly enabled 7231 * destructive actions and (c) destructive actions have 7232 * not been disabled, we're going to transition into 7233 * the KILLED state, from which no further processing 7234 * on this state will be performed. 7235 */ 7236 if (!dtrace_priv_kernel_destructive(state) || 7237 !state->dts_cred.dcr_destructive || 7238 dtrace_destructive_disallow) { 7239 void *activity = &state->dts_activity; 7240 dtrace_activity_t current; 7241 7242 do { 7243 current = state->dts_activity; 7244 } while (dtrace_cas32(activity, current, 7245 DTRACE_ACTIVITY_KILLED) != current); 7246 7247 continue; 7248 } 7249 } 7250 7251 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed, 7252 ecb->dte_alignment, state, &mstate)) < 0) 7253 continue; 7254 7255 tomax = buf->dtb_tomax; 7256 ASSERT(tomax != NULL); 7257 7258 if (ecb->dte_size != 0) { 7259 dtrace_rechdr_t dtrh; 7260 if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) { 7261 mstate.dtms_timestamp = dtrace_gethrtime(); 7262 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP; 7263 } 7264 ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t)); 7265 dtrh.dtrh_epid = ecb->dte_epid; 7266 DTRACE_RECORD_STORE_TIMESTAMP(&dtrh, 7267 mstate.dtms_timestamp); 7268 *((dtrace_rechdr_t *)(tomax + offs)) = dtrh; 7269 } 7270 7271 mstate.dtms_epid = ecb->dte_epid; 7272 mstate.dtms_present |= DTRACE_MSTATE_EPID; 7273 7274 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) 7275 mstate.dtms_access = DTRACE_ACCESS_KERNEL; 7276 else 7277 mstate.dtms_access = 0; 7278 7279 if (pred != NULL) { 7280 dtrace_difo_t *dp = pred->dtp_difo; 7281 int rval; 7282 7283 rval = dtrace_dif_emulate(dp, &mstate, vstate, state); 7284 7285 if (!(*flags & CPU_DTRACE_ERROR) && !rval) { 7286 dtrace_cacheid_t cid = probe->dtpr_predcache; 7287 7288 if (cid != DTRACE_CACHEIDNONE && !onintr) { 7289 /* 7290 * Update the predicate cache... 7291 */ 7292 ASSERT(cid == pred->dtp_cacheid); 7293 curthread->t_predcache = cid; 7294 } 7295 7296 continue; 7297 } 7298 } 7299 7300 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) && 7301 act != NULL; act = act->dta_next) { 7302 size_t valoffs; 7303 dtrace_difo_t *dp; 7304 dtrace_recdesc_t *rec = &act->dta_rec; 7305 7306 size = rec->dtrd_size; 7307 valoffs = offs + rec->dtrd_offset; 7308 7309 if (DTRACEACT_ISAGG(act->dta_kind)) { 7310 uint64_t v = 0xbad; 7311 dtrace_aggregation_t *agg; 7312 7313 agg = (dtrace_aggregation_t *)act; 7314 7315 if ((dp = act->dta_difo) != NULL) 7316 v = dtrace_dif_emulate(dp, 7317 &mstate, vstate, state); 7318 7319 if (*flags & CPU_DTRACE_ERROR) 7320 continue; 7321 7322 /* 7323 * Note that we always pass the expression 7324 * value from the previous iteration of the 7325 * action loop. This value will only be used 7326 * if there is an expression argument to the 7327 * aggregating action, denoted by the 7328 * dtag_hasarg field. 7329 */ 7330 dtrace_aggregate(agg, buf, 7331 offs, aggbuf, v, val); 7332 continue; 7333 } 7334 7335 switch (act->dta_kind) { 7336 case DTRACEACT_STOP: 7337 if (dtrace_priv_proc_destructive(state)) 7338 dtrace_action_stop(); 7339 continue; 7340 7341 case DTRACEACT_BREAKPOINT: 7342 if (dtrace_priv_kernel_destructive(state)) 7343 dtrace_action_breakpoint(ecb); 7344 continue; 7345 7346 case DTRACEACT_PANIC: 7347 if (dtrace_priv_kernel_destructive(state)) 7348 dtrace_action_panic(ecb); 7349 continue; 7350 7351 case DTRACEACT_STACK: 7352 if (!dtrace_priv_kernel(state)) 7353 continue; 7354 7355 dtrace_getpcstack((pc_t *)(tomax + valoffs), 7356 size / sizeof (pc_t), probe->dtpr_aframes, 7357 DTRACE_ANCHORED(probe) ? NULL : 7358 (uint32_t *)arg0); 7359 continue; 7360 7361 case DTRACEACT_JSTACK: 7362 case DTRACEACT_USTACK: 7363 if (!dtrace_priv_proc(state)) 7364 continue; 7365 7366 /* 7367 * See comment in DIF_VAR_PID. 7368 */ 7369 if (DTRACE_ANCHORED(mstate.dtms_probe) && 7370 CPU_ON_INTR(CPU)) { 7371 int depth = DTRACE_USTACK_NFRAMES( 7372 rec->dtrd_arg) + 1; 7373 7374 dtrace_bzero((void *)(tomax + valoffs), 7375 DTRACE_USTACK_STRSIZE(rec->dtrd_arg) 7376 + depth * sizeof (uint64_t)); 7377 7378 continue; 7379 } 7380 7381 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 && 7382 curproc->p_dtrace_helpers != NULL) { 7383 /* 7384 * This is the slow path -- we have 7385 * allocated string space, and we're 7386 * getting the stack of a process that 7387 * has helpers. Call into a separate 7388 * routine to perform this processing. 7389 */ 7390 dtrace_action_ustack(&mstate, state, 7391 (uint64_t *)(tomax + valoffs), 7392 rec->dtrd_arg); 7393 continue; 7394 } 7395 7396 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 7397 dtrace_getupcstack((uint64_t *) 7398 (tomax + valoffs), 7399 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1); 7400 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 7401 continue; 7402 7403 default: 7404 break; 7405 } 7406 7407 dp = act->dta_difo; 7408 ASSERT(dp != NULL); 7409 7410 val = dtrace_dif_emulate(dp, &mstate, vstate, state); 7411 7412 if (*flags & CPU_DTRACE_ERROR) 7413 continue; 7414 7415 switch (act->dta_kind) { 7416 case DTRACEACT_SPECULATE: { 7417 dtrace_rechdr_t *dtrh; 7418 7419 ASSERT(buf == &state->dts_buffer[cpuid]); 7420 buf = dtrace_speculation_buffer(state, 7421 cpuid, val); 7422 7423 if (buf == NULL) { 7424 *flags |= CPU_DTRACE_DROP; 7425 continue; 7426 } 7427 7428 offs = dtrace_buffer_reserve(buf, 7429 ecb->dte_needed, ecb->dte_alignment, 7430 state, NULL); 7431 7432 if (offs < 0) { 7433 *flags |= CPU_DTRACE_DROP; 7434 continue; 7435 } 7436 7437 tomax = buf->dtb_tomax; 7438 ASSERT(tomax != NULL); 7439 7440 if (ecb->dte_size == 0) 7441 continue; 7442 7443 ASSERT3U(ecb->dte_size, >=, 7444 sizeof (dtrace_rechdr_t)); 7445 dtrh = ((void *)(tomax + offs)); 7446 dtrh->dtrh_epid = ecb->dte_epid; 7447 /* 7448 * When the speculation is committed, all of 7449 * the records in the speculative buffer will 7450 * have their timestamps set to the commit 7451 * time. Until then, it is set to a sentinel 7452 * value, for debugability. 7453 */ 7454 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX); 7455 continue; 7456 } 7457 7458 case DTRACEACT_PRINTM: { 7459 /* The DIF returns a 'memref'. */ 7460 uintptr_t *memref = (uintptr_t *)(uintptr_t) val; 7461 7462 /* Get the size from the memref. */ 7463 size = memref[1]; 7464 7465 /* 7466 * Check if the size exceeds the allocated 7467 * buffer size. 7468 */ 7469 if (size + sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) { 7470 /* Flag a drop! */ 7471 *flags |= CPU_DTRACE_DROP; 7472 continue; 7473 } 7474 7475 /* Store the size in the buffer first. */ 7476 DTRACE_STORE(uintptr_t, tomax, 7477 valoffs, size); 7478 7479 /* 7480 * Offset the buffer address to the start 7481 * of the data. 7482 */ 7483 valoffs += sizeof(uintptr_t); 7484 7485 /* 7486 * Reset to the memory address rather than 7487 * the memref array, then let the BYREF 7488 * code below do the work to store the 7489 * memory data in the buffer. 7490 */ 7491 val = memref[0]; 7492 break; 7493 } 7494 7495 case DTRACEACT_PRINTT: { 7496 /* The DIF returns a 'typeref'. */ 7497 uintptr_t *typeref = (uintptr_t *)(uintptr_t) val; 7498 char c = '\0' + 1; 7499 size_t s; 7500 7501 /* 7502 * Get the type string length and round it 7503 * up so that the data that follows is 7504 * aligned for easy access. 7505 */ 7506 size_t typs = strlen((char *) typeref[2]) + 1; 7507 typs = roundup(typs, sizeof(uintptr_t)); 7508 7509 /* 7510 *Get the size from the typeref using the 7511 * number of elements and the type size. 7512 */ 7513 size = typeref[1] * typeref[3]; 7514 7515 /* 7516 * Check if the size exceeds the allocated 7517 * buffer size. 7518 */ 7519 if (size + typs + 2 * sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) { 7520 /* Flag a drop! */ 7521 *flags |= CPU_DTRACE_DROP; 7522 7523 } 7524 7525 /* Store the size in the buffer first. */ 7526 DTRACE_STORE(uintptr_t, tomax, 7527 valoffs, size); 7528 valoffs += sizeof(uintptr_t); 7529 7530 /* Store the type size in the buffer. */ 7531 DTRACE_STORE(uintptr_t, tomax, 7532 valoffs, typeref[3]); 7533 valoffs += sizeof(uintptr_t); 7534 7535 val = typeref[2]; 7536 7537 for (s = 0; s < typs; s++) { 7538 if (c != '\0') 7539 c = dtrace_load8(val++); 7540 7541 DTRACE_STORE(uint8_t, tomax, 7542 valoffs++, c); 7543 } 7544 7545 /* 7546 * Reset to the memory address rather than 7547 * the typeref array, then let the BYREF 7548 * code below do the work to store the 7549 * memory data in the buffer. 7550 */ 7551 val = typeref[0]; 7552 break; 7553 } 7554 7555 case DTRACEACT_CHILL: 7556 if (dtrace_priv_kernel_destructive(state)) 7557 dtrace_action_chill(&mstate, val); 7558 continue; 7559 7560 case DTRACEACT_RAISE: 7561 if (dtrace_priv_proc_destructive(state)) 7562 dtrace_action_raise(val); 7563 continue; 7564 7565 case DTRACEACT_COMMIT: 7566 ASSERT(!committed); 7567 7568 /* 7569 * We need to commit our buffer state. 7570 */ 7571 if (ecb->dte_size) 7572 buf->dtb_offset = offs + ecb->dte_size; 7573 buf = &state->dts_buffer[cpuid]; 7574 dtrace_speculation_commit(state, cpuid, val); 7575 committed = 1; 7576 continue; 7577 7578 case DTRACEACT_DISCARD: 7579 dtrace_speculation_discard(state, cpuid, val); 7580 continue; 7581 7582 case DTRACEACT_DIFEXPR: 7583 case DTRACEACT_LIBACT: 7584 case DTRACEACT_PRINTF: 7585 case DTRACEACT_PRINTA: 7586 case DTRACEACT_SYSTEM: 7587 case DTRACEACT_FREOPEN: 7588 case DTRACEACT_TRACEMEM: 7589 break; 7590 7591 case DTRACEACT_TRACEMEM_DYNSIZE: 7592 tracememsize = val; 7593 break; 7594 7595 case DTRACEACT_SYM: 7596 case DTRACEACT_MOD: 7597 if (!dtrace_priv_kernel(state)) 7598 continue; 7599 break; 7600 7601 case DTRACEACT_USYM: 7602 case DTRACEACT_UMOD: 7603 case DTRACEACT_UADDR: { 7604#if defined(sun) 7605 struct pid *pid = curthread->t_procp->p_pidp; 7606#endif 7607 7608 if (!dtrace_priv_proc(state)) 7609 continue; 7610 7611 DTRACE_STORE(uint64_t, tomax, 7612#if defined(sun) 7613 valoffs, (uint64_t)pid->pid_id); 7614#else 7615 valoffs, (uint64_t) curproc->p_pid); 7616#endif 7617 DTRACE_STORE(uint64_t, tomax, 7618 valoffs + sizeof (uint64_t), val); 7619 7620 continue; 7621 } 7622 7623 case DTRACEACT_EXIT: { 7624 /* 7625 * For the exit action, we are going to attempt 7626 * to atomically set our activity to be 7627 * draining. If this fails (either because 7628 * another CPU has beat us to the exit action, 7629 * or because our current activity is something 7630 * other than ACTIVE or WARMUP), we will 7631 * continue. This assures that the exit action 7632 * can be successfully recorded at most once 7633 * when we're in the ACTIVE state. If we're 7634 * encountering the exit() action while in 7635 * COOLDOWN, however, we want to honor the new 7636 * status code. (We know that we're the only 7637 * thread in COOLDOWN, so there is no race.) 7638 */ 7639 void *activity = &state->dts_activity; 7640 dtrace_activity_t current = state->dts_activity; 7641 7642 if (current == DTRACE_ACTIVITY_COOLDOWN) 7643 break; 7644 7645 if (current != DTRACE_ACTIVITY_WARMUP) 7646 current = DTRACE_ACTIVITY_ACTIVE; 7647 7648 if (dtrace_cas32(activity, current, 7649 DTRACE_ACTIVITY_DRAINING) != current) { 7650 *flags |= CPU_DTRACE_DROP; 7651 continue; 7652 } 7653 7654 break; 7655 } 7656 7657 default: 7658 ASSERT(0); 7659 } 7660 7661 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF || 7662 dp->dtdo_rtype.dtdt_flags & DIF_TF_BYUREF) { 7663 uintptr_t end = valoffs + size; 7664 7665 if (tracememsize != 0 && 7666 valoffs + tracememsize < end) { 7667 end = valoffs + tracememsize; 7668 tracememsize = 0; 7669 } 7670 7671 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF && 7672 !dtrace_vcanload((void *)(uintptr_t)val, 7673 &dp->dtdo_rtype, &mstate, vstate)) 7674 continue; 7675 7676 dtrace_store_by_ref(dp, tomax, size, &valoffs, 7677 &val, end, act->dta_intuple, 7678 dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ? 7679 DIF_TF_BYREF: DIF_TF_BYUREF); 7680 continue; 7681 } 7682 7683 switch (size) { 7684 case 0: 7685 break; 7686 7687 case sizeof (uint8_t): 7688 DTRACE_STORE(uint8_t, tomax, valoffs, val); 7689 break; 7690 case sizeof (uint16_t): 7691 DTRACE_STORE(uint16_t, tomax, valoffs, val); 7692 break; 7693 case sizeof (uint32_t): 7694 DTRACE_STORE(uint32_t, tomax, valoffs, val); 7695 break; 7696 case sizeof (uint64_t): 7697 DTRACE_STORE(uint64_t, tomax, valoffs, val); 7698 break; 7699 default: 7700 /* 7701 * Any other size should have been returned by 7702 * reference, not by value. 7703 */ 7704 ASSERT(0); 7705 break; 7706 } 7707 } 7708 7709 if (*flags & CPU_DTRACE_DROP) 7710 continue; 7711 7712 if (*flags & CPU_DTRACE_FAULT) { 7713 int ndx; 7714 dtrace_action_t *err; 7715 7716 buf->dtb_errors++; 7717 7718 if (probe->dtpr_id == dtrace_probeid_error) { 7719 /* 7720 * There's nothing we can do -- we had an 7721 * error on the error probe. We bump an 7722 * error counter to at least indicate that 7723 * this condition happened. 7724 */ 7725 dtrace_error(&state->dts_dblerrors); 7726 continue; 7727 } 7728 7729 if (vtime) { 7730 /* 7731 * Before recursing on dtrace_probe(), we 7732 * need to explicitly clear out our start 7733 * time to prevent it from being accumulated 7734 * into t_dtrace_vtime. 7735 */ 7736 curthread->t_dtrace_start = 0; 7737 } 7738 7739 /* 7740 * Iterate over the actions to figure out which action 7741 * we were processing when we experienced the error. 7742 * Note that act points _past_ the faulting action; if 7743 * act is ecb->dte_action, the fault was in the 7744 * predicate, if it's ecb->dte_action->dta_next it's 7745 * in action #1, and so on. 7746 */ 7747 for (err = ecb->dte_action, ndx = 0; 7748 err != act; err = err->dta_next, ndx++) 7749 continue; 7750 7751 dtrace_probe_error(state, ecb->dte_epid, ndx, 7752 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ? 7753 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags), 7754 cpu_core[cpuid].cpuc_dtrace_illval); 7755 7756 continue; 7757 } 7758 7759 if (!committed) 7760 buf->dtb_offset = offs + ecb->dte_size; 7761 } 7762 7763 if (vtime) 7764 curthread->t_dtrace_start = dtrace_gethrtime(); 7765 7766 dtrace_interrupt_enable(cookie); 7767} 7768 7769/* 7770 * DTrace Probe Hashing Functions 7771 * 7772 * The functions in this section (and indeed, the functions in remaining 7773 * sections) are not _called_ from probe context. (Any exceptions to this are 7774 * marked with a "Note:".) Rather, they are called from elsewhere in the 7775 * DTrace framework to look-up probes in, add probes to and remove probes from 7776 * the DTrace probe hashes. (Each probe is hashed by each element of the 7777 * probe tuple -- allowing for fast lookups, regardless of what was 7778 * specified.) 7779 */ 7780static uint_t 7781dtrace_hash_str(const char *p) 7782{ 7783 unsigned int g; 7784 uint_t hval = 0; 7785 7786 while (*p) { 7787 hval = (hval << 4) + *p++; 7788 if ((g = (hval & 0xf0000000)) != 0) 7789 hval ^= g >> 24; 7790 hval &= ~g; 7791 } 7792 return (hval); 7793} 7794 7795static dtrace_hash_t * 7796dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) 7797{ 7798 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); 7799 7800 hash->dth_stroffs = stroffs; 7801 hash->dth_nextoffs = nextoffs; 7802 hash->dth_prevoffs = prevoffs; 7803 7804 hash->dth_size = 1; 7805 hash->dth_mask = hash->dth_size - 1; 7806 7807 hash->dth_tab = kmem_zalloc(hash->dth_size * 7808 sizeof (dtrace_hashbucket_t *), KM_SLEEP); 7809 7810 return (hash); 7811} 7812 7813static void 7814dtrace_hash_destroy(dtrace_hash_t *hash) 7815{ 7816#ifdef DEBUG 7817 int i; 7818 7819 for (i = 0; i < hash->dth_size; i++) 7820 ASSERT(hash->dth_tab[i] == NULL); 7821#endif 7822 7823 kmem_free(hash->dth_tab, 7824 hash->dth_size * sizeof (dtrace_hashbucket_t *)); 7825 kmem_free(hash, sizeof (dtrace_hash_t)); 7826} 7827 7828static void 7829dtrace_hash_resize(dtrace_hash_t *hash) 7830{ 7831 int size = hash->dth_size, i, ndx; 7832 int new_size = hash->dth_size << 1; 7833 int new_mask = new_size - 1; 7834 dtrace_hashbucket_t **new_tab, *bucket, *next; 7835 7836 ASSERT((new_size & new_mask) == 0); 7837 7838 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP); 7839 7840 for (i = 0; i < size; i++) { 7841 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { 7842 dtrace_probe_t *probe = bucket->dthb_chain; 7843 7844 ASSERT(probe != NULL); 7845 ndx = DTRACE_HASHSTR(hash, probe) & new_mask; 7846 7847 next = bucket->dthb_next; 7848 bucket->dthb_next = new_tab[ndx]; 7849 new_tab[ndx] = bucket; 7850 } 7851 } 7852 7853 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *)); 7854 hash->dth_tab = new_tab; 7855 hash->dth_size = new_size; 7856 hash->dth_mask = new_mask; 7857} 7858 7859static void 7860dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) 7861{ 7862 int hashval = DTRACE_HASHSTR(hash, new); 7863 int ndx = hashval & hash->dth_mask; 7864 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 7865 dtrace_probe_t **nextp, **prevp; 7866 7867 for (; bucket != NULL; bucket = bucket->dthb_next) { 7868 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) 7869 goto add; 7870 } 7871 7872 if ((hash->dth_nbuckets >> 1) > hash->dth_size) { 7873 dtrace_hash_resize(hash); 7874 dtrace_hash_add(hash, new); 7875 return; 7876 } 7877 7878 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP); 7879 bucket->dthb_next = hash->dth_tab[ndx]; 7880 hash->dth_tab[ndx] = bucket; 7881 hash->dth_nbuckets++; 7882 7883add: 7884 nextp = DTRACE_HASHNEXT(hash, new); 7885 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL); 7886 *nextp = bucket->dthb_chain; 7887 7888 if (bucket->dthb_chain != NULL) { 7889 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain); 7890 ASSERT(*prevp == NULL); 7891 *prevp = new; 7892 } 7893 7894 bucket->dthb_chain = new; 7895 bucket->dthb_len++; 7896} 7897 7898static dtrace_probe_t * 7899dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) 7900{ 7901 int hashval = DTRACE_HASHSTR(hash, template); 7902 int ndx = hashval & hash->dth_mask; 7903 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 7904 7905 for (; bucket != NULL; bucket = bucket->dthb_next) { 7906 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 7907 return (bucket->dthb_chain); 7908 } 7909 7910 return (NULL); 7911} 7912 7913static int 7914dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) 7915{ 7916 int hashval = DTRACE_HASHSTR(hash, template); 7917 int ndx = hashval & hash->dth_mask; 7918 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 7919 7920 for (; bucket != NULL; bucket = bucket->dthb_next) { 7921 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) 7922 return (bucket->dthb_len); 7923 } 7924 7925 return (0); 7926} 7927 7928static void 7929dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) 7930{ 7931 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; 7932 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; 7933 7934 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); 7935 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); 7936 7937 /* 7938 * Find the bucket that we're removing this probe from. 7939 */ 7940 for (; bucket != NULL; bucket = bucket->dthb_next) { 7941 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) 7942 break; 7943 } 7944 7945 ASSERT(bucket != NULL); 7946 7947 if (*prevp == NULL) { 7948 if (*nextp == NULL) { 7949 /* 7950 * The removed probe was the only probe on this 7951 * bucket; we need to remove the bucket. 7952 */ 7953 dtrace_hashbucket_t *b = hash->dth_tab[ndx]; 7954 7955 ASSERT(bucket->dthb_chain == probe); 7956 ASSERT(b != NULL); 7957 7958 if (b == bucket) { 7959 hash->dth_tab[ndx] = bucket->dthb_next; 7960 } else { 7961 while (b->dthb_next != bucket) 7962 b = b->dthb_next; 7963 b->dthb_next = bucket->dthb_next; 7964 } 7965 7966 ASSERT(hash->dth_nbuckets > 0); 7967 hash->dth_nbuckets--; 7968 kmem_free(bucket, sizeof (dtrace_hashbucket_t)); 7969 return; 7970 } 7971 7972 bucket->dthb_chain = *nextp; 7973 } else { 7974 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp; 7975 } 7976 7977 if (*nextp != NULL) 7978 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp; 7979} 7980 7981/* 7982 * DTrace Utility Functions 7983 * 7984 * These are random utility functions that are _not_ called from probe context. 7985 */ 7986static int 7987dtrace_badattr(const dtrace_attribute_t *a) 7988{ 7989 return (a->dtat_name > DTRACE_STABILITY_MAX || 7990 a->dtat_data > DTRACE_STABILITY_MAX || 7991 a->dtat_class > DTRACE_CLASS_MAX); 7992} 7993 7994/* 7995 * Return a duplicate copy of a string. If the specified string is NULL, 7996 * this function returns a zero-length string. 7997 */ 7998static char * 7999dtrace_strdup(const char *str) 8000{ 8001 char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); 8002 8003 if (str != NULL) 8004 (void) strcpy(new, str); 8005 8006 return (new); 8007} 8008 8009#define DTRACE_ISALPHA(c) \ 8010 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) 8011 8012static int 8013dtrace_badname(const char *s) 8014{ 8015 char c; 8016 8017 if (s == NULL || (c = *s++) == '\0') 8018 return (0); 8019 8020 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.') 8021 return (1); 8022 8023 while ((c = *s++) != '\0') { 8024 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') && 8025 c != '-' && c != '_' && c != '.' && c != '`') 8026 return (1); 8027 } 8028 8029 return (0); 8030} 8031 8032static void 8033dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) 8034{ 8035 uint32_t priv; 8036 8037#if defined(sun) 8038 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 8039 /* 8040 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. 8041 */ 8042 priv = DTRACE_PRIV_ALL; 8043 } else { 8044 *uidp = crgetuid(cr); 8045 *zoneidp = crgetzoneid(cr); 8046 8047 priv = 0; 8048 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) 8049 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER; 8050 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) 8051 priv |= DTRACE_PRIV_USER; 8052 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) 8053 priv |= DTRACE_PRIV_PROC; 8054 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 8055 priv |= DTRACE_PRIV_OWNER; 8056 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 8057 priv |= DTRACE_PRIV_ZONEOWNER; 8058 } 8059#else 8060 priv = DTRACE_PRIV_ALL; 8061#endif 8062 8063 *privp = priv; 8064} 8065 8066#ifdef DTRACE_ERRDEBUG 8067static void 8068dtrace_errdebug(const char *str) 8069{ 8070 int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ; 8071 int occupied = 0; 8072 8073 mutex_enter(&dtrace_errlock); 8074 dtrace_errlast = str; 8075 dtrace_errthread = curthread; 8076 8077 while (occupied++ < DTRACE_ERRHASHSZ) { 8078 if (dtrace_errhash[hval].dter_msg == str) { 8079 dtrace_errhash[hval].dter_count++; 8080 goto out; 8081 } 8082 8083 if (dtrace_errhash[hval].dter_msg != NULL) { 8084 hval = (hval + 1) % DTRACE_ERRHASHSZ; 8085 continue; 8086 } 8087 8088 dtrace_errhash[hval].dter_msg = str; 8089 dtrace_errhash[hval].dter_count = 1; 8090 goto out; 8091 } 8092 8093 panic("dtrace: undersized error hash"); 8094out: 8095 mutex_exit(&dtrace_errlock); 8096} 8097#endif 8098 8099/* 8100 * DTrace Matching Functions 8101 * 8102 * These functions are used to match groups of probes, given some elements of 8103 * a probe tuple, or some globbed expressions for elements of a probe tuple. 8104 */ 8105static int 8106dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid, 8107 zoneid_t zoneid) 8108{ 8109 if (priv != DTRACE_PRIV_ALL) { 8110 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags; 8111 uint32_t match = priv & ppriv; 8112 8113 /* 8114 * No PRIV_DTRACE_* privileges... 8115 */ 8116 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER | 8117 DTRACE_PRIV_KERNEL)) == 0) 8118 return (0); 8119 8120 /* 8121 * No matching bits, but there were bits to match... 8122 */ 8123 if (match == 0 && ppriv != 0) 8124 return (0); 8125 8126 /* 8127 * Need to have permissions to the process, but don't... 8128 */ 8129 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 && 8130 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) { 8131 return (0); 8132 } 8133 8134 /* 8135 * Need to be in the same zone unless we possess the 8136 * privilege to examine all zones. 8137 */ 8138 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 && 8139 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) { 8140 return (0); 8141 } 8142 } 8143 8144 return (1); 8145} 8146 8147/* 8148 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which 8149 * consists of input pattern strings and an ops-vector to evaluate them. 8150 * This function returns >0 for match, 0 for no match, and <0 for error. 8151 */ 8152static int 8153dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp, 8154 uint32_t priv, uid_t uid, zoneid_t zoneid) 8155{ 8156 dtrace_provider_t *pvp = prp->dtpr_provider; 8157 int rv; 8158 8159 if (pvp->dtpv_defunct) 8160 return (0); 8161 8162 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0) 8163 return (rv); 8164 8165 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0) 8166 return (rv); 8167 8168 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0) 8169 return (rv); 8170 8171 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0) 8172 return (rv); 8173 8174 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0) 8175 return (0); 8176 8177 return (rv); 8178} 8179 8180/* 8181 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN) 8182 * interface for matching a glob pattern 'p' to an input string 's'. Unlike 8183 * libc's version, the kernel version only applies to 8-bit ASCII strings. 8184 * In addition, all of the recursion cases except for '*' matching have been 8185 * unwound. For '*', we still implement recursive evaluation, but a depth 8186 * counter is maintained and matching is aborted if we recurse too deep. 8187 * The function returns 0 if no match, >0 if match, and <0 if recursion error. 8188 */ 8189static int 8190dtrace_match_glob(const char *s, const char *p, int depth) 8191{ 8192 const char *olds; 8193 char s1, c; 8194 int gs; 8195 8196 if (depth > DTRACE_PROBEKEY_MAXDEPTH) 8197 return (-1); 8198 8199 if (s == NULL) 8200 s = ""; /* treat NULL as empty string */ 8201 8202top: 8203 olds = s; 8204 s1 = *s++; 8205 8206 if (p == NULL) 8207 return (0); 8208 8209 if ((c = *p++) == '\0') 8210 return (s1 == '\0'); 8211 8212 switch (c) { 8213 case '[': { 8214 int ok = 0, notflag = 0; 8215 char lc = '\0'; 8216 8217 if (s1 == '\0') 8218 return (0); 8219 8220 if (*p == '!') { 8221 notflag = 1; 8222 p++; 8223 } 8224 8225 if ((c = *p++) == '\0') 8226 return (0); 8227 8228 do { 8229 if (c == '-' && lc != '\0' && *p != ']') { 8230 if ((c = *p++) == '\0') 8231 return (0); 8232 if (c == '\\' && (c = *p++) == '\0') 8233 return (0); 8234 8235 if (notflag) { 8236 if (s1 < lc || s1 > c) 8237 ok++; 8238 else 8239 return (0); 8240 } else if (lc <= s1 && s1 <= c) 8241 ok++; 8242 8243 } else if (c == '\\' && (c = *p++) == '\0') 8244 return (0); 8245 8246 lc = c; /* save left-hand 'c' for next iteration */ 8247 8248 if (notflag) { 8249 if (s1 != c) 8250 ok++; 8251 else 8252 return (0); 8253 } else if (s1 == c) 8254 ok++; 8255 8256 if ((c = *p++) == '\0') 8257 return (0); 8258 8259 } while (c != ']'); 8260 8261 if (ok) 8262 goto top; 8263 8264 return (0); 8265 } 8266 8267 case '\\': 8268 if ((c = *p++) == '\0') 8269 return (0); 8270 /*FALLTHRU*/ 8271 8272 default: 8273 if (c != s1) 8274 return (0); 8275 /*FALLTHRU*/ 8276 8277 case '?': 8278 if (s1 != '\0') 8279 goto top; 8280 return (0); 8281 8282 case '*': 8283 while (*p == '*') 8284 p++; /* consecutive *'s are identical to a single one */ 8285 8286 if (*p == '\0') 8287 return (1); 8288 8289 for (s = olds; *s != '\0'; s++) { 8290 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0) 8291 return (gs); 8292 } 8293 8294 return (0); 8295 } 8296} 8297 8298/*ARGSUSED*/ 8299static int 8300dtrace_match_string(const char *s, const char *p, int depth) 8301{ 8302 return (s != NULL && strcmp(s, p) == 0); 8303} 8304 8305/*ARGSUSED*/ 8306static int 8307dtrace_match_nul(const char *s, const char *p, int depth) 8308{ 8309 return (1); /* always match the empty pattern */ 8310} 8311 8312/*ARGSUSED*/ 8313static int 8314dtrace_match_nonzero(const char *s, const char *p, int depth) 8315{ 8316 return (s != NULL && s[0] != '\0'); 8317} 8318 8319static int 8320dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, 8321 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) 8322{ 8323 dtrace_probe_t template, *probe; 8324 dtrace_hash_t *hash = NULL; 8325 int len, best = INT_MAX, nmatched = 0; 8326 dtrace_id_t i; 8327 8328 ASSERT(MUTEX_HELD(&dtrace_lock)); 8329 8330 /* 8331 * If the probe ID is specified in the key, just lookup by ID and 8332 * invoke the match callback once if a matching probe is found. 8333 */ 8334 if (pkp->dtpk_id != DTRACE_IDNONE) { 8335 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && 8336 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { 8337 (void) (*matched)(probe, arg); 8338 nmatched++; 8339 } 8340 return (nmatched); 8341 } 8342 8343 template.dtpr_mod = (char *)pkp->dtpk_mod; 8344 template.dtpr_func = (char *)pkp->dtpk_func; 8345 template.dtpr_name = (char *)pkp->dtpk_name; 8346 8347 /* 8348 * We want to find the most distinct of the module name, function 8349 * name, and name. So for each one that is not a glob pattern or 8350 * empty string, we perform a lookup in the corresponding hash and 8351 * use the hash table with the fewest collisions to do our search. 8352 */ 8353 if (pkp->dtpk_mmatch == &dtrace_match_string && 8354 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { 8355 best = len; 8356 hash = dtrace_bymod; 8357 } 8358 8359 if (pkp->dtpk_fmatch == &dtrace_match_string && 8360 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) { 8361 best = len; 8362 hash = dtrace_byfunc; 8363 } 8364 8365 if (pkp->dtpk_nmatch == &dtrace_match_string && 8366 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) { 8367 best = len; 8368 hash = dtrace_byname; 8369 } 8370 8371 /* 8372 * If we did not select a hash table, iterate over every probe and 8373 * invoke our callback for each one that matches our input probe key. 8374 */ 8375 if (hash == NULL) { 8376 for (i = 0; i < dtrace_nprobes; i++) { 8377 if ((probe = dtrace_probes[i]) == NULL || 8378 dtrace_match_probe(probe, pkp, priv, uid, 8379 zoneid) <= 0) 8380 continue; 8381 8382 nmatched++; 8383 8384 if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) 8385 break; 8386 } 8387 8388 return (nmatched); 8389 } 8390 8391 /* 8392 * If we selected a hash table, iterate over each probe of the same key 8393 * name and invoke the callback for every probe that matches the other 8394 * attributes of our input probe key. 8395 */ 8396 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL; 8397 probe = *(DTRACE_HASHNEXT(hash, probe))) { 8398 8399 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) 8400 continue; 8401 8402 nmatched++; 8403 8404 if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) 8405 break; 8406 } 8407 8408 return (nmatched); 8409} 8410 8411/* 8412 * Return the function pointer dtrace_probecmp() should use to compare the 8413 * specified pattern with a string. For NULL or empty patterns, we select 8414 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob(). 8415 * For non-empty non-glob strings, we use dtrace_match_string(). 8416 */ 8417static dtrace_probekey_f * 8418dtrace_probekey_func(const char *p) 8419{ 8420 char c; 8421 8422 if (p == NULL || *p == '\0') 8423 return (&dtrace_match_nul); 8424 8425 while ((c = *p++) != '\0') { 8426 if (c == '[' || c == '?' || c == '*' || c == '\\') 8427 return (&dtrace_match_glob); 8428 } 8429 8430 return (&dtrace_match_string); 8431} 8432 8433/* 8434 * Build a probe comparison key for use with dtrace_match_probe() from the 8435 * given probe description. By convention, a null key only matches anchored 8436 * probes: if each field is the empty string, reset dtpk_fmatch to 8437 * dtrace_match_nonzero(). 8438 */ 8439static void 8440dtrace_probekey(dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) 8441{ 8442 pkp->dtpk_prov = pdp->dtpd_provider; 8443 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); 8444 8445 pkp->dtpk_mod = pdp->dtpd_mod; 8446 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); 8447 8448 pkp->dtpk_func = pdp->dtpd_func; 8449 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); 8450 8451 pkp->dtpk_name = pdp->dtpd_name; 8452 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); 8453 8454 pkp->dtpk_id = pdp->dtpd_id; 8455 8456 if (pkp->dtpk_id == DTRACE_IDNONE && 8457 pkp->dtpk_pmatch == &dtrace_match_nul && 8458 pkp->dtpk_mmatch == &dtrace_match_nul && 8459 pkp->dtpk_fmatch == &dtrace_match_nul && 8460 pkp->dtpk_nmatch == &dtrace_match_nul) 8461 pkp->dtpk_fmatch = &dtrace_match_nonzero; 8462} 8463 8464/* 8465 * DTrace Provider-to-Framework API Functions 8466 * 8467 * These functions implement much of the Provider-to-Framework API, as 8468 * described in <sys/dtrace.h>. The parts of the API not in this section are 8469 * the functions in the API for probe management (found below), and 8470 * dtrace_probe() itself (found above). 8471 */ 8472 8473/* 8474 * Register the calling provider with the DTrace framework. This should 8475 * generally be called by DTrace providers in their attach(9E) entry point. 8476 */ 8477int 8478dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, 8479 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp) 8480{ 8481 dtrace_provider_t *provider; 8482 8483 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) { 8484 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8485 "arguments", name ? name : "<NULL>"); 8486 return (EINVAL); 8487 } 8488 8489 if (name[0] == '\0' || dtrace_badname(name)) { 8490 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8491 "provider name", name); 8492 return (EINVAL); 8493 } 8494 8495 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) || 8496 pops->dtps_enable == NULL || pops->dtps_disable == NULL || 8497 pops->dtps_destroy == NULL || 8498 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) { 8499 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8500 "provider ops", name); 8501 return (EINVAL); 8502 } 8503 8504 if (dtrace_badattr(&pap->dtpa_provider) || 8505 dtrace_badattr(&pap->dtpa_mod) || 8506 dtrace_badattr(&pap->dtpa_func) || 8507 dtrace_badattr(&pap->dtpa_name) || 8508 dtrace_badattr(&pap->dtpa_args)) { 8509 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8510 "provider attributes", name); 8511 return (EINVAL); 8512 } 8513 8514 if (priv & ~DTRACE_PRIV_ALL) { 8515 cmn_err(CE_WARN, "failed to register provider '%s': invalid " 8516 "privilege attributes", name); 8517 return (EINVAL); 8518 } 8519 8520 if ((priv & DTRACE_PRIV_KERNEL) && 8521 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && 8522 pops->dtps_usermode == NULL) { 8523 cmn_err(CE_WARN, "failed to register provider '%s': need " 8524 "dtps_usermode() op for given privilege attributes", name); 8525 return (EINVAL); 8526 } 8527 8528 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); 8529 provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 8530 (void) strcpy(provider->dtpv_name, name); 8531 8532 provider->dtpv_attr = *pap; 8533 provider->dtpv_priv.dtpp_flags = priv; 8534 if (cr != NULL) { 8535 provider->dtpv_priv.dtpp_uid = crgetuid(cr); 8536 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr); 8537 } 8538 provider->dtpv_pops = *pops; 8539 8540 if (pops->dtps_provide == NULL) { 8541 ASSERT(pops->dtps_provide_module != NULL); 8542 provider->dtpv_pops.dtps_provide = 8543 (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop; 8544 } 8545 8546 if (pops->dtps_provide_module == NULL) { 8547 ASSERT(pops->dtps_provide != NULL); 8548 provider->dtpv_pops.dtps_provide_module = 8549 (void (*)(void *, modctl_t *))dtrace_nullop; 8550 } 8551 8552 if (pops->dtps_suspend == NULL) { 8553 ASSERT(pops->dtps_resume == NULL); 8554 provider->dtpv_pops.dtps_suspend = 8555 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 8556 provider->dtpv_pops.dtps_resume = 8557 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; 8558 } 8559 8560 provider->dtpv_arg = arg; 8561 *idp = (dtrace_provider_id_t)provider; 8562 8563 if (pops == &dtrace_provider_ops) { 8564 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 8565 ASSERT(MUTEX_HELD(&dtrace_lock)); 8566 ASSERT(dtrace_anon.dta_enabling == NULL); 8567 8568 /* 8569 * We make sure that the DTrace provider is at the head of 8570 * the provider chain. 8571 */ 8572 provider->dtpv_next = dtrace_provider; 8573 dtrace_provider = provider; 8574 return (0); 8575 } 8576 8577 mutex_enter(&dtrace_provider_lock); 8578 mutex_enter(&dtrace_lock); 8579 8580 /* 8581 * If there is at least one provider registered, we'll add this 8582 * provider after the first provider. 8583 */ 8584 if (dtrace_provider != NULL) { 8585 provider->dtpv_next = dtrace_provider->dtpv_next; 8586 dtrace_provider->dtpv_next = provider; 8587 } else { 8588 dtrace_provider = provider; 8589 } 8590 8591 if (dtrace_retained != NULL) { 8592 dtrace_enabling_provide(provider); 8593 8594 /* 8595 * Now we need to call dtrace_enabling_matchall() -- which 8596 * will acquire cpu_lock and dtrace_lock. We therefore need 8597 * to drop all of our locks before calling into it... 8598 */ 8599 mutex_exit(&dtrace_lock); 8600 mutex_exit(&dtrace_provider_lock); 8601 dtrace_enabling_matchall(); 8602 8603 return (0); 8604 } 8605 8606 mutex_exit(&dtrace_lock); 8607 mutex_exit(&dtrace_provider_lock); 8608 8609 return (0); 8610} 8611 8612/* 8613 * Unregister the specified provider from the DTrace framework. This should 8614 * generally be called by DTrace providers in their detach(9E) entry point. 8615 */ 8616int 8617dtrace_unregister(dtrace_provider_id_t id) 8618{ 8619 dtrace_provider_t *old = (dtrace_provider_t *)id; 8620 dtrace_provider_t *prev = NULL; 8621 int i, self = 0, noreap = 0; 8622 dtrace_probe_t *probe, *first = NULL; 8623 8624 if (old->dtpv_pops.dtps_enable == 8625 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { 8626 /* 8627 * If DTrace itself is the provider, we're called with locks 8628 * already held. 8629 */ 8630 ASSERT(old == dtrace_provider); 8631#if defined(sun) 8632 ASSERT(dtrace_devi != NULL); 8633#endif 8634 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 8635 ASSERT(MUTEX_HELD(&dtrace_lock)); 8636 self = 1; 8637 8638 if (dtrace_provider->dtpv_next != NULL) { 8639 /* 8640 * There's another provider here; return failure. 8641 */ 8642 return (EBUSY); 8643 } 8644 } else { 8645 mutex_enter(&dtrace_provider_lock); 8646#if defined(sun) 8647 mutex_enter(&mod_lock); 8648#endif 8649 mutex_enter(&dtrace_lock); 8650 } 8651 8652 /* 8653 * If anyone has /dev/dtrace open, or if there are anonymous enabled 8654 * probes, we refuse to let providers slither away, unless this 8655 * provider has already been explicitly invalidated. 8656 */ 8657 if (!old->dtpv_defunct && 8658 (dtrace_opens || (dtrace_anon.dta_state != NULL && 8659 dtrace_anon.dta_state->dts_necbs > 0))) { 8660 if (!self) { 8661 mutex_exit(&dtrace_lock); 8662#if defined(sun) 8663 mutex_exit(&mod_lock); 8664#endif 8665 mutex_exit(&dtrace_provider_lock); 8666 } 8667 return (EBUSY); 8668 } 8669 8670 /* 8671 * Attempt to destroy the probes associated with this provider. 8672 */ 8673 for (i = 0; i < dtrace_nprobes; i++) { 8674 if ((probe = dtrace_probes[i]) == NULL) 8675 continue; 8676 8677 if (probe->dtpr_provider != old) 8678 continue; 8679 8680 if (probe->dtpr_ecb == NULL) 8681 continue; 8682 8683 /* 8684 * If we are trying to unregister a defunct provider, and the 8685 * provider was made defunct within the interval dictated by 8686 * dtrace_unregister_defunct_reap, we'll (asynchronously) 8687 * attempt to reap our enablings. To denote that the provider 8688 * should reattempt to unregister itself at some point in the 8689 * future, we will return a differentiable error code (EAGAIN 8690 * instead of EBUSY) in this case. 8691 */ 8692 if (dtrace_gethrtime() - old->dtpv_defunct > 8693 dtrace_unregister_defunct_reap) 8694 noreap = 1; 8695 8696 if (!self) { 8697 mutex_exit(&dtrace_lock); 8698#if defined(sun) 8699 mutex_exit(&mod_lock); 8700#endif 8701 mutex_exit(&dtrace_provider_lock); 8702 } 8703 8704 if (noreap) 8705 return (EBUSY); 8706 8707 (void) taskq_dispatch(dtrace_taskq, 8708 (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); 8709 8710 return (EAGAIN); 8711 } 8712 8713 /* 8714 * All of the probes for this provider are disabled; we can safely 8715 * remove all of them from their hash chains and from the probe array. 8716 */ 8717 for (i = 0; i < dtrace_nprobes; i++) { 8718 if ((probe = dtrace_probes[i]) == NULL) 8719 continue; 8720 8721 if (probe->dtpr_provider != old) 8722 continue; 8723 8724 dtrace_probes[i] = NULL; 8725 8726 dtrace_hash_remove(dtrace_bymod, probe); 8727 dtrace_hash_remove(dtrace_byfunc, probe); 8728 dtrace_hash_remove(dtrace_byname, probe); 8729 8730 if (first == NULL) { 8731 first = probe; 8732 probe->dtpr_nextmod = NULL; 8733 } else { 8734 probe->dtpr_nextmod = first; 8735 first = probe; 8736 } 8737 } 8738 8739 /* 8740 * The provider's probes have been removed from the hash chains and 8741 * from the probe array. Now issue a dtrace_sync() to be sure that 8742 * everyone has cleared out from any probe array processing. 8743 */ 8744 dtrace_sync(); 8745 8746 for (probe = first; probe != NULL; probe = first) { 8747 first = probe->dtpr_nextmod; 8748 8749 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, 8750 probe->dtpr_arg); 8751 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 8752 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 8753 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 8754#if defined(sun) 8755 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); 8756#else 8757 free_unr(dtrace_arena, probe->dtpr_id); 8758#endif 8759 kmem_free(probe, sizeof (dtrace_probe_t)); 8760 } 8761 8762 if ((prev = dtrace_provider) == old) { 8763#if defined(sun) 8764 ASSERT(self || dtrace_devi == NULL); 8765 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL); 8766#endif 8767 dtrace_provider = old->dtpv_next; 8768 } else { 8769 while (prev != NULL && prev->dtpv_next != old) 8770 prev = prev->dtpv_next; 8771 8772 if (prev == NULL) { 8773 panic("attempt to unregister non-existent " 8774 "dtrace provider %p\n", (void *)id); 8775 } 8776 8777 prev->dtpv_next = old->dtpv_next; 8778 } 8779 8780 if (!self) { 8781 mutex_exit(&dtrace_lock); 8782#if defined(sun) 8783 mutex_exit(&mod_lock); 8784#endif 8785 mutex_exit(&dtrace_provider_lock); 8786 } 8787 8788 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); 8789 kmem_free(old, sizeof (dtrace_provider_t)); 8790 8791 return (0); 8792} 8793 8794/* 8795 * Invalidate the specified provider. All subsequent probe lookups for the 8796 * specified provider will fail, but its probes will not be removed. 8797 */ 8798void 8799dtrace_invalidate(dtrace_provider_id_t id) 8800{ 8801 dtrace_provider_t *pvp = (dtrace_provider_t *)id; 8802 8803 ASSERT(pvp->dtpv_pops.dtps_enable != 8804 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); 8805 8806 mutex_enter(&dtrace_provider_lock); 8807 mutex_enter(&dtrace_lock); 8808 8809 pvp->dtpv_defunct = dtrace_gethrtime(); 8810 8811 mutex_exit(&dtrace_lock); 8812 mutex_exit(&dtrace_provider_lock); 8813} 8814 8815/* 8816 * Indicate whether or not DTrace has attached. 8817 */ 8818int 8819dtrace_attached(void) 8820{ 8821 /* 8822 * dtrace_provider will be non-NULL iff the DTrace driver has 8823 * attached. (It's non-NULL because DTrace is always itself a 8824 * provider.) 8825 */ 8826 return (dtrace_provider != NULL); 8827} 8828 8829/* 8830 * Remove all the unenabled probes for the given provider. This function is 8831 * not unlike dtrace_unregister(), except that it doesn't remove the provider 8832 * -- just as many of its associated probes as it can. 8833 */ 8834int 8835dtrace_condense(dtrace_provider_id_t id) 8836{ 8837 dtrace_provider_t *prov = (dtrace_provider_t *)id; 8838 int i; 8839 dtrace_probe_t *probe; 8840 8841 /* 8842 * Make sure this isn't the dtrace provider itself. 8843 */ 8844 ASSERT(prov->dtpv_pops.dtps_enable != 8845 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); 8846 8847 mutex_enter(&dtrace_provider_lock); 8848 mutex_enter(&dtrace_lock); 8849 8850 /* 8851 * Attempt to destroy the probes associated with this provider. 8852 */ 8853 for (i = 0; i < dtrace_nprobes; i++) { 8854 if ((probe = dtrace_probes[i]) == NULL) 8855 continue; 8856 8857 if (probe->dtpr_provider != prov) 8858 continue; 8859 8860 if (probe->dtpr_ecb != NULL) 8861 continue; 8862 8863 dtrace_probes[i] = NULL; 8864 8865 dtrace_hash_remove(dtrace_bymod, probe); 8866 dtrace_hash_remove(dtrace_byfunc, probe); 8867 dtrace_hash_remove(dtrace_byname, probe); 8868 8869 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, 8870 probe->dtpr_arg); 8871 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 8872 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 8873 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 8874 kmem_free(probe, sizeof (dtrace_probe_t)); 8875#if defined(sun) 8876 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); 8877#else 8878 free_unr(dtrace_arena, i + 1); 8879#endif 8880 } 8881 8882 mutex_exit(&dtrace_lock); 8883 mutex_exit(&dtrace_provider_lock); 8884 8885 return (0); 8886} 8887 8888/* 8889 * DTrace Probe Management Functions 8890 * 8891 * The functions in this section perform the DTrace probe management, 8892 * including functions to create probes, look-up probes, and call into the 8893 * providers to request that probes be provided. Some of these functions are 8894 * in the Provider-to-Framework API; these functions can be identified by the 8895 * fact that they are not declared "static". 8896 */ 8897 8898/* 8899 * Create a probe with the specified module name, function name, and name. 8900 */ 8901dtrace_id_t 8902dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, 8903 const char *func, const char *name, int aframes, void *arg) 8904{ 8905 dtrace_probe_t *probe, **probes; 8906 dtrace_provider_t *provider = (dtrace_provider_t *)prov; 8907 dtrace_id_t id; 8908 8909 if (provider == dtrace_provider) { 8910 ASSERT(MUTEX_HELD(&dtrace_lock)); 8911 } else { 8912 mutex_enter(&dtrace_lock); 8913 } 8914 8915#if defined(sun) 8916 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, 8917 VM_BESTFIT | VM_SLEEP); 8918#else 8919 id = alloc_unr(dtrace_arena); 8920#endif 8921 probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); 8922 8923 probe->dtpr_id = id; 8924 probe->dtpr_gen = dtrace_probegen++; 8925 probe->dtpr_mod = dtrace_strdup(mod); 8926 probe->dtpr_func = dtrace_strdup(func); 8927 probe->dtpr_name = dtrace_strdup(name); 8928 probe->dtpr_arg = arg; 8929 probe->dtpr_aframes = aframes; 8930 probe->dtpr_provider = provider; 8931 8932 dtrace_hash_add(dtrace_bymod, probe); 8933 dtrace_hash_add(dtrace_byfunc, probe); 8934 dtrace_hash_add(dtrace_byname, probe); 8935 8936 if (id - 1 >= dtrace_nprobes) { 8937 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); 8938 size_t nsize = osize << 1; 8939 8940 if (nsize == 0) { 8941 ASSERT(osize == 0); 8942 ASSERT(dtrace_probes == NULL); 8943 nsize = sizeof (dtrace_probe_t *); 8944 } 8945 8946 probes = kmem_zalloc(nsize, KM_SLEEP); 8947 8948 if (dtrace_probes == NULL) { 8949 ASSERT(osize == 0); 8950 dtrace_probes = probes; 8951 dtrace_nprobes = 1; 8952 } else { 8953 dtrace_probe_t **oprobes = dtrace_probes; 8954 8955 bcopy(oprobes, probes, osize); 8956 dtrace_membar_producer(); 8957 dtrace_probes = probes; 8958 8959 dtrace_sync(); 8960 8961 /* 8962 * All CPUs are now seeing the new probes array; we can 8963 * safely free the old array. 8964 */ 8965 kmem_free(oprobes, osize); 8966 dtrace_nprobes <<= 1; 8967 } 8968 8969 ASSERT(id - 1 < dtrace_nprobes); 8970 } 8971 8972 ASSERT(dtrace_probes[id - 1] == NULL); 8973 dtrace_probes[id - 1] = probe; 8974 8975 if (provider != dtrace_provider) 8976 mutex_exit(&dtrace_lock); 8977 8978 return (id); 8979} 8980 8981static dtrace_probe_t * 8982dtrace_probe_lookup_id(dtrace_id_t id) 8983{ 8984 ASSERT(MUTEX_HELD(&dtrace_lock)); 8985 8986 if (id == 0 || id > dtrace_nprobes) 8987 return (NULL); 8988 8989 return (dtrace_probes[id - 1]); 8990} 8991 8992static int 8993dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) 8994{ 8995 *((dtrace_id_t *)arg) = probe->dtpr_id; 8996 8997 return (DTRACE_MATCH_DONE); 8998} 8999 9000/* 9001 * Look up a probe based on provider and one or more of module name, function 9002 * name and probe name. 9003 */ 9004dtrace_id_t 9005dtrace_probe_lookup(dtrace_provider_id_t prid, char *mod, 9006 char *func, char *name) 9007{ 9008 dtrace_probekey_t pkey; 9009 dtrace_id_t id; 9010 int match; 9011 9012 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; 9013 pkey.dtpk_pmatch = &dtrace_match_string; 9014 pkey.dtpk_mod = mod; 9015 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; 9016 pkey.dtpk_func = func; 9017 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; 9018 pkey.dtpk_name = name; 9019 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; 9020 pkey.dtpk_id = DTRACE_IDNONE; 9021 9022 mutex_enter(&dtrace_lock); 9023 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, 9024 dtrace_probe_lookup_match, &id); 9025 mutex_exit(&dtrace_lock); 9026 9027 ASSERT(match == 1 || match == 0); 9028 return (match ? id : 0); 9029} 9030 9031/* 9032 * Returns the probe argument associated with the specified probe. 9033 */ 9034void * 9035dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid) 9036{ 9037 dtrace_probe_t *probe; 9038 void *rval = NULL; 9039 9040 mutex_enter(&dtrace_lock); 9041 9042 if ((probe = dtrace_probe_lookup_id(pid)) != NULL && 9043 probe->dtpr_provider == (dtrace_provider_t *)id) 9044 rval = probe->dtpr_arg; 9045 9046 mutex_exit(&dtrace_lock); 9047 9048 return (rval); 9049} 9050 9051/* 9052 * Copy a probe into a probe description. 9053 */ 9054static void 9055dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) 9056{ 9057 bzero(pdp, sizeof (dtrace_probedesc_t)); 9058 pdp->dtpd_id = prp->dtpr_id; 9059 9060 (void) strncpy(pdp->dtpd_provider, 9061 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1); 9062 9063 (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1); 9064 (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1); 9065 (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1); 9066} 9067 9068/* 9069 * Called to indicate that a probe -- or probes -- should be provided by a 9070 * specfied provider. If the specified description is NULL, the provider will 9071 * be told to provide all of its probes. (This is done whenever a new 9072 * consumer comes along, or whenever a retained enabling is to be matched.) If 9073 * the specified description is non-NULL, the provider is given the 9074 * opportunity to dynamically provide the specified probe, allowing providers 9075 * to support the creation of probes on-the-fly. (So-called _autocreated_ 9076 * probes.) If the provider is NULL, the operations will be applied to all 9077 * providers; if the provider is non-NULL the operations will only be applied 9078 * to the specified provider. The dtrace_provider_lock must be held, and the 9079 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation 9080 * will need to grab the dtrace_lock when it reenters the framework through 9081 * dtrace_probe_lookup(), dtrace_probe_create(), etc. 9082 */ 9083static void 9084dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) 9085{ 9086#if defined(sun) 9087 modctl_t *ctl; 9088#endif 9089 int all = 0; 9090 9091 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 9092 9093 if (prv == NULL) { 9094 all = 1; 9095 prv = dtrace_provider; 9096 } 9097 9098 do { 9099 /* 9100 * First, call the blanket provide operation. 9101 */ 9102 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); 9103 9104#if defined(sun) 9105 /* 9106 * Now call the per-module provide operation. We will grab 9107 * mod_lock to prevent the list from being modified. Note 9108 * that this also prevents the mod_busy bits from changing. 9109 * (mod_busy can only be changed with mod_lock held.) 9110 */ 9111 mutex_enter(&mod_lock); 9112 9113 ctl = &modules; 9114 do { 9115 if (ctl->mod_busy || ctl->mod_mp == NULL) 9116 continue; 9117 9118 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 9119 9120 } while ((ctl = ctl->mod_next) != &modules); 9121 9122 mutex_exit(&mod_lock); 9123#endif 9124 } while (all && (prv = prv->dtpv_next) != NULL); 9125} 9126 9127#if defined(sun) 9128/* 9129 * Iterate over each probe, and call the Framework-to-Provider API function 9130 * denoted by offs. 9131 */ 9132static void 9133dtrace_probe_foreach(uintptr_t offs) 9134{ 9135 dtrace_provider_t *prov; 9136 void (*func)(void *, dtrace_id_t, void *); 9137 dtrace_probe_t *probe; 9138 dtrace_icookie_t cookie; 9139 int i; 9140 9141 /* 9142 * We disable interrupts to walk through the probe array. This is 9143 * safe -- the dtrace_sync() in dtrace_unregister() assures that we 9144 * won't see stale data. 9145 */ 9146 cookie = dtrace_interrupt_disable(); 9147 9148 for (i = 0; i < dtrace_nprobes; i++) { 9149 if ((probe = dtrace_probes[i]) == NULL) 9150 continue; 9151 9152 if (probe->dtpr_ecb == NULL) { 9153 /* 9154 * This probe isn't enabled -- don't call the function. 9155 */ 9156 continue; 9157 } 9158 9159 prov = probe->dtpr_provider; 9160 func = *((void(**)(void *, dtrace_id_t, void *)) 9161 ((uintptr_t)&prov->dtpv_pops + offs)); 9162 9163 func(prov->dtpv_arg, i + 1, probe->dtpr_arg); 9164 } 9165 9166 dtrace_interrupt_enable(cookie); 9167} 9168#endif 9169 9170static int 9171dtrace_probe_enable(dtrace_probedesc_t *desc, dtrace_enabling_t *enab) 9172{ 9173 dtrace_probekey_t pkey; 9174 uint32_t priv; 9175 uid_t uid; 9176 zoneid_t zoneid; 9177 9178 ASSERT(MUTEX_HELD(&dtrace_lock)); 9179 dtrace_ecb_create_cache = NULL; 9180 9181 if (desc == NULL) { 9182 /* 9183 * If we're passed a NULL description, we're being asked to 9184 * create an ECB with a NULL probe. 9185 */ 9186 (void) dtrace_ecb_create_enable(NULL, enab); 9187 return (0); 9188 } 9189 9190 dtrace_probekey(desc, &pkey); 9191 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, 9192 &priv, &uid, &zoneid); 9193 9194 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, 9195 enab)); 9196} 9197 9198/* 9199 * DTrace Helper Provider Functions 9200 */ 9201static void 9202dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr) 9203{ 9204 attr->dtat_name = DOF_ATTR_NAME(dofattr); 9205 attr->dtat_data = DOF_ATTR_DATA(dofattr); 9206 attr->dtat_class = DOF_ATTR_CLASS(dofattr); 9207} 9208 9209static void 9210dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, 9211 const dof_provider_t *dofprov, char *strtab) 9212{ 9213 hprov->dthpv_provname = strtab + dofprov->dofpv_name; 9214 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider, 9215 dofprov->dofpv_provattr); 9216 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod, 9217 dofprov->dofpv_modattr); 9218 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func, 9219 dofprov->dofpv_funcattr); 9220 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name, 9221 dofprov->dofpv_nameattr); 9222 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args, 9223 dofprov->dofpv_argsattr); 9224} 9225 9226static void 9227dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 9228{ 9229 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 9230 dof_hdr_t *dof = (dof_hdr_t *)daddr; 9231 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 9232 dof_provider_t *provider; 9233 dof_probe_t *probe; 9234 uint32_t *off, *enoff; 9235 uint8_t *arg; 9236 char *strtab; 9237 uint_t i, nprobes; 9238 dtrace_helper_provdesc_t dhpv; 9239 dtrace_helper_probedesc_t dhpb; 9240 dtrace_meta_t *meta = dtrace_meta_pid; 9241 dtrace_mops_t *mops = &meta->dtm_mops; 9242 void *parg; 9243 9244 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 9245 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9246 provider->dofpv_strtab * dof->dofh_secsize); 9247 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9248 provider->dofpv_probes * dof->dofh_secsize); 9249 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9250 provider->dofpv_prargs * dof->dofh_secsize); 9251 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9252 provider->dofpv_proffs * dof->dofh_secsize); 9253 9254 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 9255 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset); 9256 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 9257 enoff = NULL; 9258 9259 /* 9260 * See dtrace_helper_provider_validate(). 9261 */ 9262 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 9263 provider->dofpv_prenoffs != DOF_SECT_NONE) { 9264 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9265 provider->dofpv_prenoffs * dof->dofh_secsize); 9266 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset); 9267 } 9268 9269 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 9270 9271 /* 9272 * Create the provider. 9273 */ 9274 dtrace_dofprov2hprov(&dhpv, provider, strtab); 9275 9276 if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) 9277 return; 9278 9279 meta->dtm_count++; 9280 9281 /* 9282 * Create the probes. 9283 */ 9284 for (i = 0; i < nprobes; i++) { 9285 probe = (dof_probe_t *)(uintptr_t)(daddr + 9286 prb_sec->dofs_offset + i * prb_sec->dofs_entsize); 9287 9288 dhpb.dthpb_mod = dhp->dofhp_mod; 9289 dhpb.dthpb_func = strtab + probe->dofpr_func; 9290 dhpb.dthpb_name = strtab + probe->dofpr_name; 9291 dhpb.dthpb_base = probe->dofpr_addr; 9292 dhpb.dthpb_offs = off + probe->dofpr_offidx; 9293 dhpb.dthpb_noffs = probe->dofpr_noffs; 9294 if (enoff != NULL) { 9295 dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; 9296 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; 9297 } else { 9298 dhpb.dthpb_enoffs = NULL; 9299 dhpb.dthpb_nenoffs = 0; 9300 } 9301 dhpb.dthpb_args = arg + probe->dofpr_argidx; 9302 dhpb.dthpb_nargc = probe->dofpr_nargc; 9303 dhpb.dthpb_xargc = probe->dofpr_xargc; 9304 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv; 9305 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv; 9306 9307 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); 9308 } 9309} 9310 9311static void 9312dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) 9313{ 9314 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 9315 dof_hdr_t *dof = (dof_hdr_t *)daddr; 9316 int i; 9317 9318 ASSERT(MUTEX_HELD(&dtrace_meta_lock)); 9319 9320 for (i = 0; i < dof->dofh_secnum; i++) { 9321 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 9322 dof->dofh_secoff + i * dof->dofh_secsize); 9323 9324 if (sec->dofs_type != DOF_SECT_PROVIDER) 9325 continue; 9326 9327 dtrace_helper_provide_one(dhp, sec, pid); 9328 } 9329 9330 /* 9331 * We may have just created probes, so we must now rematch against 9332 * any retained enablings. Note that this call will acquire both 9333 * cpu_lock and dtrace_lock; the fact that we are holding 9334 * dtrace_meta_lock now is what defines the ordering with respect to 9335 * these three locks. 9336 */ 9337 dtrace_enabling_matchall(); 9338} 9339 9340static void 9341dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) 9342{ 9343 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 9344 dof_hdr_t *dof = (dof_hdr_t *)daddr; 9345 dof_sec_t *str_sec; 9346 dof_provider_t *provider; 9347 char *strtab; 9348 dtrace_helper_provdesc_t dhpv; 9349 dtrace_meta_t *meta = dtrace_meta_pid; 9350 dtrace_mops_t *mops = &meta->dtm_mops; 9351 9352 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 9353 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + 9354 provider->dofpv_strtab * dof->dofh_secsize); 9355 9356 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 9357 9358 /* 9359 * Create the provider. 9360 */ 9361 dtrace_dofprov2hprov(&dhpv, provider, strtab); 9362 9363 mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); 9364 9365 meta->dtm_count--; 9366} 9367 9368static void 9369dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) 9370{ 9371 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; 9372 dof_hdr_t *dof = (dof_hdr_t *)daddr; 9373 int i; 9374 9375 ASSERT(MUTEX_HELD(&dtrace_meta_lock)); 9376 9377 for (i = 0; i < dof->dofh_secnum; i++) { 9378 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 9379 dof->dofh_secoff + i * dof->dofh_secsize); 9380 9381 if (sec->dofs_type != DOF_SECT_PROVIDER) 9382 continue; 9383 9384 dtrace_helper_provider_remove_one(dhp, sec, pid); 9385 } 9386} 9387 9388/* 9389 * DTrace Meta Provider-to-Framework API Functions 9390 * 9391 * These functions implement the Meta Provider-to-Framework API, as described 9392 * in <sys/dtrace.h>. 9393 */ 9394int 9395dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, 9396 dtrace_meta_provider_id_t *idp) 9397{ 9398 dtrace_meta_t *meta; 9399 dtrace_helpers_t *help, *next; 9400 int i; 9401 9402 *idp = DTRACE_METAPROVNONE; 9403 9404 /* 9405 * We strictly don't need the name, but we hold onto it for 9406 * debuggability. All hail error queues! 9407 */ 9408 if (name == NULL) { 9409 cmn_err(CE_WARN, "failed to register meta-provider: " 9410 "invalid name"); 9411 return (EINVAL); 9412 } 9413 9414 if (mops == NULL || 9415 mops->dtms_create_probe == NULL || 9416 mops->dtms_provide_pid == NULL || 9417 mops->dtms_remove_pid == NULL) { 9418 cmn_err(CE_WARN, "failed to register meta-register %s: " 9419 "invalid ops", name); 9420 return (EINVAL); 9421 } 9422 9423 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); 9424 meta->dtm_mops = *mops; 9425 meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); 9426 (void) strcpy(meta->dtm_name, name); 9427 meta->dtm_arg = arg; 9428 9429 mutex_enter(&dtrace_meta_lock); 9430 mutex_enter(&dtrace_lock); 9431 9432 if (dtrace_meta_pid != NULL) { 9433 mutex_exit(&dtrace_lock); 9434 mutex_exit(&dtrace_meta_lock); 9435 cmn_err(CE_WARN, "failed to register meta-register %s: " 9436 "user-land meta-provider exists", name); 9437 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); 9438 kmem_free(meta, sizeof (dtrace_meta_t)); 9439 return (EINVAL); 9440 } 9441 9442 dtrace_meta_pid = meta; 9443 *idp = (dtrace_meta_provider_id_t)meta; 9444 9445 /* 9446 * If there are providers and probes ready to go, pass them 9447 * off to the new meta provider now. 9448 */ 9449 9450 help = dtrace_deferred_pid; 9451 dtrace_deferred_pid = NULL; 9452 9453 mutex_exit(&dtrace_lock); 9454 9455 while (help != NULL) { 9456 for (i = 0; i < help->dthps_nprovs; i++) { 9457 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 9458 help->dthps_pid); 9459 } 9460 9461 next = help->dthps_next; 9462 help->dthps_next = NULL; 9463 help->dthps_prev = NULL; 9464 help->dthps_deferred = 0; 9465 help = next; 9466 } 9467 9468 mutex_exit(&dtrace_meta_lock); 9469 9470 return (0); 9471} 9472 9473int 9474dtrace_meta_unregister(dtrace_meta_provider_id_t id) 9475{ 9476 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id; 9477 9478 mutex_enter(&dtrace_meta_lock); 9479 mutex_enter(&dtrace_lock); 9480 9481 if (old == dtrace_meta_pid) { 9482 pp = &dtrace_meta_pid; 9483 } else { 9484 panic("attempt to unregister non-existent " 9485 "dtrace meta-provider %p\n", (void *)old); 9486 } 9487 9488 if (old->dtm_count != 0) { 9489 mutex_exit(&dtrace_lock); 9490 mutex_exit(&dtrace_meta_lock); 9491 return (EBUSY); 9492 } 9493 9494 *pp = NULL; 9495 9496 mutex_exit(&dtrace_lock); 9497 mutex_exit(&dtrace_meta_lock); 9498 9499 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); 9500 kmem_free(old, sizeof (dtrace_meta_t)); 9501 9502 return (0); 9503} 9504 9505 9506/* 9507 * DTrace DIF Object Functions 9508 */ 9509static int 9510dtrace_difo_err(uint_t pc, const char *format, ...) 9511{ 9512 if (dtrace_err_verbose) { 9513 va_list alist; 9514 9515 (void) uprintf("dtrace DIF object error: [%u]: ", pc); 9516 va_start(alist, format); 9517 (void) vuprintf(format, alist); 9518 va_end(alist); 9519 } 9520 9521#ifdef DTRACE_ERRDEBUG 9522 dtrace_errdebug(format); 9523#endif 9524 return (1); 9525} 9526 9527/* 9528 * Validate a DTrace DIF object by checking the IR instructions. The following 9529 * rules are currently enforced by dtrace_difo_validate(): 9530 * 9531 * 1. Each instruction must have a valid opcode 9532 * 2. Each register, string, variable, or subroutine reference must be valid 9533 * 3. No instruction can modify register %r0 (must be zero) 9534 * 4. All instruction reserved bits must be set to zero 9535 * 5. The last instruction must be a "ret" instruction 9536 * 6. All branch targets must reference a valid instruction _after_ the branch 9537 */ 9538static int 9539dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, 9540 cred_t *cr) 9541{ 9542 int err = 0, i; 9543 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 9544 int kcheckload; 9545 uint_t pc; 9546 9547 kcheckload = cr == NULL || 9548 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; 9549 9550 dp->dtdo_destructive = 0; 9551 9552 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { 9553 dif_instr_t instr = dp->dtdo_buf[pc]; 9554 9555 uint_t r1 = DIF_INSTR_R1(instr); 9556 uint_t r2 = DIF_INSTR_R2(instr); 9557 uint_t rd = DIF_INSTR_RD(instr); 9558 uint_t rs = DIF_INSTR_RS(instr); 9559 uint_t label = DIF_INSTR_LABEL(instr); 9560 uint_t v = DIF_INSTR_VAR(instr); 9561 uint_t subr = DIF_INSTR_SUBR(instr); 9562 uint_t type = DIF_INSTR_TYPE(instr); 9563 uint_t op = DIF_INSTR_OP(instr); 9564 9565 switch (op) { 9566 case DIF_OP_OR: 9567 case DIF_OP_XOR: 9568 case DIF_OP_AND: 9569 case DIF_OP_SLL: 9570 case DIF_OP_SRL: 9571 case DIF_OP_SRA: 9572 case DIF_OP_SUB: 9573 case DIF_OP_ADD: 9574 case DIF_OP_MUL: 9575 case DIF_OP_SDIV: 9576 case DIF_OP_UDIV: 9577 case DIF_OP_SREM: 9578 case DIF_OP_UREM: 9579 case DIF_OP_COPYS: 9580 if (r1 >= nregs) 9581 err += efunc(pc, "invalid register %u\n", r1); 9582 if (r2 >= nregs) 9583 err += efunc(pc, "invalid register %u\n", r2); 9584 if (rd >= nregs) 9585 err += efunc(pc, "invalid register %u\n", rd); 9586 if (rd == 0) 9587 err += efunc(pc, "cannot write to %r0\n"); 9588 break; 9589 case DIF_OP_NOT: 9590 case DIF_OP_MOV: 9591 case DIF_OP_ALLOCS: 9592 if (r1 >= nregs) 9593 err += efunc(pc, "invalid register %u\n", r1); 9594 if (r2 != 0) 9595 err += efunc(pc, "non-zero reserved bits\n"); 9596 if (rd >= nregs) 9597 err += efunc(pc, "invalid register %u\n", rd); 9598 if (rd == 0) 9599 err += efunc(pc, "cannot write to %r0\n"); 9600 break; 9601 case DIF_OP_LDSB: 9602 case DIF_OP_LDSH: 9603 case DIF_OP_LDSW: 9604 case DIF_OP_LDUB: 9605 case DIF_OP_LDUH: 9606 case DIF_OP_LDUW: 9607 case DIF_OP_LDX: 9608 if (r1 >= nregs) 9609 err += efunc(pc, "invalid register %u\n", r1); 9610 if (r2 != 0) 9611 err += efunc(pc, "non-zero reserved bits\n"); 9612 if (rd >= nregs) 9613 err += efunc(pc, "invalid register %u\n", rd); 9614 if (rd == 0) 9615 err += efunc(pc, "cannot write to %r0\n"); 9616 if (kcheckload) 9617 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + 9618 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); 9619 break; 9620 case DIF_OP_RLDSB: 9621 case DIF_OP_RLDSH: 9622 case DIF_OP_RLDSW: 9623 case DIF_OP_RLDUB: 9624 case DIF_OP_RLDUH: 9625 case DIF_OP_RLDUW: 9626 case DIF_OP_RLDX: 9627 if (r1 >= nregs) 9628 err += efunc(pc, "invalid register %u\n", r1); 9629 if (r2 != 0) 9630 err += efunc(pc, "non-zero reserved bits\n"); 9631 if (rd >= nregs) 9632 err += efunc(pc, "invalid register %u\n", rd); 9633 if (rd == 0) 9634 err += efunc(pc, "cannot write to %r0\n"); 9635 break; 9636 case DIF_OP_ULDSB: 9637 case DIF_OP_ULDSH: 9638 case DIF_OP_ULDSW: 9639 case DIF_OP_ULDUB: 9640 case DIF_OP_ULDUH: 9641 case DIF_OP_ULDUW: 9642 case DIF_OP_ULDX: 9643 if (r1 >= nregs) 9644 err += efunc(pc, "invalid register %u\n", r1); 9645 if (r2 != 0) 9646 err += efunc(pc, "non-zero reserved bits\n"); 9647 if (rd >= nregs) 9648 err += efunc(pc, "invalid register %u\n", rd); 9649 if (rd == 0) 9650 err += efunc(pc, "cannot write to %r0\n"); 9651 break; 9652 case DIF_OP_STB: 9653 case DIF_OP_STH: 9654 case DIF_OP_STW: 9655 case DIF_OP_STX: 9656 if (r1 >= nregs) 9657 err += efunc(pc, "invalid register %u\n", r1); 9658 if (r2 != 0) 9659 err += efunc(pc, "non-zero reserved bits\n"); 9660 if (rd >= nregs) 9661 err += efunc(pc, "invalid register %u\n", rd); 9662 if (rd == 0) 9663 err += efunc(pc, "cannot write to 0 address\n"); 9664 break; 9665 case DIF_OP_CMP: 9666 case DIF_OP_SCMP: 9667 if (r1 >= nregs) 9668 err += efunc(pc, "invalid register %u\n", r1); 9669 if (r2 >= nregs) 9670 err += efunc(pc, "invalid register %u\n", r2); 9671 if (rd != 0) 9672 err += efunc(pc, "non-zero reserved bits\n"); 9673 break; 9674 case DIF_OP_TST: 9675 if (r1 >= nregs) 9676 err += efunc(pc, "invalid register %u\n", r1); 9677 if (r2 != 0 || rd != 0) 9678 err += efunc(pc, "non-zero reserved bits\n"); 9679 break; 9680 case DIF_OP_BA: 9681 case DIF_OP_BE: 9682 case DIF_OP_BNE: 9683 case DIF_OP_BG: 9684 case DIF_OP_BGU: 9685 case DIF_OP_BGE: 9686 case DIF_OP_BGEU: 9687 case DIF_OP_BL: 9688 case DIF_OP_BLU: 9689 case DIF_OP_BLE: 9690 case DIF_OP_BLEU: 9691 if (label >= dp->dtdo_len) { 9692 err += efunc(pc, "invalid branch target %u\n", 9693 label); 9694 } 9695 if (label <= pc) { 9696 err += efunc(pc, "backward branch to %u\n", 9697 label); 9698 } 9699 break; 9700 case DIF_OP_RET: 9701 if (r1 != 0 || r2 != 0) 9702 err += efunc(pc, "non-zero reserved bits\n"); 9703 if (rd >= nregs) 9704 err += efunc(pc, "invalid register %u\n", rd); 9705 break; 9706 case DIF_OP_NOP: 9707 case DIF_OP_POPTS: 9708 case DIF_OP_FLUSHTS: 9709 if (r1 != 0 || r2 != 0 || rd != 0) 9710 err += efunc(pc, "non-zero reserved bits\n"); 9711 break; 9712 case DIF_OP_SETX: 9713 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) { 9714 err += efunc(pc, "invalid integer ref %u\n", 9715 DIF_INSTR_INTEGER(instr)); 9716 } 9717 if (rd >= nregs) 9718 err += efunc(pc, "invalid register %u\n", rd); 9719 if (rd == 0) 9720 err += efunc(pc, "cannot write to %r0\n"); 9721 break; 9722 case DIF_OP_SETS: 9723 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) { 9724 err += efunc(pc, "invalid string ref %u\n", 9725 DIF_INSTR_STRING(instr)); 9726 } 9727 if (rd >= nregs) 9728 err += efunc(pc, "invalid register %u\n", rd); 9729 if (rd == 0) 9730 err += efunc(pc, "cannot write to %r0\n"); 9731 break; 9732 case DIF_OP_LDGA: 9733 case DIF_OP_LDTA: 9734 if (r1 > DIF_VAR_ARRAY_MAX) 9735 err += efunc(pc, "invalid array %u\n", r1); 9736 if (r2 >= nregs) 9737 err += efunc(pc, "invalid register %u\n", r2); 9738 if (rd >= nregs) 9739 err += efunc(pc, "invalid register %u\n", rd); 9740 if (rd == 0) 9741 err += efunc(pc, "cannot write to %r0\n"); 9742 break; 9743 case DIF_OP_LDGS: 9744 case DIF_OP_LDTS: 9745 case DIF_OP_LDLS: 9746 case DIF_OP_LDGAA: 9747 case DIF_OP_LDTAA: 9748 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX) 9749 err += efunc(pc, "invalid variable %u\n", v); 9750 if (rd >= nregs) 9751 err += efunc(pc, "invalid register %u\n", rd); 9752 if (rd == 0) 9753 err += efunc(pc, "cannot write to %r0\n"); 9754 break; 9755 case DIF_OP_STGS: 9756 case DIF_OP_STTS: 9757 case DIF_OP_STLS: 9758 case DIF_OP_STGAA: 9759 case DIF_OP_STTAA: 9760 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX) 9761 err += efunc(pc, "invalid variable %u\n", v); 9762 if (rs >= nregs) 9763 err += efunc(pc, "invalid register %u\n", rd); 9764 break; 9765 case DIF_OP_CALL: 9766 if (subr > DIF_SUBR_MAX) 9767 err += efunc(pc, "invalid subr %u\n", subr); 9768 if (rd >= nregs) 9769 err += efunc(pc, "invalid register %u\n", rd); 9770 if (rd == 0) 9771 err += efunc(pc, "cannot write to %r0\n"); 9772 9773 if (subr == DIF_SUBR_COPYOUT || 9774 subr == DIF_SUBR_COPYOUTSTR) { 9775 dp->dtdo_destructive = 1; 9776 } 9777 9778 if (subr == DIF_SUBR_GETF) { 9779 /* 9780 * If we have a getf() we need to record that 9781 * in our state. Note that our state can be 9782 * NULL if this is a helper -- but in that 9783 * case, the call to getf() is itself illegal, 9784 * and will be caught (slightly later) when 9785 * the helper is validated. 9786 */ 9787 if (vstate->dtvs_state != NULL) 9788 vstate->dtvs_state->dts_getf++; 9789 } 9790 9791 break; 9792 case DIF_OP_PUSHTR: 9793 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF) 9794 err += efunc(pc, "invalid ref type %u\n", type); 9795 if (r2 >= nregs) 9796 err += efunc(pc, "invalid register %u\n", r2); 9797 if (rs >= nregs) 9798 err += efunc(pc, "invalid register %u\n", rs); 9799 break; 9800 case DIF_OP_PUSHTV: 9801 if (type != DIF_TYPE_CTF) 9802 err += efunc(pc, "invalid val type %u\n", type); 9803 if (r2 >= nregs) 9804 err += efunc(pc, "invalid register %u\n", r2); 9805 if (rs >= nregs) 9806 err += efunc(pc, "invalid register %u\n", rs); 9807 break; 9808 default: 9809 err += efunc(pc, "invalid opcode %u\n", 9810 DIF_INSTR_OP(instr)); 9811 } 9812 } 9813 9814 if (dp->dtdo_len != 0 && 9815 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) { 9816 err += efunc(dp->dtdo_len - 1, 9817 "expected 'ret' as last DIF instruction\n"); 9818 } 9819 9820 if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) { 9821 /* 9822 * If we're not returning by reference, the size must be either 9823 * 0 or the size of one of the base types. 9824 */ 9825 switch (dp->dtdo_rtype.dtdt_size) { 9826 case 0: 9827 case sizeof (uint8_t): 9828 case sizeof (uint16_t): 9829 case sizeof (uint32_t): 9830 case sizeof (uint64_t): 9831 break; 9832 9833 default: 9834 err += efunc(dp->dtdo_len - 1, "bad return size\n"); 9835 } 9836 } 9837 9838 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { 9839 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; 9840 dtrace_diftype_t *vt, *et; 9841 uint_t id, ndx; 9842 9843 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && 9844 v->dtdv_scope != DIFV_SCOPE_THREAD && 9845 v->dtdv_scope != DIFV_SCOPE_LOCAL) { 9846 err += efunc(i, "unrecognized variable scope %d\n", 9847 v->dtdv_scope); 9848 break; 9849 } 9850 9851 if (v->dtdv_kind != DIFV_KIND_ARRAY && 9852 v->dtdv_kind != DIFV_KIND_SCALAR) { 9853 err += efunc(i, "unrecognized variable type %d\n", 9854 v->dtdv_kind); 9855 break; 9856 } 9857 9858 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) { 9859 err += efunc(i, "%d exceeds variable id limit\n", id); 9860 break; 9861 } 9862 9863 if (id < DIF_VAR_OTHER_UBASE) 9864 continue; 9865 9866 /* 9867 * For user-defined variables, we need to check that this 9868 * definition is identical to any previous definition that we 9869 * encountered. 9870 */ 9871 ndx = id - DIF_VAR_OTHER_UBASE; 9872 9873 switch (v->dtdv_scope) { 9874 case DIFV_SCOPE_GLOBAL: 9875 if (ndx < vstate->dtvs_nglobals) { 9876 dtrace_statvar_t *svar; 9877 9878 if ((svar = vstate->dtvs_globals[ndx]) != NULL) 9879 existing = &svar->dtsv_var; 9880 } 9881 9882 break; 9883 9884 case DIFV_SCOPE_THREAD: 9885 if (ndx < vstate->dtvs_ntlocals) 9886 existing = &vstate->dtvs_tlocals[ndx]; 9887 break; 9888 9889 case DIFV_SCOPE_LOCAL: 9890 if (ndx < vstate->dtvs_nlocals) { 9891 dtrace_statvar_t *svar; 9892 9893 if ((svar = vstate->dtvs_locals[ndx]) != NULL) 9894 existing = &svar->dtsv_var; 9895 } 9896 9897 break; 9898 } 9899 9900 vt = &v->dtdv_type; 9901 9902 if (vt->dtdt_flags & DIF_TF_BYREF) { 9903 if (vt->dtdt_size == 0) { 9904 err += efunc(i, "zero-sized variable\n"); 9905 break; 9906 } 9907 9908 if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && 9909 vt->dtdt_size > dtrace_global_maxsize) { 9910 err += efunc(i, "oversized by-ref global\n"); 9911 break; 9912 } 9913 } 9914 9915 if (existing == NULL || existing->dtdv_id == 0) 9916 continue; 9917 9918 ASSERT(existing->dtdv_id == v->dtdv_id); 9919 ASSERT(existing->dtdv_scope == v->dtdv_scope); 9920 9921 if (existing->dtdv_kind != v->dtdv_kind) 9922 err += efunc(i, "%d changed variable kind\n", id); 9923 9924 et = &existing->dtdv_type; 9925 9926 if (vt->dtdt_flags != et->dtdt_flags) { 9927 err += efunc(i, "%d changed variable type flags\n", id); 9928 break; 9929 } 9930 9931 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) { 9932 err += efunc(i, "%d changed variable type size\n", id); 9933 break; 9934 } 9935 } 9936 9937 return (err); 9938} 9939 9940/* 9941 * Validate a DTrace DIF object that it is to be used as a helper. Helpers 9942 * are much more constrained than normal DIFOs. Specifically, they may 9943 * not: 9944 * 9945 * 1. Make calls to subroutines other than copyin(), copyinstr() or 9946 * miscellaneous string routines 9947 * 2. Access DTrace variables other than the args[] array, and the 9948 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables. 9949 * 3. Have thread-local variables. 9950 * 4. Have dynamic variables. 9951 */ 9952static int 9953dtrace_difo_validate_helper(dtrace_difo_t *dp) 9954{ 9955 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; 9956 int err = 0; 9957 uint_t pc; 9958 9959 for (pc = 0; pc < dp->dtdo_len; pc++) { 9960 dif_instr_t instr = dp->dtdo_buf[pc]; 9961 9962 uint_t v = DIF_INSTR_VAR(instr); 9963 uint_t subr = DIF_INSTR_SUBR(instr); 9964 uint_t op = DIF_INSTR_OP(instr); 9965 9966 switch (op) { 9967 case DIF_OP_OR: 9968 case DIF_OP_XOR: 9969 case DIF_OP_AND: 9970 case DIF_OP_SLL: 9971 case DIF_OP_SRL: 9972 case DIF_OP_SRA: 9973 case DIF_OP_SUB: 9974 case DIF_OP_ADD: 9975 case DIF_OP_MUL: 9976 case DIF_OP_SDIV: 9977 case DIF_OP_UDIV: 9978 case DIF_OP_SREM: 9979 case DIF_OP_UREM: 9980 case DIF_OP_COPYS: 9981 case DIF_OP_NOT: 9982 case DIF_OP_MOV: 9983 case DIF_OP_RLDSB: 9984 case DIF_OP_RLDSH: 9985 case DIF_OP_RLDSW: 9986 case DIF_OP_RLDUB: 9987 case DIF_OP_RLDUH: 9988 case DIF_OP_RLDUW: 9989 case DIF_OP_RLDX: 9990 case DIF_OP_ULDSB: 9991 case DIF_OP_ULDSH: 9992 case DIF_OP_ULDSW: 9993 case DIF_OP_ULDUB: 9994 case DIF_OP_ULDUH: 9995 case DIF_OP_ULDUW: 9996 case DIF_OP_ULDX: 9997 case DIF_OP_STB: 9998 case DIF_OP_STH: 9999 case DIF_OP_STW: 10000 case DIF_OP_STX: 10001 case DIF_OP_ALLOCS: 10002 case DIF_OP_CMP: 10003 case DIF_OP_SCMP: 10004 case DIF_OP_TST: 10005 case DIF_OP_BA: 10006 case DIF_OP_BE: 10007 case DIF_OP_BNE: 10008 case DIF_OP_BG: 10009 case DIF_OP_BGU: 10010 case DIF_OP_BGE: 10011 case DIF_OP_BGEU: 10012 case DIF_OP_BL: 10013 case DIF_OP_BLU: 10014 case DIF_OP_BLE: 10015 case DIF_OP_BLEU: 10016 case DIF_OP_RET: 10017 case DIF_OP_NOP: 10018 case DIF_OP_POPTS: 10019 case DIF_OP_FLUSHTS: 10020 case DIF_OP_SETX: 10021 case DIF_OP_SETS: 10022 case DIF_OP_LDGA: 10023 case DIF_OP_LDLS: 10024 case DIF_OP_STGS: 10025 case DIF_OP_STLS: 10026 case DIF_OP_PUSHTR: 10027 case DIF_OP_PUSHTV: 10028 break; 10029 10030 case DIF_OP_LDGS: 10031 if (v >= DIF_VAR_OTHER_UBASE) 10032 break; 10033 10034 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) 10035 break; 10036 10037 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID || 10038 v == DIF_VAR_PPID || v == DIF_VAR_TID || 10039 v == DIF_VAR_EXECARGS || 10040 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME || 10041 v == DIF_VAR_UID || v == DIF_VAR_GID) 10042 break; 10043 10044 err += efunc(pc, "illegal variable %u\n", v); 10045 break; 10046 10047 case DIF_OP_LDTA: 10048 case DIF_OP_LDTS: 10049 case DIF_OP_LDGAA: 10050 case DIF_OP_LDTAA: 10051 err += efunc(pc, "illegal dynamic variable load\n"); 10052 break; 10053 10054 case DIF_OP_STTS: 10055 case DIF_OP_STGAA: 10056 case DIF_OP_STTAA: 10057 err += efunc(pc, "illegal dynamic variable store\n"); 10058 break; 10059 10060 case DIF_OP_CALL: 10061 if (subr == DIF_SUBR_ALLOCA || 10062 subr == DIF_SUBR_BCOPY || 10063 subr == DIF_SUBR_COPYIN || 10064 subr == DIF_SUBR_COPYINTO || 10065 subr == DIF_SUBR_COPYINSTR || 10066 subr == DIF_SUBR_INDEX || 10067 subr == DIF_SUBR_INET_NTOA || 10068 subr == DIF_SUBR_INET_NTOA6 || 10069 subr == DIF_SUBR_INET_NTOP || 10070 subr == DIF_SUBR_JSON || 10071 subr == DIF_SUBR_LLTOSTR || 10072 subr == DIF_SUBR_STRTOLL || 10073 subr == DIF_SUBR_RINDEX || 10074 subr == DIF_SUBR_STRCHR || 10075 subr == DIF_SUBR_STRJOIN || 10076 subr == DIF_SUBR_STRRCHR || 10077 subr == DIF_SUBR_STRSTR || 10078 subr == DIF_SUBR_HTONS || 10079 subr == DIF_SUBR_HTONL || 10080 subr == DIF_SUBR_HTONLL || 10081 subr == DIF_SUBR_NTOHS || 10082 subr == DIF_SUBR_NTOHL || 10083 subr == DIF_SUBR_NTOHLL || 10084 subr == DIF_SUBR_MEMREF || 10085#if !defined(sun) 10086 subr == DIF_SUBR_MEMSTR || 10087#endif 10088 subr == DIF_SUBR_TYPEREF) 10089 break; 10090 10091 err += efunc(pc, "invalid subr %u\n", subr); 10092 break; 10093 10094 default: 10095 err += efunc(pc, "invalid opcode %u\n", 10096 DIF_INSTR_OP(instr)); 10097 } 10098 } 10099 10100 return (err); 10101} 10102 10103/* 10104 * Returns 1 if the expression in the DIF object can be cached on a per-thread 10105 * basis; 0 if not. 10106 */ 10107static int 10108dtrace_difo_cacheable(dtrace_difo_t *dp) 10109{ 10110 int i; 10111 10112 if (dp == NULL) 10113 return (0); 10114 10115 for (i = 0; i < dp->dtdo_varlen; i++) { 10116 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10117 10118 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL) 10119 continue; 10120 10121 switch (v->dtdv_id) { 10122 case DIF_VAR_CURTHREAD: 10123 case DIF_VAR_PID: 10124 case DIF_VAR_TID: 10125 case DIF_VAR_EXECARGS: 10126 case DIF_VAR_EXECNAME: 10127 case DIF_VAR_ZONENAME: 10128 break; 10129 10130 default: 10131 return (0); 10132 } 10133 } 10134 10135 /* 10136 * This DIF object may be cacheable. Now we need to look for any 10137 * array loading instructions, any memory loading instructions, or 10138 * any stores to thread-local variables. 10139 */ 10140 for (i = 0; i < dp->dtdo_len; i++) { 10141 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]); 10142 10143 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) || 10144 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) || 10145 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) || 10146 op == DIF_OP_LDGA || op == DIF_OP_STTS) 10147 return (0); 10148 } 10149 10150 return (1); 10151} 10152 10153static void 10154dtrace_difo_hold(dtrace_difo_t *dp) 10155{ 10156 int i; 10157 10158 ASSERT(MUTEX_HELD(&dtrace_lock)); 10159 10160 dp->dtdo_refcnt++; 10161 ASSERT(dp->dtdo_refcnt != 0); 10162 10163 /* 10164 * We need to check this DIF object for references to the variable 10165 * DIF_VAR_VTIMESTAMP. 10166 */ 10167 for (i = 0; i < dp->dtdo_varlen; i++) { 10168 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10169 10170 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 10171 continue; 10172 10173 if (dtrace_vtime_references++ == 0) 10174 dtrace_vtime_enable(); 10175 } 10176} 10177 10178/* 10179 * This routine calculates the dynamic variable chunksize for a given DIF 10180 * object. The calculation is not fool-proof, and can probably be tricked by 10181 * malicious DIF -- but it works for all compiler-generated DIF. Because this 10182 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail 10183 * if a dynamic variable size exceeds the chunksize. 10184 */ 10185static void 10186dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10187{ 10188 uint64_t sval = 0; 10189 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ 10190 const dif_instr_t *text = dp->dtdo_buf; 10191 uint_t pc, srd = 0; 10192 uint_t ttop = 0; 10193 size_t size, ksize; 10194 uint_t id, i; 10195 10196 for (pc = 0; pc < dp->dtdo_len; pc++) { 10197 dif_instr_t instr = text[pc]; 10198 uint_t op = DIF_INSTR_OP(instr); 10199 uint_t rd = DIF_INSTR_RD(instr); 10200 uint_t r1 = DIF_INSTR_R1(instr); 10201 uint_t nkeys = 0; 10202 uchar_t scope = 0; 10203 10204 dtrace_key_t *key = tupregs; 10205 10206 switch (op) { 10207 case DIF_OP_SETX: 10208 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)]; 10209 srd = rd; 10210 continue; 10211 10212 case DIF_OP_STTS: 10213 key = &tupregs[DIF_DTR_NREGS]; 10214 key[0].dttk_size = 0; 10215 key[1].dttk_size = 0; 10216 nkeys = 2; 10217 scope = DIFV_SCOPE_THREAD; 10218 break; 10219 10220 case DIF_OP_STGAA: 10221 case DIF_OP_STTAA: 10222 nkeys = ttop; 10223 10224 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) 10225 key[nkeys++].dttk_size = 0; 10226 10227 key[nkeys++].dttk_size = 0; 10228 10229 if (op == DIF_OP_STTAA) { 10230 scope = DIFV_SCOPE_THREAD; 10231 } else { 10232 scope = DIFV_SCOPE_GLOBAL; 10233 } 10234 10235 break; 10236 10237 case DIF_OP_PUSHTR: 10238 if (ttop == DIF_DTR_NREGS) 10239 return; 10240 10241 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) { 10242 /* 10243 * If the register for the size of the "pushtr" 10244 * is %r0 (or the value is 0) and the type is 10245 * a string, we'll use the system-wide default 10246 * string size. 10247 */ 10248 tupregs[ttop++].dttk_size = 10249 dtrace_strsize_default; 10250 } else { 10251 if (srd == 0) 10252 return; 10253 10254 tupregs[ttop++].dttk_size = sval; 10255 } 10256 10257 break; 10258 10259 case DIF_OP_PUSHTV: 10260 if (ttop == DIF_DTR_NREGS) 10261 return; 10262 10263 tupregs[ttop++].dttk_size = 0; 10264 break; 10265 10266 case DIF_OP_FLUSHTS: 10267 ttop = 0; 10268 break; 10269 10270 case DIF_OP_POPTS: 10271 if (ttop != 0) 10272 ttop--; 10273 break; 10274 } 10275 10276 sval = 0; 10277 srd = 0; 10278 10279 if (nkeys == 0) 10280 continue; 10281 10282 /* 10283 * We have a dynamic variable allocation; calculate its size. 10284 */ 10285 for (ksize = 0, i = 0; i < nkeys; i++) 10286 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); 10287 10288 size = sizeof (dtrace_dynvar_t); 10289 size += sizeof (dtrace_key_t) * (nkeys - 1); 10290 size += ksize; 10291 10292 /* 10293 * Now we need to determine the size of the stored data. 10294 */ 10295 id = DIF_INSTR_VAR(instr); 10296 10297 for (i = 0; i < dp->dtdo_varlen; i++) { 10298 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10299 10300 if (v->dtdv_id == id && v->dtdv_scope == scope) { 10301 size += v->dtdv_type.dtdt_size; 10302 break; 10303 } 10304 } 10305 10306 if (i == dp->dtdo_varlen) 10307 return; 10308 10309 /* 10310 * We have the size. If this is larger than the chunk size 10311 * for our dynamic variable state, reset the chunk size. 10312 */ 10313 size = P2ROUNDUP(size, sizeof (uint64_t)); 10314 10315 if (size > vstate->dtvs_dynvars.dtds_chunksize) 10316 vstate->dtvs_dynvars.dtds_chunksize = size; 10317 } 10318} 10319 10320static void 10321dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10322{ 10323 int i, oldsvars, osz, nsz, otlocals, ntlocals; 10324 uint_t id; 10325 10326 ASSERT(MUTEX_HELD(&dtrace_lock)); 10327 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); 10328 10329 for (i = 0; i < dp->dtdo_varlen; i++) { 10330 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10331 dtrace_statvar_t *svar, ***svarp = NULL; 10332 size_t dsize = 0; 10333 uint8_t scope = v->dtdv_scope; 10334 int *np = NULL; 10335 10336 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 10337 continue; 10338 10339 id -= DIF_VAR_OTHER_UBASE; 10340 10341 switch (scope) { 10342 case DIFV_SCOPE_THREAD: 10343 while (id >= (otlocals = vstate->dtvs_ntlocals)) { 10344 dtrace_difv_t *tlocals; 10345 10346 if ((ntlocals = (otlocals << 1)) == 0) 10347 ntlocals = 1; 10348 10349 osz = otlocals * sizeof (dtrace_difv_t); 10350 nsz = ntlocals * sizeof (dtrace_difv_t); 10351 10352 tlocals = kmem_zalloc(nsz, KM_SLEEP); 10353 10354 if (osz != 0) { 10355 bcopy(vstate->dtvs_tlocals, 10356 tlocals, osz); 10357 kmem_free(vstate->dtvs_tlocals, osz); 10358 } 10359 10360 vstate->dtvs_tlocals = tlocals; 10361 vstate->dtvs_ntlocals = ntlocals; 10362 } 10363 10364 vstate->dtvs_tlocals[id] = *v; 10365 continue; 10366 10367 case DIFV_SCOPE_LOCAL: 10368 np = &vstate->dtvs_nlocals; 10369 svarp = &vstate->dtvs_locals; 10370 10371 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 10372 dsize = NCPU * (v->dtdv_type.dtdt_size + 10373 sizeof (uint64_t)); 10374 else 10375 dsize = NCPU * sizeof (uint64_t); 10376 10377 break; 10378 10379 case DIFV_SCOPE_GLOBAL: 10380 np = &vstate->dtvs_nglobals; 10381 svarp = &vstate->dtvs_globals; 10382 10383 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) 10384 dsize = v->dtdv_type.dtdt_size + 10385 sizeof (uint64_t); 10386 10387 break; 10388 10389 default: 10390 ASSERT(0); 10391 } 10392 10393 while (id >= (oldsvars = *np)) { 10394 dtrace_statvar_t **statics; 10395 int newsvars, oldsize, newsize; 10396 10397 if ((newsvars = (oldsvars << 1)) == 0) 10398 newsvars = 1; 10399 10400 oldsize = oldsvars * sizeof (dtrace_statvar_t *); 10401 newsize = newsvars * sizeof (dtrace_statvar_t *); 10402 10403 statics = kmem_zalloc(newsize, KM_SLEEP); 10404 10405 if (oldsize != 0) { 10406 bcopy(*svarp, statics, oldsize); 10407 kmem_free(*svarp, oldsize); 10408 } 10409 10410 *svarp = statics; 10411 *np = newsvars; 10412 } 10413 10414 if ((svar = (*svarp)[id]) == NULL) { 10415 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP); 10416 svar->dtsv_var = *v; 10417 10418 if ((svar->dtsv_size = dsize) != 0) { 10419 svar->dtsv_data = (uint64_t)(uintptr_t) 10420 kmem_zalloc(dsize, KM_SLEEP); 10421 } 10422 10423 (*svarp)[id] = svar; 10424 } 10425 10426 svar->dtsv_refcnt++; 10427 } 10428 10429 dtrace_difo_chunksize(dp, vstate); 10430 dtrace_difo_hold(dp); 10431} 10432 10433static dtrace_difo_t * 10434dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10435{ 10436 dtrace_difo_t *new; 10437 size_t sz; 10438 10439 ASSERT(dp->dtdo_buf != NULL); 10440 ASSERT(dp->dtdo_refcnt != 0); 10441 10442 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 10443 10444 ASSERT(dp->dtdo_buf != NULL); 10445 sz = dp->dtdo_len * sizeof (dif_instr_t); 10446 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP); 10447 bcopy(dp->dtdo_buf, new->dtdo_buf, sz); 10448 new->dtdo_len = dp->dtdo_len; 10449 10450 if (dp->dtdo_strtab != NULL) { 10451 ASSERT(dp->dtdo_strlen != 0); 10452 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP); 10453 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen); 10454 new->dtdo_strlen = dp->dtdo_strlen; 10455 } 10456 10457 if (dp->dtdo_inttab != NULL) { 10458 ASSERT(dp->dtdo_intlen != 0); 10459 sz = dp->dtdo_intlen * sizeof (uint64_t); 10460 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP); 10461 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz); 10462 new->dtdo_intlen = dp->dtdo_intlen; 10463 } 10464 10465 if (dp->dtdo_vartab != NULL) { 10466 ASSERT(dp->dtdo_varlen != 0); 10467 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t); 10468 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP); 10469 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz); 10470 new->dtdo_varlen = dp->dtdo_varlen; 10471 } 10472 10473 dtrace_difo_init(new, vstate); 10474 return (new); 10475} 10476 10477static void 10478dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10479{ 10480 int i; 10481 10482 ASSERT(dp->dtdo_refcnt == 0); 10483 10484 for (i = 0; i < dp->dtdo_varlen; i++) { 10485 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10486 dtrace_statvar_t *svar, **svarp = NULL; 10487 uint_t id; 10488 uint8_t scope = v->dtdv_scope; 10489 int *np = NULL; 10490 10491 switch (scope) { 10492 case DIFV_SCOPE_THREAD: 10493 continue; 10494 10495 case DIFV_SCOPE_LOCAL: 10496 np = &vstate->dtvs_nlocals; 10497 svarp = vstate->dtvs_locals; 10498 break; 10499 10500 case DIFV_SCOPE_GLOBAL: 10501 np = &vstate->dtvs_nglobals; 10502 svarp = vstate->dtvs_globals; 10503 break; 10504 10505 default: 10506 ASSERT(0); 10507 } 10508 10509 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) 10510 continue; 10511 10512 id -= DIF_VAR_OTHER_UBASE; 10513 ASSERT(id < *np); 10514 10515 svar = svarp[id]; 10516 ASSERT(svar != NULL); 10517 ASSERT(svar->dtsv_refcnt > 0); 10518 10519 if (--svar->dtsv_refcnt > 0) 10520 continue; 10521 10522 if (svar->dtsv_size != 0) { 10523 ASSERT(svar->dtsv_data != 0); 10524 kmem_free((void *)(uintptr_t)svar->dtsv_data, 10525 svar->dtsv_size); 10526 } 10527 10528 kmem_free(svar, sizeof (dtrace_statvar_t)); 10529 svarp[id] = NULL; 10530 } 10531 10532 if (dp->dtdo_buf != NULL) 10533 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 10534 if (dp->dtdo_inttab != NULL) 10535 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 10536 if (dp->dtdo_strtab != NULL) 10537 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 10538 if (dp->dtdo_vartab != NULL) 10539 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 10540 10541 kmem_free(dp, sizeof (dtrace_difo_t)); 10542} 10543 10544static void 10545dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) 10546{ 10547 int i; 10548 10549 ASSERT(MUTEX_HELD(&dtrace_lock)); 10550 ASSERT(dp->dtdo_refcnt != 0); 10551 10552 for (i = 0; i < dp->dtdo_varlen; i++) { 10553 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 10554 10555 if (v->dtdv_id != DIF_VAR_VTIMESTAMP) 10556 continue; 10557 10558 ASSERT(dtrace_vtime_references > 0); 10559 if (--dtrace_vtime_references == 0) 10560 dtrace_vtime_disable(); 10561 } 10562 10563 if (--dp->dtdo_refcnt == 0) 10564 dtrace_difo_destroy(dp, vstate); 10565} 10566 10567/* 10568 * DTrace Format Functions 10569 */ 10570static uint16_t 10571dtrace_format_add(dtrace_state_t *state, char *str) 10572{ 10573 char *fmt, **new; 10574 uint16_t ndx, len = strlen(str) + 1; 10575 10576 fmt = kmem_zalloc(len, KM_SLEEP); 10577 bcopy(str, fmt, len); 10578 10579 for (ndx = 0; ndx < state->dts_nformats; ndx++) { 10580 if (state->dts_formats[ndx] == NULL) { 10581 state->dts_formats[ndx] = fmt; 10582 return (ndx + 1); 10583 } 10584 } 10585 10586 if (state->dts_nformats == USHRT_MAX) { 10587 /* 10588 * This is only likely if a denial-of-service attack is being 10589 * attempted. As such, it's okay to fail silently here. 10590 */ 10591 kmem_free(fmt, len); 10592 return (0); 10593 } 10594 10595 /* 10596 * For simplicity, we always resize the formats array to be exactly the 10597 * number of formats. 10598 */ 10599 ndx = state->dts_nformats++; 10600 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP); 10601 10602 if (state->dts_formats != NULL) { 10603 ASSERT(ndx != 0); 10604 bcopy(state->dts_formats, new, ndx * sizeof (char *)); 10605 kmem_free(state->dts_formats, ndx * sizeof (char *)); 10606 } 10607 10608 state->dts_formats = new; 10609 state->dts_formats[ndx] = fmt; 10610 10611 return (ndx + 1); 10612} 10613 10614static void 10615dtrace_format_remove(dtrace_state_t *state, uint16_t format) 10616{ 10617 char *fmt; 10618 10619 ASSERT(state->dts_formats != NULL); 10620 ASSERT(format <= state->dts_nformats); 10621 ASSERT(state->dts_formats[format - 1] != NULL); 10622 10623 fmt = state->dts_formats[format - 1]; 10624 kmem_free(fmt, strlen(fmt) + 1); 10625 state->dts_formats[format - 1] = NULL; 10626} 10627 10628static void 10629dtrace_format_destroy(dtrace_state_t *state) 10630{ 10631 int i; 10632 10633 if (state->dts_nformats == 0) { 10634 ASSERT(state->dts_formats == NULL); 10635 return; 10636 } 10637 10638 ASSERT(state->dts_formats != NULL); 10639 10640 for (i = 0; i < state->dts_nformats; i++) { 10641 char *fmt = state->dts_formats[i]; 10642 10643 if (fmt == NULL) 10644 continue; 10645 10646 kmem_free(fmt, strlen(fmt) + 1); 10647 } 10648 10649 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *)); 10650 state->dts_nformats = 0; 10651 state->dts_formats = NULL; 10652} 10653 10654/* 10655 * DTrace Predicate Functions 10656 */ 10657static dtrace_predicate_t * 10658dtrace_predicate_create(dtrace_difo_t *dp) 10659{ 10660 dtrace_predicate_t *pred; 10661 10662 ASSERT(MUTEX_HELD(&dtrace_lock)); 10663 ASSERT(dp->dtdo_refcnt != 0); 10664 10665 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); 10666 pred->dtp_difo = dp; 10667 pred->dtp_refcnt = 1; 10668 10669 if (!dtrace_difo_cacheable(dp)) 10670 return (pred); 10671 10672 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) { 10673 /* 10674 * This is only theoretically possible -- we have had 2^32 10675 * cacheable predicates on this machine. We cannot allow any 10676 * more predicates to become cacheable: as unlikely as it is, 10677 * there may be a thread caching a (now stale) predicate cache 10678 * ID. (N.B.: the temptation is being successfully resisted to 10679 * have this cmn_err() "Holy shit -- we executed this code!") 10680 */ 10681 return (pred); 10682 } 10683 10684 pred->dtp_cacheid = dtrace_predcache_id++; 10685 10686 return (pred); 10687} 10688 10689static void 10690dtrace_predicate_hold(dtrace_predicate_t *pred) 10691{ 10692 ASSERT(MUTEX_HELD(&dtrace_lock)); 10693 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); 10694 ASSERT(pred->dtp_refcnt > 0); 10695 10696 pred->dtp_refcnt++; 10697} 10698 10699static void 10700dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) 10701{ 10702 dtrace_difo_t *dp = pred->dtp_difo; 10703 10704 ASSERT(MUTEX_HELD(&dtrace_lock)); 10705 ASSERT(dp != NULL && dp->dtdo_refcnt != 0); 10706 ASSERT(pred->dtp_refcnt > 0); 10707 10708 if (--pred->dtp_refcnt == 0) { 10709 dtrace_difo_release(pred->dtp_difo, vstate); 10710 kmem_free(pred, sizeof (dtrace_predicate_t)); 10711 } 10712} 10713 10714/* 10715 * DTrace Action Description Functions 10716 */ 10717static dtrace_actdesc_t * 10718dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, 10719 uint64_t uarg, uint64_t arg) 10720{ 10721 dtrace_actdesc_t *act; 10722 10723#if defined(sun) 10724 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && 10725 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA)); 10726#endif 10727 10728 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); 10729 act->dtad_kind = kind; 10730 act->dtad_ntuple = ntuple; 10731 act->dtad_uarg = uarg; 10732 act->dtad_arg = arg; 10733 act->dtad_refcnt = 1; 10734 10735 return (act); 10736} 10737 10738static void 10739dtrace_actdesc_hold(dtrace_actdesc_t *act) 10740{ 10741 ASSERT(act->dtad_refcnt >= 1); 10742 act->dtad_refcnt++; 10743} 10744 10745static void 10746dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) 10747{ 10748 dtrace_actkind_t kind = act->dtad_kind; 10749 dtrace_difo_t *dp; 10750 10751 ASSERT(act->dtad_refcnt >= 1); 10752 10753 if (--act->dtad_refcnt != 0) 10754 return; 10755 10756 if ((dp = act->dtad_difo) != NULL) 10757 dtrace_difo_release(dp, vstate); 10758 10759 if (DTRACEACT_ISPRINTFLIKE(kind)) { 10760 char *str = (char *)(uintptr_t)act->dtad_arg; 10761 10762#if defined(sun) 10763 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || 10764 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); 10765#endif 10766 10767 if (str != NULL) 10768 kmem_free(str, strlen(str) + 1); 10769 } 10770 10771 kmem_free(act, sizeof (dtrace_actdesc_t)); 10772} 10773 10774/* 10775 * DTrace ECB Functions 10776 */ 10777static dtrace_ecb_t * 10778dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) 10779{ 10780 dtrace_ecb_t *ecb; 10781 dtrace_epid_t epid; 10782 10783 ASSERT(MUTEX_HELD(&dtrace_lock)); 10784 10785 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); 10786 ecb->dte_predicate = NULL; 10787 ecb->dte_probe = probe; 10788 10789 /* 10790 * The default size is the size of the default action: recording 10791 * the header. 10792 */ 10793 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t); 10794 ecb->dte_alignment = sizeof (dtrace_epid_t); 10795 10796 epid = state->dts_epid++; 10797 10798 if (epid - 1 >= state->dts_necbs) { 10799 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; 10800 int necbs = state->dts_necbs << 1; 10801 10802 ASSERT(epid == state->dts_necbs + 1); 10803 10804 if (necbs == 0) { 10805 ASSERT(oecbs == NULL); 10806 necbs = 1; 10807 } 10808 10809 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP); 10810 10811 if (oecbs != NULL) 10812 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs)); 10813 10814 dtrace_membar_producer(); 10815 state->dts_ecbs = ecbs; 10816 10817 if (oecbs != NULL) { 10818 /* 10819 * If this state is active, we must dtrace_sync() 10820 * before we can free the old dts_ecbs array: we're 10821 * coming in hot, and there may be active ring 10822 * buffer processing (which indexes into the dts_ecbs 10823 * array) on another CPU. 10824 */ 10825 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 10826 dtrace_sync(); 10827 10828 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs)); 10829 } 10830 10831 dtrace_membar_producer(); 10832 state->dts_necbs = necbs; 10833 } 10834 10835 ecb->dte_state = state; 10836 10837 ASSERT(state->dts_ecbs[epid - 1] == NULL); 10838 dtrace_membar_producer(); 10839 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb; 10840 10841 return (ecb); 10842} 10843 10844static void 10845dtrace_ecb_enable(dtrace_ecb_t *ecb) 10846{ 10847 dtrace_probe_t *probe = ecb->dte_probe; 10848 10849 ASSERT(MUTEX_HELD(&cpu_lock)); 10850 ASSERT(MUTEX_HELD(&dtrace_lock)); 10851 ASSERT(ecb->dte_next == NULL); 10852 10853 if (probe == NULL) { 10854 /* 10855 * This is the NULL probe -- there's nothing to do. 10856 */ 10857 return; 10858 } 10859 10860 if (probe->dtpr_ecb == NULL) { 10861 dtrace_provider_t *prov = probe->dtpr_provider; 10862 10863 /* 10864 * We're the first ECB on this probe. 10865 */ 10866 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb; 10867 10868 if (ecb->dte_predicate != NULL) 10869 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; 10870 10871 prov->dtpv_pops.dtps_enable(prov->dtpv_arg, 10872 probe->dtpr_id, probe->dtpr_arg); 10873 } else { 10874 /* 10875 * This probe is already active. Swing the last pointer to 10876 * point to the new ECB, and issue a dtrace_sync() to assure 10877 * that all CPUs have seen the change. 10878 */ 10879 ASSERT(probe->dtpr_ecb_last != NULL); 10880 probe->dtpr_ecb_last->dte_next = ecb; 10881 probe->dtpr_ecb_last = ecb; 10882 probe->dtpr_predcache = 0; 10883 10884 dtrace_sync(); 10885 } 10886} 10887 10888static void 10889dtrace_ecb_resize(dtrace_ecb_t *ecb) 10890{ 10891 dtrace_action_t *act; 10892 uint32_t curneeded = UINT32_MAX; 10893 uint32_t aggbase = UINT32_MAX; 10894 10895 /* 10896 * If we record anything, we always record the dtrace_rechdr_t. (And 10897 * we always record it first.) 10898 */ 10899 ecb->dte_size = sizeof (dtrace_rechdr_t); 10900 ecb->dte_alignment = sizeof (dtrace_epid_t); 10901 10902 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 10903 dtrace_recdesc_t *rec = &act->dta_rec; 10904 ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1); 10905 10906 ecb->dte_alignment = MAX(ecb->dte_alignment, 10907 rec->dtrd_alignment); 10908 10909 if (DTRACEACT_ISAGG(act->dta_kind)) { 10910 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 10911 10912 ASSERT(rec->dtrd_size != 0); 10913 ASSERT(agg->dtag_first != NULL); 10914 ASSERT(act->dta_prev->dta_intuple); 10915 ASSERT(aggbase != UINT32_MAX); 10916 ASSERT(curneeded != UINT32_MAX); 10917 10918 agg->dtag_base = aggbase; 10919 10920 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); 10921 rec->dtrd_offset = curneeded; 10922 curneeded += rec->dtrd_size; 10923 ecb->dte_needed = MAX(ecb->dte_needed, curneeded); 10924 10925 aggbase = UINT32_MAX; 10926 curneeded = UINT32_MAX; 10927 } else if (act->dta_intuple) { 10928 if (curneeded == UINT32_MAX) { 10929 /* 10930 * This is the first record in a tuple. Align 10931 * curneeded to be at offset 4 in an 8-byte 10932 * aligned block. 10933 */ 10934 ASSERT(act->dta_prev == NULL || 10935 !act->dta_prev->dta_intuple); 10936 ASSERT3U(aggbase, ==, UINT32_MAX); 10937 curneeded = P2PHASEUP(ecb->dte_size, 10938 sizeof (uint64_t), sizeof (dtrace_aggid_t)); 10939 10940 aggbase = curneeded - sizeof (dtrace_aggid_t); 10941 ASSERT(IS_P2ALIGNED(aggbase, 10942 sizeof (uint64_t))); 10943 } 10944 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); 10945 rec->dtrd_offset = curneeded; 10946 curneeded += rec->dtrd_size; 10947 } else { 10948 /* tuples must be followed by an aggregation */ 10949 ASSERT(act->dta_prev == NULL || 10950 !act->dta_prev->dta_intuple); 10951 10952 ecb->dte_size = P2ROUNDUP(ecb->dte_size, 10953 rec->dtrd_alignment); 10954 rec->dtrd_offset = ecb->dte_size; 10955 ecb->dte_size += rec->dtrd_size; 10956 ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size); 10957 } 10958 } 10959 10960 if ((act = ecb->dte_action) != NULL && 10961 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && 10962 ecb->dte_size == sizeof (dtrace_rechdr_t)) { 10963 /* 10964 * If the size is still sizeof (dtrace_rechdr_t), then all 10965 * actions store no data; set the size to 0. 10966 */ 10967 ecb->dte_size = 0; 10968 } 10969 10970 ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t)); 10971 ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t))); 10972 ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, 10973 ecb->dte_needed); 10974} 10975 10976static dtrace_action_t * 10977dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 10978{ 10979 dtrace_aggregation_t *agg; 10980 size_t size = sizeof (uint64_t); 10981 int ntuple = desc->dtad_ntuple; 10982 dtrace_action_t *act; 10983 dtrace_recdesc_t *frec; 10984 dtrace_aggid_t aggid; 10985 dtrace_state_t *state = ecb->dte_state; 10986 10987 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP); 10988 agg->dtag_ecb = ecb; 10989 10990 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind)); 10991 10992 switch (desc->dtad_kind) { 10993 case DTRACEAGG_MIN: 10994 agg->dtag_initial = INT64_MAX; 10995 agg->dtag_aggregate = dtrace_aggregate_min; 10996 break; 10997 10998 case DTRACEAGG_MAX: 10999 agg->dtag_initial = INT64_MIN; 11000 agg->dtag_aggregate = dtrace_aggregate_max; 11001 break; 11002 11003 case DTRACEAGG_COUNT: 11004 agg->dtag_aggregate = dtrace_aggregate_count; 11005 break; 11006 11007 case DTRACEAGG_QUANTIZE: 11008 agg->dtag_aggregate = dtrace_aggregate_quantize; 11009 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) * 11010 sizeof (uint64_t); 11011 break; 11012 11013 case DTRACEAGG_LQUANTIZE: { 11014 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg); 11015 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg); 11016 11017 agg->dtag_initial = desc->dtad_arg; 11018 agg->dtag_aggregate = dtrace_aggregate_lquantize; 11019 11020 if (step == 0 || levels == 0) 11021 goto err; 11022 11023 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t); 11024 break; 11025 } 11026 11027 case DTRACEAGG_LLQUANTIZE: { 11028 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); 11029 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); 11030 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); 11031 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); 11032 int64_t v; 11033 11034 agg->dtag_initial = desc->dtad_arg; 11035 agg->dtag_aggregate = dtrace_aggregate_llquantize; 11036 11037 if (factor < 2 || low >= high || nsteps < factor) 11038 goto err; 11039 11040 /* 11041 * Now check that the number of steps evenly divides a power 11042 * of the factor. (This assures both integer bucket size and 11043 * linearity within each magnitude.) 11044 */ 11045 for (v = factor; v < nsteps; v *= factor) 11046 continue; 11047 11048 if ((v % nsteps) || (nsteps % factor)) 11049 goto err; 11050 11051 size = (dtrace_aggregate_llquantize_bucket(factor, 11052 low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); 11053 break; 11054 } 11055 11056 case DTRACEAGG_AVG: 11057 agg->dtag_aggregate = dtrace_aggregate_avg; 11058 size = sizeof (uint64_t) * 2; 11059 break; 11060 11061 case DTRACEAGG_STDDEV: 11062 agg->dtag_aggregate = dtrace_aggregate_stddev; 11063 size = sizeof (uint64_t) * 4; 11064 break; 11065 11066 case DTRACEAGG_SUM: 11067 agg->dtag_aggregate = dtrace_aggregate_sum; 11068 break; 11069 11070 default: 11071 goto err; 11072 } 11073 11074 agg->dtag_action.dta_rec.dtrd_size = size; 11075 11076 if (ntuple == 0) 11077 goto err; 11078 11079 /* 11080 * We must make sure that we have enough actions for the n-tuple. 11081 */ 11082 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) { 11083 if (DTRACEACT_ISAGG(act->dta_kind)) 11084 break; 11085 11086 if (--ntuple == 0) { 11087 /* 11088 * This is the action with which our n-tuple begins. 11089 */ 11090 agg->dtag_first = act; 11091 goto success; 11092 } 11093 } 11094 11095 /* 11096 * This n-tuple is short by ntuple elements. Return failure. 11097 */ 11098 ASSERT(ntuple != 0); 11099err: 11100 kmem_free(agg, sizeof (dtrace_aggregation_t)); 11101 return (NULL); 11102 11103success: 11104 /* 11105 * If the last action in the tuple has a size of zero, it's actually 11106 * an expression argument for the aggregating action. 11107 */ 11108 ASSERT(ecb->dte_action_last != NULL); 11109 act = ecb->dte_action_last; 11110 11111 if (act->dta_kind == DTRACEACT_DIFEXPR) { 11112 ASSERT(act->dta_difo != NULL); 11113 11114 if (act->dta_difo->dtdo_rtype.dtdt_size == 0) 11115 agg->dtag_hasarg = 1; 11116 } 11117 11118 /* 11119 * We need to allocate an id for this aggregation. 11120 */ 11121#if defined(sun) 11122 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, 11123 VM_BESTFIT | VM_SLEEP); 11124#else 11125 aggid = alloc_unr(state->dts_aggid_arena); 11126#endif 11127 11128 if (aggid - 1 >= state->dts_naggregations) { 11129 dtrace_aggregation_t **oaggs = state->dts_aggregations; 11130 dtrace_aggregation_t **aggs; 11131 int naggs = state->dts_naggregations << 1; 11132 int onaggs = state->dts_naggregations; 11133 11134 ASSERT(aggid == state->dts_naggregations + 1); 11135 11136 if (naggs == 0) { 11137 ASSERT(oaggs == NULL); 11138 naggs = 1; 11139 } 11140 11141 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP); 11142 11143 if (oaggs != NULL) { 11144 bcopy(oaggs, aggs, onaggs * sizeof (*aggs)); 11145 kmem_free(oaggs, onaggs * sizeof (*aggs)); 11146 } 11147 11148 state->dts_aggregations = aggs; 11149 state->dts_naggregations = naggs; 11150 } 11151 11152 ASSERT(state->dts_aggregations[aggid - 1] == NULL); 11153 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg; 11154 11155 frec = &agg->dtag_first->dta_rec; 11156 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t)) 11157 frec->dtrd_alignment = sizeof (dtrace_aggid_t); 11158 11159 for (act = agg->dtag_first; act != NULL; act = act->dta_next) { 11160 ASSERT(!act->dta_intuple); 11161 act->dta_intuple = 1; 11162 } 11163 11164 return (&agg->dtag_action); 11165} 11166 11167static void 11168dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act) 11169{ 11170 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; 11171 dtrace_state_t *state = ecb->dte_state; 11172 dtrace_aggid_t aggid = agg->dtag_id; 11173 11174 ASSERT(DTRACEACT_ISAGG(act->dta_kind)); 11175#if defined(sun) 11176 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1); 11177#else 11178 free_unr(state->dts_aggid_arena, aggid); 11179#endif 11180 11181 ASSERT(state->dts_aggregations[aggid - 1] == agg); 11182 state->dts_aggregations[aggid - 1] = NULL; 11183 11184 kmem_free(agg, sizeof (dtrace_aggregation_t)); 11185} 11186 11187static int 11188dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) 11189{ 11190 dtrace_action_t *action, *last; 11191 dtrace_difo_t *dp = desc->dtad_difo; 11192 uint32_t size = 0, align = sizeof (uint8_t), mask; 11193 uint16_t format = 0; 11194 dtrace_recdesc_t *rec; 11195 dtrace_state_t *state = ecb->dte_state; 11196 dtrace_optval_t *opt = state->dts_options, nframes = 0, strsize; 11197 uint64_t arg = desc->dtad_arg; 11198 11199 ASSERT(MUTEX_HELD(&dtrace_lock)); 11200 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); 11201 11202 if (DTRACEACT_ISAGG(desc->dtad_kind)) { 11203 /* 11204 * If this is an aggregating action, there must be neither 11205 * a speculate nor a commit on the action chain. 11206 */ 11207 dtrace_action_t *act; 11208 11209 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 11210 if (act->dta_kind == DTRACEACT_COMMIT) 11211 return (EINVAL); 11212 11213 if (act->dta_kind == DTRACEACT_SPECULATE) 11214 return (EINVAL); 11215 } 11216 11217 action = dtrace_ecb_aggregation_create(ecb, desc); 11218 11219 if (action == NULL) 11220 return (EINVAL); 11221 } else { 11222 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) || 11223 (desc->dtad_kind == DTRACEACT_DIFEXPR && 11224 dp != NULL && dp->dtdo_destructive)) { 11225 state->dts_destructive = 1; 11226 } 11227 11228 switch (desc->dtad_kind) { 11229 case DTRACEACT_PRINTF: 11230 case DTRACEACT_PRINTA: 11231 case DTRACEACT_SYSTEM: 11232 case DTRACEACT_FREOPEN: 11233 case DTRACEACT_DIFEXPR: 11234 /* 11235 * We know that our arg is a string -- turn it into a 11236 * format. 11237 */ 11238 if (arg == 0) { 11239 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || 11240 desc->dtad_kind == DTRACEACT_DIFEXPR); 11241 format = 0; 11242 } else { 11243 ASSERT(arg != 0); 11244#if defined(sun) 11245 ASSERT(arg > KERNELBASE); 11246#endif 11247 format = dtrace_format_add(state, 11248 (char *)(uintptr_t)arg); 11249 } 11250 11251 /*FALLTHROUGH*/ 11252 case DTRACEACT_LIBACT: 11253 case DTRACEACT_TRACEMEM: 11254 case DTRACEACT_TRACEMEM_DYNSIZE: 11255 if (dp == NULL) 11256 return (EINVAL); 11257 11258 if ((size = dp->dtdo_rtype.dtdt_size) != 0) 11259 break; 11260 11261 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { 11262 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 11263 return (EINVAL); 11264 11265 size = opt[DTRACEOPT_STRSIZE]; 11266 } 11267 11268 break; 11269 11270 case DTRACEACT_STACK: 11271 if ((nframes = arg) == 0) { 11272 nframes = opt[DTRACEOPT_STACKFRAMES]; 11273 ASSERT(nframes > 0); 11274 arg = nframes; 11275 } 11276 11277 size = nframes * sizeof (pc_t); 11278 break; 11279 11280 case DTRACEACT_JSTACK: 11281 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0) 11282 strsize = opt[DTRACEOPT_JSTACKSTRSIZE]; 11283 11284 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) 11285 nframes = opt[DTRACEOPT_JSTACKFRAMES]; 11286 11287 arg = DTRACE_USTACK_ARG(nframes, strsize); 11288 11289 /*FALLTHROUGH*/ 11290 case DTRACEACT_USTACK: 11291 if (desc->dtad_kind != DTRACEACT_JSTACK && 11292 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { 11293 strsize = DTRACE_USTACK_STRSIZE(arg); 11294 nframes = opt[DTRACEOPT_USTACKFRAMES]; 11295 ASSERT(nframes > 0); 11296 arg = DTRACE_USTACK_ARG(nframes, strsize); 11297 } 11298 11299 /* 11300 * Save a slot for the pid. 11301 */ 11302 size = (nframes + 1) * sizeof (uint64_t); 11303 size += DTRACE_USTACK_STRSIZE(arg); 11304 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t))); 11305 11306 break; 11307 11308 case DTRACEACT_SYM: 11309 case DTRACEACT_MOD: 11310 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) != 11311 sizeof (uint64_t)) || 11312 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 11313 return (EINVAL); 11314 break; 11315 11316 case DTRACEACT_USYM: 11317 case DTRACEACT_UMOD: 11318 case DTRACEACT_UADDR: 11319 if (dp == NULL || 11320 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) || 11321 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 11322 return (EINVAL); 11323 11324 /* 11325 * We have a slot for the pid, plus a slot for the 11326 * argument. To keep things simple (aligned with 11327 * bitness-neutral sizing), we store each as a 64-bit 11328 * quantity. 11329 */ 11330 size = 2 * sizeof (uint64_t); 11331 break; 11332 11333 case DTRACEACT_STOP: 11334 case DTRACEACT_BREAKPOINT: 11335 case DTRACEACT_PANIC: 11336 break; 11337 11338 case DTRACEACT_CHILL: 11339 case DTRACEACT_DISCARD: 11340 case DTRACEACT_RAISE: 11341 if (dp == NULL) 11342 return (EINVAL); 11343 break; 11344 11345 case DTRACEACT_EXIT: 11346 if (dp == NULL || 11347 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) || 11348 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) 11349 return (EINVAL); 11350 break; 11351 11352 case DTRACEACT_SPECULATE: 11353 if (ecb->dte_size > sizeof (dtrace_rechdr_t)) 11354 return (EINVAL); 11355 11356 if (dp == NULL) 11357 return (EINVAL); 11358 11359 state->dts_speculates = 1; 11360 break; 11361 11362 case DTRACEACT_PRINTM: 11363 size = dp->dtdo_rtype.dtdt_size; 11364 break; 11365 11366 case DTRACEACT_PRINTT: 11367 size = dp->dtdo_rtype.dtdt_size; 11368 break; 11369 11370 case DTRACEACT_COMMIT: { 11371 dtrace_action_t *act = ecb->dte_action; 11372 11373 for (; act != NULL; act = act->dta_next) { 11374 if (act->dta_kind == DTRACEACT_COMMIT) 11375 return (EINVAL); 11376 } 11377 11378 if (dp == NULL) 11379 return (EINVAL); 11380 break; 11381 } 11382 11383 default: 11384 return (EINVAL); 11385 } 11386 11387 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) { 11388 /* 11389 * If this is a data-storing action or a speculate, 11390 * we must be sure that there isn't a commit on the 11391 * action chain. 11392 */ 11393 dtrace_action_t *act = ecb->dte_action; 11394 11395 for (; act != NULL; act = act->dta_next) { 11396 if (act->dta_kind == DTRACEACT_COMMIT) 11397 return (EINVAL); 11398 } 11399 } 11400 11401 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP); 11402 action->dta_rec.dtrd_size = size; 11403 } 11404 11405 action->dta_refcnt = 1; 11406 rec = &action->dta_rec; 11407 size = rec->dtrd_size; 11408 11409 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) { 11410 if (!(size & mask)) { 11411 align = mask + 1; 11412 break; 11413 } 11414 } 11415 11416 action->dta_kind = desc->dtad_kind; 11417 11418 if ((action->dta_difo = dp) != NULL) 11419 dtrace_difo_hold(dp); 11420 11421 rec->dtrd_action = action->dta_kind; 11422 rec->dtrd_arg = arg; 11423 rec->dtrd_uarg = desc->dtad_uarg; 11424 rec->dtrd_alignment = (uint16_t)align; 11425 rec->dtrd_format = format; 11426 11427 if ((last = ecb->dte_action_last) != NULL) { 11428 ASSERT(ecb->dte_action != NULL); 11429 action->dta_prev = last; 11430 last->dta_next = action; 11431 } else { 11432 ASSERT(ecb->dte_action == NULL); 11433 ecb->dte_action = action; 11434 } 11435 11436 ecb->dte_action_last = action; 11437 11438 return (0); 11439} 11440 11441static void 11442dtrace_ecb_action_remove(dtrace_ecb_t *ecb) 11443{ 11444 dtrace_action_t *act = ecb->dte_action, *next; 11445 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate; 11446 dtrace_difo_t *dp; 11447 uint16_t format; 11448 11449 if (act != NULL && act->dta_refcnt > 1) { 11450 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1); 11451 act->dta_refcnt--; 11452 } else { 11453 for (; act != NULL; act = next) { 11454 next = act->dta_next; 11455 ASSERT(next != NULL || act == ecb->dte_action_last); 11456 ASSERT(act->dta_refcnt == 1); 11457 11458 if ((format = act->dta_rec.dtrd_format) != 0) 11459 dtrace_format_remove(ecb->dte_state, format); 11460 11461 if ((dp = act->dta_difo) != NULL) 11462 dtrace_difo_release(dp, vstate); 11463 11464 if (DTRACEACT_ISAGG(act->dta_kind)) { 11465 dtrace_ecb_aggregation_destroy(ecb, act); 11466 } else { 11467 kmem_free(act, sizeof (dtrace_action_t)); 11468 } 11469 } 11470 } 11471 11472 ecb->dte_action = NULL; 11473 ecb->dte_action_last = NULL; 11474 ecb->dte_size = 0; 11475} 11476 11477static void 11478dtrace_ecb_disable(dtrace_ecb_t *ecb) 11479{ 11480 /* 11481 * We disable the ECB by removing it from its probe. 11482 */ 11483 dtrace_ecb_t *pecb, *prev = NULL; 11484 dtrace_probe_t *probe = ecb->dte_probe; 11485 11486 ASSERT(MUTEX_HELD(&dtrace_lock)); 11487 11488 if (probe == NULL) { 11489 /* 11490 * This is the NULL probe; there is nothing to disable. 11491 */ 11492 return; 11493 } 11494 11495 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) { 11496 if (pecb == ecb) 11497 break; 11498 prev = pecb; 11499 } 11500 11501 ASSERT(pecb != NULL); 11502 11503 if (prev == NULL) { 11504 probe->dtpr_ecb = ecb->dte_next; 11505 } else { 11506 prev->dte_next = ecb->dte_next; 11507 } 11508 11509 if (ecb == probe->dtpr_ecb_last) { 11510 ASSERT(ecb->dte_next == NULL); 11511 probe->dtpr_ecb_last = prev; 11512 } 11513 11514 /* 11515 * The ECB has been disconnected from the probe; now sync to assure 11516 * that all CPUs have seen the change before returning. 11517 */ 11518 dtrace_sync(); 11519 11520 if (probe->dtpr_ecb == NULL) { 11521 /* 11522 * That was the last ECB on the probe; clear the predicate 11523 * cache ID for the probe, disable it and sync one more time 11524 * to assure that we'll never hit it again. 11525 */ 11526 dtrace_provider_t *prov = probe->dtpr_provider; 11527 11528 ASSERT(ecb->dte_next == NULL); 11529 ASSERT(probe->dtpr_ecb_last == NULL); 11530 probe->dtpr_predcache = DTRACE_CACHEIDNONE; 11531 prov->dtpv_pops.dtps_disable(prov->dtpv_arg, 11532 probe->dtpr_id, probe->dtpr_arg); 11533 dtrace_sync(); 11534 } else { 11535 /* 11536 * There is at least one ECB remaining on the probe. If there 11537 * is _exactly_ one, set the probe's predicate cache ID to be 11538 * the predicate cache ID of the remaining ECB. 11539 */ 11540 ASSERT(probe->dtpr_ecb_last != NULL); 11541 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE); 11542 11543 if (probe->dtpr_ecb == probe->dtpr_ecb_last) { 11544 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate; 11545 11546 ASSERT(probe->dtpr_ecb->dte_next == NULL); 11547 11548 if (p != NULL) 11549 probe->dtpr_predcache = p->dtp_cacheid; 11550 } 11551 11552 ecb->dte_next = NULL; 11553 } 11554} 11555 11556static void 11557dtrace_ecb_destroy(dtrace_ecb_t *ecb) 11558{ 11559 dtrace_state_t *state = ecb->dte_state; 11560 dtrace_vstate_t *vstate = &state->dts_vstate; 11561 dtrace_predicate_t *pred; 11562 dtrace_epid_t epid = ecb->dte_epid; 11563 11564 ASSERT(MUTEX_HELD(&dtrace_lock)); 11565 ASSERT(ecb->dte_next == NULL); 11566 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); 11567 11568 if ((pred = ecb->dte_predicate) != NULL) 11569 dtrace_predicate_release(pred, vstate); 11570 11571 dtrace_ecb_action_remove(ecb); 11572 11573 ASSERT(state->dts_ecbs[epid - 1] == ecb); 11574 state->dts_ecbs[epid - 1] = NULL; 11575 11576 kmem_free(ecb, sizeof (dtrace_ecb_t)); 11577} 11578 11579static dtrace_ecb_t * 11580dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, 11581 dtrace_enabling_t *enab) 11582{ 11583 dtrace_ecb_t *ecb; 11584 dtrace_predicate_t *pred; 11585 dtrace_actdesc_t *act; 11586 dtrace_provider_t *prov; 11587 dtrace_ecbdesc_t *desc = enab->dten_current; 11588 11589 ASSERT(MUTEX_HELD(&dtrace_lock)); 11590 ASSERT(state != NULL); 11591 11592 ecb = dtrace_ecb_add(state, probe); 11593 ecb->dte_uarg = desc->dted_uarg; 11594 11595 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) { 11596 dtrace_predicate_hold(pred); 11597 ecb->dte_predicate = pred; 11598 } 11599 11600 if (probe != NULL) { 11601 /* 11602 * If the provider shows more leg than the consumer is old 11603 * enough to see, we need to enable the appropriate implicit 11604 * predicate bits to prevent the ecb from activating at 11605 * revealing times. 11606 * 11607 * Providers specifying DTRACE_PRIV_USER at register time 11608 * are stating that they need the /proc-style privilege 11609 * model to be enforced, and this is what DTRACE_COND_OWNER 11610 * and DTRACE_COND_ZONEOWNER will then do at probe time. 11611 */ 11612 prov = probe->dtpr_provider; 11613 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) && 11614 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 11615 ecb->dte_cond |= DTRACE_COND_OWNER; 11616 11617 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) && 11618 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) 11619 ecb->dte_cond |= DTRACE_COND_ZONEOWNER; 11620 11621 /* 11622 * If the provider shows us kernel innards and the user 11623 * is lacking sufficient privilege, enable the 11624 * DTRACE_COND_USERMODE implicit predicate. 11625 */ 11626 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) && 11627 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL)) 11628 ecb->dte_cond |= DTRACE_COND_USERMODE; 11629 } 11630 11631 if (dtrace_ecb_create_cache != NULL) { 11632 /* 11633 * If we have a cached ecb, we'll use its action list instead 11634 * of creating our own (saving both time and space). 11635 */ 11636 dtrace_ecb_t *cached = dtrace_ecb_create_cache; 11637 dtrace_action_t *act = cached->dte_action; 11638 11639 if (act != NULL) { 11640 ASSERT(act->dta_refcnt > 0); 11641 act->dta_refcnt++; 11642 ecb->dte_action = act; 11643 ecb->dte_action_last = cached->dte_action_last; 11644 ecb->dte_needed = cached->dte_needed; 11645 ecb->dte_size = cached->dte_size; 11646 ecb->dte_alignment = cached->dte_alignment; 11647 } 11648 11649 return (ecb); 11650 } 11651 11652 for (act = desc->dted_action; act != NULL; act = act->dtad_next) { 11653 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) { 11654 dtrace_ecb_destroy(ecb); 11655 return (NULL); 11656 } 11657 } 11658 11659 dtrace_ecb_resize(ecb); 11660 11661 return (dtrace_ecb_create_cache = ecb); 11662} 11663 11664static int 11665dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) 11666{ 11667 dtrace_ecb_t *ecb; 11668 dtrace_enabling_t *enab = arg; 11669 dtrace_state_t *state = enab->dten_vstate->dtvs_state; 11670 11671 ASSERT(state != NULL); 11672 11673 if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { 11674 /* 11675 * This probe was created in a generation for which this 11676 * enabling has previously created ECBs; we don't want to 11677 * enable it again, so just kick out. 11678 */ 11679 return (DTRACE_MATCH_NEXT); 11680 } 11681 11682 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) 11683 return (DTRACE_MATCH_DONE); 11684 11685 dtrace_ecb_enable(ecb); 11686 return (DTRACE_MATCH_NEXT); 11687} 11688 11689static dtrace_ecb_t * 11690dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) 11691{ 11692 dtrace_ecb_t *ecb; 11693 11694 ASSERT(MUTEX_HELD(&dtrace_lock)); 11695 11696 if (id == 0 || id > state->dts_necbs) 11697 return (NULL); 11698 11699 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); 11700 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id); 11701 11702 return (state->dts_ecbs[id - 1]); 11703} 11704 11705static dtrace_aggregation_t * 11706dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) 11707{ 11708 dtrace_aggregation_t *agg; 11709 11710 ASSERT(MUTEX_HELD(&dtrace_lock)); 11711 11712 if (id == 0 || id > state->dts_naggregations) 11713 return (NULL); 11714 11715 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); 11716 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL || 11717 agg->dtag_id == id); 11718 11719 return (state->dts_aggregations[id - 1]); 11720} 11721 11722/* 11723 * DTrace Buffer Functions 11724 * 11725 * The following functions manipulate DTrace buffers. Most of these functions 11726 * are called in the context of establishing or processing consumer state; 11727 * exceptions are explicitly noted. 11728 */ 11729 11730/* 11731 * Note: called from cross call context. This function switches the two 11732 * buffers on a given CPU. The atomicity of this operation is assured by 11733 * disabling interrupts while the actual switch takes place; the disabling of 11734 * interrupts serializes the execution with any execution of dtrace_probe() on 11735 * the same CPU. 11736 */ 11737static void 11738dtrace_buffer_switch(dtrace_buffer_t *buf) 11739{ 11740 caddr_t tomax = buf->dtb_tomax; 11741 caddr_t xamot = buf->dtb_xamot; 11742 dtrace_icookie_t cookie; 11743 hrtime_t now; 11744 11745 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 11746 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); 11747 11748 cookie = dtrace_interrupt_disable(); 11749 now = dtrace_gethrtime(); 11750 buf->dtb_tomax = xamot; 11751 buf->dtb_xamot = tomax; 11752 buf->dtb_xamot_drops = buf->dtb_drops; 11753 buf->dtb_xamot_offset = buf->dtb_offset; 11754 buf->dtb_xamot_errors = buf->dtb_errors; 11755 buf->dtb_xamot_flags = buf->dtb_flags; 11756 buf->dtb_offset = 0; 11757 buf->dtb_drops = 0; 11758 buf->dtb_errors = 0; 11759 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); 11760 buf->dtb_interval = now - buf->dtb_switched; 11761 buf->dtb_switched = now; 11762 dtrace_interrupt_enable(cookie); 11763} 11764 11765/* 11766 * Note: called from cross call context. This function activates a buffer 11767 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation 11768 * is guaranteed by the disabling of interrupts. 11769 */ 11770static void 11771dtrace_buffer_activate(dtrace_state_t *state) 11772{ 11773 dtrace_buffer_t *buf; 11774 dtrace_icookie_t cookie = dtrace_interrupt_disable(); 11775 11776 buf = &state->dts_buffer[curcpu]; 11777 11778 if (buf->dtb_tomax != NULL) { 11779 /* 11780 * We might like to assert that the buffer is marked inactive, 11781 * but this isn't necessarily true: the buffer for the CPU 11782 * that processes the BEGIN probe has its buffer activated 11783 * manually. In this case, we take the (harmless) action 11784 * re-clearing the bit INACTIVE bit. 11785 */ 11786 buf->dtb_flags &= ~DTRACEBUF_INACTIVE; 11787 } 11788 11789 dtrace_interrupt_enable(cookie); 11790} 11791 11792static int 11793dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, 11794 processorid_t cpu, int *factor) 11795{ 11796#if defined(sun) 11797 cpu_t *cp; 11798#endif 11799 dtrace_buffer_t *buf; 11800 int allocated = 0, desired = 0; 11801 11802#if defined(sun) 11803 ASSERT(MUTEX_HELD(&cpu_lock)); 11804 ASSERT(MUTEX_HELD(&dtrace_lock)); 11805 11806 *factor = 1; 11807 11808 if (size > dtrace_nonroot_maxsize && 11809 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) 11810 return (EFBIG); 11811 11812 cp = cpu_list; 11813 11814 do { 11815 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 11816 continue; 11817 11818 buf = &bufs[cp->cpu_id]; 11819 11820 /* 11821 * If there is already a buffer allocated for this CPU, it 11822 * is only possible that this is a DR event. In this case, 11823 */ 11824 if (buf->dtb_tomax != NULL) { 11825 ASSERT(buf->dtb_size == size); 11826 continue; 11827 } 11828 11829 ASSERT(buf->dtb_xamot == NULL); 11830 11831 if ((buf->dtb_tomax = kmem_zalloc(size, 11832 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 11833 goto err; 11834 11835 buf->dtb_size = size; 11836 buf->dtb_flags = flags; 11837 buf->dtb_offset = 0; 11838 buf->dtb_drops = 0; 11839 11840 if (flags & DTRACEBUF_NOSWITCH) 11841 continue; 11842 11843 if ((buf->dtb_xamot = kmem_zalloc(size, 11844 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 11845 goto err; 11846 } while ((cp = cp->cpu_next) != cpu_list); 11847 11848 return (0); 11849 11850err: 11851 cp = cpu_list; 11852 11853 do { 11854 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) 11855 continue; 11856 11857 buf = &bufs[cp->cpu_id]; 11858 desired += 2; 11859 11860 if (buf->dtb_xamot != NULL) { 11861 ASSERT(buf->dtb_tomax != NULL); 11862 ASSERT(buf->dtb_size == size); 11863 kmem_free(buf->dtb_xamot, size); 11864 allocated++; 11865 } 11866 11867 if (buf->dtb_tomax != NULL) { 11868 ASSERT(buf->dtb_size == size); 11869 kmem_free(buf->dtb_tomax, size); 11870 allocated++; 11871 } 11872 11873 buf->dtb_tomax = NULL; 11874 buf->dtb_xamot = NULL; 11875 buf->dtb_size = 0; 11876 } while ((cp = cp->cpu_next) != cpu_list); 11877#else 11878 int i; 11879 11880 *factor = 1; 11881#if defined(__amd64__) || defined(__mips__) || defined(__powerpc__) 11882 /* 11883 * FreeBSD isn't good at limiting the amount of memory we 11884 * ask to malloc, so let's place a limit here before trying 11885 * to do something that might well end in tears at bedtime. 11886 */ 11887 if (size > physmem * PAGE_SIZE / (128 * (mp_maxid + 1))) 11888 return (ENOMEM); 11889#endif 11890 11891 ASSERT(MUTEX_HELD(&dtrace_lock)); 11892 CPU_FOREACH(i) { 11893 if (cpu != DTRACE_CPUALL && cpu != i) 11894 continue; 11895 11896 buf = &bufs[i]; 11897 11898 /* 11899 * If there is already a buffer allocated for this CPU, it 11900 * is only possible that this is a DR event. In this case, 11901 * the buffer size must match our specified size. 11902 */ 11903 if (buf->dtb_tomax != NULL) { 11904 ASSERT(buf->dtb_size == size); 11905 continue; 11906 } 11907 11908 ASSERT(buf->dtb_xamot == NULL); 11909 11910 if ((buf->dtb_tomax = kmem_zalloc(size, 11911 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 11912 goto err; 11913 11914 buf->dtb_size = size; 11915 buf->dtb_flags = flags; 11916 buf->dtb_offset = 0; 11917 buf->dtb_drops = 0; 11918 11919 if (flags & DTRACEBUF_NOSWITCH) 11920 continue; 11921 11922 if ((buf->dtb_xamot = kmem_zalloc(size, 11923 KM_NOSLEEP | KM_NORMALPRI)) == NULL) 11924 goto err; 11925 } 11926 11927 return (0); 11928 11929err: 11930 /* 11931 * Error allocating memory, so free the buffers that were 11932 * allocated before the failed allocation. 11933 */ 11934 CPU_FOREACH(i) { 11935 if (cpu != DTRACE_CPUALL && cpu != i) 11936 continue; 11937 11938 buf = &bufs[i]; 11939 desired += 2; 11940 11941 if (buf->dtb_xamot != NULL) { 11942 ASSERT(buf->dtb_tomax != NULL); 11943 ASSERT(buf->dtb_size == size); 11944 kmem_free(buf->dtb_xamot, size); 11945 allocated++; 11946 } 11947 11948 if (buf->dtb_tomax != NULL) { 11949 ASSERT(buf->dtb_size == size); 11950 kmem_free(buf->dtb_tomax, size); 11951 allocated++; 11952 } 11953 11954 buf->dtb_tomax = NULL; 11955 buf->dtb_xamot = NULL; 11956 buf->dtb_size = 0; 11957 11958 } 11959#endif 11960 *factor = desired / (allocated > 0 ? allocated : 1); 11961 11962 return (ENOMEM); 11963} 11964 11965/* 11966 * Note: called from probe context. This function just increments the drop 11967 * count on a buffer. It has been made a function to allow for the 11968 * possibility of understanding the source of mysterious drop counts. (A 11969 * problem for which one may be particularly disappointed that DTrace cannot 11970 * be used to understand DTrace.) 11971 */ 11972static void 11973dtrace_buffer_drop(dtrace_buffer_t *buf) 11974{ 11975 buf->dtb_drops++; 11976} 11977 11978/* 11979 * Note: called from probe context. This function is called to reserve space 11980 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the 11981 * mstate. Returns the new offset in the buffer, or a negative value if an 11982 * error has occurred. 11983 */ 11984static intptr_t 11985dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, 11986 dtrace_state_t *state, dtrace_mstate_t *mstate) 11987{ 11988 intptr_t offs = buf->dtb_offset, soffs; 11989 intptr_t woffs; 11990 caddr_t tomax; 11991 size_t total; 11992 11993 if (buf->dtb_flags & DTRACEBUF_INACTIVE) 11994 return (-1); 11995 11996 if ((tomax = buf->dtb_tomax) == NULL) { 11997 dtrace_buffer_drop(buf); 11998 return (-1); 11999 } 12000 12001 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) { 12002 while (offs & (align - 1)) { 12003 /* 12004 * Assert that our alignment is off by a number which 12005 * is itself sizeof (uint32_t) aligned. 12006 */ 12007 ASSERT(!((align - (offs & (align - 1))) & 12008 (sizeof (uint32_t) - 1))); 12009 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 12010 offs += sizeof (uint32_t); 12011 } 12012 12013 if ((soffs = offs + needed) > buf->dtb_size) { 12014 dtrace_buffer_drop(buf); 12015 return (-1); 12016 } 12017 12018 if (mstate == NULL) 12019 return (offs); 12020 12021 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs; 12022 mstate->dtms_scratch_size = buf->dtb_size - soffs; 12023 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 12024 12025 return (offs); 12026 } 12027 12028 if (buf->dtb_flags & DTRACEBUF_FILL) { 12029 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN && 12030 (buf->dtb_flags & DTRACEBUF_FULL)) 12031 return (-1); 12032 goto out; 12033 } 12034 12035 total = needed + (offs & (align - 1)); 12036 12037 /* 12038 * For a ring buffer, life is quite a bit more complicated. Before 12039 * we can store any padding, we need to adjust our wrapping offset. 12040 * (If we've never before wrapped or we're not about to, no adjustment 12041 * is required.) 12042 */ 12043 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) || 12044 offs + total > buf->dtb_size) { 12045 woffs = buf->dtb_xamot_offset; 12046 12047 if (offs + total > buf->dtb_size) { 12048 /* 12049 * We can't fit in the end of the buffer. First, a 12050 * sanity check that we can fit in the buffer at all. 12051 */ 12052 if (total > buf->dtb_size) { 12053 dtrace_buffer_drop(buf); 12054 return (-1); 12055 } 12056 12057 /* 12058 * We're going to be storing at the top of the buffer, 12059 * so now we need to deal with the wrapped offset. We 12060 * only reset our wrapped offset to 0 if it is 12061 * currently greater than the current offset. If it 12062 * is less than the current offset, it is because a 12063 * previous allocation induced a wrap -- but the 12064 * allocation didn't subsequently take the space due 12065 * to an error or false predicate evaluation. In this 12066 * case, we'll just leave the wrapped offset alone: if 12067 * the wrapped offset hasn't been advanced far enough 12068 * for this allocation, it will be adjusted in the 12069 * lower loop. 12070 */ 12071 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 12072 if (woffs >= offs) 12073 woffs = 0; 12074 } else { 12075 woffs = 0; 12076 } 12077 12078 /* 12079 * Now we know that we're going to be storing to the 12080 * top of the buffer and that there is room for us 12081 * there. We need to clear the buffer from the current 12082 * offset to the end (there may be old gunk there). 12083 */ 12084 while (offs < buf->dtb_size) 12085 tomax[offs++] = 0; 12086 12087 /* 12088 * We need to set our offset to zero. And because we 12089 * are wrapping, we need to set the bit indicating as 12090 * much. We can also adjust our needed space back 12091 * down to the space required by the ECB -- we know 12092 * that the top of the buffer is aligned. 12093 */ 12094 offs = 0; 12095 total = needed; 12096 buf->dtb_flags |= DTRACEBUF_WRAPPED; 12097 } else { 12098 /* 12099 * There is room for us in the buffer, so we simply 12100 * need to check the wrapped offset. 12101 */ 12102 if (woffs < offs) { 12103 /* 12104 * The wrapped offset is less than the offset. 12105 * This can happen if we allocated buffer space 12106 * that induced a wrap, but then we didn't 12107 * subsequently take the space due to an error 12108 * or false predicate evaluation. This is 12109 * okay; we know that _this_ allocation isn't 12110 * going to induce a wrap. We still can't 12111 * reset the wrapped offset to be zero, 12112 * however: the space may have been trashed in 12113 * the previous failed probe attempt. But at 12114 * least the wrapped offset doesn't need to 12115 * be adjusted at all... 12116 */ 12117 goto out; 12118 } 12119 } 12120 12121 while (offs + total > woffs) { 12122 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); 12123 size_t size; 12124 12125 if (epid == DTRACE_EPIDNONE) { 12126 size = sizeof (uint32_t); 12127 } else { 12128 ASSERT3U(epid, <=, state->dts_necbs); 12129 ASSERT(state->dts_ecbs[epid - 1] != NULL); 12130 12131 size = state->dts_ecbs[epid - 1]->dte_size; 12132 } 12133 12134 ASSERT(woffs + size <= buf->dtb_size); 12135 ASSERT(size != 0); 12136 12137 if (woffs + size == buf->dtb_size) { 12138 /* 12139 * We've reached the end of the buffer; we want 12140 * to set the wrapped offset to 0 and break 12141 * out. However, if the offs is 0, then we're 12142 * in a strange edge-condition: the amount of 12143 * space that we want to reserve plus the size 12144 * of the record that we're overwriting is 12145 * greater than the size of the buffer. This 12146 * is problematic because if we reserve the 12147 * space but subsequently don't consume it (due 12148 * to a failed predicate or error) the wrapped 12149 * offset will be 0 -- yet the EPID at offset 0 12150 * will not be committed. This situation is 12151 * relatively easy to deal with: if we're in 12152 * this case, the buffer is indistinguishable 12153 * from one that hasn't wrapped; we need only 12154 * finish the job by clearing the wrapped bit, 12155 * explicitly setting the offset to be 0, and 12156 * zero'ing out the old data in the buffer. 12157 */ 12158 if (offs == 0) { 12159 buf->dtb_flags &= ~DTRACEBUF_WRAPPED; 12160 buf->dtb_offset = 0; 12161 woffs = total; 12162 12163 while (woffs < buf->dtb_size) 12164 tomax[woffs++] = 0; 12165 } 12166 12167 woffs = 0; 12168 break; 12169 } 12170 12171 woffs += size; 12172 } 12173 12174 /* 12175 * We have a wrapped offset. It may be that the wrapped offset 12176 * has become zero -- that's okay. 12177 */ 12178 buf->dtb_xamot_offset = woffs; 12179 } 12180 12181out: 12182 /* 12183 * Now we can plow the buffer with any necessary padding. 12184 */ 12185 while (offs & (align - 1)) { 12186 /* 12187 * Assert that our alignment is off by a number which 12188 * is itself sizeof (uint32_t) aligned. 12189 */ 12190 ASSERT(!((align - (offs & (align - 1))) & 12191 (sizeof (uint32_t) - 1))); 12192 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); 12193 offs += sizeof (uint32_t); 12194 } 12195 12196 if (buf->dtb_flags & DTRACEBUF_FILL) { 12197 if (offs + needed > buf->dtb_size - state->dts_reserve) { 12198 buf->dtb_flags |= DTRACEBUF_FULL; 12199 return (-1); 12200 } 12201 } 12202 12203 if (mstate == NULL) 12204 return (offs); 12205 12206 /* 12207 * For ring buffers and fill buffers, the scratch space is always 12208 * the inactive buffer. 12209 */ 12210 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot; 12211 mstate->dtms_scratch_size = buf->dtb_size; 12212 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; 12213 12214 return (offs); 12215} 12216 12217static void 12218dtrace_buffer_polish(dtrace_buffer_t *buf) 12219{ 12220 ASSERT(buf->dtb_flags & DTRACEBUF_RING); 12221 ASSERT(MUTEX_HELD(&dtrace_lock)); 12222 12223 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) 12224 return; 12225 12226 /* 12227 * We need to polish the ring buffer. There are three cases: 12228 * 12229 * - The first (and presumably most common) is that there is no gap 12230 * between the buffer offset and the wrapped offset. In this case, 12231 * there is nothing in the buffer that isn't valid data; we can 12232 * mark the buffer as polished and return. 12233 * 12234 * - The second (less common than the first but still more common 12235 * than the third) is that there is a gap between the buffer offset 12236 * and the wrapped offset, and the wrapped offset is larger than the 12237 * buffer offset. This can happen because of an alignment issue, or 12238 * can happen because of a call to dtrace_buffer_reserve() that 12239 * didn't subsequently consume the buffer space. In this case, 12240 * we need to zero the data from the buffer offset to the wrapped 12241 * offset. 12242 * 12243 * - The third (and least common) is that there is a gap between the 12244 * buffer offset and the wrapped offset, but the wrapped offset is 12245 * _less_ than the buffer offset. This can only happen because a 12246 * call to dtrace_buffer_reserve() induced a wrap, but the space 12247 * was not subsequently consumed. In this case, we need to zero the 12248 * space from the offset to the end of the buffer _and_ from the 12249 * top of the buffer to the wrapped offset. 12250 */ 12251 if (buf->dtb_offset < buf->dtb_xamot_offset) { 12252 bzero(buf->dtb_tomax + buf->dtb_offset, 12253 buf->dtb_xamot_offset - buf->dtb_offset); 12254 } 12255 12256 if (buf->dtb_offset > buf->dtb_xamot_offset) { 12257 bzero(buf->dtb_tomax + buf->dtb_offset, 12258 buf->dtb_size - buf->dtb_offset); 12259 bzero(buf->dtb_tomax, buf->dtb_xamot_offset); 12260 } 12261} 12262 12263/* 12264 * This routine determines if data generated at the specified time has likely 12265 * been entirely consumed at user-level. This routine is called to determine 12266 * if an ECB on a defunct probe (but for an active enabling) can be safely 12267 * disabled and destroyed. 12268 */ 12269static int 12270dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) 12271{ 12272 int i; 12273 12274 for (i = 0; i < NCPU; i++) { 12275 dtrace_buffer_t *buf = &bufs[i]; 12276 12277 if (buf->dtb_size == 0) 12278 continue; 12279 12280 if (buf->dtb_flags & DTRACEBUF_RING) 12281 return (0); 12282 12283 if (!buf->dtb_switched && buf->dtb_offset != 0) 12284 return (0); 12285 12286 if (buf->dtb_switched - buf->dtb_interval < when) 12287 return (0); 12288 } 12289 12290 return (1); 12291} 12292 12293static void 12294dtrace_buffer_free(dtrace_buffer_t *bufs) 12295{ 12296 int i; 12297 12298 for (i = 0; i < NCPU; i++) { 12299 dtrace_buffer_t *buf = &bufs[i]; 12300 12301 if (buf->dtb_tomax == NULL) { 12302 ASSERT(buf->dtb_xamot == NULL); 12303 ASSERT(buf->dtb_size == 0); 12304 continue; 12305 } 12306 12307 if (buf->dtb_xamot != NULL) { 12308 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 12309 kmem_free(buf->dtb_xamot, buf->dtb_size); 12310 } 12311 12312 kmem_free(buf->dtb_tomax, buf->dtb_size); 12313 buf->dtb_size = 0; 12314 buf->dtb_tomax = NULL; 12315 buf->dtb_xamot = NULL; 12316 } 12317} 12318 12319/* 12320 * DTrace Enabling Functions 12321 */ 12322static dtrace_enabling_t * 12323dtrace_enabling_create(dtrace_vstate_t *vstate) 12324{ 12325 dtrace_enabling_t *enab; 12326 12327 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP); 12328 enab->dten_vstate = vstate; 12329 12330 return (enab); 12331} 12332 12333static void 12334dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) 12335{ 12336 dtrace_ecbdesc_t **ndesc; 12337 size_t osize, nsize; 12338 12339 /* 12340 * We can't add to enablings after we've enabled them, or after we've 12341 * retained them. 12342 */ 12343 ASSERT(enab->dten_probegen == 0); 12344 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 12345 12346 if (enab->dten_ndesc < enab->dten_maxdesc) { 12347 enab->dten_desc[enab->dten_ndesc++] = ecb; 12348 return; 12349 } 12350 12351 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 12352 12353 if (enab->dten_maxdesc == 0) { 12354 enab->dten_maxdesc = 1; 12355 } else { 12356 enab->dten_maxdesc <<= 1; 12357 } 12358 12359 ASSERT(enab->dten_ndesc < enab->dten_maxdesc); 12360 12361 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); 12362 ndesc = kmem_zalloc(nsize, KM_SLEEP); 12363 bcopy(enab->dten_desc, ndesc, osize); 12364 if (enab->dten_desc != NULL) 12365 kmem_free(enab->dten_desc, osize); 12366 12367 enab->dten_desc = ndesc; 12368 enab->dten_desc[enab->dten_ndesc++] = ecb; 12369} 12370 12371static void 12372dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb, 12373 dtrace_probedesc_t *pd) 12374{ 12375 dtrace_ecbdesc_t *new; 12376 dtrace_predicate_t *pred; 12377 dtrace_actdesc_t *act; 12378 12379 /* 12380 * We're going to create a new ECB description that matches the 12381 * specified ECB in every way, but has the specified probe description. 12382 */ 12383 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 12384 12385 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL) 12386 dtrace_predicate_hold(pred); 12387 12388 for (act = ecb->dted_action; act != NULL; act = act->dtad_next) 12389 dtrace_actdesc_hold(act); 12390 12391 new->dted_action = ecb->dted_action; 12392 new->dted_pred = ecb->dted_pred; 12393 new->dted_probe = *pd; 12394 new->dted_uarg = ecb->dted_uarg; 12395 12396 dtrace_enabling_add(enab, new); 12397} 12398 12399static void 12400dtrace_enabling_dump(dtrace_enabling_t *enab) 12401{ 12402 int i; 12403 12404 for (i = 0; i < enab->dten_ndesc; i++) { 12405 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe; 12406 12407 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i, 12408 desc->dtpd_provider, desc->dtpd_mod, 12409 desc->dtpd_func, desc->dtpd_name); 12410 } 12411} 12412 12413static void 12414dtrace_enabling_destroy(dtrace_enabling_t *enab) 12415{ 12416 int i; 12417 dtrace_ecbdesc_t *ep; 12418 dtrace_vstate_t *vstate = enab->dten_vstate; 12419 12420 ASSERT(MUTEX_HELD(&dtrace_lock)); 12421 12422 for (i = 0; i < enab->dten_ndesc; i++) { 12423 dtrace_actdesc_t *act, *next; 12424 dtrace_predicate_t *pred; 12425 12426 ep = enab->dten_desc[i]; 12427 12428 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) 12429 dtrace_predicate_release(pred, vstate); 12430 12431 for (act = ep->dted_action; act != NULL; act = next) { 12432 next = act->dtad_next; 12433 dtrace_actdesc_release(act, vstate); 12434 } 12435 12436 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 12437 } 12438 12439 if (enab->dten_desc != NULL) 12440 kmem_free(enab->dten_desc, 12441 enab->dten_maxdesc * sizeof (dtrace_enabling_t *)); 12442 12443 /* 12444 * If this was a retained enabling, decrement the dts_nretained count 12445 * and take it off of the dtrace_retained list. 12446 */ 12447 if (enab->dten_prev != NULL || enab->dten_next != NULL || 12448 dtrace_retained == enab) { 12449 ASSERT(enab->dten_vstate->dtvs_state != NULL); 12450 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); 12451 enab->dten_vstate->dtvs_state->dts_nretained--; 12452 dtrace_retained_gen++; 12453 } 12454 12455 if (enab->dten_prev == NULL) { 12456 if (dtrace_retained == enab) { 12457 dtrace_retained = enab->dten_next; 12458 12459 if (dtrace_retained != NULL) 12460 dtrace_retained->dten_prev = NULL; 12461 } 12462 } else { 12463 ASSERT(enab != dtrace_retained); 12464 ASSERT(dtrace_retained != NULL); 12465 enab->dten_prev->dten_next = enab->dten_next; 12466 } 12467 12468 if (enab->dten_next != NULL) { 12469 ASSERT(dtrace_retained != NULL); 12470 enab->dten_next->dten_prev = enab->dten_prev; 12471 } 12472 12473 kmem_free(enab, sizeof (dtrace_enabling_t)); 12474} 12475 12476static int 12477dtrace_enabling_retain(dtrace_enabling_t *enab) 12478{ 12479 dtrace_state_t *state; 12480 12481 ASSERT(MUTEX_HELD(&dtrace_lock)); 12482 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); 12483 ASSERT(enab->dten_vstate != NULL); 12484 12485 state = enab->dten_vstate->dtvs_state; 12486 ASSERT(state != NULL); 12487 12488 /* 12489 * We only allow each state to retain dtrace_retain_max enablings. 12490 */ 12491 if (state->dts_nretained >= dtrace_retain_max) 12492 return (ENOSPC); 12493 12494 state->dts_nretained++; 12495 dtrace_retained_gen++; 12496 12497 if (dtrace_retained == NULL) { 12498 dtrace_retained = enab; 12499 return (0); 12500 } 12501 12502 enab->dten_next = dtrace_retained; 12503 dtrace_retained->dten_prev = enab; 12504 dtrace_retained = enab; 12505 12506 return (0); 12507} 12508 12509static int 12510dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, 12511 dtrace_probedesc_t *create) 12512{ 12513 dtrace_enabling_t *new, *enab; 12514 int found = 0, err = ENOENT; 12515 12516 ASSERT(MUTEX_HELD(&dtrace_lock)); 12517 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); 12518 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); 12519 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); 12520 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN); 12521 12522 new = dtrace_enabling_create(&state->dts_vstate); 12523 12524 /* 12525 * Iterate over all retained enablings, looking for enablings that 12526 * match the specified state. 12527 */ 12528 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 12529 int i; 12530 12531 /* 12532 * dtvs_state can only be NULL for helper enablings -- and 12533 * helper enablings can't be retained. 12534 */ 12535 ASSERT(enab->dten_vstate->dtvs_state != NULL); 12536 12537 if (enab->dten_vstate->dtvs_state != state) 12538 continue; 12539 12540 /* 12541 * Now iterate over each probe description; we're looking for 12542 * an exact match to the specified probe description. 12543 */ 12544 for (i = 0; i < enab->dten_ndesc; i++) { 12545 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 12546 dtrace_probedesc_t *pd = &ep->dted_probe; 12547 12548 if (strcmp(pd->dtpd_provider, match->dtpd_provider)) 12549 continue; 12550 12551 if (strcmp(pd->dtpd_mod, match->dtpd_mod)) 12552 continue; 12553 12554 if (strcmp(pd->dtpd_func, match->dtpd_func)) 12555 continue; 12556 12557 if (strcmp(pd->dtpd_name, match->dtpd_name)) 12558 continue; 12559 12560 /* 12561 * We have a winning probe! Add it to our growing 12562 * enabling. 12563 */ 12564 found = 1; 12565 dtrace_enabling_addlike(new, ep, create); 12566 } 12567 } 12568 12569 if (!found || (err = dtrace_enabling_retain(new)) != 0) { 12570 dtrace_enabling_destroy(new); 12571 return (err); 12572 } 12573 12574 return (0); 12575} 12576 12577static void 12578dtrace_enabling_retract(dtrace_state_t *state) 12579{ 12580 dtrace_enabling_t *enab, *next; 12581 12582 ASSERT(MUTEX_HELD(&dtrace_lock)); 12583 12584 /* 12585 * Iterate over all retained enablings, destroy the enablings retained 12586 * for the specified state. 12587 */ 12588 for (enab = dtrace_retained; enab != NULL; enab = next) { 12589 next = enab->dten_next; 12590 12591 /* 12592 * dtvs_state can only be NULL for helper enablings -- and 12593 * helper enablings can't be retained. 12594 */ 12595 ASSERT(enab->dten_vstate->dtvs_state != NULL); 12596 12597 if (enab->dten_vstate->dtvs_state == state) { 12598 ASSERT(state->dts_nretained > 0); 12599 dtrace_enabling_destroy(enab); 12600 } 12601 } 12602 12603 ASSERT(state->dts_nretained == 0); 12604} 12605 12606static int 12607dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) 12608{ 12609 int i = 0; 12610 int matched = 0; 12611 12612 ASSERT(MUTEX_HELD(&cpu_lock)); 12613 ASSERT(MUTEX_HELD(&dtrace_lock)); 12614 12615 for (i = 0; i < enab->dten_ndesc; i++) { 12616 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 12617 12618 enab->dten_current = ep; 12619 enab->dten_error = 0; 12620 12621 matched += dtrace_probe_enable(&ep->dted_probe, enab); 12622 12623 if (enab->dten_error != 0) { 12624 /* 12625 * If we get an error half-way through enabling the 12626 * probes, we kick out -- perhaps with some number of 12627 * them enabled. Leaving enabled probes enabled may 12628 * be slightly confusing for user-level, but we expect 12629 * that no one will attempt to actually drive on in 12630 * the face of such errors. If this is an anonymous 12631 * enabling (indicated with a NULL nmatched pointer), 12632 * we cmn_err() a message. We aren't expecting to 12633 * get such an error -- such as it can exist at all, 12634 * it would be a result of corrupted DOF in the driver 12635 * properties. 12636 */ 12637 if (nmatched == NULL) { 12638 cmn_err(CE_WARN, "dtrace_enabling_match() " 12639 "error on %p: %d", (void *)ep, 12640 enab->dten_error); 12641 } 12642 12643 return (enab->dten_error); 12644 } 12645 } 12646 12647 enab->dten_probegen = dtrace_probegen; 12648 if (nmatched != NULL) 12649 *nmatched = matched; 12650 12651 return (0); 12652} 12653 12654static void 12655dtrace_enabling_matchall(void) 12656{ 12657 dtrace_enabling_t *enab; 12658 12659 mutex_enter(&cpu_lock); 12660 mutex_enter(&dtrace_lock); 12661 12662 /* 12663 * Iterate over all retained enablings to see if any probes match 12664 * against them. We only perform this operation on enablings for which 12665 * we have sufficient permissions by virtue of being in the global zone 12666 * or in the same zone as the DTrace client. Because we can be called 12667 * after dtrace_detach() has been called, we cannot assert that there 12668 * are retained enablings. We can safely load from dtrace_retained, 12669 * however: the taskq_destroy() at the end of dtrace_detach() will 12670 * block pending our completion. 12671 */ 12672 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 12673#if defined(sun) 12674 cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred; 12675 12676 if (INGLOBALZONE(curproc) || 12677 cr != NULL && getzoneid() == crgetzoneid(cr)) 12678#endif 12679 (void) dtrace_enabling_match(enab, NULL); 12680 } 12681 12682 mutex_exit(&dtrace_lock); 12683 mutex_exit(&cpu_lock); 12684} 12685 12686/* 12687 * If an enabling is to be enabled without having matched probes (that is, if 12688 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the 12689 * enabling must be _primed_ by creating an ECB for every ECB description. 12690 * This must be done to assure that we know the number of speculations, the 12691 * number of aggregations, the minimum buffer size needed, etc. before we 12692 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually 12693 * enabling any probes, we create ECBs for every ECB decription, but with a 12694 * NULL probe -- which is exactly what this function does. 12695 */ 12696static void 12697dtrace_enabling_prime(dtrace_state_t *state) 12698{ 12699 dtrace_enabling_t *enab; 12700 int i; 12701 12702 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { 12703 ASSERT(enab->dten_vstate->dtvs_state != NULL); 12704 12705 if (enab->dten_vstate->dtvs_state != state) 12706 continue; 12707 12708 /* 12709 * We don't want to prime an enabling more than once, lest 12710 * we allow a malicious user to induce resource exhaustion. 12711 * (The ECBs that result from priming an enabling aren't 12712 * leaked -- but they also aren't deallocated until the 12713 * consumer state is destroyed.) 12714 */ 12715 if (enab->dten_primed) 12716 continue; 12717 12718 for (i = 0; i < enab->dten_ndesc; i++) { 12719 enab->dten_current = enab->dten_desc[i]; 12720 (void) dtrace_probe_enable(NULL, enab); 12721 } 12722 12723 enab->dten_primed = 1; 12724 } 12725} 12726 12727/* 12728 * Called to indicate that probes should be provided due to retained 12729 * enablings. This is implemented in terms of dtrace_probe_provide(), but it 12730 * must take an initial lap through the enabling calling the dtps_provide() 12731 * entry point explicitly to allow for autocreated probes. 12732 */ 12733static void 12734dtrace_enabling_provide(dtrace_provider_t *prv) 12735{ 12736 int i, all = 0; 12737 dtrace_probedesc_t desc; 12738 dtrace_genid_t gen; 12739 12740 ASSERT(MUTEX_HELD(&dtrace_lock)); 12741 ASSERT(MUTEX_HELD(&dtrace_provider_lock)); 12742 12743 if (prv == NULL) { 12744 all = 1; 12745 prv = dtrace_provider; 12746 } 12747 12748 do { 12749 dtrace_enabling_t *enab; 12750 void *parg = prv->dtpv_arg; 12751 12752retry: 12753 gen = dtrace_retained_gen; 12754 for (enab = dtrace_retained; enab != NULL; 12755 enab = enab->dten_next) { 12756 for (i = 0; i < enab->dten_ndesc; i++) { 12757 desc = enab->dten_desc[i]->dted_probe; 12758 mutex_exit(&dtrace_lock); 12759 prv->dtpv_pops.dtps_provide(parg, &desc); 12760 mutex_enter(&dtrace_lock); 12761 /* 12762 * Process the retained enablings again if 12763 * they have changed while we weren't holding 12764 * dtrace_lock. 12765 */ 12766 if (gen != dtrace_retained_gen) 12767 goto retry; 12768 } 12769 } 12770 } while (all && (prv = prv->dtpv_next) != NULL); 12771 12772 mutex_exit(&dtrace_lock); 12773 dtrace_probe_provide(NULL, all ? NULL : prv); 12774 mutex_enter(&dtrace_lock); 12775} 12776 12777/* 12778 * Called to reap ECBs that are attached to probes from defunct providers. 12779 */ 12780static void 12781dtrace_enabling_reap(void) 12782{ 12783 dtrace_provider_t *prov; 12784 dtrace_probe_t *probe; 12785 dtrace_ecb_t *ecb; 12786 hrtime_t when; 12787 int i; 12788 12789 mutex_enter(&cpu_lock); 12790 mutex_enter(&dtrace_lock); 12791 12792 for (i = 0; i < dtrace_nprobes; i++) { 12793 if ((probe = dtrace_probes[i]) == NULL) 12794 continue; 12795 12796 if (probe->dtpr_ecb == NULL) 12797 continue; 12798 12799 prov = probe->dtpr_provider; 12800 12801 if ((when = prov->dtpv_defunct) == 0) 12802 continue; 12803 12804 /* 12805 * We have ECBs on a defunct provider: we want to reap these 12806 * ECBs to allow the provider to unregister. The destruction 12807 * of these ECBs must be done carefully: if we destroy the ECB 12808 * and the consumer later wishes to consume an EPID that 12809 * corresponds to the destroyed ECB (and if the EPID metadata 12810 * has not been previously consumed), the consumer will abort 12811 * processing on the unknown EPID. To reduce (but not, sadly, 12812 * eliminate) the possibility of this, we will only destroy an 12813 * ECB for a defunct provider if, for the state that 12814 * corresponds to the ECB: 12815 * 12816 * (a) There is no speculative tracing (which can effectively 12817 * cache an EPID for an arbitrary amount of time). 12818 * 12819 * (b) The principal buffers have been switched twice since the 12820 * provider became defunct. 12821 * 12822 * (c) The aggregation buffers are of zero size or have been 12823 * switched twice since the provider became defunct. 12824 * 12825 * We use dts_speculates to determine (a) and call a function 12826 * (dtrace_buffer_consumed()) to determine (b) and (c). Note 12827 * that as soon as we've been unable to destroy one of the ECBs 12828 * associated with the probe, we quit trying -- reaping is only 12829 * fruitful in as much as we can destroy all ECBs associated 12830 * with the defunct provider's probes. 12831 */ 12832 while ((ecb = probe->dtpr_ecb) != NULL) { 12833 dtrace_state_t *state = ecb->dte_state; 12834 dtrace_buffer_t *buf = state->dts_buffer; 12835 dtrace_buffer_t *aggbuf = state->dts_aggbuffer; 12836 12837 if (state->dts_speculates) 12838 break; 12839 12840 if (!dtrace_buffer_consumed(buf, when)) 12841 break; 12842 12843 if (!dtrace_buffer_consumed(aggbuf, when)) 12844 break; 12845 12846 dtrace_ecb_disable(ecb); 12847 ASSERT(probe->dtpr_ecb != ecb); 12848 dtrace_ecb_destroy(ecb); 12849 } 12850 } 12851 12852 mutex_exit(&dtrace_lock); 12853 mutex_exit(&cpu_lock); 12854} 12855 12856/* 12857 * DTrace DOF Functions 12858 */ 12859/*ARGSUSED*/ 12860static void 12861dtrace_dof_error(dof_hdr_t *dof, const char *str) 12862{ 12863 if (dtrace_err_verbose) 12864 cmn_err(CE_WARN, "failed to process DOF: %s", str); 12865 12866#ifdef DTRACE_ERRDEBUG 12867 dtrace_errdebug(str); 12868#endif 12869} 12870 12871/* 12872 * Create DOF out of a currently enabled state. Right now, we only create 12873 * DOF containing the run-time options -- but this could be expanded to create 12874 * complete DOF representing the enabled state. 12875 */ 12876static dof_hdr_t * 12877dtrace_dof_create(dtrace_state_t *state) 12878{ 12879 dof_hdr_t *dof; 12880 dof_sec_t *sec; 12881 dof_optdesc_t *opt; 12882 int i, len = sizeof (dof_hdr_t) + 12883 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + 12884 sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 12885 12886 ASSERT(MUTEX_HELD(&dtrace_lock)); 12887 12888 dof = kmem_zalloc(len, KM_SLEEP); 12889 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; 12890 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; 12891 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; 12892 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3; 12893 12894 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE; 12895 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE; 12896 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION; 12897 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION; 12898 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS; 12899 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS; 12900 12901 dof->dofh_flags = 0; 12902 dof->dofh_hdrsize = sizeof (dof_hdr_t); 12903 dof->dofh_secsize = sizeof (dof_sec_t); 12904 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */ 12905 dof->dofh_secoff = sizeof (dof_hdr_t); 12906 dof->dofh_loadsz = len; 12907 dof->dofh_filesz = len; 12908 dof->dofh_pad = 0; 12909 12910 /* 12911 * Fill in the option section header... 12912 */ 12913 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t)); 12914 sec->dofs_type = DOF_SECT_OPTDESC; 12915 sec->dofs_align = sizeof (uint64_t); 12916 sec->dofs_flags = DOF_SECF_LOAD; 12917 sec->dofs_entsize = sizeof (dof_optdesc_t); 12918 12919 opt = (dof_optdesc_t *)((uintptr_t)sec + 12920 roundup(sizeof (dof_sec_t), sizeof (uint64_t))); 12921 12922 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof; 12923 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX; 12924 12925 for (i = 0; i < DTRACEOPT_MAX; i++) { 12926 opt[i].dofo_option = i; 12927 opt[i].dofo_strtab = DOF_SECIDX_NONE; 12928 opt[i].dofo_value = state->dts_options[i]; 12929 } 12930 12931 return (dof); 12932} 12933 12934static dof_hdr_t * 12935dtrace_dof_copyin(uintptr_t uarg, int *errp) 12936{ 12937 dof_hdr_t hdr, *dof; 12938 12939 ASSERT(!MUTEX_HELD(&dtrace_lock)); 12940 12941 /* 12942 * First, we're going to copyin() the sizeof (dof_hdr_t). 12943 */ 12944 if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { 12945 dtrace_dof_error(NULL, "failed to copyin DOF header"); 12946 *errp = EFAULT; 12947 return (NULL); 12948 } 12949 12950 /* 12951 * Now we'll allocate the entire DOF and copy it in -- provided 12952 * that the length isn't outrageous. 12953 */ 12954 if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { 12955 dtrace_dof_error(&hdr, "load size exceeds maximum"); 12956 *errp = E2BIG; 12957 return (NULL); 12958 } 12959 12960 if (hdr.dofh_loadsz < sizeof (hdr)) { 12961 dtrace_dof_error(&hdr, "invalid load size"); 12962 *errp = EINVAL; 12963 return (NULL); 12964 } 12965 12966 dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); 12967 12968 if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || 12969 dof->dofh_loadsz != hdr.dofh_loadsz) { 12970 kmem_free(dof, hdr.dofh_loadsz); 12971 *errp = EFAULT; 12972 return (NULL); 12973 } 12974 12975 return (dof); 12976} 12977 12978#if !defined(sun) 12979static __inline uchar_t 12980dtrace_dof_char(char c) { 12981 switch (c) { 12982 case '0': 12983 case '1': 12984 case '2': 12985 case '3': 12986 case '4': 12987 case '5': 12988 case '6': 12989 case '7': 12990 case '8': 12991 case '9': 12992 return (c - '0'); 12993 case 'A': 12994 case 'B': 12995 case 'C': 12996 case 'D': 12997 case 'E': 12998 case 'F': 12999 return (c - 'A' + 10); 13000 case 'a': 13001 case 'b': 13002 case 'c': 13003 case 'd': 13004 case 'e': 13005 case 'f': 13006 return (c - 'a' + 10); 13007 } 13008 /* Should not reach here. */ 13009 return (0); 13010} 13011#endif 13012 13013static dof_hdr_t * 13014dtrace_dof_property(const char *name) 13015{ 13016 uchar_t *buf; 13017 uint64_t loadsz; 13018 unsigned int len, i; 13019 dof_hdr_t *dof; 13020 13021#if defined(sun) 13022 /* 13023 * Unfortunately, array of values in .conf files are always (and 13024 * only) interpreted to be integer arrays. We must read our DOF 13025 * as an integer array, and then squeeze it into a byte array. 13026 */ 13027 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, 13028 (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) 13029 return (NULL); 13030 13031 for (i = 0; i < len; i++) 13032 buf[i] = (uchar_t)(((int *)buf)[i]); 13033 13034 if (len < sizeof (dof_hdr_t)) { 13035 ddi_prop_free(buf); 13036 dtrace_dof_error(NULL, "truncated header"); 13037 return (NULL); 13038 } 13039 13040 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { 13041 ddi_prop_free(buf); 13042 dtrace_dof_error(NULL, "truncated DOF"); 13043 return (NULL); 13044 } 13045 13046 if (loadsz >= dtrace_dof_maxsize) { 13047 ddi_prop_free(buf); 13048 dtrace_dof_error(NULL, "oversized DOF"); 13049 return (NULL); 13050 } 13051 13052 dof = kmem_alloc(loadsz, KM_SLEEP); 13053 bcopy(buf, dof, loadsz); 13054 ddi_prop_free(buf); 13055#else 13056 char *p; 13057 char *p_env; 13058 13059 if ((p_env = getenv(name)) == NULL) 13060 return (NULL); 13061 13062 len = strlen(p_env) / 2; 13063 13064 buf = kmem_alloc(len, KM_SLEEP); 13065 13066 dof = (dof_hdr_t *) buf; 13067 13068 p = p_env; 13069 13070 for (i = 0; i < len; i++) { 13071 buf[i] = (dtrace_dof_char(p[0]) << 4) | 13072 dtrace_dof_char(p[1]); 13073 p += 2; 13074 } 13075 13076 freeenv(p_env); 13077 13078 if (len < sizeof (dof_hdr_t)) { 13079 kmem_free(buf, 0); 13080 dtrace_dof_error(NULL, "truncated header"); 13081 return (NULL); 13082 } 13083 13084 if (len < (loadsz = dof->dofh_loadsz)) { 13085 kmem_free(buf, 0); 13086 dtrace_dof_error(NULL, "truncated DOF"); 13087 return (NULL); 13088 } 13089 13090 if (loadsz >= dtrace_dof_maxsize) { 13091 kmem_free(buf, 0); 13092 dtrace_dof_error(NULL, "oversized DOF"); 13093 return (NULL); 13094 } 13095#endif 13096 13097 return (dof); 13098} 13099 13100static void 13101dtrace_dof_destroy(dof_hdr_t *dof) 13102{ 13103 kmem_free(dof, dof->dofh_loadsz); 13104} 13105 13106/* 13107 * Return the dof_sec_t pointer corresponding to a given section index. If the 13108 * index is not valid, dtrace_dof_error() is called and NULL is returned. If 13109 * a type other than DOF_SECT_NONE is specified, the header is checked against 13110 * this type and NULL is returned if the types do not match. 13111 */ 13112static dof_sec_t * 13113dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i) 13114{ 13115 dof_sec_t *sec = (dof_sec_t *)(uintptr_t) 13116 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize); 13117 13118 if (i >= dof->dofh_secnum) { 13119 dtrace_dof_error(dof, "referenced section index is invalid"); 13120 return (NULL); 13121 } 13122 13123 if (!(sec->dofs_flags & DOF_SECF_LOAD)) { 13124 dtrace_dof_error(dof, "referenced section is not loadable"); 13125 return (NULL); 13126 } 13127 13128 if (type != DOF_SECT_NONE && type != sec->dofs_type) { 13129 dtrace_dof_error(dof, "referenced section is the wrong type"); 13130 return (NULL); 13131 } 13132 13133 return (sec); 13134} 13135 13136static dtrace_probedesc_t * 13137dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) 13138{ 13139 dof_probedesc_t *probe; 13140 dof_sec_t *strtab; 13141 uintptr_t daddr = (uintptr_t)dof; 13142 uintptr_t str; 13143 size_t size; 13144 13145 if (sec->dofs_type != DOF_SECT_PROBEDESC) { 13146 dtrace_dof_error(dof, "invalid probe section"); 13147 return (NULL); 13148 } 13149 13150 if (sec->dofs_align != sizeof (dof_secidx_t)) { 13151 dtrace_dof_error(dof, "bad alignment in probe description"); 13152 return (NULL); 13153 } 13154 13155 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) { 13156 dtrace_dof_error(dof, "truncated probe description"); 13157 return (NULL); 13158 } 13159 13160 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset); 13161 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab); 13162 13163 if (strtab == NULL) 13164 return (NULL); 13165 13166 str = daddr + strtab->dofs_offset; 13167 size = strtab->dofs_size; 13168 13169 if (probe->dofp_provider >= strtab->dofs_size) { 13170 dtrace_dof_error(dof, "corrupt probe provider"); 13171 return (NULL); 13172 } 13173 13174 (void) strncpy(desc->dtpd_provider, 13175 (char *)(str + probe->dofp_provider), 13176 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); 13177 13178 if (probe->dofp_mod >= strtab->dofs_size) { 13179 dtrace_dof_error(dof, "corrupt probe module"); 13180 return (NULL); 13181 } 13182 13183 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), 13184 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); 13185 13186 if (probe->dofp_func >= strtab->dofs_size) { 13187 dtrace_dof_error(dof, "corrupt probe function"); 13188 return (NULL); 13189 } 13190 13191 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), 13192 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); 13193 13194 if (probe->dofp_name >= strtab->dofs_size) { 13195 dtrace_dof_error(dof, "corrupt probe name"); 13196 return (NULL); 13197 } 13198 13199 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), 13200 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); 13201 13202 return (desc); 13203} 13204 13205static dtrace_difo_t * 13206dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 13207 cred_t *cr) 13208{ 13209 dtrace_difo_t *dp; 13210 size_t ttl = 0; 13211 dof_difohdr_t *dofd; 13212 uintptr_t daddr = (uintptr_t)dof; 13213 size_t max = dtrace_difo_maxsize; 13214 int i, l, n; 13215 13216 static const struct { 13217 int section; 13218 int bufoffs; 13219 int lenoffs; 13220 int entsize; 13221 int align; 13222 const char *msg; 13223 } difo[] = { 13224 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf), 13225 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t), 13226 sizeof (dif_instr_t), "multiple DIF sections" }, 13227 13228 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab), 13229 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t), 13230 sizeof (uint64_t), "multiple integer tables" }, 13231 13232 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab), 13233 offsetof(dtrace_difo_t, dtdo_strlen), 0, 13234 sizeof (char), "multiple string tables" }, 13235 13236 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab), 13237 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), 13238 sizeof (uint_t), "multiple variable tables" }, 13239 13240 { DOF_SECT_NONE, 0, 0, 0, 0, NULL } 13241 }; 13242 13243 if (sec->dofs_type != DOF_SECT_DIFOHDR) { 13244 dtrace_dof_error(dof, "invalid DIFO header section"); 13245 return (NULL); 13246 } 13247 13248 if (sec->dofs_align != sizeof (dof_secidx_t)) { 13249 dtrace_dof_error(dof, "bad alignment in DIFO header"); 13250 return (NULL); 13251 } 13252 13253 if (sec->dofs_size < sizeof (dof_difohdr_t) || 13254 sec->dofs_size % sizeof (dof_secidx_t)) { 13255 dtrace_dof_error(dof, "bad size in DIFO header"); 13256 return (NULL); 13257 } 13258 13259 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 13260 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1; 13261 13262 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); 13263 dp->dtdo_rtype = dofd->dofd_rtype; 13264 13265 for (l = 0; l < n; l++) { 13266 dof_sec_t *subsec; 13267 void **bufp; 13268 uint32_t *lenp; 13269 13270 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE, 13271 dofd->dofd_links[l])) == NULL) 13272 goto err; /* invalid section link */ 13273 13274 if (ttl + subsec->dofs_size > max) { 13275 dtrace_dof_error(dof, "exceeds maximum size"); 13276 goto err; 13277 } 13278 13279 ttl += subsec->dofs_size; 13280 13281 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { 13282 if (subsec->dofs_type != difo[i].section) 13283 continue; 13284 13285 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { 13286 dtrace_dof_error(dof, "section not loaded"); 13287 goto err; 13288 } 13289 13290 if (subsec->dofs_align != difo[i].align) { 13291 dtrace_dof_error(dof, "bad alignment"); 13292 goto err; 13293 } 13294 13295 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); 13296 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); 13297 13298 if (*bufp != NULL) { 13299 dtrace_dof_error(dof, difo[i].msg); 13300 goto err; 13301 } 13302 13303 if (difo[i].entsize != subsec->dofs_entsize) { 13304 dtrace_dof_error(dof, "entry size mismatch"); 13305 goto err; 13306 } 13307 13308 if (subsec->dofs_entsize != 0 && 13309 (subsec->dofs_size % subsec->dofs_entsize) != 0) { 13310 dtrace_dof_error(dof, "corrupt entry size"); 13311 goto err; 13312 } 13313 13314 *lenp = subsec->dofs_size; 13315 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP); 13316 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset), 13317 *bufp, subsec->dofs_size); 13318 13319 if (subsec->dofs_entsize != 0) 13320 *lenp /= subsec->dofs_entsize; 13321 13322 break; 13323 } 13324 13325 /* 13326 * If we encounter a loadable DIFO sub-section that is not 13327 * known to us, assume this is a broken program and fail. 13328 */ 13329 if (difo[i].section == DOF_SECT_NONE && 13330 (subsec->dofs_flags & DOF_SECF_LOAD)) { 13331 dtrace_dof_error(dof, "unrecognized DIFO subsection"); 13332 goto err; 13333 } 13334 } 13335 13336 if (dp->dtdo_buf == NULL) { 13337 /* 13338 * We can't have a DIF object without DIF text. 13339 */ 13340 dtrace_dof_error(dof, "missing DIF text"); 13341 goto err; 13342 } 13343 13344 /* 13345 * Before we validate the DIF object, run through the variable table 13346 * looking for the strings -- if any of their size are under, we'll set 13347 * their size to be the system-wide default string size. Note that 13348 * this should _not_ happen if the "strsize" option has been set -- 13349 * in this case, the compiler should have set the size to reflect the 13350 * setting of the option. 13351 */ 13352 for (i = 0; i < dp->dtdo_varlen; i++) { 13353 dtrace_difv_t *v = &dp->dtdo_vartab[i]; 13354 dtrace_diftype_t *t = &v->dtdv_type; 13355 13356 if (v->dtdv_id < DIF_VAR_OTHER_UBASE) 13357 continue; 13358 13359 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0) 13360 t->dtdt_size = dtrace_strsize_default; 13361 } 13362 13363 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0) 13364 goto err; 13365 13366 dtrace_difo_init(dp, vstate); 13367 return (dp); 13368 13369err: 13370 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); 13371 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); 13372 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); 13373 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); 13374 13375 kmem_free(dp, sizeof (dtrace_difo_t)); 13376 return (NULL); 13377} 13378 13379static dtrace_predicate_t * 13380dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 13381 cred_t *cr) 13382{ 13383 dtrace_difo_t *dp; 13384 13385 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL) 13386 return (NULL); 13387 13388 return (dtrace_predicate_create(dp)); 13389} 13390 13391static dtrace_actdesc_t * 13392dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 13393 cred_t *cr) 13394{ 13395 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next; 13396 dof_actdesc_t *desc; 13397 dof_sec_t *difosec; 13398 size_t offs; 13399 uintptr_t daddr = (uintptr_t)dof; 13400 uint64_t arg; 13401 dtrace_actkind_t kind; 13402 13403 if (sec->dofs_type != DOF_SECT_ACTDESC) { 13404 dtrace_dof_error(dof, "invalid action section"); 13405 return (NULL); 13406 } 13407 13408 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) { 13409 dtrace_dof_error(dof, "truncated action description"); 13410 return (NULL); 13411 } 13412 13413 if (sec->dofs_align != sizeof (uint64_t)) { 13414 dtrace_dof_error(dof, "bad alignment in action description"); 13415 return (NULL); 13416 } 13417 13418 if (sec->dofs_size < sec->dofs_entsize) { 13419 dtrace_dof_error(dof, "section entry size exceeds total size"); 13420 return (NULL); 13421 } 13422 13423 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) { 13424 dtrace_dof_error(dof, "bad entry size in action description"); 13425 return (NULL); 13426 } 13427 13428 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) { 13429 dtrace_dof_error(dof, "actions exceed dtrace_actions_max"); 13430 return (NULL); 13431 } 13432 13433 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) { 13434 desc = (dof_actdesc_t *)(daddr + 13435 (uintptr_t)sec->dofs_offset + offs); 13436 kind = (dtrace_actkind_t)desc->dofa_kind; 13437 13438 if ((DTRACEACT_ISPRINTFLIKE(kind) && 13439 (kind != DTRACEACT_PRINTA || 13440 desc->dofa_strtab != DOF_SECIDX_NONE)) || 13441 (kind == DTRACEACT_DIFEXPR && 13442 desc->dofa_strtab != DOF_SECIDX_NONE)) { 13443 dof_sec_t *strtab; 13444 char *str, *fmt; 13445 uint64_t i; 13446 13447 /* 13448 * The argument to these actions is an index into the 13449 * DOF string table. For printf()-like actions, this 13450 * is the format string. For print(), this is the 13451 * CTF type of the expression result. 13452 */ 13453 if ((strtab = dtrace_dof_sect(dof, 13454 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) 13455 goto err; 13456 13457 str = (char *)((uintptr_t)dof + 13458 (uintptr_t)strtab->dofs_offset); 13459 13460 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) { 13461 if (str[i] == '\0') 13462 break; 13463 } 13464 13465 if (i >= strtab->dofs_size) { 13466 dtrace_dof_error(dof, "bogus format string"); 13467 goto err; 13468 } 13469 13470 if (i == desc->dofa_arg) { 13471 dtrace_dof_error(dof, "empty format string"); 13472 goto err; 13473 } 13474 13475 i -= desc->dofa_arg; 13476 fmt = kmem_alloc(i + 1, KM_SLEEP); 13477 bcopy(&str[desc->dofa_arg], fmt, i + 1); 13478 arg = (uint64_t)(uintptr_t)fmt; 13479 } else { 13480 if (kind == DTRACEACT_PRINTA) { 13481 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE); 13482 arg = 0; 13483 } else { 13484 arg = desc->dofa_arg; 13485 } 13486 } 13487 13488 act = dtrace_actdesc_create(kind, desc->dofa_ntuple, 13489 desc->dofa_uarg, arg); 13490 13491 if (last != NULL) { 13492 last->dtad_next = act; 13493 } else { 13494 first = act; 13495 } 13496 13497 last = act; 13498 13499 if (desc->dofa_difo == DOF_SECIDX_NONE) 13500 continue; 13501 13502 if ((difosec = dtrace_dof_sect(dof, 13503 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL) 13504 goto err; 13505 13506 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr); 13507 13508 if (act->dtad_difo == NULL) 13509 goto err; 13510 } 13511 13512 ASSERT(first != NULL); 13513 return (first); 13514 13515err: 13516 for (act = first; act != NULL; act = next) { 13517 next = act->dtad_next; 13518 dtrace_actdesc_release(act, vstate); 13519 } 13520 13521 return (NULL); 13522} 13523 13524static dtrace_ecbdesc_t * 13525dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, 13526 cred_t *cr) 13527{ 13528 dtrace_ecbdesc_t *ep; 13529 dof_ecbdesc_t *ecb; 13530 dtrace_probedesc_t *desc; 13531 dtrace_predicate_t *pred = NULL; 13532 13533 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) { 13534 dtrace_dof_error(dof, "truncated ECB description"); 13535 return (NULL); 13536 } 13537 13538 if (sec->dofs_align != sizeof (uint64_t)) { 13539 dtrace_dof_error(dof, "bad alignment in ECB description"); 13540 return (NULL); 13541 } 13542 13543 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset); 13544 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes); 13545 13546 if (sec == NULL) 13547 return (NULL); 13548 13549 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); 13550 ep->dted_uarg = ecb->dofe_uarg; 13551 desc = &ep->dted_probe; 13552 13553 if (dtrace_dof_probedesc(dof, sec, desc) == NULL) 13554 goto err; 13555 13556 if (ecb->dofe_pred != DOF_SECIDX_NONE) { 13557 if ((sec = dtrace_dof_sect(dof, 13558 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL) 13559 goto err; 13560 13561 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL) 13562 goto err; 13563 13564 ep->dted_pred.dtpdd_predicate = pred; 13565 } 13566 13567 if (ecb->dofe_actions != DOF_SECIDX_NONE) { 13568 if ((sec = dtrace_dof_sect(dof, 13569 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL) 13570 goto err; 13571 13572 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr); 13573 13574 if (ep->dted_action == NULL) 13575 goto err; 13576 } 13577 13578 return (ep); 13579 13580err: 13581 if (pred != NULL) 13582 dtrace_predicate_release(pred, vstate); 13583 kmem_free(ep, sizeof (dtrace_ecbdesc_t)); 13584 return (NULL); 13585} 13586 13587/* 13588 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the 13589 * specified DOF. At present, this amounts to simply adding 'ubase' to the 13590 * site of any user SETX relocations to account for load object base address. 13591 * In the future, if we need other relocations, this function can be extended. 13592 */ 13593static int 13594dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase) 13595{ 13596 uintptr_t daddr = (uintptr_t)dof; 13597 dof_relohdr_t *dofr = 13598 (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); 13599 dof_sec_t *ss, *rs, *ts; 13600 dof_relodesc_t *r; 13601 uint_t i, n; 13602 13603 if (sec->dofs_size < sizeof (dof_relohdr_t) || 13604 sec->dofs_align != sizeof (dof_secidx_t)) { 13605 dtrace_dof_error(dof, "invalid relocation header"); 13606 return (-1); 13607 } 13608 13609 ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); 13610 rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); 13611 ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); 13612 13613 if (ss == NULL || rs == NULL || ts == NULL) 13614 return (-1); /* dtrace_dof_error() has been called already */ 13615 13616 if (rs->dofs_entsize < sizeof (dof_relodesc_t) || 13617 rs->dofs_align != sizeof (uint64_t)) { 13618 dtrace_dof_error(dof, "invalid relocation section"); 13619 return (-1); 13620 } 13621 13622 r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); 13623 n = rs->dofs_size / rs->dofs_entsize; 13624 13625 for (i = 0; i < n; i++) { 13626 uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; 13627 13628 switch (r->dofr_type) { 13629 case DOF_RELO_NONE: 13630 break; 13631 case DOF_RELO_SETX: 13632 if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + 13633 sizeof (uint64_t) > ts->dofs_size) { 13634 dtrace_dof_error(dof, "bad relocation offset"); 13635 return (-1); 13636 } 13637 13638 if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { 13639 dtrace_dof_error(dof, "misaligned setx relo"); 13640 return (-1); 13641 } 13642 13643 *(uint64_t *)taddr += ubase; 13644 break; 13645 default: 13646 dtrace_dof_error(dof, "invalid relocation type"); 13647 return (-1); 13648 } 13649 13650 r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); 13651 } 13652 13653 return (0); 13654} 13655 13656/* 13657 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated 13658 * header: it should be at the front of a memory region that is at least 13659 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in 13660 * size. It need not be validated in any other way. 13661 */ 13662static int 13663dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, 13664 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes) 13665{ 13666 uint64_t len = dof->dofh_loadsz, seclen; 13667 uintptr_t daddr = (uintptr_t)dof; 13668 dtrace_ecbdesc_t *ep; 13669 dtrace_enabling_t *enab; 13670 uint_t i; 13671 13672 ASSERT(MUTEX_HELD(&dtrace_lock)); 13673 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); 13674 13675 /* 13676 * Check the DOF header identification bytes. In addition to checking 13677 * valid settings, we also verify that unused bits/bytes are zeroed so 13678 * we can use them later without fear of regressing existing binaries. 13679 */ 13680 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0], 13681 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) { 13682 dtrace_dof_error(dof, "DOF magic string mismatch"); 13683 return (-1); 13684 } 13685 13686 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 && 13687 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) { 13688 dtrace_dof_error(dof, "DOF has invalid data model"); 13689 return (-1); 13690 } 13691 13692 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) { 13693 dtrace_dof_error(dof, "DOF encoding mismatch"); 13694 return (-1); 13695 } 13696 13697 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 13698 dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { 13699 dtrace_dof_error(dof, "DOF version mismatch"); 13700 return (-1); 13701 } 13702 13703 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { 13704 dtrace_dof_error(dof, "DOF uses unsupported instruction set"); 13705 return (-1); 13706 } 13707 13708 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) { 13709 dtrace_dof_error(dof, "DOF uses too many integer registers"); 13710 return (-1); 13711 } 13712 13713 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) { 13714 dtrace_dof_error(dof, "DOF uses too many tuple registers"); 13715 return (-1); 13716 } 13717 13718 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) { 13719 if (dof->dofh_ident[i] != 0) { 13720 dtrace_dof_error(dof, "DOF has invalid ident byte set"); 13721 return (-1); 13722 } 13723 } 13724 13725 if (dof->dofh_flags & ~DOF_FL_VALID) { 13726 dtrace_dof_error(dof, "DOF has invalid flag bits set"); 13727 return (-1); 13728 } 13729 13730 if (dof->dofh_secsize == 0) { 13731 dtrace_dof_error(dof, "zero section header size"); 13732 return (-1); 13733 } 13734 13735 /* 13736 * Check that the section headers don't exceed the amount of DOF 13737 * data. Note that we cast the section size and number of sections 13738 * to uint64_t's to prevent possible overflow in the multiplication. 13739 */ 13740 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize; 13741 13742 if (dof->dofh_secoff > len || seclen > len || 13743 dof->dofh_secoff + seclen > len) { 13744 dtrace_dof_error(dof, "truncated section headers"); 13745 return (-1); 13746 } 13747 13748 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) { 13749 dtrace_dof_error(dof, "misaligned section headers"); 13750 return (-1); 13751 } 13752 13753 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) { 13754 dtrace_dof_error(dof, "misaligned section size"); 13755 return (-1); 13756 } 13757 13758 /* 13759 * Take an initial pass through the section headers to be sure that 13760 * the headers don't have stray offsets. If the 'noprobes' flag is 13761 * set, do not permit sections relating to providers, probes, or args. 13762 */ 13763 for (i = 0; i < dof->dofh_secnum; i++) { 13764 dof_sec_t *sec = (dof_sec_t *)(daddr + 13765 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 13766 13767 if (noprobes) { 13768 switch (sec->dofs_type) { 13769 case DOF_SECT_PROVIDER: 13770 case DOF_SECT_PROBES: 13771 case DOF_SECT_PRARGS: 13772 case DOF_SECT_PROFFS: 13773 dtrace_dof_error(dof, "illegal sections " 13774 "for enabling"); 13775 return (-1); 13776 } 13777 } 13778 13779 if (DOF_SEC_ISLOADABLE(sec->dofs_type) && 13780 !(sec->dofs_flags & DOF_SECF_LOAD)) { 13781 dtrace_dof_error(dof, "loadable section with load " 13782 "flag unset"); 13783 return (-1); 13784 } 13785 13786 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 13787 continue; /* just ignore non-loadable sections */ 13788 13789 if (!ISP2(sec->dofs_align)) { 13790 dtrace_dof_error(dof, "bad section alignment"); 13791 return (-1); 13792 } 13793 13794 if (sec->dofs_offset & (sec->dofs_align - 1)) { 13795 dtrace_dof_error(dof, "misaligned section"); 13796 return (-1); 13797 } 13798 13799 if (sec->dofs_offset > len || sec->dofs_size > len || 13800 sec->dofs_offset + sec->dofs_size > len) { 13801 dtrace_dof_error(dof, "corrupt section header"); 13802 return (-1); 13803 } 13804 13805 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr + 13806 sec->dofs_offset + sec->dofs_size - 1) != '\0') { 13807 dtrace_dof_error(dof, "non-terminating string table"); 13808 return (-1); 13809 } 13810 } 13811 13812 /* 13813 * Take a second pass through the sections and locate and perform any 13814 * relocations that are present. We do this after the first pass to 13815 * be sure that all sections have had their headers validated. 13816 */ 13817 for (i = 0; i < dof->dofh_secnum; i++) { 13818 dof_sec_t *sec = (dof_sec_t *)(daddr + 13819 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 13820 13821 if (!(sec->dofs_flags & DOF_SECF_LOAD)) 13822 continue; /* skip sections that are not loadable */ 13823 13824 switch (sec->dofs_type) { 13825 case DOF_SECT_URELHDR: 13826 if (dtrace_dof_relocate(dof, sec, ubase) != 0) 13827 return (-1); 13828 break; 13829 } 13830 } 13831 13832 if ((enab = *enabp) == NULL) 13833 enab = *enabp = dtrace_enabling_create(vstate); 13834 13835 for (i = 0; i < dof->dofh_secnum; i++) { 13836 dof_sec_t *sec = (dof_sec_t *)(daddr + 13837 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 13838 13839 if (sec->dofs_type != DOF_SECT_ECBDESC) 13840 continue; 13841 13842 if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { 13843 dtrace_enabling_destroy(enab); 13844 *enabp = NULL; 13845 return (-1); 13846 } 13847 13848 dtrace_enabling_add(enab, ep); 13849 } 13850 13851 return (0); 13852} 13853 13854/* 13855 * Process DOF for any options. This routine assumes that the DOF has been 13856 * at least processed by dtrace_dof_slurp(). 13857 */ 13858static int 13859dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) 13860{ 13861 int i, rval; 13862 uint32_t entsize; 13863 size_t offs; 13864 dof_optdesc_t *desc; 13865 13866 for (i = 0; i < dof->dofh_secnum; i++) { 13867 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof + 13868 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); 13869 13870 if (sec->dofs_type != DOF_SECT_OPTDESC) 13871 continue; 13872 13873 if (sec->dofs_align != sizeof (uint64_t)) { 13874 dtrace_dof_error(dof, "bad alignment in " 13875 "option description"); 13876 return (EINVAL); 13877 } 13878 13879 if ((entsize = sec->dofs_entsize) == 0) { 13880 dtrace_dof_error(dof, "zeroed option entry size"); 13881 return (EINVAL); 13882 } 13883 13884 if (entsize < sizeof (dof_optdesc_t)) { 13885 dtrace_dof_error(dof, "bad option entry size"); 13886 return (EINVAL); 13887 } 13888 13889 for (offs = 0; offs < sec->dofs_size; offs += entsize) { 13890 desc = (dof_optdesc_t *)((uintptr_t)dof + 13891 (uintptr_t)sec->dofs_offset + offs); 13892 13893 if (desc->dofo_strtab != DOF_SECIDX_NONE) { 13894 dtrace_dof_error(dof, "non-zero option string"); 13895 return (EINVAL); 13896 } 13897 13898 if (desc->dofo_value == DTRACEOPT_UNSET) { 13899 dtrace_dof_error(dof, "unset option"); 13900 return (EINVAL); 13901 } 13902 13903 if ((rval = dtrace_state_option(state, 13904 desc->dofo_option, desc->dofo_value)) != 0) { 13905 dtrace_dof_error(dof, "rejected option"); 13906 return (rval); 13907 } 13908 } 13909 } 13910 13911 return (0); 13912} 13913 13914/* 13915 * DTrace Consumer State Functions 13916 */ 13917static int 13918dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) 13919{ 13920 size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize; 13921 void *base; 13922 uintptr_t limit; 13923 dtrace_dynvar_t *dvar, *next, *start; 13924 int i; 13925 13926 ASSERT(MUTEX_HELD(&dtrace_lock)); 13927 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); 13928 13929 bzero(dstate, sizeof (dtrace_dstate_t)); 13930 13931 if ((dstate->dtds_chunksize = chunksize) == 0) 13932 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; 13933 13934 if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) 13935 size = min; 13936 13937 if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) 13938 return (ENOMEM); 13939 13940 dstate->dtds_size = size; 13941 dstate->dtds_base = base; 13942 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP); 13943 bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t)); 13944 13945 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)); 13946 13947 if (hashsize != 1 && (hashsize & 1)) 13948 hashsize--; 13949 13950 dstate->dtds_hashsize = hashsize; 13951 dstate->dtds_hash = dstate->dtds_base; 13952 13953 /* 13954 * Set all of our hash buckets to point to the single sink, and (if 13955 * it hasn't already been set), set the sink's hash value to be the 13956 * sink sentinel value. The sink is needed for dynamic variable 13957 * lookups to know that they have iterated over an entire, valid hash 13958 * chain. 13959 */ 13960 for (i = 0; i < hashsize; i++) 13961 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink; 13962 13963 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK) 13964 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK; 13965 13966 /* 13967 * Determine number of active CPUs. Divide free list evenly among 13968 * active CPUs. 13969 */ 13970 start = (dtrace_dynvar_t *) 13971 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); 13972 limit = (uintptr_t)base + size; 13973 13974 maxper = (limit - (uintptr_t)start) / NCPU; 13975 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; 13976 13977#if !defined(sun) 13978 CPU_FOREACH(i) { 13979#else 13980 for (i = 0; i < NCPU; i++) { 13981#endif 13982 dstate->dtds_percpu[i].dtdsc_free = dvar = start; 13983 13984 /* 13985 * If we don't even have enough chunks to make it once through 13986 * NCPUs, we're just going to allocate everything to the first 13987 * CPU. And if we're on the last CPU, we're going to allocate 13988 * whatever is left over. In either case, we set the limit to 13989 * be the limit of the dynamic variable space. 13990 */ 13991 if (maxper == 0 || i == NCPU - 1) { 13992 limit = (uintptr_t)base + size; 13993 start = NULL; 13994 } else { 13995 limit = (uintptr_t)start + maxper; 13996 start = (dtrace_dynvar_t *)limit; 13997 } 13998 13999 ASSERT(limit <= (uintptr_t)base + size); 14000 14001 for (;;) { 14002 next = (dtrace_dynvar_t *)((uintptr_t)dvar + 14003 dstate->dtds_chunksize); 14004 14005 if ((uintptr_t)next + dstate->dtds_chunksize >= limit) 14006 break; 14007 14008 dvar->dtdv_next = next; 14009 dvar = next; 14010 } 14011 14012 if (maxper == 0) 14013 break; 14014 } 14015 14016 return (0); 14017} 14018 14019static void 14020dtrace_dstate_fini(dtrace_dstate_t *dstate) 14021{ 14022 ASSERT(MUTEX_HELD(&cpu_lock)); 14023 14024 if (dstate->dtds_base == NULL) 14025 return; 14026 14027 kmem_free(dstate->dtds_base, dstate->dtds_size); 14028 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu); 14029} 14030 14031static void 14032dtrace_vstate_fini(dtrace_vstate_t *vstate) 14033{ 14034 /* 14035 * Logical XOR, where are you? 14036 */ 14037 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL)); 14038 14039 if (vstate->dtvs_nglobals > 0) { 14040 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals * 14041 sizeof (dtrace_statvar_t *)); 14042 } 14043 14044 if (vstate->dtvs_ntlocals > 0) { 14045 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals * 14046 sizeof (dtrace_difv_t)); 14047 } 14048 14049 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL)); 14050 14051 if (vstate->dtvs_nlocals > 0) { 14052 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals * 14053 sizeof (dtrace_statvar_t *)); 14054 } 14055} 14056 14057#if defined(sun) 14058static void 14059dtrace_state_clean(dtrace_state_t *state) 14060{ 14061 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) 14062 return; 14063 14064 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); 14065 dtrace_speculation_clean(state); 14066} 14067 14068static void 14069dtrace_state_deadman(dtrace_state_t *state) 14070{ 14071 hrtime_t now; 14072 14073 dtrace_sync(); 14074 14075 now = dtrace_gethrtime(); 14076 14077 if (state != dtrace_anon.dta_state && 14078 now - state->dts_laststatus >= dtrace_deadman_user) 14079 return; 14080 14081 /* 14082 * We must be sure that dts_alive never appears to be less than the 14083 * value upon entry to dtrace_state_deadman(), and because we lack a 14084 * dtrace_cas64(), we cannot store to it atomically. We thus instead 14085 * store INT64_MAX to it, followed by a memory barrier, followed by 14086 * the new value. This assures that dts_alive never appears to be 14087 * less than its true value, regardless of the order in which the 14088 * stores to the underlying storage are issued. 14089 */ 14090 state->dts_alive = INT64_MAX; 14091 dtrace_membar_producer(); 14092 state->dts_alive = now; 14093} 14094#else 14095static void 14096dtrace_state_clean(void *arg) 14097{ 14098 dtrace_state_t *state = arg; 14099 dtrace_optval_t *opt = state->dts_options; 14100 14101 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) 14102 return; 14103 14104 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); 14105 dtrace_speculation_clean(state); 14106 14107 callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC, 14108 dtrace_state_clean, state); 14109} 14110 14111static void 14112dtrace_state_deadman(void *arg) 14113{ 14114 dtrace_state_t *state = arg; 14115 hrtime_t now; 14116 14117 dtrace_sync(); 14118 14119 dtrace_debug_output(); 14120 14121 now = dtrace_gethrtime(); 14122 14123 if (state != dtrace_anon.dta_state && 14124 now - state->dts_laststatus >= dtrace_deadman_user) 14125 return; 14126 14127 /* 14128 * We must be sure that dts_alive never appears to be less than the 14129 * value upon entry to dtrace_state_deadman(), and because we lack a 14130 * dtrace_cas64(), we cannot store to it atomically. We thus instead 14131 * store INT64_MAX to it, followed by a memory barrier, followed by 14132 * the new value. This assures that dts_alive never appears to be 14133 * less than its true value, regardless of the order in which the 14134 * stores to the underlying storage are issued. 14135 */ 14136 state->dts_alive = INT64_MAX; 14137 dtrace_membar_producer(); 14138 state->dts_alive = now; 14139 14140 callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC, 14141 dtrace_state_deadman, state); 14142} 14143#endif 14144 14145static dtrace_state_t * 14146#if defined(sun) 14147dtrace_state_create(dev_t *devp, cred_t *cr) 14148#else 14149dtrace_state_create(struct cdev *dev) 14150#endif 14151{ 14152#if defined(sun) 14153 minor_t minor; 14154 major_t major; 14155#else 14156 cred_t *cr = NULL; 14157 int m = 0; 14158#endif 14159 char c[30]; 14160 dtrace_state_t *state; 14161 dtrace_optval_t *opt; 14162 int bufsize = NCPU * sizeof (dtrace_buffer_t), i; 14163 14164 ASSERT(MUTEX_HELD(&dtrace_lock)); 14165 ASSERT(MUTEX_HELD(&cpu_lock)); 14166 14167#if defined(sun) 14168 minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, 14169 VM_BESTFIT | VM_SLEEP); 14170 14171 if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { 14172 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 14173 return (NULL); 14174 } 14175 14176 state = ddi_get_soft_state(dtrace_softstate, minor); 14177#else 14178 if (dev != NULL) { 14179 cr = dev->si_cred; 14180 m = dev2unit(dev); 14181 } 14182 14183 /* Allocate memory for the state. */ 14184 state = kmem_zalloc(sizeof(dtrace_state_t), KM_SLEEP); 14185#endif 14186 14187 state->dts_epid = DTRACE_EPIDNONE + 1; 14188 14189 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", m); 14190#if defined(sun) 14191 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1, 14192 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 14193 14194 if (devp != NULL) { 14195 major = getemajor(*devp); 14196 } else { 14197 major = ddi_driver_major(dtrace_devi); 14198 } 14199 14200 state->dts_dev = makedevice(major, minor); 14201 14202 if (devp != NULL) 14203 *devp = state->dts_dev; 14204#else 14205 state->dts_aggid_arena = new_unrhdr(1, INT_MAX, &dtrace_unr_mtx); 14206 state->dts_dev = dev; 14207#endif 14208 14209 /* 14210 * We allocate NCPU buffers. On the one hand, this can be quite 14211 * a bit of memory per instance (nearly 36K on a Starcat). On the 14212 * other hand, it saves an additional memory reference in the probe 14213 * path. 14214 */ 14215 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); 14216 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); 14217 14218#if defined(sun) 14219 state->dts_cleaner = CYCLIC_NONE; 14220 state->dts_deadman = CYCLIC_NONE; 14221#else 14222 callout_init(&state->dts_cleaner, CALLOUT_MPSAFE); 14223 callout_init(&state->dts_deadman, CALLOUT_MPSAFE); 14224#endif 14225 state->dts_vstate.dtvs_state = state; 14226 14227 for (i = 0; i < DTRACEOPT_MAX; i++) 14228 state->dts_options[i] = DTRACEOPT_UNSET; 14229 14230 /* 14231 * Set the default options. 14232 */ 14233 opt = state->dts_options; 14234 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH; 14235 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO; 14236 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default; 14237 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default; 14238 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL; 14239 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default; 14240 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default; 14241 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default; 14242 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default; 14243 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default; 14244 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default; 14245 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; 14246 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; 14247 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; 14248 14249 state->dts_activity = DTRACE_ACTIVITY_INACTIVE; 14250 14251 /* 14252 * Depending on the user credentials, we set flag bits which alter probe 14253 * visibility or the amount of destructiveness allowed. In the case of 14254 * actual anonymous tracing, or the possession of all privileges, all of 14255 * the normal checks are bypassed. 14256 */ 14257 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { 14258 state->dts_cred.dcr_visible = DTRACE_CRV_ALL; 14259 state->dts_cred.dcr_action = DTRACE_CRA_ALL; 14260 } else { 14261 /* 14262 * Set up the credentials for this instantiation. We take a 14263 * hold on the credential to prevent it from disappearing on 14264 * us; this in turn prevents the zone_t referenced by this 14265 * credential from disappearing. This means that we can 14266 * examine the credential and the zone from probe context. 14267 */ 14268 crhold(cr); 14269 state->dts_cred.dcr_cred = cr; 14270 14271 /* 14272 * CRA_PROC means "we have *some* privilege for dtrace" and 14273 * unlocks the use of variables like pid, zonename, etc. 14274 */ 14275 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) || 14276 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 14277 state->dts_cred.dcr_action |= DTRACE_CRA_PROC; 14278 } 14279 14280 /* 14281 * dtrace_user allows use of syscall and profile providers. 14282 * If the user also has proc_owner and/or proc_zone, we 14283 * extend the scope to include additional visibility and 14284 * destructive power. 14285 */ 14286 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) { 14287 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) { 14288 state->dts_cred.dcr_visible |= 14289 DTRACE_CRV_ALLPROC; 14290 14291 state->dts_cred.dcr_action |= 14292 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 14293 } 14294 14295 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) { 14296 state->dts_cred.dcr_visible |= 14297 DTRACE_CRV_ALLZONE; 14298 14299 state->dts_cred.dcr_action |= 14300 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 14301 } 14302 14303 /* 14304 * If we have all privs in whatever zone this is, 14305 * we can do destructive things to processes which 14306 * have altered credentials. 14307 */ 14308#if defined(sun) 14309 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 14310 cr->cr_zone->zone_privset)) { 14311 state->dts_cred.dcr_action |= 14312 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 14313 } 14314#endif 14315 } 14316 14317 /* 14318 * Holding the dtrace_kernel privilege also implies that 14319 * the user has the dtrace_user privilege from a visibility 14320 * perspective. But without further privileges, some 14321 * destructive actions are not available. 14322 */ 14323 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) { 14324 /* 14325 * Make all probes in all zones visible. However, 14326 * this doesn't mean that all actions become available 14327 * to all zones. 14328 */ 14329 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL | 14330 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE; 14331 14332 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL | 14333 DTRACE_CRA_PROC; 14334 /* 14335 * Holding proc_owner means that destructive actions 14336 * for *this* zone are allowed. 14337 */ 14338 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 14339 state->dts_cred.dcr_action |= 14340 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 14341 14342 /* 14343 * Holding proc_zone means that destructive actions 14344 * for this user/group ID in all zones is allowed. 14345 */ 14346 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 14347 state->dts_cred.dcr_action |= 14348 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 14349 14350#if defined(sun) 14351 /* 14352 * If we have all privs in whatever zone this is, 14353 * we can do destructive things to processes which 14354 * have altered credentials. 14355 */ 14356 if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), 14357 cr->cr_zone->zone_privset)) { 14358 state->dts_cred.dcr_action |= 14359 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; 14360 } 14361#endif 14362 } 14363 14364 /* 14365 * Holding the dtrace_proc privilege gives control over fasttrap 14366 * and pid providers. We need to grant wider destructive 14367 * privileges in the event that the user has proc_owner and/or 14368 * proc_zone. 14369 */ 14370 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { 14371 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) 14372 state->dts_cred.dcr_action |= 14373 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; 14374 14375 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) 14376 state->dts_cred.dcr_action |= 14377 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; 14378 } 14379 } 14380 14381 return (state); 14382} 14383 14384static int 14385dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) 14386{ 14387 dtrace_optval_t *opt = state->dts_options, size; 14388 processorid_t cpu = 0;; 14389 int flags = 0, rval, factor, divisor = 1; 14390 14391 ASSERT(MUTEX_HELD(&dtrace_lock)); 14392 ASSERT(MUTEX_HELD(&cpu_lock)); 14393 ASSERT(which < DTRACEOPT_MAX); 14394 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || 14395 (state == dtrace_anon.dta_state && 14396 state->dts_activity == DTRACE_ACTIVITY_ACTIVE)); 14397 14398 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0) 14399 return (0); 14400 14401 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET) 14402 cpu = opt[DTRACEOPT_CPU]; 14403 14404 if (which == DTRACEOPT_SPECSIZE) 14405 flags |= DTRACEBUF_NOSWITCH; 14406 14407 if (which == DTRACEOPT_BUFSIZE) { 14408 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING) 14409 flags |= DTRACEBUF_RING; 14410 14411 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL) 14412 flags |= DTRACEBUF_FILL; 14413 14414 if (state != dtrace_anon.dta_state || 14415 state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 14416 flags |= DTRACEBUF_INACTIVE; 14417 } 14418 14419 for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) { 14420 /* 14421 * The size must be 8-byte aligned. If the size is not 8-byte 14422 * aligned, drop it down by the difference. 14423 */ 14424 if (size & (sizeof (uint64_t) - 1)) 14425 size -= size & (sizeof (uint64_t) - 1); 14426 14427 if (size < state->dts_reserve) { 14428 /* 14429 * Buffers always must be large enough to accommodate 14430 * their prereserved space. We return E2BIG instead 14431 * of ENOMEM in this case to allow for user-level 14432 * software to differentiate the cases. 14433 */ 14434 return (E2BIG); 14435 } 14436 14437 rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor); 14438 14439 if (rval != ENOMEM) { 14440 opt[which] = size; 14441 return (rval); 14442 } 14443 14444 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 14445 return (rval); 14446 14447 for (divisor = 2; divisor < factor; divisor <<= 1) 14448 continue; 14449 } 14450 14451 return (ENOMEM); 14452} 14453 14454static int 14455dtrace_state_buffers(dtrace_state_t *state) 14456{ 14457 dtrace_speculation_t *spec = state->dts_speculations; 14458 int rval, i; 14459 14460 if ((rval = dtrace_state_buffer(state, state->dts_buffer, 14461 DTRACEOPT_BUFSIZE)) != 0) 14462 return (rval); 14463 14464 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer, 14465 DTRACEOPT_AGGSIZE)) != 0) 14466 return (rval); 14467 14468 for (i = 0; i < state->dts_nspeculations; i++) { 14469 if ((rval = dtrace_state_buffer(state, 14470 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0) 14471 return (rval); 14472 } 14473 14474 return (0); 14475} 14476 14477static void 14478dtrace_state_prereserve(dtrace_state_t *state) 14479{ 14480 dtrace_ecb_t *ecb; 14481 dtrace_probe_t *probe; 14482 14483 state->dts_reserve = 0; 14484 14485 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL) 14486 return; 14487 14488 /* 14489 * If our buffer policy is a "fill" buffer policy, we need to set the 14490 * prereserved space to be the space required by the END probes. 14491 */ 14492 probe = dtrace_probes[dtrace_probeid_end - 1]; 14493 ASSERT(probe != NULL); 14494 14495 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { 14496 if (ecb->dte_state != state) 14497 continue; 14498 14499 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment; 14500 } 14501} 14502 14503static int 14504dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) 14505{ 14506 dtrace_optval_t *opt = state->dts_options, sz, nspec; 14507 dtrace_speculation_t *spec; 14508 dtrace_buffer_t *buf; 14509#if defined(sun) 14510 cyc_handler_t hdlr; 14511 cyc_time_t when; 14512#endif 14513 int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t); 14514 dtrace_icookie_t cookie; 14515 14516 mutex_enter(&cpu_lock); 14517 mutex_enter(&dtrace_lock); 14518 14519 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 14520 rval = EBUSY; 14521 goto out; 14522 } 14523 14524 /* 14525 * Before we can perform any checks, we must prime all of the 14526 * retained enablings that correspond to this state. 14527 */ 14528 dtrace_enabling_prime(state); 14529 14530 if (state->dts_destructive && !state->dts_cred.dcr_destructive) { 14531 rval = EACCES; 14532 goto out; 14533 } 14534 14535 dtrace_state_prereserve(state); 14536 14537 /* 14538 * Now we want to do is try to allocate our speculations. 14539 * We do not automatically resize the number of speculations; if 14540 * this fails, we will fail the operation. 14541 */ 14542 nspec = opt[DTRACEOPT_NSPEC]; 14543 ASSERT(nspec != DTRACEOPT_UNSET); 14544 14545 if (nspec > INT_MAX) { 14546 rval = ENOMEM; 14547 goto out; 14548 } 14549 14550 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), 14551 KM_NOSLEEP | KM_NORMALPRI); 14552 14553 if (spec == NULL) { 14554 rval = ENOMEM; 14555 goto out; 14556 } 14557 14558 state->dts_speculations = spec; 14559 state->dts_nspeculations = (int)nspec; 14560 14561 for (i = 0; i < nspec; i++) { 14562 if ((buf = kmem_zalloc(bufsize, 14563 KM_NOSLEEP | KM_NORMALPRI)) == NULL) { 14564 rval = ENOMEM; 14565 goto err; 14566 } 14567 14568 spec[i].dtsp_buffer = buf; 14569 } 14570 14571 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) { 14572 if (dtrace_anon.dta_state == NULL) { 14573 rval = ENOENT; 14574 goto out; 14575 } 14576 14577 if (state->dts_necbs != 0) { 14578 rval = EALREADY; 14579 goto out; 14580 } 14581 14582 state->dts_anon = dtrace_anon_grab(); 14583 ASSERT(state->dts_anon != NULL); 14584 state = state->dts_anon; 14585 14586 /* 14587 * We want "grabanon" to be set in the grabbed state, so we'll 14588 * copy that option value from the grabbing state into the 14589 * grabbed state. 14590 */ 14591 state->dts_options[DTRACEOPT_GRABANON] = 14592 opt[DTRACEOPT_GRABANON]; 14593 14594 *cpu = dtrace_anon.dta_beganon; 14595 14596 /* 14597 * If the anonymous state is active (as it almost certainly 14598 * is if the anonymous enabling ultimately matched anything), 14599 * we don't allow any further option processing -- but we 14600 * don't return failure. 14601 */ 14602 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 14603 goto out; 14604 } 14605 14606 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET && 14607 opt[DTRACEOPT_AGGSIZE] != 0) { 14608 if (state->dts_aggregations == NULL) { 14609 /* 14610 * We're not going to create an aggregation buffer 14611 * because we don't have any ECBs that contain 14612 * aggregations -- set this option to 0. 14613 */ 14614 opt[DTRACEOPT_AGGSIZE] = 0; 14615 } else { 14616 /* 14617 * If we have an aggregation buffer, we must also have 14618 * a buffer to use as scratch. 14619 */ 14620 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || 14621 opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { 14622 opt[DTRACEOPT_BUFSIZE] = state->dts_needed; 14623 } 14624 } 14625 } 14626 14627 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET && 14628 opt[DTRACEOPT_SPECSIZE] != 0) { 14629 if (!state->dts_speculates) { 14630 /* 14631 * We're not going to create speculation buffers 14632 * because we don't have any ECBs that actually 14633 * speculate -- set the speculation size to 0. 14634 */ 14635 opt[DTRACEOPT_SPECSIZE] = 0; 14636 } 14637 } 14638 14639 /* 14640 * The bare minimum size for any buffer that we're actually going to 14641 * do anything to is sizeof (uint64_t). 14642 */ 14643 sz = sizeof (uint64_t); 14644 14645 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) || 14646 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) || 14647 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) { 14648 /* 14649 * A buffer size has been explicitly set to 0 (or to a size 14650 * that will be adjusted to 0) and we need the space -- we 14651 * need to return failure. We return ENOSPC to differentiate 14652 * it from failing to allocate a buffer due to failure to meet 14653 * the reserve (for which we return E2BIG). 14654 */ 14655 rval = ENOSPC; 14656 goto out; 14657 } 14658 14659 if ((rval = dtrace_state_buffers(state)) != 0) 14660 goto err; 14661 14662 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET) 14663 sz = dtrace_dstate_defsize; 14664 14665 do { 14666 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz); 14667 14668 if (rval == 0) 14669 break; 14670 14671 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) 14672 goto err; 14673 } while (sz >>= 1); 14674 14675 opt[DTRACEOPT_DYNVARSIZE] = sz; 14676 14677 if (rval != 0) 14678 goto err; 14679 14680 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max) 14681 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max; 14682 14683 if (opt[DTRACEOPT_CLEANRATE] == 0) 14684 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 14685 14686 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min) 14687 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min; 14688 14689 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) 14690 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; 14691 14692 state->dts_alive = state->dts_laststatus = dtrace_gethrtime(); 14693#if defined(sun) 14694 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; 14695 hdlr.cyh_arg = state; 14696 hdlr.cyh_level = CY_LOW_LEVEL; 14697 14698 when.cyt_when = 0; 14699 when.cyt_interval = opt[DTRACEOPT_CLEANRATE]; 14700 14701 state->dts_cleaner = cyclic_add(&hdlr, &when); 14702 14703 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman; 14704 hdlr.cyh_arg = state; 14705 hdlr.cyh_level = CY_LOW_LEVEL; 14706 14707 when.cyt_when = 0; 14708 when.cyt_interval = dtrace_deadman_interval; 14709 14710 state->dts_deadman = cyclic_add(&hdlr, &when); 14711#else 14712 callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC, 14713 dtrace_state_clean, state); 14714 callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC, 14715 dtrace_state_deadman, state); 14716#endif 14717 14718 state->dts_activity = DTRACE_ACTIVITY_WARMUP; 14719 14720#if defined(sun) 14721 if (state->dts_getf != 0 && 14722 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) { 14723 /* 14724 * We don't have kernel privs but we have at least one call 14725 * to getf(); we need to bump our zone's count, and (if 14726 * this is the first enabling to have an unprivileged call 14727 * to getf()) we need to hook into closef(). 14728 */ 14729 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++; 14730 14731 if (dtrace_getf++ == 0) { 14732 ASSERT(dtrace_closef == NULL); 14733 dtrace_closef = dtrace_getf_barrier; 14734 } 14735 } 14736#endif 14737 14738 /* 14739 * Now it's time to actually fire the BEGIN probe. We need to disable 14740 * interrupts here both to record the CPU on which we fired the BEGIN 14741 * probe (the data from this CPU will be processed first at user 14742 * level) and to manually activate the buffer for this CPU. 14743 */ 14744 cookie = dtrace_interrupt_disable(); 14745 *cpu = curcpu; 14746 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE); 14747 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE; 14748 14749 dtrace_probe(dtrace_probeid_begin, 14750 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 14751 dtrace_interrupt_enable(cookie); 14752 /* 14753 * We may have had an exit action from a BEGIN probe; only change our 14754 * state to ACTIVE if we're still in WARMUP. 14755 */ 14756 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP || 14757 state->dts_activity == DTRACE_ACTIVITY_DRAINING); 14758 14759 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP) 14760 state->dts_activity = DTRACE_ACTIVITY_ACTIVE; 14761 14762 /* 14763 * Regardless of whether or not now we're in ACTIVE or DRAINING, we 14764 * want each CPU to transition its principal buffer out of the 14765 * INACTIVE state. Doing this assures that no CPU will suddenly begin 14766 * processing an ECB halfway down a probe's ECB chain; all CPUs will 14767 * atomically transition from processing none of a state's ECBs to 14768 * processing all of them. 14769 */ 14770 dtrace_xcall(DTRACE_CPUALL, 14771 (dtrace_xcall_t)dtrace_buffer_activate, state); 14772 goto out; 14773 14774err: 14775 dtrace_buffer_free(state->dts_buffer); 14776 dtrace_buffer_free(state->dts_aggbuffer); 14777 14778 if ((nspec = state->dts_nspeculations) == 0) { 14779 ASSERT(state->dts_speculations == NULL); 14780 goto out; 14781 } 14782 14783 spec = state->dts_speculations; 14784 ASSERT(spec != NULL); 14785 14786 for (i = 0; i < state->dts_nspeculations; i++) { 14787 if ((buf = spec[i].dtsp_buffer) == NULL) 14788 break; 14789 14790 dtrace_buffer_free(buf); 14791 kmem_free(buf, bufsize); 14792 } 14793 14794 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 14795 state->dts_nspeculations = 0; 14796 state->dts_speculations = NULL; 14797 14798out: 14799 mutex_exit(&dtrace_lock); 14800 mutex_exit(&cpu_lock); 14801 14802 return (rval); 14803} 14804 14805static int 14806dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) 14807{ 14808 dtrace_icookie_t cookie; 14809 14810 ASSERT(MUTEX_HELD(&dtrace_lock)); 14811 14812 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && 14813 state->dts_activity != DTRACE_ACTIVITY_DRAINING) 14814 return (EINVAL); 14815 14816 /* 14817 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync 14818 * to be sure that every CPU has seen it. See below for the details 14819 * on why this is done. 14820 */ 14821 state->dts_activity = DTRACE_ACTIVITY_DRAINING; 14822 dtrace_sync(); 14823 14824 /* 14825 * By this point, it is impossible for any CPU to be still processing 14826 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to 14827 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any 14828 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe() 14829 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN 14830 * iff we're in the END probe. 14831 */ 14832 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN; 14833 dtrace_sync(); 14834 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN); 14835 14836 /* 14837 * Finally, we can release the reserve and call the END probe. We 14838 * disable interrupts across calling the END probe to allow us to 14839 * return the CPU on which we actually called the END probe. This 14840 * allows user-land to be sure that this CPU's principal buffer is 14841 * processed last. 14842 */ 14843 state->dts_reserve = 0; 14844 14845 cookie = dtrace_interrupt_disable(); 14846 *cpu = curcpu; 14847 dtrace_probe(dtrace_probeid_end, 14848 (uint64_t)(uintptr_t)state, 0, 0, 0, 0); 14849 dtrace_interrupt_enable(cookie); 14850 14851 state->dts_activity = DTRACE_ACTIVITY_STOPPED; 14852 dtrace_sync(); 14853 14854#if defined(sun) 14855 if (state->dts_getf != 0 && 14856 !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) { 14857 /* 14858 * We don't have kernel privs but we have at least one call 14859 * to getf(); we need to lower our zone's count, and (if 14860 * this is the last enabling to have an unprivileged call 14861 * to getf()) we need to clear the closef() hook. 14862 */ 14863 ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0); 14864 ASSERT(dtrace_closef == dtrace_getf_barrier); 14865 ASSERT(dtrace_getf > 0); 14866 14867 state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--; 14868 14869 if (--dtrace_getf == 0) 14870 dtrace_closef = NULL; 14871 } 14872#endif 14873 14874 return (0); 14875} 14876 14877static int 14878dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, 14879 dtrace_optval_t val) 14880{ 14881 ASSERT(MUTEX_HELD(&dtrace_lock)); 14882 14883 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) 14884 return (EBUSY); 14885 14886 if (option >= DTRACEOPT_MAX) 14887 return (EINVAL); 14888 14889 if (option != DTRACEOPT_CPU && val < 0) 14890 return (EINVAL); 14891 14892 switch (option) { 14893 case DTRACEOPT_DESTRUCTIVE: 14894 if (dtrace_destructive_disallow) 14895 return (EACCES); 14896 14897 state->dts_cred.dcr_destructive = 1; 14898 break; 14899 14900 case DTRACEOPT_BUFSIZE: 14901 case DTRACEOPT_DYNVARSIZE: 14902 case DTRACEOPT_AGGSIZE: 14903 case DTRACEOPT_SPECSIZE: 14904 case DTRACEOPT_STRSIZE: 14905 if (val < 0) 14906 return (EINVAL); 14907 14908 if (val >= LONG_MAX) { 14909 /* 14910 * If this is an otherwise negative value, set it to 14911 * the highest multiple of 128m less than LONG_MAX. 14912 * Technically, we're adjusting the size without 14913 * regard to the buffer resizing policy, but in fact, 14914 * this has no effect -- if we set the buffer size to 14915 * ~LONG_MAX and the buffer policy is ultimately set to 14916 * be "manual", the buffer allocation is guaranteed to 14917 * fail, if only because the allocation requires two 14918 * buffers. (We set the the size to the highest 14919 * multiple of 128m because it ensures that the size 14920 * will remain a multiple of a megabyte when 14921 * repeatedly halved -- all the way down to 15m.) 14922 */ 14923 val = LONG_MAX - (1 << 27) + 1; 14924 } 14925 } 14926 14927 state->dts_options[option] = val; 14928 14929 return (0); 14930} 14931 14932static void 14933dtrace_state_destroy(dtrace_state_t *state) 14934{ 14935 dtrace_ecb_t *ecb; 14936 dtrace_vstate_t *vstate = &state->dts_vstate; 14937#if defined(sun) 14938 minor_t minor = getminor(state->dts_dev); 14939#endif 14940 int i, bufsize = NCPU * sizeof (dtrace_buffer_t); 14941 dtrace_speculation_t *spec = state->dts_speculations; 14942 int nspec = state->dts_nspeculations; 14943 uint32_t match; 14944 14945 ASSERT(MUTEX_HELD(&dtrace_lock)); 14946 ASSERT(MUTEX_HELD(&cpu_lock)); 14947 14948 /* 14949 * First, retract any retained enablings for this state. 14950 */ 14951 dtrace_enabling_retract(state); 14952 ASSERT(state->dts_nretained == 0); 14953 14954 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE || 14955 state->dts_activity == DTRACE_ACTIVITY_DRAINING) { 14956 /* 14957 * We have managed to come into dtrace_state_destroy() on a 14958 * hot enabling -- almost certainly because of a disorderly 14959 * shutdown of a consumer. (That is, a consumer that is 14960 * exiting without having called dtrace_stop().) In this case, 14961 * we're going to set our activity to be KILLED, and then 14962 * issue a sync to be sure that everyone is out of probe 14963 * context before we start blowing away ECBs. 14964 */ 14965 state->dts_activity = DTRACE_ACTIVITY_KILLED; 14966 dtrace_sync(); 14967 } 14968 14969 /* 14970 * Release the credential hold we took in dtrace_state_create(). 14971 */ 14972 if (state->dts_cred.dcr_cred != NULL) 14973 crfree(state->dts_cred.dcr_cred); 14974 14975 /* 14976 * Now we can safely disable and destroy any enabled probes. Because 14977 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress 14978 * (especially if they're all enabled), we take two passes through the 14979 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and 14980 * in the second we disable whatever is left over. 14981 */ 14982 for (match = DTRACE_PRIV_KERNEL; ; match = 0) { 14983 for (i = 0; i < state->dts_necbs; i++) { 14984 if ((ecb = state->dts_ecbs[i]) == NULL) 14985 continue; 14986 14987 if (match && ecb->dte_probe != NULL) { 14988 dtrace_probe_t *probe = ecb->dte_probe; 14989 dtrace_provider_t *prov = probe->dtpr_provider; 14990 14991 if (!(prov->dtpv_priv.dtpp_flags & match)) 14992 continue; 14993 } 14994 14995 dtrace_ecb_disable(ecb); 14996 dtrace_ecb_destroy(ecb); 14997 } 14998 14999 if (!match) 15000 break; 15001 } 15002 15003 /* 15004 * Before we free the buffers, perform one more sync to assure that 15005 * every CPU is out of probe context. 15006 */ 15007 dtrace_sync(); 15008 15009 dtrace_buffer_free(state->dts_buffer); 15010 dtrace_buffer_free(state->dts_aggbuffer); 15011 15012 for (i = 0; i < nspec; i++) 15013 dtrace_buffer_free(spec[i].dtsp_buffer); 15014 15015#if defined(sun) 15016 if (state->dts_cleaner != CYCLIC_NONE) 15017 cyclic_remove(state->dts_cleaner); 15018 15019 if (state->dts_deadman != CYCLIC_NONE) 15020 cyclic_remove(state->dts_deadman); 15021#else 15022 callout_stop(&state->dts_cleaner); 15023 callout_drain(&state->dts_cleaner); 15024 callout_stop(&state->dts_deadman); 15025 callout_drain(&state->dts_deadman); 15026#endif 15027 15028 dtrace_dstate_fini(&vstate->dtvs_dynvars); 15029 dtrace_vstate_fini(vstate); 15030 if (state->dts_ecbs != NULL) 15031 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); 15032 15033 if (state->dts_aggregations != NULL) { 15034#ifdef DEBUG 15035 for (i = 0; i < state->dts_naggregations; i++) 15036 ASSERT(state->dts_aggregations[i] == NULL); 15037#endif 15038 ASSERT(state->dts_naggregations > 0); 15039 kmem_free(state->dts_aggregations, 15040 state->dts_naggregations * sizeof (dtrace_aggregation_t *)); 15041 } 15042 15043 kmem_free(state->dts_buffer, bufsize); 15044 kmem_free(state->dts_aggbuffer, bufsize); 15045 15046 for (i = 0; i < nspec; i++) 15047 kmem_free(spec[i].dtsp_buffer, bufsize); 15048 15049 if (spec != NULL) 15050 kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); 15051 15052 dtrace_format_destroy(state); 15053 15054 if (state->dts_aggid_arena != NULL) { 15055#if defined(sun) 15056 vmem_destroy(state->dts_aggid_arena); 15057#else 15058 delete_unrhdr(state->dts_aggid_arena); 15059#endif 15060 state->dts_aggid_arena = NULL; 15061 } 15062#if defined(sun) 15063 ddi_soft_state_free(dtrace_softstate, minor); 15064 vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); 15065#endif 15066} 15067 15068/* 15069 * DTrace Anonymous Enabling Functions 15070 */ 15071static dtrace_state_t * 15072dtrace_anon_grab(void) 15073{ 15074 dtrace_state_t *state; 15075 15076 ASSERT(MUTEX_HELD(&dtrace_lock)); 15077 15078 if ((state = dtrace_anon.dta_state) == NULL) { 15079 ASSERT(dtrace_anon.dta_enabling == NULL); 15080 return (NULL); 15081 } 15082 15083 ASSERT(dtrace_anon.dta_enabling != NULL); 15084 ASSERT(dtrace_retained != NULL); 15085 15086 dtrace_enabling_destroy(dtrace_anon.dta_enabling); 15087 dtrace_anon.dta_enabling = NULL; 15088 dtrace_anon.dta_state = NULL; 15089 15090 return (state); 15091} 15092 15093static void 15094dtrace_anon_property(void) 15095{ 15096 int i, rv; 15097 dtrace_state_t *state; 15098 dof_hdr_t *dof; 15099 char c[32]; /* enough for "dof-data-" + digits */ 15100 15101 ASSERT(MUTEX_HELD(&dtrace_lock)); 15102 ASSERT(MUTEX_HELD(&cpu_lock)); 15103 15104 for (i = 0; ; i++) { 15105 (void) snprintf(c, sizeof (c), "dof-data-%d", i); 15106 15107 dtrace_err_verbose = 1; 15108 15109 if ((dof = dtrace_dof_property(c)) == NULL) { 15110 dtrace_err_verbose = 0; 15111 break; 15112 } 15113 15114#if defined(sun) 15115 /* 15116 * We want to create anonymous state, so we need to transition 15117 * the kernel debugger to indicate that DTrace is active. If 15118 * this fails (e.g. because the debugger has modified text in 15119 * some way), we won't continue with the processing. 15120 */ 15121 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 15122 cmn_err(CE_NOTE, "kernel debugger active; anonymous " 15123 "enabling ignored."); 15124 dtrace_dof_destroy(dof); 15125 break; 15126 } 15127#endif 15128 15129 /* 15130 * If we haven't allocated an anonymous state, we'll do so now. 15131 */ 15132 if ((state = dtrace_anon.dta_state) == NULL) { 15133#if defined(sun) 15134 state = dtrace_state_create(NULL, NULL); 15135#else 15136 state = dtrace_state_create(NULL); 15137#endif 15138 dtrace_anon.dta_state = state; 15139 15140 if (state == NULL) { 15141 /* 15142 * This basically shouldn't happen: the only 15143 * failure mode from dtrace_state_create() is a 15144 * failure of ddi_soft_state_zalloc() that 15145 * itself should never happen. Still, the 15146 * interface allows for a failure mode, and 15147 * we want to fail as gracefully as possible: 15148 * we'll emit an error message and cease 15149 * processing anonymous state in this case. 15150 */ 15151 cmn_err(CE_WARN, "failed to create " 15152 "anonymous state"); 15153 dtrace_dof_destroy(dof); 15154 break; 15155 } 15156 } 15157 15158 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(), 15159 &dtrace_anon.dta_enabling, 0, B_TRUE); 15160 15161 if (rv == 0) 15162 rv = dtrace_dof_options(dof, state); 15163 15164 dtrace_err_verbose = 0; 15165 dtrace_dof_destroy(dof); 15166 15167 if (rv != 0) { 15168 /* 15169 * This is malformed DOF; chuck any anonymous state 15170 * that we created. 15171 */ 15172 ASSERT(dtrace_anon.dta_enabling == NULL); 15173 dtrace_state_destroy(state); 15174 dtrace_anon.dta_state = NULL; 15175 break; 15176 } 15177 15178 ASSERT(dtrace_anon.dta_enabling != NULL); 15179 } 15180 15181 if (dtrace_anon.dta_enabling != NULL) { 15182 int rval; 15183 15184 /* 15185 * dtrace_enabling_retain() can only fail because we are 15186 * trying to retain more enablings than are allowed -- but 15187 * we only have one anonymous enabling, and we are guaranteed 15188 * to be allowed at least one retained enabling; we assert 15189 * that dtrace_enabling_retain() returns success. 15190 */ 15191 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling); 15192 ASSERT(rval == 0); 15193 15194 dtrace_enabling_dump(dtrace_anon.dta_enabling); 15195 } 15196} 15197 15198/* 15199 * DTrace Helper Functions 15200 */ 15201static void 15202dtrace_helper_trace(dtrace_helper_action_t *helper, 15203 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) 15204{ 15205 uint32_t size, next, nnext, i; 15206 dtrace_helptrace_t *ent; 15207 uint16_t flags = cpu_core[curcpu].cpuc_dtrace_flags; 15208 15209 if (!dtrace_helptrace_enabled) 15210 return; 15211 15212 ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); 15213 15214 /* 15215 * What would a tracing framework be without its own tracing 15216 * framework? (Well, a hell of a lot simpler, for starters...) 15217 */ 15218 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals * 15219 sizeof (uint64_t) - sizeof (uint64_t); 15220 15221 /* 15222 * Iterate until we can allocate a slot in the trace buffer. 15223 */ 15224 do { 15225 next = dtrace_helptrace_next; 15226 15227 if (next + size < dtrace_helptrace_bufsize) { 15228 nnext = next + size; 15229 } else { 15230 nnext = size; 15231 } 15232 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next); 15233 15234 /* 15235 * We have our slot; fill it in. 15236 */ 15237 if (nnext == size) 15238 next = 0; 15239 15240 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next]; 15241 ent->dtht_helper = helper; 15242 ent->dtht_where = where; 15243 ent->dtht_nlocals = vstate->dtvs_nlocals; 15244 15245 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ? 15246 mstate->dtms_fltoffs : -1; 15247 ent->dtht_fault = DTRACE_FLAGS2FLT(flags); 15248 ent->dtht_illval = cpu_core[curcpu].cpuc_dtrace_illval; 15249 15250 for (i = 0; i < vstate->dtvs_nlocals; i++) { 15251 dtrace_statvar_t *svar; 15252 15253 if ((svar = vstate->dtvs_locals[i]) == NULL) 15254 continue; 15255 15256 ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t)); 15257 ent->dtht_locals[i] = 15258 ((uint64_t *)(uintptr_t)svar->dtsv_data)[curcpu]; 15259 } 15260} 15261 15262static uint64_t 15263dtrace_helper(int which, dtrace_mstate_t *mstate, 15264 dtrace_state_t *state, uint64_t arg0, uint64_t arg1) 15265{ 15266 uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; 15267 uint64_t sarg0 = mstate->dtms_arg[0]; 15268 uint64_t sarg1 = mstate->dtms_arg[1]; 15269 uint64_t rval = 0; 15270 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers; 15271 dtrace_helper_action_t *helper; 15272 dtrace_vstate_t *vstate; 15273 dtrace_difo_t *pred; 15274 int i, trace = dtrace_helptrace_enabled; 15275 15276 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS); 15277 15278 if (helpers == NULL) 15279 return (0); 15280 15281 if ((helper = helpers->dthps_actions[which]) == NULL) 15282 return (0); 15283 15284 vstate = &helpers->dthps_vstate; 15285 mstate->dtms_arg[0] = arg0; 15286 mstate->dtms_arg[1] = arg1; 15287 15288 /* 15289 * Now iterate over each helper. If its predicate evaluates to 'true', 15290 * we'll call the corresponding actions. Note that the below calls 15291 * to dtrace_dif_emulate() may set faults in machine state. This is 15292 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow 15293 * the stored DIF offset with its own (which is the desired behavior). 15294 * Also, note the calls to dtrace_dif_emulate() may allocate scratch 15295 * from machine state; this is okay, too. 15296 */ 15297 for (; helper != NULL; helper = helper->dtha_next) { 15298 if ((pred = helper->dtha_predicate) != NULL) { 15299 if (trace) 15300 dtrace_helper_trace(helper, mstate, vstate, 0); 15301 15302 if (!dtrace_dif_emulate(pred, mstate, vstate, state)) 15303 goto next; 15304 15305 if (*flags & CPU_DTRACE_FAULT) 15306 goto err; 15307 } 15308 15309 for (i = 0; i < helper->dtha_nactions; i++) { 15310 if (trace) 15311 dtrace_helper_trace(helper, 15312 mstate, vstate, i + 1); 15313 15314 rval = dtrace_dif_emulate(helper->dtha_actions[i], 15315 mstate, vstate, state); 15316 15317 if (*flags & CPU_DTRACE_FAULT) 15318 goto err; 15319 } 15320 15321next: 15322 if (trace) 15323 dtrace_helper_trace(helper, mstate, vstate, 15324 DTRACE_HELPTRACE_NEXT); 15325 } 15326 15327 if (trace) 15328 dtrace_helper_trace(helper, mstate, vstate, 15329 DTRACE_HELPTRACE_DONE); 15330 15331 /* 15332 * Restore the arg0 that we saved upon entry. 15333 */ 15334 mstate->dtms_arg[0] = sarg0; 15335 mstate->dtms_arg[1] = sarg1; 15336 15337 return (rval); 15338 15339err: 15340 if (trace) 15341 dtrace_helper_trace(helper, mstate, vstate, 15342 DTRACE_HELPTRACE_ERR); 15343 15344 /* 15345 * Restore the arg0 that we saved upon entry. 15346 */ 15347 mstate->dtms_arg[0] = sarg0; 15348 mstate->dtms_arg[1] = sarg1; 15349 15350 return (0); 15351} 15352 15353static void 15354dtrace_helper_action_destroy(dtrace_helper_action_t *helper, 15355 dtrace_vstate_t *vstate) 15356{ 15357 int i; 15358 15359 if (helper->dtha_predicate != NULL) 15360 dtrace_difo_release(helper->dtha_predicate, vstate); 15361 15362 for (i = 0; i < helper->dtha_nactions; i++) { 15363 ASSERT(helper->dtha_actions[i] != NULL); 15364 dtrace_difo_release(helper->dtha_actions[i], vstate); 15365 } 15366 15367 kmem_free(helper->dtha_actions, 15368 helper->dtha_nactions * sizeof (dtrace_difo_t *)); 15369 kmem_free(helper, sizeof (dtrace_helper_action_t)); 15370} 15371 15372static int 15373dtrace_helper_destroygen(int gen) 15374{ 15375 proc_t *p = curproc; 15376 dtrace_helpers_t *help = p->p_dtrace_helpers; 15377 dtrace_vstate_t *vstate; 15378 int i; 15379 15380 ASSERT(MUTEX_HELD(&dtrace_lock)); 15381 15382 if (help == NULL || gen > help->dthps_generation) 15383 return (EINVAL); 15384 15385 vstate = &help->dthps_vstate; 15386 15387 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 15388 dtrace_helper_action_t *last = NULL, *h, *next; 15389 15390 for (h = help->dthps_actions[i]; h != NULL; h = next) { 15391 next = h->dtha_next; 15392 15393 if (h->dtha_generation == gen) { 15394 if (last != NULL) { 15395 last->dtha_next = next; 15396 } else { 15397 help->dthps_actions[i] = next; 15398 } 15399 15400 dtrace_helper_action_destroy(h, vstate); 15401 } else { 15402 last = h; 15403 } 15404 } 15405 } 15406 15407 /* 15408 * Interate until we've cleared out all helper providers with the 15409 * given generation number. 15410 */ 15411 for (;;) { 15412 dtrace_helper_provider_t *prov; 15413 15414 /* 15415 * Look for a helper provider with the right generation. We 15416 * have to start back at the beginning of the list each time 15417 * because we drop dtrace_lock. It's unlikely that we'll make 15418 * more than two passes. 15419 */ 15420 for (i = 0; i < help->dthps_nprovs; i++) { 15421 prov = help->dthps_provs[i]; 15422 15423 if (prov->dthp_generation == gen) 15424 break; 15425 } 15426 15427 /* 15428 * If there were no matches, we're done. 15429 */ 15430 if (i == help->dthps_nprovs) 15431 break; 15432 15433 /* 15434 * Move the last helper provider into this slot. 15435 */ 15436 help->dthps_nprovs--; 15437 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs]; 15438 help->dthps_provs[help->dthps_nprovs] = NULL; 15439 15440 mutex_exit(&dtrace_lock); 15441 15442 /* 15443 * If we have a meta provider, remove this helper provider. 15444 */ 15445 mutex_enter(&dtrace_meta_lock); 15446 if (dtrace_meta_pid != NULL) { 15447 ASSERT(dtrace_deferred_pid == NULL); 15448 dtrace_helper_provider_remove(&prov->dthp_prov, 15449 p->p_pid); 15450 } 15451 mutex_exit(&dtrace_meta_lock); 15452 15453 dtrace_helper_provider_destroy(prov); 15454 15455 mutex_enter(&dtrace_lock); 15456 } 15457 15458 return (0); 15459} 15460 15461static int 15462dtrace_helper_validate(dtrace_helper_action_t *helper) 15463{ 15464 int err = 0, i; 15465 dtrace_difo_t *dp; 15466 15467 if ((dp = helper->dtha_predicate) != NULL) 15468 err += dtrace_difo_validate_helper(dp); 15469 15470 for (i = 0; i < helper->dtha_nactions; i++) 15471 err += dtrace_difo_validate_helper(helper->dtha_actions[i]); 15472 15473 return (err == 0); 15474} 15475 15476static int 15477dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep) 15478{ 15479 dtrace_helpers_t *help; 15480 dtrace_helper_action_t *helper, *last; 15481 dtrace_actdesc_t *act; 15482 dtrace_vstate_t *vstate; 15483 dtrace_predicate_t *pred; 15484 int count = 0, nactions = 0, i; 15485 15486 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) 15487 return (EINVAL); 15488 15489 help = curproc->p_dtrace_helpers; 15490 last = help->dthps_actions[which]; 15491 vstate = &help->dthps_vstate; 15492 15493 for (count = 0; last != NULL; last = last->dtha_next) { 15494 count++; 15495 if (last->dtha_next == NULL) 15496 break; 15497 } 15498 15499 /* 15500 * If we already have dtrace_helper_actions_max helper actions for this 15501 * helper action type, we'll refuse to add a new one. 15502 */ 15503 if (count >= dtrace_helper_actions_max) 15504 return (ENOSPC); 15505 15506 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP); 15507 helper->dtha_generation = help->dthps_generation; 15508 15509 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) { 15510 ASSERT(pred->dtp_difo != NULL); 15511 dtrace_difo_hold(pred->dtp_difo); 15512 helper->dtha_predicate = pred->dtp_difo; 15513 } 15514 15515 for (act = ep->dted_action; act != NULL; act = act->dtad_next) { 15516 if (act->dtad_kind != DTRACEACT_DIFEXPR) 15517 goto err; 15518 15519 if (act->dtad_difo == NULL) 15520 goto err; 15521 15522 nactions++; 15523 } 15524 15525 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) * 15526 (helper->dtha_nactions = nactions), KM_SLEEP); 15527 15528 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) { 15529 dtrace_difo_hold(act->dtad_difo); 15530 helper->dtha_actions[i++] = act->dtad_difo; 15531 } 15532 15533 if (!dtrace_helper_validate(helper)) 15534 goto err; 15535 15536 if (last == NULL) { 15537 help->dthps_actions[which] = helper; 15538 } else { 15539 last->dtha_next = helper; 15540 } 15541 15542 if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { 15543 dtrace_helptrace_nlocals = vstate->dtvs_nlocals; 15544 dtrace_helptrace_next = 0; 15545 } 15546 15547 return (0); 15548err: 15549 dtrace_helper_action_destroy(helper, vstate); 15550 return (EINVAL); 15551} 15552 15553static void 15554dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, 15555 dof_helper_t *dofhp) 15556{ 15557 ASSERT(MUTEX_NOT_HELD(&dtrace_lock)); 15558 15559 mutex_enter(&dtrace_meta_lock); 15560 mutex_enter(&dtrace_lock); 15561 15562 if (!dtrace_attached() || dtrace_meta_pid == NULL) { 15563 /* 15564 * If the dtrace module is loaded but not attached, or if 15565 * there aren't isn't a meta provider registered to deal with 15566 * these provider descriptions, we need to postpone creating 15567 * the actual providers until later. 15568 */ 15569 15570 if (help->dthps_next == NULL && help->dthps_prev == NULL && 15571 dtrace_deferred_pid != help) { 15572 help->dthps_deferred = 1; 15573 help->dthps_pid = p->p_pid; 15574 help->dthps_next = dtrace_deferred_pid; 15575 help->dthps_prev = NULL; 15576 if (dtrace_deferred_pid != NULL) 15577 dtrace_deferred_pid->dthps_prev = help; 15578 dtrace_deferred_pid = help; 15579 } 15580 15581 mutex_exit(&dtrace_lock); 15582 15583 } else if (dofhp != NULL) { 15584 /* 15585 * If the dtrace module is loaded and we have a particular 15586 * helper provider description, pass that off to the 15587 * meta provider. 15588 */ 15589 15590 mutex_exit(&dtrace_lock); 15591 15592 dtrace_helper_provide(dofhp, p->p_pid); 15593 15594 } else { 15595 /* 15596 * Otherwise, just pass all the helper provider descriptions 15597 * off to the meta provider. 15598 */ 15599 15600 int i; 15601 mutex_exit(&dtrace_lock); 15602 15603 for (i = 0; i < help->dthps_nprovs; i++) { 15604 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, 15605 p->p_pid); 15606 } 15607 } 15608 15609 mutex_exit(&dtrace_meta_lock); 15610} 15611 15612static int 15613dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) 15614{ 15615 dtrace_helpers_t *help; 15616 dtrace_helper_provider_t *hprov, **tmp_provs; 15617 uint_t tmp_maxprovs, i; 15618 15619 ASSERT(MUTEX_HELD(&dtrace_lock)); 15620 15621 help = curproc->p_dtrace_helpers; 15622 ASSERT(help != NULL); 15623 15624 /* 15625 * If we already have dtrace_helper_providers_max helper providers, 15626 * we're refuse to add a new one. 15627 */ 15628 if (help->dthps_nprovs >= dtrace_helper_providers_max) 15629 return (ENOSPC); 15630 15631 /* 15632 * Check to make sure this isn't a duplicate. 15633 */ 15634 for (i = 0; i < help->dthps_nprovs; i++) { 15635 if (dofhp->dofhp_dof == 15636 help->dthps_provs[i]->dthp_prov.dofhp_dof) 15637 return (EALREADY); 15638 } 15639 15640 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP); 15641 hprov->dthp_prov = *dofhp; 15642 hprov->dthp_ref = 1; 15643 hprov->dthp_generation = gen; 15644 15645 /* 15646 * Allocate a bigger table for helper providers if it's already full. 15647 */ 15648 if (help->dthps_maxprovs == help->dthps_nprovs) { 15649 tmp_maxprovs = help->dthps_maxprovs; 15650 tmp_provs = help->dthps_provs; 15651 15652 if (help->dthps_maxprovs == 0) 15653 help->dthps_maxprovs = 2; 15654 else 15655 help->dthps_maxprovs *= 2; 15656 if (help->dthps_maxprovs > dtrace_helper_providers_max) 15657 help->dthps_maxprovs = dtrace_helper_providers_max; 15658 15659 ASSERT(tmp_maxprovs < help->dthps_maxprovs); 15660 15661 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs * 15662 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 15663 15664 if (tmp_provs != NULL) { 15665 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs * 15666 sizeof (dtrace_helper_provider_t *)); 15667 kmem_free(tmp_provs, tmp_maxprovs * 15668 sizeof (dtrace_helper_provider_t *)); 15669 } 15670 } 15671 15672 help->dthps_provs[help->dthps_nprovs] = hprov; 15673 help->dthps_nprovs++; 15674 15675 return (0); 15676} 15677 15678static void 15679dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov) 15680{ 15681 mutex_enter(&dtrace_lock); 15682 15683 if (--hprov->dthp_ref == 0) { 15684 dof_hdr_t *dof; 15685 mutex_exit(&dtrace_lock); 15686 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof; 15687 dtrace_dof_destroy(dof); 15688 kmem_free(hprov, sizeof (dtrace_helper_provider_t)); 15689 } else { 15690 mutex_exit(&dtrace_lock); 15691 } 15692} 15693 15694static int 15695dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) 15696{ 15697 uintptr_t daddr = (uintptr_t)dof; 15698 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; 15699 dof_provider_t *provider; 15700 dof_probe_t *probe; 15701 uint8_t *arg; 15702 char *strtab, *typestr; 15703 dof_stridx_t typeidx; 15704 size_t typesz; 15705 uint_t nprobes, j, k; 15706 15707 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER); 15708 15709 if (sec->dofs_offset & (sizeof (uint_t) - 1)) { 15710 dtrace_dof_error(dof, "misaligned section offset"); 15711 return (-1); 15712 } 15713 15714 /* 15715 * The section needs to be large enough to contain the DOF provider 15716 * structure appropriate for the given version. 15717 */ 15718 if (sec->dofs_size < 15719 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ? 15720 offsetof(dof_provider_t, dofpv_prenoffs) : 15721 sizeof (dof_provider_t))) { 15722 dtrace_dof_error(dof, "provider section too small"); 15723 return (-1); 15724 } 15725 15726 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); 15727 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab); 15728 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes); 15729 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs); 15730 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs); 15731 15732 if (str_sec == NULL || prb_sec == NULL || 15733 arg_sec == NULL || off_sec == NULL) 15734 return (-1); 15735 15736 enoff_sec = NULL; 15737 15738 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && 15739 provider->dofpv_prenoffs != DOF_SECT_NONE && 15740 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS, 15741 provider->dofpv_prenoffs)) == NULL) 15742 return (-1); 15743 15744 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); 15745 15746 if (provider->dofpv_name >= str_sec->dofs_size || 15747 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) { 15748 dtrace_dof_error(dof, "invalid provider name"); 15749 return (-1); 15750 } 15751 15752 if (prb_sec->dofs_entsize == 0 || 15753 prb_sec->dofs_entsize > prb_sec->dofs_size) { 15754 dtrace_dof_error(dof, "invalid entry size"); 15755 return (-1); 15756 } 15757 15758 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) { 15759 dtrace_dof_error(dof, "misaligned entry size"); 15760 return (-1); 15761 } 15762 15763 if (off_sec->dofs_entsize != sizeof (uint32_t)) { 15764 dtrace_dof_error(dof, "invalid entry size"); 15765 return (-1); 15766 } 15767 15768 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) { 15769 dtrace_dof_error(dof, "misaligned section offset"); 15770 return (-1); 15771 } 15772 15773 if (arg_sec->dofs_entsize != sizeof (uint8_t)) { 15774 dtrace_dof_error(dof, "invalid entry size"); 15775 return (-1); 15776 } 15777 15778 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); 15779 15780 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; 15781 15782 /* 15783 * Take a pass through the probes to check for errors. 15784 */ 15785 for (j = 0; j < nprobes; j++) { 15786 probe = (dof_probe_t *)(uintptr_t)(daddr + 15787 prb_sec->dofs_offset + j * prb_sec->dofs_entsize); 15788 15789 if (probe->dofpr_func >= str_sec->dofs_size) { 15790 dtrace_dof_error(dof, "invalid function name"); 15791 return (-1); 15792 } 15793 15794 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) { 15795 dtrace_dof_error(dof, "function name too long"); 15796 return (-1); 15797 } 15798 15799 if (probe->dofpr_name >= str_sec->dofs_size || 15800 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) { 15801 dtrace_dof_error(dof, "invalid probe name"); 15802 return (-1); 15803 } 15804 15805 /* 15806 * The offset count must not wrap the index, and the offsets 15807 * must also not overflow the section's data. 15808 */ 15809 if (probe->dofpr_offidx + probe->dofpr_noffs < 15810 probe->dofpr_offidx || 15811 (probe->dofpr_offidx + probe->dofpr_noffs) * 15812 off_sec->dofs_entsize > off_sec->dofs_size) { 15813 dtrace_dof_error(dof, "invalid probe offset"); 15814 return (-1); 15815 } 15816 15817 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) { 15818 /* 15819 * If there's no is-enabled offset section, make sure 15820 * there aren't any is-enabled offsets. Otherwise 15821 * perform the same checks as for probe offsets 15822 * (immediately above). 15823 */ 15824 if (enoff_sec == NULL) { 15825 if (probe->dofpr_enoffidx != 0 || 15826 probe->dofpr_nenoffs != 0) { 15827 dtrace_dof_error(dof, "is-enabled " 15828 "offsets with null section"); 15829 return (-1); 15830 } 15831 } else if (probe->dofpr_enoffidx + 15832 probe->dofpr_nenoffs < probe->dofpr_enoffidx || 15833 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) * 15834 enoff_sec->dofs_entsize > enoff_sec->dofs_size) { 15835 dtrace_dof_error(dof, "invalid is-enabled " 15836 "offset"); 15837 return (-1); 15838 } 15839 15840 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) { 15841 dtrace_dof_error(dof, "zero probe and " 15842 "is-enabled offsets"); 15843 return (-1); 15844 } 15845 } else if (probe->dofpr_noffs == 0) { 15846 dtrace_dof_error(dof, "zero probe offsets"); 15847 return (-1); 15848 } 15849 15850 if (probe->dofpr_argidx + probe->dofpr_xargc < 15851 probe->dofpr_argidx || 15852 (probe->dofpr_argidx + probe->dofpr_xargc) * 15853 arg_sec->dofs_entsize > arg_sec->dofs_size) { 15854 dtrace_dof_error(dof, "invalid args"); 15855 return (-1); 15856 } 15857 15858 typeidx = probe->dofpr_nargv; 15859 typestr = strtab + probe->dofpr_nargv; 15860 for (k = 0; k < probe->dofpr_nargc; k++) { 15861 if (typeidx >= str_sec->dofs_size) { 15862 dtrace_dof_error(dof, "bad " 15863 "native argument type"); 15864 return (-1); 15865 } 15866 15867 typesz = strlen(typestr) + 1; 15868 if (typesz > DTRACE_ARGTYPELEN) { 15869 dtrace_dof_error(dof, "native " 15870 "argument type too long"); 15871 return (-1); 15872 } 15873 typeidx += typesz; 15874 typestr += typesz; 15875 } 15876 15877 typeidx = probe->dofpr_xargv; 15878 typestr = strtab + probe->dofpr_xargv; 15879 for (k = 0; k < probe->dofpr_xargc; k++) { 15880 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) { 15881 dtrace_dof_error(dof, "bad " 15882 "native argument index"); 15883 return (-1); 15884 } 15885 15886 if (typeidx >= str_sec->dofs_size) { 15887 dtrace_dof_error(dof, "bad " 15888 "translated argument type"); 15889 return (-1); 15890 } 15891 15892 typesz = strlen(typestr) + 1; 15893 if (typesz > DTRACE_ARGTYPELEN) { 15894 dtrace_dof_error(dof, "translated argument " 15895 "type too long"); 15896 return (-1); 15897 } 15898 15899 typeidx += typesz; 15900 typestr += typesz; 15901 } 15902 } 15903 15904 return (0); 15905} 15906 15907static int 15908dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp) 15909{ 15910 dtrace_helpers_t *help; 15911 dtrace_vstate_t *vstate; 15912 dtrace_enabling_t *enab = NULL; 15913 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; 15914 uintptr_t daddr = (uintptr_t)dof; 15915 15916 ASSERT(MUTEX_HELD(&dtrace_lock)); 15917 15918 if ((help = curproc->p_dtrace_helpers) == NULL) 15919 help = dtrace_helpers_create(curproc); 15920 15921 vstate = &help->dthps_vstate; 15922 15923 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, 15924 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) { 15925 dtrace_dof_destroy(dof); 15926 return (rv); 15927 } 15928 15929 /* 15930 * Look for helper providers and validate their descriptions. 15931 */ 15932 if (dhp != NULL) { 15933 for (i = 0; i < dof->dofh_secnum; i++) { 15934 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + 15935 dof->dofh_secoff + i * dof->dofh_secsize); 15936 15937 if (sec->dofs_type != DOF_SECT_PROVIDER) 15938 continue; 15939 15940 if (dtrace_helper_provider_validate(dof, sec) != 0) { 15941 dtrace_enabling_destroy(enab); 15942 dtrace_dof_destroy(dof); 15943 return (-1); 15944 } 15945 15946 nprovs++; 15947 } 15948 } 15949 15950 /* 15951 * Now we need to walk through the ECB descriptions in the enabling. 15952 */ 15953 for (i = 0; i < enab->dten_ndesc; i++) { 15954 dtrace_ecbdesc_t *ep = enab->dten_desc[i]; 15955 dtrace_probedesc_t *desc = &ep->dted_probe; 15956 15957 if (strcmp(desc->dtpd_provider, "dtrace") != 0) 15958 continue; 15959 15960 if (strcmp(desc->dtpd_mod, "helper") != 0) 15961 continue; 15962 15963 if (strcmp(desc->dtpd_func, "ustack") != 0) 15964 continue; 15965 15966 if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, 15967 ep)) != 0) { 15968 /* 15969 * Adding this helper action failed -- we are now going 15970 * to rip out the entire generation and return failure. 15971 */ 15972 (void) dtrace_helper_destroygen(help->dthps_generation); 15973 dtrace_enabling_destroy(enab); 15974 dtrace_dof_destroy(dof); 15975 return (-1); 15976 } 15977 15978 nhelpers++; 15979 } 15980 15981 if (nhelpers < enab->dten_ndesc) 15982 dtrace_dof_error(dof, "unmatched helpers"); 15983 15984 gen = help->dthps_generation++; 15985 dtrace_enabling_destroy(enab); 15986 15987 if (dhp != NULL && nprovs > 0) { 15988 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; 15989 if (dtrace_helper_provider_add(dhp, gen) == 0) { 15990 mutex_exit(&dtrace_lock); 15991 dtrace_helper_provider_register(curproc, help, dhp); 15992 mutex_enter(&dtrace_lock); 15993 15994 destroy = 0; 15995 } 15996 } 15997 15998 if (destroy) 15999 dtrace_dof_destroy(dof); 16000 16001 return (gen); 16002} 16003 16004static dtrace_helpers_t * 16005dtrace_helpers_create(proc_t *p) 16006{ 16007 dtrace_helpers_t *help; 16008 16009 ASSERT(MUTEX_HELD(&dtrace_lock)); 16010 ASSERT(p->p_dtrace_helpers == NULL); 16011 16012 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); 16013 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) * 16014 DTRACE_NHELPER_ACTIONS, KM_SLEEP); 16015 16016 p->p_dtrace_helpers = help; 16017 dtrace_helpers++; 16018 16019 return (help); 16020} 16021 16022#if defined(sun) 16023static 16024#endif 16025void 16026dtrace_helpers_destroy(proc_t *p) 16027{ 16028 dtrace_helpers_t *help; 16029 dtrace_vstate_t *vstate; 16030#if defined(sun) 16031 proc_t *p = curproc; 16032#endif 16033 int i; 16034 16035 mutex_enter(&dtrace_lock); 16036 16037 ASSERT(p->p_dtrace_helpers != NULL); 16038 ASSERT(dtrace_helpers > 0); 16039 16040 help = p->p_dtrace_helpers; 16041 vstate = &help->dthps_vstate; 16042 16043 /* 16044 * We're now going to lose the help from this process. 16045 */ 16046 p->p_dtrace_helpers = NULL; 16047 dtrace_sync(); 16048 16049 /* 16050 * Destory the helper actions. 16051 */ 16052 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 16053 dtrace_helper_action_t *h, *next; 16054 16055 for (h = help->dthps_actions[i]; h != NULL; h = next) { 16056 next = h->dtha_next; 16057 dtrace_helper_action_destroy(h, vstate); 16058 h = next; 16059 } 16060 } 16061 16062 mutex_exit(&dtrace_lock); 16063 16064 /* 16065 * Destroy the helper providers. 16066 */ 16067 if (help->dthps_maxprovs > 0) { 16068 mutex_enter(&dtrace_meta_lock); 16069 if (dtrace_meta_pid != NULL) { 16070 ASSERT(dtrace_deferred_pid == NULL); 16071 16072 for (i = 0; i < help->dthps_nprovs; i++) { 16073 dtrace_helper_provider_remove( 16074 &help->dthps_provs[i]->dthp_prov, p->p_pid); 16075 } 16076 } else { 16077 mutex_enter(&dtrace_lock); 16078 ASSERT(help->dthps_deferred == 0 || 16079 help->dthps_next != NULL || 16080 help->dthps_prev != NULL || 16081 help == dtrace_deferred_pid); 16082 16083 /* 16084 * Remove the helper from the deferred list. 16085 */ 16086 if (help->dthps_next != NULL) 16087 help->dthps_next->dthps_prev = help->dthps_prev; 16088 if (help->dthps_prev != NULL) 16089 help->dthps_prev->dthps_next = help->dthps_next; 16090 if (dtrace_deferred_pid == help) { 16091 dtrace_deferred_pid = help->dthps_next; 16092 ASSERT(help->dthps_prev == NULL); 16093 } 16094 16095 mutex_exit(&dtrace_lock); 16096 } 16097 16098 mutex_exit(&dtrace_meta_lock); 16099 16100 for (i = 0; i < help->dthps_nprovs; i++) { 16101 dtrace_helper_provider_destroy(help->dthps_provs[i]); 16102 } 16103 16104 kmem_free(help->dthps_provs, help->dthps_maxprovs * 16105 sizeof (dtrace_helper_provider_t *)); 16106 } 16107 16108 mutex_enter(&dtrace_lock); 16109 16110 dtrace_vstate_fini(&help->dthps_vstate); 16111 kmem_free(help->dthps_actions, 16112 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS); 16113 kmem_free(help, sizeof (dtrace_helpers_t)); 16114 16115 --dtrace_helpers; 16116 mutex_exit(&dtrace_lock); 16117} 16118 16119#if defined(sun) 16120static 16121#endif 16122void 16123dtrace_helpers_duplicate(proc_t *from, proc_t *to) 16124{ 16125 dtrace_helpers_t *help, *newhelp; 16126 dtrace_helper_action_t *helper, *new, *last; 16127 dtrace_difo_t *dp; 16128 dtrace_vstate_t *vstate; 16129 int i, j, sz, hasprovs = 0; 16130 16131 mutex_enter(&dtrace_lock); 16132 ASSERT(from->p_dtrace_helpers != NULL); 16133 ASSERT(dtrace_helpers > 0); 16134 16135 help = from->p_dtrace_helpers; 16136 newhelp = dtrace_helpers_create(to); 16137 ASSERT(to->p_dtrace_helpers != NULL); 16138 16139 newhelp->dthps_generation = help->dthps_generation; 16140 vstate = &newhelp->dthps_vstate; 16141 16142 /* 16143 * Duplicate the helper actions. 16144 */ 16145 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { 16146 if ((helper = help->dthps_actions[i]) == NULL) 16147 continue; 16148 16149 for (last = NULL; helper != NULL; helper = helper->dtha_next) { 16150 new = kmem_zalloc(sizeof (dtrace_helper_action_t), 16151 KM_SLEEP); 16152 new->dtha_generation = helper->dtha_generation; 16153 16154 if ((dp = helper->dtha_predicate) != NULL) { 16155 dp = dtrace_difo_duplicate(dp, vstate); 16156 new->dtha_predicate = dp; 16157 } 16158 16159 new->dtha_nactions = helper->dtha_nactions; 16160 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions; 16161 new->dtha_actions = kmem_alloc(sz, KM_SLEEP); 16162 16163 for (j = 0; j < new->dtha_nactions; j++) { 16164 dtrace_difo_t *dp = helper->dtha_actions[j]; 16165 16166 ASSERT(dp != NULL); 16167 dp = dtrace_difo_duplicate(dp, vstate); 16168 new->dtha_actions[j] = dp; 16169 } 16170 16171 if (last != NULL) { 16172 last->dtha_next = new; 16173 } else { 16174 newhelp->dthps_actions[i] = new; 16175 } 16176 16177 last = new; 16178 } 16179 } 16180 16181 /* 16182 * Duplicate the helper providers and register them with the 16183 * DTrace framework. 16184 */ 16185 if (help->dthps_nprovs > 0) { 16186 newhelp->dthps_nprovs = help->dthps_nprovs; 16187 newhelp->dthps_maxprovs = help->dthps_nprovs; 16188 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs * 16189 sizeof (dtrace_helper_provider_t *), KM_SLEEP); 16190 for (i = 0; i < newhelp->dthps_nprovs; i++) { 16191 newhelp->dthps_provs[i] = help->dthps_provs[i]; 16192 newhelp->dthps_provs[i]->dthp_ref++; 16193 } 16194 16195 hasprovs = 1; 16196 } 16197 16198 mutex_exit(&dtrace_lock); 16199 16200 if (hasprovs) 16201 dtrace_helper_provider_register(to, newhelp, NULL); 16202} 16203 16204/* 16205 * DTrace Hook Functions 16206 */ 16207static void 16208dtrace_module_loaded(modctl_t *ctl) 16209{ 16210 dtrace_provider_t *prv; 16211 16212 mutex_enter(&dtrace_provider_lock); 16213#if defined(sun) 16214 mutex_enter(&mod_lock); 16215#endif 16216 16217#if defined(sun) 16218 ASSERT(ctl->mod_busy); 16219#endif 16220 16221 /* 16222 * We're going to call each providers per-module provide operation 16223 * specifying only this module. 16224 */ 16225 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) 16226 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); 16227 16228#if defined(sun) 16229 mutex_exit(&mod_lock); 16230#endif 16231 mutex_exit(&dtrace_provider_lock); 16232 16233 /* 16234 * If we have any retained enablings, we need to match against them. 16235 * Enabling probes requires that cpu_lock be held, and we cannot hold 16236 * cpu_lock here -- it is legal for cpu_lock to be held when loading a 16237 * module. (In particular, this happens when loading scheduling 16238 * classes.) So if we have any retained enablings, we need to dispatch 16239 * our task queue to do the match for us. 16240 */ 16241 mutex_enter(&dtrace_lock); 16242 16243 if (dtrace_retained == NULL) { 16244 mutex_exit(&dtrace_lock); 16245 return; 16246 } 16247 16248 (void) taskq_dispatch(dtrace_taskq, 16249 (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); 16250 16251 mutex_exit(&dtrace_lock); 16252 16253 /* 16254 * And now, for a little heuristic sleaze: in general, we want to 16255 * match modules as soon as they load. However, we cannot guarantee 16256 * this, because it would lead us to the lock ordering violation 16257 * outlined above. The common case, of course, is that cpu_lock is 16258 * _not_ held -- so we delay here for a clock tick, hoping that that's 16259 * long enough for the task queue to do its work. If it's not, it's 16260 * not a serious problem -- it just means that the module that we 16261 * just loaded may not be immediately instrumentable. 16262 */ 16263 delay(1); 16264} 16265 16266static void 16267#if defined(sun) 16268dtrace_module_unloaded(modctl_t *ctl) 16269#else 16270dtrace_module_unloaded(modctl_t *ctl, int *error) 16271#endif 16272{ 16273 dtrace_probe_t template, *probe, *first, *next; 16274 dtrace_provider_t *prov; 16275#if !defined(sun) 16276 char modname[DTRACE_MODNAMELEN]; 16277 size_t len; 16278#endif 16279 16280#if defined(sun) 16281 template.dtpr_mod = ctl->mod_modname; 16282#else 16283 /* Handle the fact that ctl->filename may end in ".ko". */ 16284 strlcpy(modname, ctl->filename, sizeof(modname)); 16285 len = strlen(ctl->filename); 16286 if (len > 3 && strcmp(modname + len - 3, ".ko") == 0) 16287 modname[len - 3] = '\0'; 16288 template.dtpr_mod = modname; 16289#endif 16290 16291 mutex_enter(&dtrace_provider_lock); 16292#if defined(sun) 16293 mutex_enter(&mod_lock); 16294#endif 16295 mutex_enter(&dtrace_lock); 16296 16297#if !defined(sun) 16298 if (ctl->nenabled > 0) { 16299 /* Don't allow unloads if a probe is enabled. */ 16300 mutex_exit(&dtrace_provider_lock); 16301 mutex_exit(&dtrace_lock); 16302 *error = -1; 16303 printf( 16304 "kldunload: attempt to unload module that has DTrace probes enabled\n"); 16305 return; 16306 } 16307#endif 16308 16309 if (dtrace_bymod == NULL) { 16310 /* 16311 * The DTrace module is loaded (obviously) but not attached; 16312 * we don't have any work to do. 16313 */ 16314 mutex_exit(&dtrace_provider_lock); 16315#if defined(sun) 16316 mutex_exit(&mod_lock); 16317#endif 16318 mutex_exit(&dtrace_lock); 16319 return; 16320 } 16321 16322 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); 16323 probe != NULL; probe = probe->dtpr_nextmod) { 16324 if (probe->dtpr_ecb != NULL) { 16325 mutex_exit(&dtrace_provider_lock); 16326#if defined(sun) 16327 mutex_exit(&mod_lock); 16328#endif 16329 mutex_exit(&dtrace_lock); 16330 16331 /* 16332 * This shouldn't _actually_ be possible -- we're 16333 * unloading a module that has an enabled probe in it. 16334 * (It's normally up to the provider to make sure that 16335 * this can't happen.) However, because dtps_enable() 16336 * doesn't have a failure mode, there can be an 16337 * enable/unload race. Upshot: we don't want to 16338 * assert, but we're not going to disable the 16339 * probe, either. 16340 */ 16341 if (dtrace_err_verbose) { 16342#if defined(sun) 16343 cmn_err(CE_WARN, "unloaded module '%s' had " 16344 "enabled probes", ctl->mod_modname); 16345#else 16346 cmn_err(CE_WARN, "unloaded module '%s' had " 16347 "enabled probes", modname); 16348#endif 16349 } 16350 16351 return; 16352 } 16353 } 16354 16355 probe = first; 16356 16357 for (first = NULL; probe != NULL; probe = next) { 16358 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); 16359 16360 dtrace_probes[probe->dtpr_id - 1] = NULL; 16361 16362 next = probe->dtpr_nextmod; 16363 dtrace_hash_remove(dtrace_bymod, probe); 16364 dtrace_hash_remove(dtrace_byfunc, probe); 16365 dtrace_hash_remove(dtrace_byname, probe); 16366 16367 if (first == NULL) { 16368 first = probe; 16369 probe->dtpr_nextmod = NULL; 16370 } else { 16371 probe->dtpr_nextmod = first; 16372 first = probe; 16373 } 16374 } 16375 16376 /* 16377 * We've removed all of the module's probes from the hash chains and 16378 * from the probe array. Now issue a dtrace_sync() to be sure that 16379 * everyone has cleared out from any probe array processing. 16380 */ 16381 dtrace_sync(); 16382 16383 for (probe = first; probe != NULL; probe = first) { 16384 first = probe->dtpr_nextmod; 16385 prov = probe->dtpr_provider; 16386 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, 16387 probe->dtpr_arg); 16388 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); 16389 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); 16390 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); 16391#if defined(sun) 16392 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); 16393#else 16394 free_unr(dtrace_arena, probe->dtpr_id); 16395#endif 16396 kmem_free(probe, sizeof (dtrace_probe_t)); 16397 } 16398 16399 mutex_exit(&dtrace_lock); 16400#if defined(sun) 16401 mutex_exit(&mod_lock); 16402#endif 16403 mutex_exit(&dtrace_provider_lock); 16404} 16405 16406#if !defined(sun) 16407static void 16408dtrace_kld_load(void *arg __unused, linker_file_t lf) 16409{ 16410 16411 dtrace_module_loaded(lf); 16412} 16413 16414static void 16415dtrace_kld_unload_try(void *arg __unused, linker_file_t lf, int *error) 16416{ 16417 16418 if (*error != 0) 16419 /* We already have an error, so don't do anything. */ 16420 return; 16421 dtrace_module_unloaded(lf, error); 16422} 16423#endif 16424 16425#if defined(sun) 16426static void 16427dtrace_suspend(void) 16428{ 16429 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend)); 16430} 16431 16432static void 16433dtrace_resume(void) 16434{ 16435 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume)); 16436} 16437#endif 16438 16439static int 16440dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) 16441{ 16442 ASSERT(MUTEX_HELD(&cpu_lock)); 16443 mutex_enter(&dtrace_lock); 16444 16445 switch (what) { 16446 case CPU_CONFIG: { 16447 dtrace_state_t *state; 16448 dtrace_optval_t *opt, rs, c; 16449 16450 /* 16451 * For now, we only allocate a new buffer for anonymous state. 16452 */ 16453 if ((state = dtrace_anon.dta_state) == NULL) 16454 break; 16455 16456 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) 16457 break; 16458 16459 opt = state->dts_options; 16460 c = opt[DTRACEOPT_CPU]; 16461 16462 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu) 16463 break; 16464 16465 /* 16466 * Regardless of what the actual policy is, we're going to 16467 * temporarily set our resize policy to be manual. We're 16468 * also going to temporarily set our CPU option to denote 16469 * the newly configured CPU. 16470 */ 16471 rs = opt[DTRACEOPT_BUFRESIZE]; 16472 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL; 16473 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu; 16474 16475 (void) dtrace_state_buffers(state); 16476 16477 opt[DTRACEOPT_BUFRESIZE] = rs; 16478 opt[DTRACEOPT_CPU] = c; 16479 16480 break; 16481 } 16482 16483 case CPU_UNCONFIG: 16484 /* 16485 * We don't free the buffer in the CPU_UNCONFIG case. (The 16486 * buffer will be freed when the consumer exits.) 16487 */ 16488 break; 16489 16490 default: 16491 break; 16492 } 16493 16494 mutex_exit(&dtrace_lock); 16495 return (0); 16496} 16497 16498#if defined(sun) 16499static void 16500dtrace_cpu_setup_initial(processorid_t cpu) 16501{ 16502 (void) dtrace_cpu_setup(CPU_CONFIG, cpu); 16503} 16504#endif 16505 16506static void 16507dtrace_toxrange_add(uintptr_t base, uintptr_t limit) 16508{ 16509 if (dtrace_toxranges >= dtrace_toxranges_max) { 16510 int osize, nsize; 16511 dtrace_toxrange_t *range; 16512 16513 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 16514 16515 if (osize == 0) { 16516 ASSERT(dtrace_toxrange == NULL); 16517 ASSERT(dtrace_toxranges_max == 0); 16518 dtrace_toxranges_max = 1; 16519 } else { 16520 dtrace_toxranges_max <<= 1; 16521 } 16522 16523 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); 16524 range = kmem_zalloc(nsize, KM_SLEEP); 16525 16526 if (dtrace_toxrange != NULL) { 16527 ASSERT(osize != 0); 16528 bcopy(dtrace_toxrange, range, osize); 16529 kmem_free(dtrace_toxrange, osize); 16530 } 16531 16532 dtrace_toxrange = range; 16533 } 16534 16535 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0); 16536 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0); 16537 16538 dtrace_toxrange[dtrace_toxranges].dtt_base = base; 16539 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; 16540 dtrace_toxranges++; 16541} 16542 16543static void 16544dtrace_getf_barrier() 16545{ 16546#if defined(sun) 16547 /* 16548 * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings 16549 * that contain calls to getf(), this routine will be called on every 16550 * closef() before either the underlying vnode is released or the 16551 * file_t itself is freed. By the time we are here, it is essential 16552 * that the file_t can no longer be accessed from a call to getf() 16553 * in probe context -- that assures that a dtrace_sync() can be used 16554 * to clear out any enablings referring to the old structures. 16555 */ 16556 if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 || 16557 kcred->cr_zone->zone_dtrace_getf != 0) 16558 dtrace_sync(); 16559#endif 16560} 16561 16562/* 16563 * DTrace Driver Cookbook Functions 16564 */ 16565#if defined(sun) 16566/*ARGSUSED*/ 16567static int 16568dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 16569{ 16570 dtrace_provider_id_t id; 16571 dtrace_state_t *state = NULL; 16572 dtrace_enabling_t *enab; 16573 16574 mutex_enter(&cpu_lock); 16575 mutex_enter(&dtrace_provider_lock); 16576 mutex_enter(&dtrace_lock); 16577 16578 if (ddi_soft_state_init(&dtrace_softstate, 16579 sizeof (dtrace_state_t), 0) != 0) { 16580 cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); 16581 mutex_exit(&cpu_lock); 16582 mutex_exit(&dtrace_provider_lock); 16583 mutex_exit(&dtrace_lock); 16584 return (DDI_FAILURE); 16585 } 16586 16587 if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, 16588 DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || 16589 ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, 16590 DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { 16591 cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); 16592 ddi_remove_minor_node(devi, NULL); 16593 ddi_soft_state_fini(&dtrace_softstate); 16594 mutex_exit(&cpu_lock); 16595 mutex_exit(&dtrace_provider_lock); 16596 mutex_exit(&dtrace_lock); 16597 return (DDI_FAILURE); 16598 } 16599 16600 ddi_report_dev(devi); 16601 dtrace_devi = devi; 16602 16603 dtrace_modload = dtrace_module_loaded; 16604 dtrace_modunload = dtrace_module_unloaded; 16605 dtrace_cpu_init = dtrace_cpu_setup_initial; 16606 dtrace_helpers_cleanup = dtrace_helpers_destroy; 16607 dtrace_helpers_fork = dtrace_helpers_duplicate; 16608 dtrace_cpustart_init = dtrace_suspend; 16609 dtrace_cpustart_fini = dtrace_resume; 16610 dtrace_debugger_init = dtrace_suspend; 16611 dtrace_debugger_fini = dtrace_resume; 16612 16613 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 16614 16615 ASSERT(MUTEX_HELD(&cpu_lock)); 16616 16617 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, 16618 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); 16619 dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, 16620 UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, 16621 VM_SLEEP | VMC_IDENTIFIER); 16622 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 16623 1, INT_MAX, 0); 16624 16625 dtrace_state_cache = kmem_cache_create("dtrace_state_cache", 16626 sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, 16627 NULL, NULL, NULL, NULL, NULL, 0); 16628 16629 ASSERT(MUTEX_HELD(&cpu_lock)); 16630 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), 16631 offsetof(dtrace_probe_t, dtpr_nextmod), 16632 offsetof(dtrace_probe_t, dtpr_prevmod)); 16633 16634 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), 16635 offsetof(dtrace_probe_t, dtpr_nextfunc), 16636 offsetof(dtrace_probe_t, dtpr_prevfunc)); 16637 16638 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), 16639 offsetof(dtrace_probe_t, dtpr_nextname), 16640 offsetof(dtrace_probe_t, dtpr_prevname)); 16641 16642 if (dtrace_retain_max < 1) { 16643 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; " 16644 "setting to 1", dtrace_retain_max); 16645 dtrace_retain_max = 1; 16646 } 16647 16648 /* 16649 * Now discover our toxic ranges. 16650 */ 16651 dtrace_toxic_ranges(dtrace_toxrange_add); 16652 16653 /* 16654 * Before we register ourselves as a provider to our own framework, 16655 * we would like to assert that dtrace_provider is NULL -- but that's 16656 * not true if we were loaded as a dependency of a DTrace provider. 16657 * Once we've registered, we can assert that dtrace_provider is our 16658 * pseudo provider. 16659 */ 16660 (void) dtrace_register("dtrace", &dtrace_provider_attr, 16661 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id); 16662 16663 ASSERT(dtrace_provider != NULL); 16664 ASSERT((dtrace_provider_id_t)dtrace_provider == id); 16665 16666 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) 16667 dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); 16668 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) 16669 dtrace_provider, NULL, NULL, "END", 0, NULL); 16670 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) 16671 dtrace_provider, NULL, NULL, "ERROR", 1, NULL); 16672 16673 dtrace_anon_property(); 16674 mutex_exit(&cpu_lock); 16675 16676 /* 16677 * If DTrace helper tracing is enabled, we need to allocate the 16678 * trace buffer and initialize the values. 16679 */ 16680 if (dtrace_helptrace_enabled) { 16681 ASSERT(dtrace_helptrace_buffer == NULL); 16682 dtrace_helptrace_buffer = 16683 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); 16684 dtrace_helptrace_next = 0; 16685 } 16686 16687 /* 16688 * If there are already providers, we must ask them to provide their 16689 * probes, and then match any anonymous enabling against them. Note 16690 * that there should be no other retained enablings at this time: 16691 * the only retained enablings at this time should be the anonymous 16692 * enabling. 16693 */ 16694 if (dtrace_anon.dta_enabling != NULL) { 16695 ASSERT(dtrace_retained == dtrace_anon.dta_enabling); 16696 16697 dtrace_enabling_provide(NULL); 16698 state = dtrace_anon.dta_state; 16699 16700 /* 16701 * We couldn't hold cpu_lock across the above call to 16702 * dtrace_enabling_provide(), but we must hold it to actually 16703 * enable the probes. We have to drop all of our locks, pick 16704 * up cpu_lock, and regain our locks before matching the 16705 * retained anonymous enabling. 16706 */ 16707 mutex_exit(&dtrace_lock); 16708 mutex_exit(&dtrace_provider_lock); 16709 16710 mutex_enter(&cpu_lock); 16711 mutex_enter(&dtrace_provider_lock); 16712 mutex_enter(&dtrace_lock); 16713 16714 if ((enab = dtrace_anon.dta_enabling) != NULL) 16715 (void) dtrace_enabling_match(enab, NULL); 16716 16717 mutex_exit(&cpu_lock); 16718 } 16719 16720 mutex_exit(&dtrace_lock); 16721 mutex_exit(&dtrace_provider_lock); 16722 16723 if (state != NULL) { 16724 /* 16725 * If we created any anonymous state, set it going now. 16726 */ 16727 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon); 16728 } 16729 16730 return (DDI_SUCCESS); 16731} 16732#endif 16733 16734#if !defined(sun) 16735static void dtrace_dtr(void *); 16736#endif 16737 16738/*ARGSUSED*/ 16739static int 16740#if defined(sun) 16741dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 16742#else 16743dtrace_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 16744#endif 16745{ 16746 dtrace_state_t *state; 16747 uint32_t priv; 16748 uid_t uid; 16749 zoneid_t zoneid; 16750 16751#if defined(sun) 16752 if (getminor(*devp) == DTRACEMNRN_HELPER) 16753 return (0); 16754 16755 /* 16756 * If this wasn't an open with the "helper" minor, then it must be 16757 * the "dtrace" minor. 16758 */ 16759 if (getminor(*devp) == DTRACEMNRN_DTRACE) 16760 return (ENXIO); 16761#else 16762 cred_t *cred_p = NULL; 16763 cred_p = dev->si_cred; 16764 16765 /* 16766 * If no DTRACE_PRIV_* bits are set in the credential, then the 16767 * caller lacks sufficient permission to do anything with DTrace. 16768 */ 16769 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid); 16770 if (priv == DTRACE_PRIV_NONE) { 16771#endif 16772 16773 return (EACCES); 16774 } 16775 16776 /* 16777 * Ask all providers to provide all their probes. 16778 */ 16779 mutex_enter(&dtrace_provider_lock); 16780 dtrace_probe_provide(NULL, NULL); 16781 mutex_exit(&dtrace_provider_lock); 16782 16783 mutex_enter(&cpu_lock); 16784 mutex_enter(&dtrace_lock); 16785 dtrace_opens++; 16786 dtrace_membar_producer(); 16787 16788#if defined(sun) 16789 /* 16790 * If the kernel debugger is active (that is, if the kernel debugger 16791 * modified text in some way), we won't allow the open. 16792 */ 16793 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { 16794 dtrace_opens--; 16795 mutex_exit(&cpu_lock); 16796 mutex_exit(&dtrace_lock); 16797 return (EBUSY); 16798 } 16799 16800 state = dtrace_state_create(devp, cred_p); 16801#else 16802 state = dtrace_state_create(dev); 16803 devfs_set_cdevpriv(state, dtrace_dtr); 16804#endif 16805 16806 mutex_exit(&cpu_lock); 16807 16808 if (state == NULL) { 16809#if defined(sun) 16810 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) 16811 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 16812#else 16813 --dtrace_opens; 16814#endif 16815 mutex_exit(&dtrace_lock); 16816 return (EAGAIN); 16817 } 16818 16819 mutex_exit(&dtrace_lock); 16820 16821 return (0); 16822} 16823 16824/*ARGSUSED*/ 16825#if defined(sun) 16826static int 16827dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) 16828#else 16829static void 16830dtrace_dtr(void *data) 16831#endif 16832{ 16833#if defined(sun) 16834 minor_t minor = getminor(dev); 16835 dtrace_state_t *state; 16836 16837 if (minor == DTRACEMNRN_HELPER) 16838 return (0); 16839 16840 state = ddi_get_soft_state(dtrace_softstate, minor); 16841#else 16842 dtrace_state_t *state = data; 16843#endif 16844 16845 mutex_enter(&cpu_lock); 16846 mutex_enter(&dtrace_lock); 16847 16848#ifdef illumos 16849 if (state->dts_anon) 16850#else 16851 if (state != NULL && state->dts_anon) 16852#endif 16853 { 16854 /* 16855 * There is anonymous state. Destroy that first. 16856 */ 16857 ASSERT(dtrace_anon.dta_state == NULL); 16858 dtrace_state_destroy(state->dts_anon); 16859 } 16860 16861#ifdef illumos 16862 dtrace_state_destroy(state); 16863#else 16864 if (state != NULL) { 16865 dtrace_state_destroy(state); 16866 kmem_free(state, 0); 16867 } 16868#endif 16869 ASSERT(dtrace_opens > 0); 16870 16871#if defined(sun) 16872 /* 16873 * Only relinquish control of the kernel debugger interface when there 16874 * are no consumers and no anonymous enablings. 16875 */ 16876 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) 16877 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 16878#else 16879 --dtrace_opens; 16880#endif 16881 16882 mutex_exit(&dtrace_lock); 16883 mutex_exit(&cpu_lock); 16884 16885#if defined(sun) 16886 return (0); 16887#endif 16888} 16889 16890#if defined(sun) 16891/*ARGSUSED*/ 16892static int 16893dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv) 16894{ 16895 int rval; 16896 dof_helper_t help, *dhp = NULL; 16897 16898 switch (cmd) { 16899 case DTRACEHIOC_ADDDOF: 16900 if (copyin((void *)arg, &help, sizeof (help)) != 0) { 16901 dtrace_dof_error(NULL, "failed to copyin DOF helper"); 16902 return (EFAULT); 16903 } 16904 16905 dhp = &help; 16906 arg = (intptr_t)help.dofhp_dof; 16907 /*FALLTHROUGH*/ 16908 16909 case DTRACEHIOC_ADD: { 16910 dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval); 16911 16912 if (dof == NULL) 16913 return (rval); 16914 16915 mutex_enter(&dtrace_lock); 16916 16917 /* 16918 * dtrace_helper_slurp() takes responsibility for the dof -- 16919 * it may free it now or it may save it and free it later. 16920 */ 16921 if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) { 16922 *rv = rval; 16923 rval = 0; 16924 } else { 16925 rval = EINVAL; 16926 } 16927 16928 mutex_exit(&dtrace_lock); 16929 return (rval); 16930 } 16931 16932 case DTRACEHIOC_REMOVE: { 16933 mutex_enter(&dtrace_lock); 16934 rval = dtrace_helper_destroygen(arg); 16935 mutex_exit(&dtrace_lock); 16936 16937 return (rval); 16938 } 16939 16940 default: 16941 break; 16942 } 16943 16944 return (ENOTTY); 16945} 16946 16947/*ARGSUSED*/ 16948static int 16949dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 16950{ 16951 minor_t minor = getminor(dev); 16952 dtrace_state_t *state; 16953 int rval; 16954 16955 if (minor == DTRACEMNRN_HELPER) 16956 return (dtrace_ioctl_helper(cmd, arg, rv)); 16957 16958 state = ddi_get_soft_state(dtrace_softstate, minor); 16959 16960 if (state->dts_anon) { 16961 ASSERT(dtrace_anon.dta_state == NULL); 16962 state = state->dts_anon; 16963 } 16964 16965 switch (cmd) { 16966 case DTRACEIOC_PROVIDER: { 16967 dtrace_providerdesc_t pvd; 16968 dtrace_provider_t *pvp; 16969 16970 if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) 16971 return (EFAULT); 16972 16973 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; 16974 mutex_enter(&dtrace_provider_lock); 16975 16976 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { 16977 if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) 16978 break; 16979 } 16980 16981 mutex_exit(&dtrace_provider_lock); 16982 16983 if (pvp == NULL) 16984 return (ESRCH); 16985 16986 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); 16987 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); 16988 16989 if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) 16990 return (EFAULT); 16991 16992 return (0); 16993 } 16994 16995 case DTRACEIOC_EPROBE: { 16996 dtrace_eprobedesc_t epdesc; 16997 dtrace_ecb_t *ecb; 16998 dtrace_action_t *act; 16999 void *buf; 17000 size_t size; 17001 uintptr_t dest; 17002 int nrecs; 17003 17004 if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) 17005 return (EFAULT); 17006 17007 mutex_enter(&dtrace_lock); 17008 17009 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { 17010 mutex_exit(&dtrace_lock); 17011 return (EINVAL); 17012 } 17013 17014 if (ecb->dte_probe == NULL) { 17015 mutex_exit(&dtrace_lock); 17016 return (EINVAL); 17017 } 17018 17019 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; 17020 epdesc.dtepd_uarg = ecb->dte_uarg; 17021 epdesc.dtepd_size = ecb->dte_size; 17022 17023 nrecs = epdesc.dtepd_nrecs; 17024 epdesc.dtepd_nrecs = 0; 17025 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 17026 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 17027 continue; 17028 17029 epdesc.dtepd_nrecs++; 17030 } 17031 17032 /* 17033 * Now that we have the size, we need to allocate a temporary 17034 * buffer in which to store the complete description. We need 17035 * the temporary buffer to be able to drop dtrace_lock() 17036 * across the copyout(), below. 17037 */ 17038 size = sizeof (dtrace_eprobedesc_t) + 17039 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); 17040 17041 buf = kmem_alloc(size, KM_SLEEP); 17042 dest = (uintptr_t)buf; 17043 17044 bcopy(&epdesc, (void *)dest, sizeof (epdesc)); 17045 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); 17046 17047 for (act = ecb->dte_action; act != NULL; act = act->dta_next) { 17048 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) 17049 continue; 17050 17051 if (nrecs-- == 0) 17052 break; 17053 17054 bcopy(&act->dta_rec, (void *)dest, 17055 sizeof (dtrace_recdesc_t)); 17056 dest += sizeof (dtrace_recdesc_t); 17057 } 17058 17059 mutex_exit(&dtrace_lock); 17060 17061 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 17062 kmem_free(buf, size); 17063 return (EFAULT); 17064 } 17065 17066 kmem_free(buf, size); 17067 return (0); 17068 } 17069 17070 case DTRACEIOC_AGGDESC: { 17071 dtrace_aggdesc_t aggdesc; 17072 dtrace_action_t *act; 17073 dtrace_aggregation_t *agg; 17074 int nrecs; 17075 uint32_t offs; 17076 dtrace_recdesc_t *lrec; 17077 void *buf; 17078 size_t size; 17079 uintptr_t dest; 17080 17081 if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) 17082 return (EFAULT); 17083 17084 mutex_enter(&dtrace_lock); 17085 17086 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { 17087 mutex_exit(&dtrace_lock); 17088 return (EINVAL); 17089 } 17090 17091 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; 17092 17093 nrecs = aggdesc.dtagd_nrecs; 17094 aggdesc.dtagd_nrecs = 0; 17095 17096 offs = agg->dtag_base; 17097 lrec = &agg->dtag_action.dta_rec; 17098 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; 17099 17100 for (act = agg->dtag_first; ; act = act->dta_next) { 17101 ASSERT(act->dta_intuple || 17102 DTRACEACT_ISAGG(act->dta_kind)); 17103 17104 /* 17105 * If this action has a record size of zero, it 17106 * denotes an argument to the aggregating action. 17107 * Because the presence of this record doesn't (or 17108 * shouldn't) affect the way the data is interpreted, 17109 * we don't copy it out to save user-level the 17110 * confusion of dealing with a zero-length record. 17111 */ 17112 if (act->dta_rec.dtrd_size == 0) { 17113 ASSERT(agg->dtag_hasarg); 17114 continue; 17115 } 17116 17117 aggdesc.dtagd_nrecs++; 17118 17119 if (act == &agg->dtag_action) 17120 break; 17121 } 17122 17123 /* 17124 * Now that we have the size, we need to allocate a temporary 17125 * buffer in which to store the complete description. We need 17126 * the temporary buffer to be able to drop dtrace_lock() 17127 * across the copyout(), below. 17128 */ 17129 size = sizeof (dtrace_aggdesc_t) + 17130 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); 17131 17132 buf = kmem_alloc(size, KM_SLEEP); 17133 dest = (uintptr_t)buf; 17134 17135 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); 17136 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); 17137 17138 for (act = agg->dtag_first; ; act = act->dta_next) { 17139 dtrace_recdesc_t rec = act->dta_rec; 17140 17141 /* 17142 * See the comment in the above loop for why we pass 17143 * over zero-length records. 17144 */ 17145 if (rec.dtrd_size == 0) { 17146 ASSERT(agg->dtag_hasarg); 17147 continue; 17148 } 17149 17150 if (nrecs-- == 0) 17151 break; 17152 17153 rec.dtrd_offset -= offs; 17154 bcopy(&rec, (void *)dest, sizeof (rec)); 17155 dest += sizeof (dtrace_recdesc_t); 17156 17157 if (act == &agg->dtag_action) 17158 break; 17159 } 17160 17161 mutex_exit(&dtrace_lock); 17162 17163 if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { 17164 kmem_free(buf, size); 17165 return (EFAULT); 17166 } 17167 17168 kmem_free(buf, size); 17169 return (0); 17170 } 17171 17172 case DTRACEIOC_ENABLE: { 17173 dof_hdr_t *dof; 17174 dtrace_enabling_t *enab = NULL; 17175 dtrace_vstate_t *vstate; 17176 int err = 0; 17177 17178 *rv = 0; 17179 17180 /* 17181 * If a NULL argument has been passed, we take this as our 17182 * cue to reevaluate our enablings. 17183 */ 17184 if (arg == NULL) { 17185 dtrace_enabling_matchall(); 17186 17187 return (0); 17188 } 17189 17190 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) 17191 return (rval); 17192 17193 mutex_enter(&cpu_lock); 17194 mutex_enter(&dtrace_lock); 17195 vstate = &state->dts_vstate; 17196 17197 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { 17198 mutex_exit(&dtrace_lock); 17199 mutex_exit(&cpu_lock); 17200 dtrace_dof_destroy(dof); 17201 return (EBUSY); 17202 } 17203 17204 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) { 17205 mutex_exit(&dtrace_lock); 17206 mutex_exit(&cpu_lock); 17207 dtrace_dof_destroy(dof); 17208 return (EINVAL); 17209 } 17210 17211 if ((rval = dtrace_dof_options(dof, state)) != 0) { 17212 dtrace_enabling_destroy(enab); 17213 mutex_exit(&dtrace_lock); 17214 mutex_exit(&cpu_lock); 17215 dtrace_dof_destroy(dof); 17216 return (rval); 17217 } 17218 17219 if ((err = dtrace_enabling_match(enab, rv)) == 0) { 17220 err = dtrace_enabling_retain(enab); 17221 } else { 17222 dtrace_enabling_destroy(enab); 17223 } 17224 17225 mutex_exit(&cpu_lock); 17226 mutex_exit(&dtrace_lock); 17227 dtrace_dof_destroy(dof); 17228 17229 return (err); 17230 } 17231 17232 case DTRACEIOC_REPLICATE: { 17233 dtrace_repldesc_t desc; 17234 dtrace_probedesc_t *match = &desc.dtrpd_match; 17235 dtrace_probedesc_t *create = &desc.dtrpd_create; 17236 int err; 17237 17238 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 17239 return (EFAULT); 17240 17241 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 17242 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 17243 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 17244 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 17245 17246 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 17247 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 17248 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 17249 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 17250 17251 mutex_enter(&dtrace_lock); 17252 err = dtrace_enabling_replicate(state, match, create); 17253 mutex_exit(&dtrace_lock); 17254 17255 return (err); 17256 } 17257 17258 case DTRACEIOC_PROBEMATCH: 17259 case DTRACEIOC_PROBES: { 17260 dtrace_probe_t *probe = NULL; 17261 dtrace_probedesc_t desc; 17262 dtrace_probekey_t pkey; 17263 dtrace_id_t i; 17264 int m = 0; 17265 uint32_t priv; 17266 uid_t uid; 17267 zoneid_t zoneid; 17268 17269 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 17270 return (EFAULT); 17271 17272 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; 17273 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; 17274 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; 17275 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; 17276 17277 /* 17278 * Before we attempt to match this probe, we want to give 17279 * all providers the opportunity to provide it. 17280 */ 17281 if (desc.dtpd_id == DTRACE_IDNONE) { 17282 mutex_enter(&dtrace_provider_lock); 17283 dtrace_probe_provide(&desc, NULL); 17284 mutex_exit(&dtrace_provider_lock); 17285 desc.dtpd_id++; 17286 } 17287 17288 if (cmd == DTRACEIOC_PROBEMATCH) { 17289 dtrace_probekey(&desc, &pkey); 17290 pkey.dtpk_id = DTRACE_IDNONE; 17291 } 17292 17293 dtrace_cred2priv(cr, &priv, &uid, &zoneid); 17294 17295 mutex_enter(&dtrace_lock); 17296 17297 if (cmd == DTRACEIOC_PROBEMATCH) { 17298 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 17299 if ((probe = dtrace_probes[i - 1]) != NULL && 17300 (m = dtrace_match_probe(probe, &pkey, 17301 priv, uid, zoneid)) != 0) 17302 break; 17303 } 17304 17305 if (m < 0) { 17306 mutex_exit(&dtrace_lock); 17307 return (EINVAL); 17308 } 17309 17310 } else { 17311 for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { 17312 if ((probe = dtrace_probes[i - 1]) != NULL && 17313 dtrace_match_priv(probe, priv, uid, zoneid)) 17314 break; 17315 } 17316 } 17317 17318 if (probe == NULL) { 17319 mutex_exit(&dtrace_lock); 17320 return (ESRCH); 17321 } 17322 17323 dtrace_probe_description(probe, &desc); 17324 mutex_exit(&dtrace_lock); 17325 17326 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 17327 return (EFAULT); 17328 17329 return (0); 17330 } 17331 17332 case DTRACEIOC_PROBEARG: { 17333 dtrace_argdesc_t desc; 17334 dtrace_probe_t *probe; 17335 dtrace_provider_t *prov; 17336 17337 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 17338 return (EFAULT); 17339 17340 if (desc.dtargd_id == DTRACE_IDNONE) 17341 return (EINVAL); 17342 17343 if (desc.dtargd_ndx == DTRACE_ARGNONE) 17344 return (EINVAL); 17345 17346 mutex_enter(&dtrace_provider_lock); 17347 mutex_enter(&mod_lock); 17348 mutex_enter(&dtrace_lock); 17349 17350 if (desc.dtargd_id > dtrace_nprobes) { 17351 mutex_exit(&dtrace_lock); 17352 mutex_exit(&mod_lock); 17353 mutex_exit(&dtrace_provider_lock); 17354 return (EINVAL); 17355 } 17356 17357 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) { 17358 mutex_exit(&dtrace_lock); 17359 mutex_exit(&mod_lock); 17360 mutex_exit(&dtrace_provider_lock); 17361 return (EINVAL); 17362 } 17363 17364 mutex_exit(&dtrace_lock); 17365 17366 prov = probe->dtpr_provider; 17367 17368 if (prov->dtpv_pops.dtps_getargdesc == NULL) { 17369 /* 17370 * There isn't any typed information for this probe. 17371 * Set the argument number to DTRACE_ARGNONE. 17372 */ 17373 desc.dtargd_ndx = DTRACE_ARGNONE; 17374 } else { 17375 desc.dtargd_native[0] = '\0'; 17376 desc.dtargd_xlate[0] = '\0'; 17377 desc.dtargd_mapping = desc.dtargd_ndx; 17378 17379 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, 17380 probe->dtpr_id, probe->dtpr_arg, &desc); 17381 } 17382 17383 mutex_exit(&mod_lock); 17384 mutex_exit(&dtrace_provider_lock); 17385 17386 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 17387 return (EFAULT); 17388 17389 return (0); 17390 } 17391 17392 case DTRACEIOC_GO: { 17393 processorid_t cpuid; 17394 rval = dtrace_state_go(state, &cpuid); 17395 17396 if (rval != 0) 17397 return (rval); 17398 17399 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 17400 return (EFAULT); 17401 17402 return (0); 17403 } 17404 17405 case DTRACEIOC_STOP: { 17406 processorid_t cpuid; 17407 17408 mutex_enter(&dtrace_lock); 17409 rval = dtrace_state_stop(state, &cpuid); 17410 mutex_exit(&dtrace_lock); 17411 17412 if (rval != 0) 17413 return (rval); 17414 17415 if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) 17416 return (EFAULT); 17417 17418 return (0); 17419 } 17420 17421 case DTRACEIOC_DOFGET: { 17422 dof_hdr_t hdr, *dof; 17423 uint64_t len; 17424 17425 if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) 17426 return (EFAULT); 17427 17428 mutex_enter(&dtrace_lock); 17429 dof = dtrace_dof_create(state); 17430 mutex_exit(&dtrace_lock); 17431 17432 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); 17433 rval = copyout(dof, (void *)arg, len); 17434 dtrace_dof_destroy(dof); 17435 17436 return (rval == 0 ? 0 : EFAULT); 17437 } 17438 17439 case DTRACEIOC_AGGSNAP: 17440 case DTRACEIOC_BUFSNAP: { 17441 dtrace_bufdesc_t desc; 17442 caddr_t cached; 17443 dtrace_buffer_t *buf; 17444 17445 if (copyin((void *)arg, &desc, sizeof (desc)) != 0) 17446 return (EFAULT); 17447 17448 if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) 17449 return (EINVAL); 17450 17451 mutex_enter(&dtrace_lock); 17452 17453 if (cmd == DTRACEIOC_BUFSNAP) { 17454 buf = &state->dts_buffer[desc.dtbd_cpu]; 17455 } else { 17456 buf = &state->dts_aggbuffer[desc.dtbd_cpu]; 17457 } 17458 17459 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { 17460 size_t sz = buf->dtb_offset; 17461 17462 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { 17463 mutex_exit(&dtrace_lock); 17464 return (EBUSY); 17465 } 17466 17467 /* 17468 * If this buffer has already been consumed, we're 17469 * going to indicate that there's nothing left here 17470 * to consume. 17471 */ 17472 if (buf->dtb_flags & DTRACEBUF_CONSUMED) { 17473 mutex_exit(&dtrace_lock); 17474 17475 desc.dtbd_size = 0; 17476 desc.dtbd_drops = 0; 17477 desc.dtbd_errors = 0; 17478 desc.dtbd_oldest = 0; 17479 sz = sizeof (desc); 17480 17481 if (copyout(&desc, (void *)arg, sz) != 0) 17482 return (EFAULT); 17483 17484 return (0); 17485 } 17486 17487 /* 17488 * If this is a ring buffer that has wrapped, we want 17489 * to copy the whole thing out. 17490 */ 17491 if (buf->dtb_flags & DTRACEBUF_WRAPPED) { 17492 dtrace_buffer_polish(buf); 17493 sz = buf->dtb_size; 17494 } 17495 17496 if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { 17497 mutex_exit(&dtrace_lock); 17498 return (EFAULT); 17499 } 17500 17501 desc.dtbd_size = sz; 17502 desc.dtbd_drops = buf->dtb_drops; 17503 desc.dtbd_errors = buf->dtb_errors; 17504 desc.dtbd_oldest = buf->dtb_xamot_offset; 17505 desc.dtbd_timestamp = dtrace_gethrtime(); 17506 17507 mutex_exit(&dtrace_lock); 17508 17509 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 17510 return (EFAULT); 17511 17512 buf->dtb_flags |= DTRACEBUF_CONSUMED; 17513 17514 return (0); 17515 } 17516 17517 if (buf->dtb_tomax == NULL) { 17518 ASSERT(buf->dtb_xamot == NULL); 17519 mutex_exit(&dtrace_lock); 17520 return (ENOENT); 17521 } 17522 17523 cached = buf->dtb_tomax; 17524 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); 17525 17526 dtrace_xcall(desc.dtbd_cpu, 17527 (dtrace_xcall_t)dtrace_buffer_switch, buf); 17528 17529 state->dts_errors += buf->dtb_xamot_errors; 17530 17531 /* 17532 * If the buffers did not actually switch, then the cross call 17533 * did not take place -- presumably because the given CPU is 17534 * not in the ready set. If this is the case, we'll return 17535 * ENOENT. 17536 */ 17537 if (buf->dtb_tomax == cached) { 17538 ASSERT(buf->dtb_xamot != cached); 17539 mutex_exit(&dtrace_lock); 17540 return (ENOENT); 17541 } 17542 17543 ASSERT(cached == buf->dtb_xamot); 17544 17545 /* 17546 * We have our snapshot; now copy it out. 17547 */ 17548 if (copyout(buf->dtb_xamot, desc.dtbd_data, 17549 buf->dtb_xamot_offset) != 0) { 17550 mutex_exit(&dtrace_lock); 17551 return (EFAULT); 17552 } 17553 17554 desc.dtbd_size = buf->dtb_xamot_offset; 17555 desc.dtbd_drops = buf->dtb_xamot_drops; 17556 desc.dtbd_errors = buf->dtb_xamot_errors; 17557 desc.dtbd_oldest = 0; 17558 desc.dtbd_timestamp = buf->dtb_switched; 17559 17560 mutex_exit(&dtrace_lock); 17561 17562 /* 17563 * Finally, copy out the buffer description. 17564 */ 17565 if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) 17566 return (EFAULT); 17567 17568 return (0); 17569 } 17570 17571 case DTRACEIOC_CONF: { 17572 dtrace_conf_t conf; 17573 17574 bzero(&conf, sizeof (conf)); 17575 conf.dtc_difversion = DIF_VERSION; 17576 conf.dtc_difintregs = DIF_DIR_NREGS; 17577 conf.dtc_diftupregs = DIF_DTR_NREGS; 17578 conf.dtc_ctfmodel = CTF_MODEL_NATIVE; 17579 17580 if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) 17581 return (EFAULT); 17582 17583 return (0); 17584 } 17585 17586 case DTRACEIOC_STATUS: { 17587 dtrace_status_t stat; 17588 dtrace_dstate_t *dstate; 17589 int i, j; 17590 uint64_t nerrs; 17591 17592 /* 17593 * See the comment in dtrace_state_deadman() for the reason 17594 * for setting dts_laststatus to INT64_MAX before setting 17595 * it to the correct value. 17596 */ 17597 state->dts_laststatus = INT64_MAX; 17598 dtrace_membar_producer(); 17599 state->dts_laststatus = dtrace_gethrtime(); 17600 17601 bzero(&stat, sizeof (stat)); 17602 17603 mutex_enter(&dtrace_lock); 17604 17605 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { 17606 mutex_exit(&dtrace_lock); 17607 return (ENOENT); 17608 } 17609 17610 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) 17611 stat.dtst_exiting = 1; 17612 17613 nerrs = state->dts_errors; 17614 dstate = &state->dts_vstate.dtvs_dynvars; 17615 17616 for (i = 0; i < NCPU; i++) { 17617 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; 17618 17619 stat.dtst_dyndrops += dcpu->dtdsc_drops; 17620 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; 17621 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; 17622 17623 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) 17624 stat.dtst_filled++; 17625 17626 nerrs += state->dts_buffer[i].dtb_errors; 17627 17628 for (j = 0; j < state->dts_nspeculations; j++) { 17629 dtrace_speculation_t *spec; 17630 dtrace_buffer_t *buf; 17631 17632 spec = &state->dts_speculations[j]; 17633 buf = &spec->dtsp_buffer[i]; 17634 stat.dtst_specdrops += buf->dtb_xamot_drops; 17635 } 17636 } 17637 17638 stat.dtst_specdrops_busy = state->dts_speculations_busy; 17639 stat.dtst_specdrops_unavail = state->dts_speculations_unavail; 17640 stat.dtst_stkstroverflows = state->dts_stkstroverflows; 17641 stat.dtst_dblerrors = state->dts_dblerrors; 17642 stat.dtst_killed = 17643 (state->dts_activity == DTRACE_ACTIVITY_KILLED); 17644 stat.dtst_errors = nerrs; 17645 17646 mutex_exit(&dtrace_lock); 17647 17648 if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) 17649 return (EFAULT); 17650 17651 return (0); 17652 } 17653 17654 case DTRACEIOC_FORMAT: { 17655 dtrace_fmtdesc_t fmt; 17656 char *str; 17657 int len; 17658 17659 if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) 17660 return (EFAULT); 17661 17662 mutex_enter(&dtrace_lock); 17663 17664 if (fmt.dtfd_format == 0 || 17665 fmt.dtfd_format > state->dts_nformats) { 17666 mutex_exit(&dtrace_lock); 17667 return (EINVAL); 17668 } 17669 17670 /* 17671 * Format strings are allocated contiguously and they are 17672 * never freed; if a format index is less than the number 17673 * of formats, we can assert that the format map is non-NULL 17674 * and that the format for the specified index is non-NULL. 17675 */ 17676 ASSERT(state->dts_formats != NULL); 17677 str = state->dts_formats[fmt.dtfd_format - 1]; 17678 ASSERT(str != NULL); 17679 17680 len = strlen(str) + 1; 17681 17682 if (len > fmt.dtfd_length) { 17683 fmt.dtfd_length = len; 17684 17685 if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { 17686 mutex_exit(&dtrace_lock); 17687 return (EINVAL); 17688 } 17689 } else { 17690 if (copyout(str, fmt.dtfd_string, len) != 0) { 17691 mutex_exit(&dtrace_lock); 17692 return (EINVAL); 17693 } 17694 } 17695 17696 mutex_exit(&dtrace_lock); 17697 return (0); 17698 } 17699 17700 default: 17701 break; 17702 } 17703 17704 return (ENOTTY); 17705} 17706 17707/*ARGSUSED*/ 17708static int 17709dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 17710{ 17711 dtrace_state_t *state; 17712 17713 switch (cmd) { 17714 case DDI_DETACH: 17715 break; 17716 17717 case DDI_SUSPEND: 17718 return (DDI_SUCCESS); 17719 17720 default: 17721 return (DDI_FAILURE); 17722 } 17723 17724 mutex_enter(&cpu_lock); 17725 mutex_enter(&dtrace_provider_lock); 17726 mutex_enter(&dtrace_lock); 17727 17728 ASSERT(dtrace_opens == 0); 17729 17730 if (dtrace_helpers > 0) { 17731 mutex_exit(&dtrace_provider_lock); 17732 mutex_exit(&dtrace_lock); 17733 mutex_exit(&cpu_lock); 17734 return (DDI_FAILURE); 17735 } 17736 17737 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { 17738 mutex_exit(&dtrace_provider_lock); 17739 mutex_exit(&dtrace_lock); 17740 mutex_exit(&cpu_lock); 17741 return (DDI_FAILURE); 17742 } 17743 17744 dtrace_provider = NULL; 17745 17746 if ((state = dtrace_anon_grab()) != NULL) { 17747 /* 17748 * If there were ECBs on this state, the provider should 17749 * have not been allowed to detach; assert that there is 17750 * none. 17751 */ 17752 ASSERT(state->dts_necbs == 0); 17753 dtrace_state_destroy(state); 17754 17755 /* 17756 * If we're being detached with anonymous state, we need to 17757 * indicate to the kernel debugger that DTrace is now inactive. 17758 */ 17759 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); 17760 } 17761 17762 bzero(&dtrace_anon, sizeof (dtrace_anon_t)); 17763 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); 17764 dtrace_cpu_init = NULL; 17765 dtrace_helpers_cleanup = NULL; 17766 dtrace_helpers_fork = NULL; 17767 dtrace_cpustart_init = NULL; 17768 dtrace_cpustart_fini = NULL; 17769 dtrace_debugger_init = NULL; 17770 dtrace_debugger_fini = NULL; 17771 dtrace_modload = NULL; 17772 dtrace_modunload = NULL; 17773 17774 ASSERT(dtrace_getf == 0); 17775 ASSERT(dtrace_closef == NULL); 17776 17777 mutex_exit(&cpu_lock); 17778 17779 if (dtrace_helptrace_enabled) { 17780 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize); 17781 dtrace_helptrace_buffer = NULL; 17782 } 17783 17784 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *)); 17785 dtrace_probes = NULL; 17786 dtrace_nprobes = 0; 17787 17788 dtrace_hash_destroy(dtrace_bymod); 17789 dtrace_hash_destroy(dtrace_byfunc); 17790 dtrace_hash_destroy(dtrace_byname); 17791 dtrace_bymod = NULL; 17792 dtrace_byfunc = NULL; 17793 dtrace_byname = NULL; 17794 17795 kmem_cache_destroy(dtrace_state_cache); 17796 vmem_destroy(dtrace_minor); 17797 vmem_destroy(dtrace_arena); 17798 17799 if (dtrace_toxrange != NULL) { 17800 kmem_free(dtrace_toxrange, 17801 dtrace_toxranges_max * sizeof (dtrace_toxrange_t)); 17802 dtrace_toxrange = NULL; 17803 dtrace_toxranges = 0; 17804 dtrace_toxranges_max = 0; 17805 } 17806 17807 ddi_remove_minor_node(dtrace_devi, NULL); 17808 dtrace_devi = NULL; 17809 17810 ddi_soft_state_fini(&dtrace_softstate); 17811 17812 ASSERT(dtrace_vtime_references == 0); 17813 ASSERT(dtrace_opens == 0); 17814 ASSERT(dtrace_retained == NULL); 17815 17816 mutex_exit(&dtrace_lock); 17817 mutex_exit(&dtrace_provider_lock); 17818 17819 /* 17820 * We don't destroy the task queue until after we have dropped our 17821 * locks (taskq_destroy() may block on running tasks). To prevent 17822 * attempting to do work after we have effectively detached but before 17823 * the task queue has been destroyed, all tasks dispatched via the 17824 * task queue must check that DTrace is still attached before 17825 * performing any operation. 17826 */ 17827 taskq_destroy(dtrace_taskq); 17828 dtrace_taskq = NULL; 17829 17830 return (DDI_SUCCESS); 17831} 17832#endif 17833 17834#if defined(sun) 17835/*ARGSUSED*/ 17836static int 17837dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 17838{ 17839 int error; 17840 17841 switch (infocmd) { 17842 case DDI_INFO_DEVT2DEVINFO: 17843 *result = (void *)dtrace_devi; 17844 error = DDI_SUCCESS; 17845 break; 17846 case DDI_INFO_DEVT2INSTANCE: 17847 *result = (void *)0; 17848 error = DDI_SUCCESS; 17849 break; 17850 default: 17851 error = DDI_FAILURE; 17852 } 17853 return (error); 17854} 17855#endif 17856 17857#if defined(sun) 17858static struct cb_ops dtrace_cb_ops = { 17859 dtrace_open, /* open */ 17860 dtrace_close, /* close */ 17861 nulldev, /* strategy */ 17862 nulldev, /* print */ 17863 nodev, /* dump */ 17864 nodev, /* read */ 17865 nodev, /* write */ 17866 dtrace_ioctl, /* ioctl */ 17867 nodev, /* devmap */ 17868 nodev, /* mmap */ 17869 nodev, /* segmap */ 17870 nochpoll, /* poll */ 17871 ddi_prop_op, /* cb_prop_op */ 17872 0, /* streamtab */ 17873 D_NEW | D_MP /* Driver compatibility flag */ 17874}; 17875 17876static struct dev_ops dtrace_ops = { 17877 DEVO_REV, /* devo_rev */ 17878 0, /* refcnt */ 17879 dtrace_info, /* get_dev_info */ 17880 nulldev, /* identify */ 17881 nulldev, /* probe */ 17882 dtrace_attach, /* attach */ 17883 dtrace_detach, /* detach */ 17884 nodev, /* reset */ 17885 &dtrace_cb_ops, /* driver operations */ 17886 NULL, /* bus operations */ 17887 nodev /* dev power */ 17888}; 17889 17890static struct modldrv modldrv = { 17891 &mod_driverops, /* module type (this is a pseudo driver) */ 17892 "Dynamic Tracing", /* name of module */ 17893 &dtrace_ops, /* driver ops */ 17894}; 17895 17896static struct modlinkage modlinkage = { 17897 MODREV_1, 17898 (void *)&modldrv, 17899 NULL 17900}; 17901 17902int 17903_init(void) 17904{ 17905 return (mod_install(&modlinkage)); 17906} 17907 17908int 17909_info(struct modinfo *modinfop) 17910{ 17911 return (mod_info(&modlinkage, modinfop)); 17912} 17913 17914int 17915_fini(void) 17916{ 17917 return (mod_remove(&modlinkage)); 17918} 17919#else 17920 17921static d_ioctl_t dtrace_ioctl; 17922static d_ioctl_t dtrace_ioctl_helper; 17923static void dtrace_load(void *); 17924static int dtrace_unload(void); 17925static struct cdev *dtrace_dev; 17926static struct cdev *helper_dev; 17927 17928void dtrace_invop_init(void); 17929void dtrace_invop_uninit(void); 17930 17931static struct cdevsw dtrace_cdevsw = { 17932 .d_version = D_VERSION, 17933 .d_ioctl = dtrace_ioctl, 17934 .d_open = dtrace_open, 17935 .d_name = "dtrace", 17936}; 17937 17938static struct cdevsw helper_cdevsw = { 17939 .d_version = D_VERSION, 17940 .d_ioctl = dtrace_ioctl_helper, 17941 .d_name = "helper", 17942}; 17943 17944#include <dtrace_anon.c> 17945#include <dtrace_ioctl.c> 17946#include <dtrace_load.c> 17947#include <dtrace_modevent.c> 17948#include <dtrace_sysctl.c> 17949#include <dtrace_unload.c> 17950#include <dtrace_vtime.c> 17951#include <dtrace_hacks.c> 17952#include <dtrace_isa.c> 17953 17954SYSINIT(dtrace_load, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_load, NULL); 17955SYSUNINIT(dtrace_unload, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_unload, NULL); 17956SYSINIT(dtrace_anon_init, SI_SUB_DTRACE_ANON, SI_ORDER_FIRST, dtrace_anon_init, NULL); 17957 17958DEV_MODULE(dtrace, dtrace_modevent, NULL); 17959MODULE_VERSION(dtrace, 1); 17960MODULE_DEPEND(dtrace, opensolaris, 1, 1, 1); 17961#endif 17962