kern_ktrace.c revision 198411
1274955Ssvnmir/*- 2274955Ssvnmir * Copyright (c) 1989, 1993 3274955Ssvnmir * The Regents of the University of California. 4274955Ssvnmir * Copyright (c) 2005 Robert N. M. Watson 5274955Ssvnmir * All rights reserved. 6274955Ssvnmir * 7274955Ssvnmir * Redistribution and use in source and binary forms, with or without 8274955Ssvnmir * modification, are permitted provided that the following conditions 9274955Ssvnmir * are met: 10274955Ssvnmir * 1. Redistributions of source code must retain the above copyright 11274955Ssvnmir * notice, this list of conditions and the following disclaimer. 12274955Ssvnmir * 2. Redistributions in binary form must reproduce the above copyright 13274955Ssvnmir * notice, this list of conditions and the following disclaimer in the 14274955Ssvnmir * documentation and/or other materials provided with the distribution. 15274955Ssvnmir * 4. Neither the name of the University nor the names of its contributors 16274955Ssvnmir * may be used to endorse or promote products derived from this software 17274955Ssvnmir * without specific prior written permission. 18274955Ssvnmir * 19274955Ssvnmir * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20274955Ssvnmir * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21274955Ssvnmir * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22274955Ssvnmir * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23274955Ssvnmir * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24274955Ssvnmir * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25274955Ssvnmir * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26274955Ssvnmir * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27274955Ssvnmir * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28274955Ssvnmir * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29274955Ssvnmir * SUCH DAMAGE. 30277320Sdim * 31277320Sdim * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32277320Sdim */ 33274955Ssvnmir 34274955Ssvnmir#include <sys/cdefs.h> 35274955Ssvnmir__FBSDID("$FreeBSD: head/sys/kern/kern_ktrace.c 198411 2009-10-23 15:14:54Z jhb $"); 36274955Ssvnmir 37274955Ssvnmir#include "opt_ktrace.h" 38274955Ssvnmir 39277320Sdim#include <sys/param.h> 40277320Sdim#include <sys/systm.h> 41277320Sdim#include <sys/fcntl.h> 42274955Ssvnmir#include <sys/kernel.h> 43274955Ssvnmir#include <sys/kthread.h> 44274955Ssvnmir#include <sys/lock.h> 45274955Ssvnmir#include <sys/mutex.h> 46274955Ssvnmir#include <sys/malloc.h> 47274955Ssvnmir#include <sys/mount.h> 48274955Ssvnmir#include <sys/namei.h> 49274955Ssvnmir#include <sys/priv.h> 50277320Sdim#include <sys/proc.h> 51277320Sdim#include <sys/unistd.h> 52277320Sdim#include <sys/vnode.h> 53274955Ssvnmir#include <sys/socket.h> 54274955Ssvnmir#include <sys/stat.h> 55274955Ssvnmir#include <sys/ktrace.h> 56274955Ssvnmir#include <sys/sx.h> 57277320Sdim#include <sys/sysctl.h> 58277320Sdim#include <sys/syslog.h> 59277320Sdim#include <sys/sysproto.h> 60274955Ssvnmir 61274955Ssvnmir#include <security/mac/mac_framework.h> 62274955Ssvnmir 63274955Ssvnmir/* 64274955Ssvnmir * The ktrace facility allows the tracing of certain key events in user space 65274955Ssvnmir * processes, such as system calls, signal delivery, context switches, and 66274955Ssvnmir * user generated events using utrace(2). It works by streaming event 67274955Ssvnmir * records and data to a vnode associated with the process using the 68274955Ssvnmir * ktrace(2) system call. In general, records can be written directly from 69274955Ssvnmir * the context that generates the event. One important exception to this is 70274955Ssvnmir * during a context switch, where sleeping is not permitted. To handle this 71274955Ssvnmir * case, trace events are generated using in-kernel ktr_request records, and 72274955Ssvnmir * then delivered to disk at a convenient moment -- either immediately, the 73274955Ssvnmir * next traceable event, at system call return, or at process exit. 74274955Ssvnmir * 75274955Ssvnmir * When dealing with multiple threads or processes writing to the same event 76274955Ssvnmir * log, ordering guarantees are weak: specifically, if an event has multiple 77274955Ssvnmir * records (i.e., system call enter and return), they may be interlaced with 78274955Ssvnmir * records from another event. Process and thread ID information is provided 79274955Ssvnmir * in the record, and user applications can de-interlace events if required. 80274955Ssvnmir */ 81296417Sdim 82274955Ssvnmirstatic MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 83274955Ssvnmir 84274955Ssvnmir#ifdef KTRACE 85288943Sdim 86274955Ssvnmir#ifndef KTRACE_REQUEST_POOL 87274955Ssvnmir#define KTRACE_REQUEST_POOL 100 88274955Ssvnmir#endif 89274955Ssvnmir 90274955Ssvnmirstruct ktr_request { 91288943Sdim struct ktr_header ktr_header; 92274955Ssvnmir void *ktr_buffer; 93274955Ssvnmir union { 94274955Ssvnmir struct ktr_syscall ktr_syscall; 95296417Sdim struct ktr_sysret ktr_sysret; 96274955Ssvnmir struct ktr_genio ktr_genio; 97274955Ssvnmir struct ktr_psig ktr_psig; 98274955Ssvnmir struct ktr_csw ktr_csw; 99274955Ssvnmir } ktr_data; 100274955Ssvnmir STAILQ_ENTRY(ktr_request) ktr_list; 101274955Ssvnmir}; 102274955Ssvnmir 103274955Ssvnmirstatic int data_lengths[] = { 104274955Ssvnmir 0, /* none */ 105274955Ssvnmir offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 106274955Ssvnmir sizeof(struct ktr_sysret), /* KTR_SYSRET */ 107274955Ssvnmir 0, /* KTR_NAMEI */ 108274955Ssvnmir sizeof(struct ktr_genio), /* KTR_GENIO */ 109274955Ssvnmir sizeof(struct ktr_psig), /* KTR_PSIG */ 110274955Ssvnmir sizeof(struct ktr_csw), /* KTR_CSW */ 111296417Sdim 0, /* KTR_USER */ 112274955Ssvnmir 0, /* KTR_STRUCT */ 113274955Ssvnmir 0, /* KTR_SYSCTL */ 114274955Ssvnmir}; 115274955Ssvnmir 116274955Ssvnmirstatic STAILQ_HEAD(, ktr_request) ktr_free; 117274955Ssvnmir 118274955Ssvnmirstatic SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 119274955Ssvnmir 120274955Ssvnmirstatic u_int ktr_requestpool = KTRACE_REQUEST_POOL; 121296417SdimTUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 122288943Sdim 123296417Sdimstatic u_int ktr_geniosize = PAGE_SIZE; 124296417SdimTUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 125288943SdimSYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 126274955Ssvnmir 0, "Maximum size of genio event payload"); 127274955Ssvnmir 128274955Ssvnmirstatic int print_message = 1; 129274955Ssvnmirstruct mtx ktrace_mtx; 130274955Ssvnmirstatic struct sx ktrace_sx; 131274955Ssvnmir 132274955Ssvnmirstatic void ktrace_init(void *dummy); 133274955Ssvnmirstatic int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 134274955Ssvnmirstatic u_int ktrace_resize_pool(u_int newsize); 135274955Ssvnmirstatic struct ktr_request *ktr_getrequest(int type); 136274955Ssvnmirstatic void ktr_submitrequest(struct thread *td, struct ktr_request *req); 137274955Ssvnmirstatic void ktr_freerequest(struct ktr_request *req); 138280031Sdimstatic void ktr_writerequest(struct thread *td, struct ktr_request *req); 139280031Sdimstatic int ktrcanset(struct thread *,struct proc *); 140280031Sdimstatic int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 141274955Ssvnmirstatic int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 142274955Ssvnmir 143274955Ssvnmir/* 144274955Ssvnmir * ktrace itself generates events, such as context switches, which we do not 145274955Ssvnmir * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 146274955Ssvnmir * whether or not it is in a region where tracing of events should be 147274955Ssvnmir * suppressed. 148274955Ssvnmir */ 149274955Ssvnmirstatic void 150288943Sdimktrace_enter(struct thread *td) 151288943Sdim{ 152274955Ssvnmir 153274955Ssvnmir KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 154274955Ssvnmir td->td_pflags |= TDP_INKTRACE; 155274955Ssvnmir} 156274955Ssvnmir 157274955Ssvnmirstatic void 158274955Ssvnmirktrace_exit(struct thread *td) 159274955Ssvnmir{ 160274955Ssvnmir 161274955Ssvnmir KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 162296417Sdim td->td_pflags &= ~TDP_INKTRACE; 163274955Ssvnmir} 164274955Ssvnmir 165274955Ssvnmirstatic void 166274955Ssvnmirktrace_assert(struct thread *td) 167274955Ssvnmir{ 168274955Ssvnmir 169274955Ssvnmir KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 170274955Ssvnmir} 171274955Ssvnmir 172274955Ssvnmirstatic void 173274955Ssvnmirktrace_init(void *dummy) 174288943Sdim{ 175296417Sdim struct ktr_request *req; 176296417Sdim int i; 177274955Ssvnmir 178274955Ssvnmir mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 179274955Ssvnmir sx_init(&ktrace_sx, "ktrace_sx"); 180274955Ssvnmir STAILQ_INIT(&ktr_free); 181274955Ssvnmir for (i = 0; i < ktr_requestpool; i++) { 182274955Ssvnmir req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 183274955Ssvnmir STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 184274955Ssvnmir } 185274955Ssvnmir} 186274955SsvnmirSYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 187274955Ssvnmir 188274955Ssvnmirstatic int 189274955Ssvnmirsysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 190274955Ssvnmir{ 191274955Ssvnmir struct thread *td; 192274955Ssvnmir u_int newsize, oldsize, wantsize; 193274955Ssvnmir int error; 194274955Ssvnmir 195274955Ssvnmir /* Handle easy read-only case first to avoid warnings from GCC. */ 196274955Ssvnmir if (!req->newptr) { 197274955Ssvnmir mtx_lock(&ktrace_mtx); 198274955Ssvnmir oldsize = ktr_requestpool; 199274955Ssvnmir mtx_unlock(&ktrace_mtx); 200274955Ssvnmir return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 201274955Ssvnmir } 202274955Ssvnmir 203274955Ssvnmir error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 204274955Ssvnmir if (error) 205274955Ssvnmir return (error); 206274955Ssvnmir td = curthread; 207274955Ssvnmir ktrace_enter(td); 208274955Ssvnmir mtx_lock(&ktrace_mtx); 209274955Ssvnmir oldsize = ktr_requestpool; 210274955Ssvnmir newsize = ktrace_resize_pool(wantsize); 211274955Ssvnmir mtx_unlock(&ktrace_mtx); 212274955Ssvnmir ktrace_exit(td); 213274955Ssvnmir error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 214274955Ssvnmir if (error) 215274955Ssvnmir return (error); 216274955Ssvnmir if (wantsize > oldsize && newsize < wantsize) 217274955Ssvnmir return (ENOSPC); 218274955Ssvnmir return (0); 219274955Ssvnmir} 220274955SsvnmirSYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 221274955Ssvnmir &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", ""); 222274955Ssvnmir 223274955Ssvnmirstatic u_int 224274955Ssvnmirktrace_resize_pool(u_int newsize) 225274955Ssvnmir{ 226274955Ssvnmir struct ktr_request *req; 227274955Ssvnmir int bound; 228274955Ssvnmir 229274955Ssvnmir mtx_assert(&ktrace_mtx, MA_OWNED); 230274955Ssvnmir print_message = 1; 231274955Ssvnmir bound = newsize - ktr_requestpool; 232274955Ssvnmir if (bound == 0) 233274955Ssvnmir return (ktr_requestpool); 234274955Ssvnmir if (bound < 0) 235274955Ssvnmir /* Shrink pool down to newsize if possible. */ 236274955Ssvnmir while (bound++ < 0) { 237274955Ssvnmir req = STAILQ_FIRST(&ktr_free); 238274955Ssvnmir if (req == NULL) 239274955Ssvnmir return (ktr_requestpool); 240274955Ssvnmir STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 241274955Ssvnmir ktr_requestpool--; 242274955Ssvnmir mtx_unlock(&ktrace_mtx); 243280031Sdim free(req, M_KTRACE); 244280031Sdim mtx_lock(&ktrace_mtx); 245280031Sdim } 246288943Sdim else 247296417Sdim /* Grow pool up to newsize. */ 248274955Ssvnmir while (bound-- > 0) { 249274955Ssvnmir mtx_unlock(&ktrace_mtx); 250274955Ssvnmir req = malloc(sizeof(struct ktr_request), M_KTRACE, 251274955Ssvnmir M_WAITOK); 252274955Ssvnmir mtx_lock(&ktrace_mtx); 253274955Ssvnmir STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 254274955Ssvnmir ktr_requestpool++; 255274955Ssvnmir } 256274955Ssvnmir return (ktr_requestpool); 257280031Sdim} 258280031Sdim 259274955Ssvnmir/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 260280031SdimCTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 261274955Ssvnmir (sizeof((struct thread *)NULL)->td_name)); 262296417Sdim 263274955Ssvnmirstatic struct ktr_request * 264274955Ssvnmirktr_getrequest(int type) 265280031Sdim{ 266274955Ssvnmir struct ktr_request *req; 267274955Ssvnmir struct thread *td = curthread; 268280031Sdim struct proc *p = td->td_proc; 269280031Sdim int pm; 270280031Sdim 271288943Sdim ktrace_enter(td); /* XXX: In caller instead? */ 272288943Sdim mtx_lock(&ktrace_mtx); 273280031Sdim if (!KTRCHECK(td, type)) { 274280031Sdim mtx_unlock(&ktrace_mtx); 275280031Sdim ktrace_exit(td); 276280031Sdim return (NULL); 277274955Ssvnmir } 278274955Ssvnmir req = STAILQ_FIRST(&ktr_free); 279280031Sdim if (req != NULL) { 280274955Ssvnmir STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 281274955Ssvnmir req->ktr_header.ktr_type = type; 282274955Ssvnmir if (p->p_traceflag & KTRFAC_DROP) { 283274955Ssvnmir req->ktr_header.ktr_type |= KTR_DROP; 284274955Ssvnmir p->p_traceflag &= ~KTRFAC_DROP; 285274955Ssvnmir } 286274955Ssvnmir mtx_unlock(&ktrace_mtx); 287274955Ssvnmir microtime(&req->ktr_header.ktr_time); 288274955Ssvnmir req->ktr_header.ktr_pid = p->p_pid; 289274955Ssvnmir req->ktr_header.ktr_tid = td->td_tid; 290274955Ssvnmir bcopy(td->td_name, req->ktr_header.ktr_comm, 291274955Ssvnmir sizeof(req->ktr_header.ktr_comm)); 292274955Ssvnmir req->ktr_buffer = NULL; 293274955Ssvnmir req->ktr_header.ktr_len = 0; 294274955Ssvnmir } else { 295274955Ssvnmir p->p_traceflag |= KTRFAC_DROP; 296274955Ssvnmir pm = print_message; 297274955Ssvnmir print_message = 0; 298274955Ssvnmir mtx_unlock(&ktrace_mtx); 299296417Sdim if (pm) 300274955Ssvnmir printf("Out of ktrace request objects.\n"); 301274955Ssvnmir ktrace_exit(td); 302274955Ssvnmir } 303274955Ssvnmir return (req); 304274955Ssvnmir} 305274955Ssvnmir 306274955Ssvnmir/* 307274955Ssvnmir * Some trace generation environments don't permit direct access to VFS, 308274955Ssvnmir * such as during a context switch where sleeping is not allowed. Under these 309274955Ssvnmir * circumstances, queue a request to the thread to be written asynchronously 310274955Ssvnmir * later. 311274955Ssvnmir */ 312274955Ssvnmirstatic void 313274955Ssvnmirktr_enqueuerequest(struct thread *td, struct ktr_request *req) 314274955Ssvnmir{ 315274955Ssvnmir 316274955Ssvnmir mtx_lock(&ktrace_mtx); 317274955Ssvnmir STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 318274955Ssvnmir mtx_unlock(&ktrace_mtx); 319274955Ssvnmir ktrace_exit(td); 320274955Ssvnmir} 321274955Ssvnmir 322274955Ssvnmir/* 323274955Ssvnmir * Drain any pending ktrace records from the per-thread queue to disk. This 324274955Ssvnmir * is used both internally before committing other records, and also on 325274955Ssvnmir * system call return. We drain all the ones we can find at the time when 326274955Ssvnmir * drain is requested, but don't keep draining after that as those events 327274955Ssvnmir * may be approximately "after" the current event. 328274955Ssvnmir */ 329274955Ssvnmirstatic void 330274955Ssvnmirktr_drain(struct thread *td) 331274955Ssvnmir{ 332274955Ssvnmir struct ktr_request *queued_req; 333274955Ssvnmir STAILQ_HEAD(, ktr_request) local_queue; 334274955Ssvnmir 335274955Ssvnmir ktrace_assert(td); 336274955Ssvnmir sx_assert(&ktrace_sx, SX_XLOCKED); 337274955Ssvnmir 338274955Ssvnmir STAILQ_INIT(&local_queue); /* XXXRW: needed? */ 339274955Ssvnmir 340274955Ssvnmir if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 341274955Ssvnmir mtx_lock(&ktrace_mtx); 342274955Ssvnmir STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 343274955Ssvnmir mtx_unlock(&ktrace_mtx); 344274955Ssvnmir 345274955Ssvnmir while ((queued_req = STAILQ_FIRST(&local_queue))) { 346274955Ssvnmir STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 347274955Ssvnmir ktr_writerequest(td, queued_req); 348274955Ssvnmir ktr_freerequest(queued_req); 349274955Ssvnmir } 350274955Ssvnmir } 351274955Ssvnmir} 352274955Ssvnmir 353274955Ssvnmir/* 354274955Ssvnmir * Submit a trace record for immediate commit to disk -- to be used only 355274955Ssvnmir * where entering VFS is OK. First drain any pending records that may have 356274955Ssvnmir * been cached in the thread. 357274955Ssvnmir */ 358274955Ssvnmirstatic void 359274955Ssvnmirktr_submitrequest(struct thread *td, struct ktr_request *req) 360274955Ssvnmir{ 361274955Ssvnmir 362274955Ssvnmir ktrace_assert(td); 363274955Ssvnmir 364274955Ssvnmir sx_xlock(&ktrace_sx); 365274955Ssvnmir ktr_drain(td); 366274955Ssvnmir ktr_writerequest(td, req); 367274955Ssvnmir ktr_freerequest(req); 368274955Ssvnmir sx_xunlock(&ktrace_sx); 369296417Sdim 370274955Ssvnmir ktrace_exit(td); 371274955Ssvnmir} 372274955Ssvnmir 373280031Sdimstatic void 374280031Sdimktr_freerequest(struct ktr_request *req) 375274955Ssvnmir{ 376274955Ssvnmir 377274955Ssvnmir if (req->ktr_buffer != NULL) 378274955Ssvnmir free(req->ktr_buffer, M_KTRACE); 379274955Ssvnmir mtx_lock(&ktrace_mtx); 380274955Ssvnmir STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 381274955Ssvnmir mtx_unlock(&ktrace_mtx); 382274955Ssvnmir} 383274955Ssvnmir 384274955Ssvnmirvoid 385274955Ssvnmirktrsyscall(code, narg, args) 386274955Ssvnmir int code, narg; 387274955Ssvnmir register_t args[]; 388274955Ssvnmir{ 389274955Ssvnmir struct ktr_request *req; 390274955Ssvnmir struct ktr_syscall *ktp; 391274955Ssvnmir size_t buflen; 392280031Sdim char *buf = NULL; 393280031Sdim 394280031Sdim buflen = sizeof(register_t) * narg; 395280031Sdim if (buflen > 0) { 396280031Sdim buf = malloc(buflen, M_KTRACE, M_WAITOK); 397280031Sdim bcopy(args, buf, buflen); 398280031Sdim } 399288943Sdim req = ktr_getrequest(KTR_SYSCALL); 400288943Sdim if (req == NULL) { 401288943Sdim if (buf != NULL) 402296417Sdim free(buf, M_KTRACE); 403274955Ssvnmir return; 404274955Ssvnmir } 405274955Ssvnmir ktp = &req->ktr_data.ktr_syscall; 406274955Ssvnmir ktp->ktr_code = code; 407274955Ssvnmir ktp->ktr_narg = narg; 408274955Ssvnmir if (buflen > 0) { 409274955Ssvnmir req->ktr_header.ktr_len = buflen; 410280031Sdim req->ktr_buffer = buf; 411280031Sdim } 412280031Sdim ktr_submitrequest(curthread, req); 413280031Sdim} 414280031Sdim 415274955Ssvnmirvoid 416274955Ssvnmirktrsysret(code, error, retval) 417274955Ssvnmir int code, error; 418280031Sdim register_t retval; 419280031Sdim{ 420280031Sdim struct ktr_request *req; 421280031Sdim struct ktr_sysret *ktp; 422280031Sdim 423288943Sdim req = ktr_getrequest(KTR_SYSRET); 424288943Sdim if (req == NULL) 425280031Sdim return; 426280031Sdim ktp = &req->ktr_data.ktr_sysret; 427280031Sdim ktp->ktr_code = code; 428280031Sdim ktp->ktr_error = error; 429280031Sdim ktp->ktr_retval = retval; /* what about val2 ? */ 430280031Sdim ktr_submitrequest(curthread, req); 431274955Ssvnmir} 432274955Ssvnmir 433280031Sdim/* 434274955Ssvnmir * When a process exits, drain per-process asynchronous trace records. 435274955Ssvnmir */ 436274955Ssvnmirvoid 437274955Ssvnmirktrprocexit(struct thread *td) 438274955Ssvnmir{ 439274955Ssvnmir 440274955Ssvnmir ktrace_enter(td); 441274955Ssvnmir sx_xlock(&ktrace_sx); 442296417Sdim ktr_drain(td); 443274955Ssvnmir sx_xunlock(&ktrace_sx); 444274955Ssvnmir ktrace_exit(td); 445274955Ssvnmir} 446274955Ssvnmir 447296417Sdim/* 448274955Ssvnmir * When a thread returns, drain any asynchronous records generated by the 449274955Ssvnmir * system call. 450296417Sdim */ 451296417Sdimvoid 452274955Ssvnmirktruserret(struct thread *td) 453274955Ssvnmir{ 454274955Ssvnmir 455274955Ssvnmir ktrace_enter(td); 456274955Ssvnmir sx_xlock(&ktrace_sx); 457274955Ssvnmir ktr_drain(td); 458274955Ssvnmir sx_xunlock(&ktrace_sx); 459274955Ssvnmir ktrace_exit(td); 460274955Ssvnmir} 461274955Ssvnmir 462274955Ssvnmirvoid 463274955Ssvnmirktrnamei(path) 464274955Ssvnmir char *path; 465274955Ssvnmir{ 466274955Ssvnmir struct ktr_request *req; 467274955Ssvnmir int namelen; 468274955Ssvnmir char *buf = NULL; 469274955Ssvnmir 470274955Ssvnmir namelen = strlen(path); 471274955Ssvnmir if (namelen > 0) { 472274955Ssvnmir buf = malloc(namelen, M_KTRACE, M_WAITOK); 473274955Ssvnmir bcopy(path, buf, namelen); 474274955Ssvnmir } 475274955Ssvnmir req = ktr_getrequest(KTR_NAMEI); 476296417Sdim if (req == NULL) { 477274955Ssvnmir if (buf != NULL) 478274955Ssvnmir free(buf, M_KTRACE); 479274955Ssvnmir return; 480274955Ssvnmir } 481296417Sdim if (namelen > 0) { 482274955Ssvnmir req->ktr_header.ktr_len = namelen; 483274955Ssvnmir req->ktr_buffer = buf; 484274955Ssvnmir } 485274955Ssvnmir ktr_submitrequest(curthread, req); 486274955Ssvnmir} 487274955Ssvnmir 488274955Ssvnmirvoid 489274955Ssvnmirktrsysctl(name, namelen) 490274955Ssvnmir int *name; 491274955Ssvnmir u_int namelen; 492274955Ssvnmir{ 493274955Ssvnmir struct ktr_request *req; 494274955Ssvnmir u_int mib[CTL_MAXNAME + 2]; 495274955Ssvnmir char *mibname; 496274955Ssvnmir size_t mibnamelen; 497274955Ssvnmir int error; 498274955Ssvnmir 499274955Ssvnmir /* Lookup name of mib. */ 500274955Ssvnmir KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 501274955Ssvnmir mib[0] = 0; 502274955Ssvnmir mib[1] = 1; 503274955Ssvnmir bcopy(name, mib + 2, namelen * sizeof(*name)); 504274955Ssvnmir mibnamelen = 128; 505274955Ssvnmir mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 506274955Ssvnmir error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 507274955Ssvnmir NULL, 0, &mibnamelen, 0); 508274955Ssvnmir if (error) { 509274955Ssvnmir free(mibname, M_KTRACE); 510274955Ssvnmir return; 511274955Ssvnmir } 512274955Ssvnmir req = ktr_getrequest(KTR_SYSCTL); 513274955Ssvnmir if (req == NULL) { 514274955Ssvnmir free(mibname, M_KTRACE); 515274955Ssvnmir return; 516274955Ssvnmir } 517274955Ssvnmir req->ktr_header.ktr_len = mibnamelen; 518274955Ssvnmir req->ktr_buffer = mibname; 519274955Ssvnmir ktr_submitrequest(curthread, req); 520274955Ssvnmir} 521274955Ssvnmir 522274955Ssvnmirvoid 523274955Ssvnmirktrgenio(fd, rw, uio, error) 524274955Ssvnmir int fd; 525274955Ssvnmir enum uio_rw rw; 526274955Ssvnmir struct uio *uio; 527274955Ssvnmir int error; 528274955Ssvnmir{ 529274955Ssvnmir struct ktr_request *req; 530274955Ssvnmir struct ktr_genio *ktg; 531274955Ssvnmir int datalen; 532274955Ssvnmir char *buf; 533274955Ssvnmir 534280031Sdim if (error) { 535274955Ssvnmir free(uio, M_IOV); 536274955Ssvnmir return; 537274955Ssvnmir } 538274955Ssvnmir uio->uio_offset = 0; 539274955Ssvnmir uio->uio_rw = UIO_WRITE; 540274955Ssvnmir datalen = imin(uio->uio_resid, ktr_geniosize); 541274955Ssvnmir buf = malloc(datalen, M_KTRACE, M_WAITOK); 542274955Ssvnmir error = uiomove(buf, datalen, uio); 543274955Ssvnmir free(uio, M_IOV); 544274955Ssvnmir if (error) { 545274955Ssvnmir free(buf, M_KTRACE); 546274955Ssvnmir return; 547274955Ssvnmir } 548274955Ssvnmir req = ktr_getrequest(KTR_GENIO); 549274955Ssvnmir if (req == NULL) { 550274955Ssvnmir free(buf, M_KTRACE); 551274955Ssvnmir return; 552274955Ssvnmir } 553274955Ssvnmir ktg = &req->ktr_data.ktr_genio; 554274955Ssvnmir ktg->ktr_fd = fd; 555274955Ssvnmir ktg->ktr_rw = rw; 556274955Ssvnmir req->ktr_header.ktr_len = datalen; 557274955Ssvnmir req->ktr_buffer = buf; 558274955Ssvnmir ktr_submitrequest(curthread, req); 559274955Ssvnmir} 560274955Ssvnmir 561274955Ssvnmirvoid 562274955Ssvnmirktrpsig(sig, action, mask, code) 563296417Sdim int sig; 564274955Ssvnmir sig_t action; 565274955Ssvnmir sigset_t *mask; 566274955Ssvnmir int code; 567274955Ssvnmir{ 568288943Sdim struct ktr_request *req; 569296417Sdim struct ktr_psig *kp; 570274955Ssvnmir 571274955Ssvnmir req = ktr_getrequest(KTR_PSIG); 572274955Ssvnmir if (req == NULL) 573274955Ssvnmir return; 574274955Ssvnmir kp = &req->ktr_data.ktr_psig; 575274955Ssvnmir kp->signo = (char)sig; 576274955Ssvnmir kp->action = action; 577296417Sdim kp->mask = *mask; 578274955Ssvnmir kp->code = code; 579274955Ssvnmir ktr_enqueuerequest(curthread, req); 580274955Ssvnmir} 581274955Ssvnmir 582274955Ssvnmirvoid 583274955Ssvnmirktrcsw(out, user) 584274955Ssvnmir int out, user; 585274955Ssvnmir{ 586274955Ssvnmir struct ktr_request *req; 587274955Ssvnmir struct ktr_csw *kc; 588 589 req = ktr_getrequest(KTR_CSW); 590 if (req == NULL) 591 return; 592 kc = &req->ktr_data.ktr_csw; 593 kc->out = out; 594 kc->user = user; 595 ktr_enqueuerequest(curthread, req); 596} 597 598void 599ktrstruct(name, namelen, data, datalen) 600 const char *name; 601 size_t namelen; 602 void *data; 603 size_t datalen; 604{ 605 struct ktr_request *req; 606 char *buf = NULL; 607 size_t buflen; 608 609 if (!data) 610 datalen = 0; 611 buflen = namelen + 1 + datalen; 612 buf = malloc(buflen, M_KTRACE, M_WAITOK); 613 bcopy(name, buf, namelen); 614 buf[namelen] = '\0'; 615 bcopy(data, buf + namelen + 1, datalen); 616 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 617 free(buf, M_KTRACE); 618 return; 619 } 620 req->ktr_buffer = buf; 621 req->ktr_header.ktr_len = buflen; 622 ktr_submitrequest(curthread, req); 623} 624#endif /* KTRACE */ 625 626/* Interface and common routines */ 627 628#ifndef _SYS_SYSPROTO_H_ 629struct ktrace_args { 630 char *fname; 631 int ops; 632 int facs; 633 int pid; 634}; 635#endif 636/* ARGSUSED */ 637int 638ktrace(td, uap) 639 struct thread *td; 640 register struct ktrace_args *uap; 641{ 642#ifdef KTRACE 643 register struct vnode *vp = NULL; 644 register struct proc *p; 645 struct pgrp *pg; 646 int facs = uap->facs & ~KTRFAC_ROOT; 647 int ops = KTROP(uap->ops); 648 int descend = uap->ops & KTRFLAG_DESCEND; 649 int nfound, ret = 0; 650 int flags, error = 0, vfslocked; 651 struct nameidata nd; 652 struct ucred *cred; 653 654 /* 655 * Need something to (un)trace. 656 */ 657 if (ops != KTROP_CLEARFILE && facs == 0) 658 return (EINVAL); 659 660 ktrace_enter(td); 661 if (ops != KTROP_CLEAR) { 662 /* 663 * an operation which requires a file argument. 664 */ 665 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 666 uap->fname, td); 667 flags = FREAD | FWRITE | O_NOFOLLOW; 668 error = vn_open(&nd, &flags, 0, NULL); 669 if (error) { 670 ktrace_exit(td); 671 return (error); 672 } 673 vfslocked = NDHASGIANT(&nd); 674 NDFREE(&nd, NDF_ONLY_PNBUF); 675 vp = nd.ni_vp; 676 VOP_UNLOCK(vp, 0); 677 if (vp->v_type != VREG) { 678 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 679 VFS_UNLOCK_GIANT(vfslocked); 680 ktrace_exit(td); 681 return (EACCES); 682 } 683 VFS_UNLOCK_GIANT(vfslocked); 684 } 685 /* 686 * Clear all uses of the tracefile. 687 */ 688 if (ops == KTROP_CLEARFILE) { 689 int vrele_count; 690 691 vrele_count = 0; 692 sx_slock(&allproc_lock); 693 FOREACH_PROC_IN_SYSTEM(p) { 694 PROC_LOCK(p); 695 if (p->p_tracevp == vp) { 696 if (ktrcanset(td, p)) { 697 mtx_lock(&ktrace_mtx); 698 cred = p->p_tracecred; 699 p->p_tracecred = NULL; 700 p->p_tracevp = NULL; 701 p->p_traceflag = 0; 702 mtx_unlock(&ktrace_mtx); 703 vrele_count++; 704 crfree(cred); 705 } else 706 error = EPERM; 707 } 708 PROC_UNLOCK(p); 709 } 710 sx_sunlock(&allproc_lock); 711 if (vrele_count > 0) { 712 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 713 while (vrele_count-- > 0) 714 vrele(vp); 715 VFS_UNLOCK_GIANT(vfslocked); 716 } 717 goto done; 718 } 719 /* 720 * do it 721 */ 722 sx_slock(&proctree_lock); 723 if (uap->pid < 0) { 724 /* 725 * by process group 726 */ 727 pg = pgfind(-uap->pid); 728 if (pg == NULL) { 729 sx_sunlock(&proctree_lock); 730 error = ESRCH; 731 goto done; 732 } 733 /* 734 * ktrops() may call vrele(). Lock pg_members 735 * by the proctree_lock rather than pg_mtx. 736 */ 737 PGRP_UNLOCK(pg); 738 nfound = 0; 739 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 740 PROC_LOCK(p); 741 if (p_cansee(td, p) != 0) { 742 PROC_UNLOCK(p); 743 continue; 744 } 745 PROC_UNLOCK(p); 746 nfound++; 747 if (descend) 748 ret |= ktrsetchildren(td, p, ops, facs, vp); 749 else 750 ret |= ktrops(td, p, ops, facs, vp); 751 } 752 if (nfound == 0) { 753 sx_sunlock(&proctree_lock); 754 error = ESRCH; 755 goto done; 756 } 757 } else { 758 /* 759 * by pid 760 */ 761 p = pfind(uap->pid); 762 if (p == NULL) { 763 sx_sunlock(&proctree_lock); 764 error = ESRCH; 765 goto done; 766 } 767 error = p_cansee(td, p); 768 /* 769 * The slock of the proctree lock will keep this process 770 * from going away, so unlocking the proc here is ok. 771 */ 772 PROC_UNLOCK(p); 773 if (error) { 774 sx_sunlock(&proctree_lock); 775 goto done; 776 } 777 if (descend) 778 ret |= ktrsetchildren(td, p, ops, facs, vp); 779 else 780 ret |= ktrops(td, p, ops, facs, vp); 781 } 782 sx_sunlock(&proctree_lock); 783 if (!ret) 784 error = EPERM; 785done: 786 if (vp != NULL) { 787 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 788 (void) vn_close(vp, FWRITE, td->td_ucred, td); 789 VFS_UNLOCK_GIANT(vfslocked); 790 } 791 ktrace_exit(td); 792 return (error); 793#else /* !KTRACE */ 794 return (ENOSYS); 795#endif /* KTRACE */ 796} 797 798/* ARGSUSED */ 799int 800utrace(td, uap) 801 struct thread *td; 802 register struct utrace_args *uap; 803{ 804 805#ifdef KTRACE 806 struct ktr_request *req; 807 void *cp; 808 int error; 809 810 if (!KTRPOINT(td, KTR_USER)) 811 return (0); 812 if (uap->len > KTR_USER_MAXLEN) 813 return (EINVAL); 814 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 815 error = copyin(uap->addr, cp, uap->len); 816 if (error) { 817 free(cp, M_KTRACE); 818 return (error); 819 } 820 req = ktr_getrequest(KTR_USER); 821 if (req == NULL) { 822 free(cp, M_KTRACE); 823 return (ENOMEM); 824 } 825 req->ktr_buffer = cp; 826 req->ktr_header.ktr_len = uap->len; 827 ktr_submitrequest(td, req); 828 return (0); 829#else /* !KTRACE */ 830 return (ENOSYS); 831#endif /* KTRACE */ 832} 833 834#ifdef KTRACE 835static int 836ktrops(td, p, ops, facs, vp) 837 struct thread *td; 838 struct proc *p; 839 int ops, facs; 840 struct vnode *vp; 841{ 842 struct vnode *tracevp = NULL; 843 struct ucred *tracecred = NULL; 844 845 PROC_LOCK(p); 846 if (!ktrcanset(td, p)) { 847 PROC_UNLOCK(p); 848 return (0); 849 } 850 mtx_lock(&ktrace_mtx); 851 if (ops == KTROP_SET) { 852 if (p->p_tracevp != vp) { 853 /* 854 * if trace file already in use, relinquish below 855 */ 856 tracevp = p->p_tracevp; 857 VREF(vp); 858 p->p_tracevp = vp; 859 } 860 if (p->p_tracecred != td->td_ucred) { 861 tracecred = p->p_tracecred; 862 p->p_tracecred = crhold(td->td_ucred); 863 } 864 p->p_traceflag |= facs; 865 if (priv_check(td, PRIV_KTRACE) == 0) 866 p->p_traceflag |= KTRFAC_ROOT; 867 } else { 868 /* KTROP_CLEAR */ 869 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 870 /* no more tracing */ 871 p->p_traceflag = 0; 872 tracevp = p->p_tracevp; 873 p->p_tracevp = NULL; 874 tracecred = p->p_tracecred; 875 p->p_tracecred = NULL; 876 } 877 } 878 mtx_unlock(&ktrace_mtx); 879 PROC_UNLOCK(p); 880 if (tracevp != NULL) { 881 int vfslocked; 882 883 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 884 vrele(tracevp); 885 VFS_UNLOCK_GIANT(vfslocked); 886 } 887 if (tracecred != NULL) 888 crfree(tracecred); 889 890 return (1); 891} 892 893static int 894ktrsetchildren(td, top, ops, facs, vp) 895 struct thread *td; 896 struct proc *top; 897 int ops, facs; 898 struct vnode *vp; 899{ 900 register struct proc *p; 901 register int ret = 0; 902 903 p = top; 904 sx_assert(&proctree_lock, SX_LOCKED); 905 for (;;) { 906 ret |= ktrops(td, p, ops, facs, vp); 907 /* 908 * If this process has children, descend to them next, 909 * otherwise do any siblings, and if done with this level, 910 * follow back up the tree (but not past top). 911 */ 912 if (!LIST_EMPTY(&p->p_children)) 913 p = LIST_FIRST(&p->p_children); 914 else for (;;) { 915 if (p == top) 916 return (ret); 917 if (LIST_NEXT(p, p_sibling)) { 918 p = LIST_NEXT(p, p_sibling); 919 break; 920 } 921 p = p->p_pptr; 922 } 923 } 924 /*NOTREACHED*/ 925} 926 927static void 928ktr_writerequest(struct thread *td, struct ktr_request *req) 929{ 930 struct ktr_header *kth; 931 struct vnode *vp; 932 struct proc *p; 933 struct ucred *cred; 934 struct uio auio; 935 struct iovec aiov[3]; 936 struct mount *mp; 937 int datalen, buflen, vrele_count; 938 int error, vfslocked; 939 940 /* 941 * We hold the vnode and credential for use in I/O in case ktrace is 942 * disabled on the process as we write out the request. 943 * 944 * XXXRW: This is not ideal: we could end up performing a write after 945 * the vnode has been closed. 946 */ 947 mtx_lock(&ktrace_mtx); 948 vp = td->td_proc->p_tracevp; 949 cred = td->td_proc->p_tracecred; 950 951 /* 952 * If vp is NULL, the vp has been cleared out from under this 953 * request, so just drop it. Make sure the credential and vnode are 954 * in sync: we should have both or neither. 955 */ 956 if (vp == NULL) { 957 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 958 mtx_unlock(&ktrace_mtx); 959 return; 960 } 961 VREF(vp); 962 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 963 crhold(cred); 964 mtx_unlock(&ktrace_mtx); 965 966 kth = &req->ktr_header; 967 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 968 sizeof(data_lengths) / sizeof(data_lengths[0]), 969 ("data_lengths array overflow")); 970 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 971 buflen = kth->ktr_len; 972 auio.uio_iov = &aiov[0]; 973 auio.uio_offset = 0; 974 auio.uio_segflg = UIO_SYSSPACE; 975 auio.uio_rw = UIO_WRITE; 976 aiov[0].iov_base = (caddr_t)kth; 977 aiov[0].iov_len = sizeof(struct ktr_header); 978 auio.uio_resid = sizeof(struct ktr_header); 979 auio.uio_iovcnt = 1; 980 auio.uio_td = td; 981 if (datalen != 0) { 982 aiov[1].iov_base = (caddr_t)&req->ktr_data; 983 aiov[1].iov_len = datalen; 984 auio.uio_resid += datalen; 985 auio.uio_iovcnt++; 986 kth->ktr_len += datalen; 987 } 988 if (buflen != 0) { 989 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 990 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 991 aiov[auio.uio_iovcnt].iov_len = buflen; 992 auio.uio_resid += buflen; 993 auio.uio_iovcnt++; 994 } 995 996 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 997 vn_start_write(vp, &mp, V_WAIT); 998 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 999#ifdef MAC 1000 error = mac_vnode_check_write(cred, NOCRED, vp); 1001 if (error == 0) 1002#endif 1003 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1004 VOP_UNLOCK(vp, 0); 1005 vn_finished_write(mp); 1006 crfree(cred); 1007 if (!error) { 1008 vrele(vp); 1009 VFS_UNLOCK_GIANT(vfslocked); 1010 return; 1011 } 1012 VFS_UNLOCK_GIANT(vfslocked); 1013 1014 /* 1015 * If error encountered, give up tracing on this vnode. We defer 1016 * all the vrele()'s on the vnode until after we are finished walking 1017 * the various lists to avoid needlessly holding locks. 1018 * NB: at this point we still hold the vnode reference that must 1019 * not go away as we need the valid vnode to compare with. Thus let 1020 * vrele_count start at 1 and the reference will be freed 1021 * by the loop at the end after our last use of vp. 1022 */ 1023 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1024 error); 1025 vrele_count = 1; 1026 /* 1027 * First, clear this vnode from being used by any processes in the 1028 * system. 1029 * XXX - If one process gets an EPERM writing to the vnode, should 1030 * we really do this? Other processes might have suitable 1031 * credentials for the operation. 1032 */ 1033 cred = NULL; 1034 sx_slock(&allproc_lock); 1035 FOREACH_PROC_IN_SYSTEM(p) { 1036 PROC_LOCK(p); 1037 if (p->p_tracevp == vp) { 1038 mtx_lock(&ktrace_mtx); 1039 p->p_tracevp = NULL; 1040 p->p_traceflag = 0; 1041 cred = p->p_tracecred; 1042 p->p_tracecred = NULL; 1043 mtx_unlock(&ktrace_mtx); 1044 vrele_count++; 1045 } 1046 PROC_UNLOCK(p); 1047 if (cred != NULL) { 1048 crfree(cred); 1049 cred = NULL; 1050 } 1051 } 1052 sx_sunlock(&allproc_lock); 1053 1054 /* 1055 * We can't clear any pending requests in threads that have cached 1056 * them but not yet committed them, as those are per-thread. The 1057 * thread will have to clear it itself on system call return. 1058 */ 1059 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1060 while (vrele_count-- > 0) 1061 vrele(vp); 1062 VFS_UNLOCK_GIANT(vfslocked); 1063} 1064 1065/* 1066 * Return true if caller has permission to set the ktracing state 1067 * of target. Essentially, the target can't possess any 1068 * more permissions than the caller. KTRFAC_ROOT signifies that 1069 * root previously set the tracing status on the target process, and 1070 * so, only root may further change it. 1071 */ 1072static int 1073ktrcanset(td, targetp) 1074 struct thread *td; 1075 struct proc *targetp; 1076{ 1077 1078 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1079 if (targetp->p_traceflag & KTRFAC_ROOT && 1080 priv_check(td, PRIV_KTRACE)) 1081 return (0); 1082 1083 if (p_candebug(td, targetp) != 0) 1084 return (0); 1085 1086 return (1); 1087} 1088 1089#endif /* KTRACE */ 1090