kern_ktrace.c revision 211102
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: head/sys/kern/kern_ktrace.c 211102 2010-08-09 14:48:31Z gavin $"); 36 37#include "opt_ktrace.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/fcntl.h> 42#include <sys/kernel.h> 43#include <sys/kthread.h> 44#include <sys/lock.h> 45#include <sys/mutex.h> 46#include <sys/malloc.h> 47#include <sys/mount.h> 48#include <sys/namei.h> 49#include <sys/priv.h> 50#include <sys/proc.h> 51#include <sys/unistd.h> 52#include <sys/vnode.h> 53#include <sys/socket.h> 54#include <sys/stat.h> 55#include <sys/ktrace.h> 56#include <sys/sx.h> 57#include <sys/sysctl.h> 58#include <sys/syslog.h> 59#include <sys/sysproto.h> 60 61#include <security/mac/mac_framework.h> 62 63/* 64 * The ktrace facility allows the tracing of certain key events in user space 65 * processes, such as system calls, signal delivery, context switches, and 66 * user generated events using utrace(2). It works by streaming event 67 * records and data to a vnode associated with the process using the 68 * ktrace(2) system call. In general, records can be written directly from 69 * the context that generates the event. One important exception to this is 70 * during a context switch, where sleeping is not permitted. To handle this 71 * case, trace events are generated using in-kernel ktr_request records, and 72 * then delivered to disk at a convenient moment -- either immediately, the 73 * next traceable event, at system call return, or at process exit. 74 * 75 * When dealing with multiple threads or processes writing to the same event 76 * log, ordering guarantees are weak: specifically, if an event has multiple 77 * records (i.e., system call enter and return), they may be interlaced with 78 * records from another event. Process and thread ID information is provided 79 * in the record, and user applications can de-interlace events if required. 80 */ 81 82static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 83 84#ifdef KTRACE 85 86#ifndef KTRACE_REQUEST_POOL 87#define KTRACE_REQUEST_POOL 100 88#endif 89 90struct ktr_request { 91 struct ktr_header ktr_header; 92 void *ktr_buffer; 93 union { 94 struct ktr_syscall ktr_syscall; 95 struct ktr_sysret ktr_sysret; 96 struct ktr_genio ktr_genio; 97 struct ktr_psig ktr_psig; 98 struct ktr_csw ktr_csw; 99 } ktr_data; 100 STAILQ_ENTRY(ktr_request) ktr_list; 101}; 102 103static int data_lengths[] = { 104 0, /* none */ 105 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 106 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 107 0, /* KTR_NAMEI */ 108 sizeof(struct ktr_genio), /* KTR_GENIO */ 109 sizeof(struct ktr_psig), /* KTR_PSIG */ 110 sizeof(struct ktr_csw), /* KTR_CSW */ 111 0, /* KTR_USER */ 112 0, /* KTR_STRUCT */ 113 0, /* KTR_SYSCTL */ 114}; 115 116static STAILQ_HEAD(, ktr_request) ktr_free; 117 118static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 119 120static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 121TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 122 123static u_int ktr_geniosize = PAGE_SIZE; 124TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 125SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 126 0, "Maximum size of genio event payload"); 127 128static int print_message = 1; 129struct mtx ktrace_mtx; 130static struct sx ktrace_sx; 131 132static void ktrace_init(void *dummy); 133static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 134static u_int ktrace_resize_pool(u_int newsize); 135static struct ktr_request *ktr_getrequest(int type); 136static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 137static void ktr_freerequest(struct ktr_request *req); 138static void ktr_writerequest(struct thread *td, struct ktr_request *req); 139static int ktrcanset(struct thread *,struct proc *); 140static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 141static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 142 143/* 144 * ktrace itself generates events, such as context switches, which we do not 145 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 146 * whether or not it is in a region where tracing of events should be 147 * suppressed. 148 */ 149static void 150ktrace_enter(struct thread *td) 151{ 152 153 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 154 td->td_pflags |= TDP_INKTRACE; 155} 156 157static void 158ktrace_exit(struct thread *td) 159{ 160 161 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 162 td->td_pflags &= ~TDP_INKTRACE; 163} 164 165static void 166ktrace_assert(struct thread *td) 167{ 168 169 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 170} 171 172static void 173ktrace_init(void *dummy) 174{ 175 struct ktr_request *req; 176 int i; 177 178 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 179 sx_init(&ktrace_sx, "ktrace_sx"); 180 STAILQ_INIT(&ktr_free); 181 for (i = 0; i < ktr_requestpool; i++) { 182 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 183 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 184 } 185} 186SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 187 188static int 189sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 190{ 191 struct thread *td; 192 u_int newsize, oldsize, wantsize; 193 int error; 194 195 /* Handle easy read-only case first to avoid warnings from GCC. */ 196 if (!req->newptr) { 197 mtx_lock(&ktrace_mtx); 198 oldsize = ktr_requestpool; 199 mtx_unlock(&ktrace_mtx); 200 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 201 } 202 203 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 204 if (error) 205 return (error); 206 td = curthread; 207 ktrace_enter(td); 208 mtx_lock(&ktrace_mtx); 209 oldsize = ktr_requestpool; 210 newsize = ktrace_resize_pool(wantsize); 211 mtx_unlock(&ktrace_mtx); 212 ktrace_exit(td); 213 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 214 if (error) 215 return (error); 216 if (wantsize > oldsize && newsize < wantsize) 217 return (ENOSPC); 218 return (0); 219} 220SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 221 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 222 "Pool buffer size for ktrace(1)"); 223 224static u_int 225ktrace_resize_pool(u_int newsize) 226{ 227 struct ktr_request *req; 228 int bound; 229 230 mtx_assert(&ktrace_mtx, MA_OWNED); 231 print_message = 1; 232 bound = newsize - ktr_requestpool; 233 if (bound == 0) 234 return (ktr_requestpool); 235 if (bound < 0) 236 /* Shrink pool down to newsize if possible. */ 237 while (bound++ < 0) { 238 req = STAILQ_FIRST(&ktr_free); 239 if (req == NULL) 240 return (ktr_requestpool); 241 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 242 ktr_requestpool--; 243 mtx_unlock(&ktrace_mtx); 244 free(req, M_KTRACE); 245 mtx_lock(&ktrace_mtx); 246 } 247 else 248 /* Grow pool up to newsize. */ 249 while (bound-- > 0) { 250 mtx_unlock(&ktrace_mtx); 251 req = malloc(sizeof(struct ktr_request), M_KTRACE, 252 M_WAITOK); 253 mtx_lock(&ktrace_mtx); 254 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 255 ktr_requestpool++; 256 } 257 return (ktr_requestpool); 258} 259 260/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 261CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 262 (sizeof((struct thread *)NULL)->td_name)); 263 264static struct ktr_request * 265ktr_getrequest(int type) 266{ 267 struct ktr_request *req; 268 struct thread *td = curthread; 269 struct proc *p = td->td_proc; 270 int pm; 271 272 ktrace_enter(td); /* XXX: In caller instead? */ 273 mtx_lock(&ktrace_mtx); 274 if (!KTRCHECK(td, type)) { 275 mtx_unlock(&ktrace_mtx); 276 ktrace_exit(td); 277 return (NULL); 278 } 279 req = STAILQ_FIRST(&ktr_free); 280 if (req != NULL) { 281 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 282 req->ktr_header.ktr_type = type; 283 if (p->p_traceflag & KTRFAC_DROP) { 284 req->ktr_header.ktr_type |= KTR_DROP; 285 p->p_traceflag &= ~KTRFAC_DROP; 286 } 287 mtx_unlock(&ktrace_mtx); 288 microtime(&req->ktr_header.ktr_time); 289 req->ktr_header.ktr_pid = p->p_pid; 290 req->ktr_header.ktr_tid = td->td_tid; 291 bcopy(td->td_name, req->ktr_header.ktr_comm, 292 sizeof(req->ktr_header.ktr_comm)); 293 req->ktr_buffer = NULL; 294 req->ktr_header.ktr_len = 0; 295 } else { 296 p->p_traceflag |= KTRFAC_DROP; 297 pm = print_message; 298 print_message = 0; 299 mtx_unlock(&ktrace_mtx); 300 if (pm) 301 printf("Out of ktrace request objects.\n"); 302 ktrace_exit(td); 303 } 304 return (req); 305} 306 307/* 308 * Some trace generation environments don't permit direct access to VFS, 309 * such as during a context switch where sleeping is not allowed. Under these 310 * circumstances, queue a request to the thread to be written asynchronously 311 * later. 312 */ 313static void 314ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 315{ 316 317 mtx_lock(&ktrace_mtx); 318 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 319 mtx_unlock(&ktrace_mtx); 320 ktrace_exit(td); 321} 322 323/* 324 * Drain any pending ktrace records from the per-thread queue to disk. This 325 * is used both internally before committing other records, and also on 326 * system call return. We drain all the ones we can find at the time when 327 * drain is requested, but don't keep draining after that as those events 328 * may be approximately "after" the current event. 329 */ 330static void 331ktr_drain(struct thread *td) 332{ 333 struct ktr_request *queued_req; 334 STAILQ_HEAD(, ktr_request) local_queue; 335 336 ktrace_assert(td); 337 sx_assert(&ktrace_sx, SX_XLOCKED); 338 339 STAILQ_INIT(&local_queue); /* XXXRW: needed? */ 340 341 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 342 mtx_lock(&ktrace_mtx); 343 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 344 mtx_unlock(&ktrace_mtx); 345 346 while ((queued_req = STAILQ_FIRST(&local_queue))) { 347 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 348 ktr_writerequest(td, queued_req); 349 ktr_freerequest(queued_req); 350 } 351 } 352} 353 354/* 355 * Submit a trace record for immediate commit to disk -- to be used only 356 * where entering VFS is OK. First drain any pending records that may have 357 * been cached in the thread. 358 */ 359static void 360ktr_submitrequest(struct thread *td, struct ktr_request *req) 361{ 362 363 ktrace_assert(td); 364 365 sx_xlock(&ktrace_sx); 366 ktr_drain(td); 367 ktr_writerequest(td, req); 368 ktr_freerequest(req); 369 sx_xunlock(&ktrace_sx); 370 371 ktrace_exit(td); 372} 373 374static void 375ktr_freerequest(struct ktr_request *req) 376{ 377 378 if (req->ktr_buffer != NULL) 379 free(req->ktr_buffer, M_KTRACE); 380 mtx_lock(&ktrace_mtx); 381 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 382 mtx_unlock(&ktrace_mtx); 383} 384 385void 386ktrsyscall(code, narg, args) 387 int code, narg; 388 register_t args[]; 389{ 390 struct ktr_request *req; 391 struct ktr_syscall *ktp; 392 size_t buflen; 393 char *buf = NULL; 394 395 buflen = sizeof(register_t) * narg; 396 if (buflen > 0) { 397 buf = malloc(buflen, M_KTRACE, M_WAITOK); 398 bcopy(args, buf, buflen); 399 } 400 req = ktr_getrequest(KTR_SYSCALL); 401 if (req == NULL) { 402 if (buf != NULL) 403 free(buf, M_KTRACE); 404 return; 405 } 406 ktp = &req->ktr_data.ktr_syscall; 407 ktp->ktr_code = code; 408 ktp->ktr_narg = narg; 409 if (buflen > 0) { 410 req->ktr_header.ktr_len = buflen; 411 req->ktr_buffer = buf; 412 } 413 ktr_submitrequest(curthread, req); 414} 415 416void 417ktrsysret(code, error, retval) 418 int code, error; 419 register_t retval; 420{ 421 struct ktr_request *req; 422 struct ktr_sysret *ktp; 423 424 req = ktr_getrequest(KTR_SYSRET); 425 if (req == NULL) 426 return; 427 ktp = &req->ktr_data.ktr_sysret; 428 ktp->ktr_code = code; 429 ktp->ktr_error = error; 430 ktp->ktr_retval = retval; /* what about val2 ? */ 431 ktr_submitrequest(curthread, req); 432} 433 434/* 435 * When a process exits, drain per-process asynchronous trace records. 436 */ 437void 438ktrprocexit(struct thread *td) 439{ 440 441 ktrace_enter(td); 442 sx_xlock(&ktrace_sx); 443 ktr_drain(td); 444 sx_xunlock(&ktrace_sx); 445 ktrace_exit(td); 446} 447 448/* 449 * When a thread returns, drain any asynchronous records generated by the 450 * system call. 451 */ 452void 453ktruserret(struct thread *td) 454{ 455 456 ktrace_enter(td); 457 sx_xlock(&ktrace_sx); 458 ktr_drain(td); 459 sx_xunlock(&ktrace_sx); 460 ktrace_exit(td); 461} 462 463void 464ktrnamei(path) 465 char *path; 466{ 467 struct ktr_request *req; 468 int namelen; 469 char *buf = NULL; 470 471 namelen = strlen(path); 472 if (namelen > 0) { 473 buf = malloc(namelen, M_KTRACE, M_WAITOK); 474 bcopy(path, buf, namelen); 475 } 476 req = ktr_getrequest(KTR_NAMEI); 477 if (req == NULL) { 478 if (buf != NULL) 479 free(buf, M_KTRACE); 480 return; 481 } 482 if (namelen > 0) { 483 req->ktr_header.ktr_len = namelen; 484 req->ktr_buffer = buf; 485 } 486 ktr_submitrequest(curthread, req); 487} 488 489void 490ktrsysctl(name, namelen) 491 int *name; 492 u_int namelen; 493{ 494 struct ktr_request *req; 495 u_int mib[CTL_MAXNAME + 2]; 496 char *mibname; 497 size_t mibnamelen; 498 int error; 499 500 /* Lookup name of mib. */ 501 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 502 mib[0] = 0; 503 mib[1] = 1; 504 bcopy(name, mib + 2, namelen * sizeof(*name)); 505 mibnamelen = 128; 506 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 507 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 508 NULL, 0, &mibnamelen, 0); 509 if (error) { 510 free(mibname, M_KTRACE); 511 return; 512 } 513 req = ktr_getrequest(KTR_SYSCTL); 514 if (req == NULL) { 515 free(mibname, M_KTRACE); 516 return; 517 } 518 req->ktr_header.ktr_len = mibnamelen; 519 req->ktr_buffer = mibname; 520 ktr_submitrequest(curthread, req); 521} 522 523void 524ktrgenio(fd, rw, uio, error) 525 int fd; 526 enum uio_rw rw; 527 struct uio *uio; 528 int error; 529{ 530 struct ktr_request *req; 531 struct ktr_genio *ktg; 532 int datalen; 533 char *buf; 534 535 if (error) { 536 free(uio, M_IOV); 537 return; 538 } 539 uio->uio_offset = 0; 540 uio->uio_rw = UIO_WRITE; 541 datalen = imin(uio->uio_resid, ktr_geniosize); 542 buf = malloc(datalen, M_KTRACE, M_WAITOK); 543 error = uiomove(buf, datalen, uio); 544 free(uio, M_IOV); 545 if (error) { 546 free(buf, M_KTRACE); 547 return; 548 } 549 req = ktr_getrequest(KTR_GENIO); 550 if (req == NULL) { 551 free(buf, M_KTRACE); 552 return; 553 } 554 ktg = &req->ktr_data.ktr_genio; 555 ktg->ktr_fd = fd; 556 ktg->ktr_rw = rw; 557 req->ktr_header.ktr_len = datalen; 558 req->ktr_buffer = buf; 559 ktr_submitrequest(curthread, req); 560} 561 562void 563ktrpsig(sig, action, mask, code) 564 int sig; 565 sig_t action; 566 sigset_t *mask; 567 int code; 568{ 569 struct ktr_request *req; 570 struct ktr_psig *kp; 571 572 req = ktr_getrequest(KTR_PSIG); 573 if (req == NULL) 574 return; 575 kp = &req->ktr_data.ktr_psig; 576 kp->signo = (char)sig; 577 kp->action = action; 578 kp->mask = *mask; 579 kp->code = code; 580 ktr_enqueuerequest(curthread, req); 581} 582 583void 584ktrcsw(out, user) 585 int out, user; 586{ 587 struct ktr_request *req; 588 struct ktr_csw *kc; 589 590 req = ktr_getrequest(KTR_CSW); 591 if (req == NULL) 592 return; 593 kc = &req->ktr_data.ktr_csw; 594 kc->out = out; 595 kc->user = user; 596 ktr_enqueuerequest(curthread, req); 597} 598 599void 600ktrstruct(name, data, datalen) 601 const char *name; 602 void *data; 603 size_t datalen; 604{ 605 struct ktr_request *req; 606 char *buf = NULL; 607 size_t buflen; 608 609 if (!data) 610 datalen = 0; 611 buflen = strlen(name) + 1 + datalen; 612 buf = malloc(buflen, M_KTRACE, M_WAITOK); 613 strcpy(buf, name); 614 bcopy(data, buf + strlen(name) + 1, datalen); 615 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 616 free(buf, M_KTRACE); 617 return; 618 } 619 req->ktr_buffer = buf; 620 req->ktr_header.ktr_len = buflen; 621 ktr_submitrequest(curthread, req); 622} 623#endif /* KTRACE */ 624 625/* Interface and common routines */ 626 627#ifndef _SYS_SYSPROTO_H_ 628struct ktrace_args { 629 char *fname; 630 int ops; 631 int facs; 632 int pid; 633}; 634#endif 635/* ARGSUSED */ 636int 637ktrace(td, uap) 638 struct thread *td; 639 register struct ktrace_args *uap; 640{ 641#ifdef KTRACE 642 register struct vnode *vp = NULL; 643 register struct proc *p; 644 struct pgrp *pg; 645 int facs = uap->facs & ~KTRFAC_ROOT; 646 int ops = KTROP(uap->ops); 647 int descend = uap->ops & KTRFLAG_DESCEND; 648 int nfound, ret = 0; 649 int flags, error = 0, vfslocked; 650 struct nameidata nd; 651 struct ucred *cred; 652 653 /* 654 * Need something to (un)trace. 655 */ 656 if (ops != KTROP_CLEARFILE && facs == 0) 657 return (EINVAL); 658 659 ktrace_enter(td); 660 if (ops != KTROP_CLEAR) { 661 /* 662 * an operation which requires a file argument. 663 */ 664 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 665 uap->fname, td); 666 flags = FREAD | FWRITE | O_NOFOLLOW; 667 error = vn_open(&nd, &flags, 0, NULL); 668 if (error) { 669 ktrace_exit(td); 670 return (error); 671 } 672 vfslocked = NDHASGIANT(&nd); 673 NDFREE(&nd, NDF_ONLY_PNBUF); 674 vp = nd.ni_vp; 675 VOP_UNLOCK(vp, 0); 676 if (vp->v_type != VREG) { 677 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 678 VFS_UNLOCK_GIANT(vfslocked); 679 ktrace_exit(td); 680 return (EACCES); 681 } 682 VFS_UNLOCK_GIANT(vfslocked); 683 } 684 /* 685 * Clear all uses of the tracefile. 686 */ 687 if (ops == KTROP_CLEARFILE) { 688 int vrele_count; 689 690 vrele_count = 0; 691 sx_slock(&allproc_lock); 692 FOREACH_PROC_IN_SYSTEM(p) { 693 PROC_LOCK(p); 694 if (p->p_tracevp == vp) { 695 if (ktrcanset(td, p)) { 696 mtx_lock(&ktrace_mtx); 697 cred = p->p_tracecred; 698 p->p_tracecred = NULL; 699 p->p_tracevp = NULL; 700 p->p_traceflag = 0; 701 mtx_unlock(&ktrace_mtx); 702 vrele_count++; 703 crfree(cred); 704 } else 705 error = EPERM; 706 } 707 PROC_UNLOCK(p); 708 } 709 sx_sunlock(&allproc_lock); 710 if (vrele_count > 0) { 711 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 712 while (vrele_count-- > 0) 713 vrele(vp); 714 VFS_UNLOCK_GIANT(vfslocked); 715 } 716 goto done; 717 } 718 /* 719 * do it 720 */ 721 sx_slock(&proctree_lock); 722 if (uap->pid < 0) { 723 /* 724 * by process group 725 */ 726 pg = pgfind(-uap->pid); 727 if (pg == NULL) { 728 sx_sunlock(&proctree_lock); 729 error = ESRCH; 730 goto done; 731 } 732 /* 733 * ktrops() may call vrele(). Lock pg_members 734 * by the proctree_lock rather than pg_mtx. 735 */ 736 PGRP_UNLOCK(pg); 737 nfound = 0; 738 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 739 PROC_LOCK(p); 740 if (p_cansee(td, p) != 0) { 741 PROC_UNLOCK(p); 742 continue; 743 } 744 PROC_UNLOCK(p); 745 nfound++; 746 if (descend) 747 ret |= ktrsetchildren(td, p, ops, facs, vp); 748 else 749 ret |= ktrops(td, p, ops, facs, vp); 750 } 751 if (nfound == 0) { 752 sx_sunlock(&proctree_lock); 753 error = ESRCH; 754 goto done; 755 } 756 } else { 757 /* 758 * by pid 759 */ 760 p = pfind(uap->pid); 761 if (p == NULL) { 762 sx_sunlock(&proctree_lock); 763 error = ESRCH; 764 goto done; 765 } 766 error = p_cansee(td, p); 767 /* 768 * The slock of the proctree lock will keep this process 769 * from going away, so unlocking the proc here is ok. 770 */ 771 PROC_UNLOCK(p); 772 if (error) { 773 sx_sunlock(&proctree_lock); 774 goto done; 775 } 776 if (descend) 777 ret |= ktrsetchildren(td, p, ops, facs, vp); 778 else 779 ret |= ktrops(td, p, ops, facs, vp); 780 } 781 sx_sunlock(&proctree_lock); 782 if (!ret) 783 error = EPERM; 784done: 785 if (vp != NULL) { 786 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 787 (void) vn_close(vp, FWRITE, td->td_ucred, td); 788 VFS_UNLOCK_GIANT(vfslocked); 789 } 790 ktrace_exit(td); 791 return (error); 792#else /* !KTRACE */ 793 return (ENOSYS); 794#endif /* KTRACE */ 795} 796 797/* ARGSUSED */ 798int 799utrace(td, uap) 800 struct thread *td; 801 register struct utrace_args *uap; 802{ 803 804#ifdef KTRACE 805 struct ktr_request *req; 806 void *cp; 807 int error; 808 809 if (!KTRPOINT(td, KTR_USER)) 810 return (0); 811 if (uap->len > KTR_USER_MAXLEN) 812 return (EINVAL); 813 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 814 error = copyin(uap->addr, cp, uap->len); 815 if (error) { 816 free(cp, M_KTRACE); 817 return (error); 818 } 819 req = ktr_getrequest(KTR_USER); 820 if (req == NULL) { 821 free(cp, M_KTRACE); 822 return (ENOMEM); 823 } 824 req->ktr_buffer = cp; 825 req->ktr_header.ktr_len = uap->len; 826 ktr_submitrequest(td, req); 827 return (0); 828#else /* !KTRACE */ 829 return (ENOSYS); 830#endif /* KTRACE */ 831} 832 833#ifdef KTRACE 834static int 835ktrops(td, p, ops, facs, vp) 836 struct thread *td; 837 struct proc *p; 838 int ops, facs; 839 struct vnode *vp; 840{ 841 struct vnode *tracevp = NULL; 842 struct ucred *tracecred = NULL; 843 844 PROC_LOCK(p); 845 if (!ktrcanset(td, p)) { 846 PROC_UNLOCK(p); 847 return (0); 848 } 849 mtx_lock(&ktrace_mtx); 850 if (ops == KTROP_SET) { 851 if (p->p_tracevp != vp) { 852 /* 853 * if trace file already in use, relinquish below 854 */ 855 tracevp = p->p_tracevp; 856 VREF(vp); 857 p->p_tracevp = vp; 858 } 859 if (p->p_tracecred != td->td_ucred) { 860 tracecred = p->p_tracecred; 861 p->p_tracecred = crhold(td->td_ucred); 862 } 863 p->p_traceflag |= facs; 864 if (priv_check(td, PRIV_KTRACE) == 0) 865 p->p_traceflag |= KTRFAC_ROOT; 866 } else { 867 /* KTROP_CLEAR */ 868 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 869 /* no more tracing */ 870 p->p_traceflag = 0; 871 tracevp = p->p_tracevp; 872 p->p_tracevp = NULL; 873 tracecred = p->p_tracecred; 874 p->p_tracecred = NULL; 875 } 876 } 877 mtx_unlock(&ktrace_mtx); 878 PROC_UNLOCK(p); 879 if (tracevp != NULL) { 880 int vfslocked; 881 882 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 883 vrele(tracevp); 884 VFS_UNLOCK_GIANT(vfslocked); 885 } 886 if (tracecred != NULL) 887 crfree(tracecred); 888 889 return (1); 890} 891 892static int 893ktrsetchildren(td, top, ops, facs, vp) 894 struct thread *td; 895 struct proc *top; 896 int ops, facs; 897 struct vnode *vp; 898{ 899 register struct proc *p; 900 register int ret = 0; 901 902 p = top; 903 sx_assert(&proctree_lock, SX_LOCKED); 904 for (;;) { 905 ret |= ktrops(td, p, ops, facs, vp); 906 /* 907 * If this process has children, descend to them next, 908 * otherwise do any siblings, and if done with this level, 909 * follow back up the tree (but not past top). 910 */ 911 if (!LIST_EMPTY(&p->p_children)) 912 p = LIST_FIRST(&p->p_children); 913 else for (;;) { 914 if (p == top) 915 return (ret); 916 if (LIST_NEXT(p, p_sibling)) { 917 p = LIST_NEXT(p, p_sibling); 918 break; 919 } 920 p = p->p_pptr; 921 } 922 } 923 /*NOTREACHED*/ 924} 925 926static void 927ktr_writerequest(struct thread *td, struct ktr_request *req) 928{ 929 struct ktr_header *kth; 930 struct vnode *vp; 931 struct proc *p; 932 struct ucred *cred; 933 struct uio auio; 934 struct iovec aiov[3]; 935 struct mount *mp; 936 int datalen, buflen, vrele_count; 937 int error, vfslocked; 938 939 /* 940 * We hold the vnode and credential for use in I/O in case ktrace is 941 * disabled on the process as we write out the request. 942 * 943 * XXXRW: This is not ideal: we could end up performing a write after 944 * the vnode has been closed. 945 */ 946 mtx_lock(&ktrace_mtx); 947 vp = td->td_proc->p_tracevp; 948 cred = td->td_proc->p_tracecred; 949 950 /* 951 * If vp is NULL, the vp has been cleared out from under this 952 * request, so just drop it. Make sure the credential and vnode are 953 * in sync: we should have both or neither. 954 */ 955 if (vp == NULL) { 956 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 957 mtx_unlock(&ktrace_mtx); 958 return; 959 } 960 VREF(vp); 961 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 962 crhold(cred); 963 mtx_unlock(&ktrace_mtx); 964 965 kth = &req->ktr_header; 966 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 967 sizeof(data_lengths) / sizeof(data_lengths[0]), 968 ("data_lengths array overflow")); 969 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 970 buflen = kth->ktr_len; 971 auio.uio_iov = &aiov[0]; 972 auio.uio_offset = 0; 973 auio.uio_segflg = UIO_SYSSPACE; 974 auio.uio_rw = UIO_WRITE; 975 aiov[0].iov_base = (caddr_t)kth; 976 aiov[0].iov_len = sizeof(struct ktr_header); 977 auio.uio_resid = sizeof(struct ktr_header); 978 auio.uio_iovcnt = 1; 979 auio.uio_td = td; 980 if (datalen != 0) { 981 aiov[1].iov_base = (caddr_t)&req->ktr_data; 982 aiov[1].iov_len = datalen; 983 auio.uio_resid += datalen; 984 auio.uio_iovcnt++; 985 kth->ktr_len += datalen; 986 } 987 if (buflen != 0) { 988 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 989 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 990 aiov[auio.uio_iovcnt].iov_len = buflen; 991 auio.uio_resid += buflen; 992 auio.uio_iovcnt++; 993 } 994 995 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 996 vn_start_write(vp, &mp, V_WAIT); 997 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 998#ifdef MAC 999 error = mac_vnode_check_write(cred, NOCRED, vp); 1000 if (error == 0) 1001#endif 1002 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1003 VOP_UNLOCK(vp, 0); 1004 vn_finished_write(mp); 1005 crfree(cred); 1006 if (!error) { 1007 vrele(vp); 1008 VFS_UNLOCK_GIANT(vfslocked); 1009 return; 1010 } 1011 VFS_UNLOCK_GIANT(vfslocked); 1012 1013 /* 1014 * If error encountered, give up tracing on this vnode. We defer 1015 * all the vrele()'s on the vnode until after we are finished walking 1016 * the various lists to avoid needlessly holding locks. 1017 * NB: at this point we still hold the vnode reference that must 1018 * not go away as we need the valid vnode to compare with. Thus let 1019 * vrele_count start at 1 and the reference will be freed 1020 * by the loop at the end after our last use of vp. 1021 */ 1022 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1023 error); 1024 vrele_count = 1; 1025 /* 1026 * First, clear this vnode from being used by any processes in the 1027 * system. 1028 * XXX - If one process gets an EPERM writing to the vnode, should 1029 * we really do this? Other processes might have suitable 1030 * credentials for the operation. 1031 */ 1032 cred = NULL; 1033 sx_slock(&allproc_lock); 1034 FOREACH_PROC_IN_SYSTEM(p) { 1035 PROC_LOCK(p); 1036 if (p->p_tracevp == vp) { 1037 mtx_lock(&ktrace_mtx); 1038 p->p_tracevp = NULL; 1039 p->p_traceflag = 0; 1040 cred = p->p_tracecred; 1041 p->p_tracecred = NULL; 1042 mtx_unlock(&ktrace_mtx); 1043 vrele_count++; 1044 } 1045 PROC_UNLOCK(p); 1046 if (cred != NULL) { 1047 crfree(cred); 1048 cred = NULL; 1049 } 1050 } 1051 sx_sunlock(&allproc_lock); 1052 1053 /* 1054 * We can't clear any pending requests in threads that have cached 1055 * them but not yet committed them, as those are per-thread. The 1056 * thread will have to clear it itself on system call return. 1057 */ 1058 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1059 while (vrele_count-- > 0) 1060 vrele(vp); 1061 VFS_UNLOCK_GIANT(vfslocked); 1062} 1063 1064/* 1065 * Return true if caller has permission to set the ktracing state 1066 * of target. Essentially, the target can't possess any 1067 * more permissions than the caller. KTRFAC_ROOT signifies that 1068 * root previously set the tracing status on the target process, and 1069 * so, only root may further change it. 1070 */ 1071static int 1072ktrcanset(td, targetp) 1073 struct thread *td; 1074 struct proc *targetp; 1075{ 1076 1077 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1078 if (targetp->p_traceflag & KTRFAC_ROOT && 1079 priv_check(td, PRIV_KTRACE)) 1080 return (0); 1081 1082 if (p_candebug(td, targetp) != 0) 1083 return (0); 1084 1085 return (1); 1086} 1087 1088#endif /* KTRACE */ 1089