kern_ktrace.c revision 189707
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: head/sys/kern/kern_ktrace.c 189707 2009-03-11 21:48:36Z jhb $"); 36 37#include "opt_ktrace.h" 38#include "opt_mac.h" 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/fcntl.h> 43#include <sys/kernel.h> 44#include <sys/kthread.h> 45#include <sys/lock.h> 46#include <sys/mutex.h> 47#include <sys/malloc.h> 48#include <sys/mount.h> 49#include <sys/namei.h> 50#include <sys/priv.h> 51#include <sys/proc.h> 52#include <sys/unistd.h> 53#include <sys/vnode.h> 54#include <sys/socket.h> 55#include <sys/stat.h> 56#include <sys/ktrace.h> 57#include <sys/sx.h> 58#include <sys/sysctl.h> 59#include <sys/syslog.h> 60#include <sys/sysproto.h> 61 62#include <security/mac/mac_framework.h> 63 64/* 65 * The ktrace facility allows the tracing of certain key events in user space 66 * processes, such as system calls, signal delivery, context switches, and 67 * user generated events using utrace(2). It works by streaming event 68 * records and data to a vnode associated with the process using the 69 * ktrace(2) system call. In general, records can be written directly from 70 * the context that generates the event. One important exception to this is 71 * during a context switch, where sleeping is not permitted. To handle this 72 * case, trace events are generated using in-kernel ktr_request records, and 73 * then delivered to disk at a convenient moment -- either immediately, the 74 * next traceable event, at system call return, or at process exit. 75 * 76 * When dealing with multiple threads or processes writing to the same event 77 * log, ordering guarantees are weak: specifically, if an event has multiple 78 * records (i.e., system call enter and return), they may be interlaced with 79 * records from another event. Process and thread ID information is provided 80 * in the record, and user applications can de-interlace events if required. 81 */ 82 83static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 84 85#ifdef KTRACE 86 87#ifndef KTRACE_REQUEST_POOL 88#define KTRACE_REQUEST_POOL 100 89#endif 90 91struct ktr_request { 92 struct ktr_header ktr_header; 93 void *ktr_buffer; 94 union { 95 struct ktr_syscall ktr_syscall; 96 struct ktr_sysret ktr_sysret; 97 struct ktr_genio ktr_genio; 98 struct ktr_psig ktr_psig; 99 struct ktr_csw ktr_csw; 100 } ktr_data; 101 STAILQ_ENTRY(ktr_request) ktr_list; 102}; 103 104static int data_lengths[] = { 105 0, /* none */ 106 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 107 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 108 0, /* KTR_NAMEI */ 109 sizeof(struct ktr_genio), /* KTR_GENIO */ 110 sizeof(struct ktr_psig), /* KTR_PSIG */ 111 sizeof(struct ktr_csw), /* KTR_CSW */ 112 0, /* KTR_USER */ 113 0, /* KTR_STRUCT */ 114 0, /* KTR_SYSCTL */ 115}; 116 117static STAILQ_HEAD(, ktr_request) ktr_free; 118 119static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 120 121static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 122TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 123 124static u_int ktr_geniosize = PAGE_SIZE; 125TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 126SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 127 0, "Maximum size of genio event payload"); 128 129static int print_message = 1; 130struct mtx ktrace_mtx; 131static struct sx ktrace_sx; 132 133static void ktrace_init(void *dummy); 134static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 135static u_int ktrace_resize_pool(u_int newsize); 136static struct ktr_request *ktr_getrequest(int type); 137static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 138static void ktr_freerequest(struct ktr_request *req); 139static void ktr_writerequest(struct thread *td, struct ktr_request *req); 140static int ktrcanset(struct thread *,struct proc *); 141static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 142static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 143 144/* 145 * ktrace itself generates events, such as context switches, which we do not 146 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 147 * whether or not it is in a region where tracing of events should be 148 * suppressed. 149 */ 150static void 151ktrace_enter(struct thread *td) 152{ 153 154 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 155 td->td_pflags |= TDP_INKTRACE; 156} 157 158static void 159ktrace_exit(struct thread *td) 160{ 161 162 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 163 td->td_pflags &= ~TDP_INKTRACE; 164} 165 166static void 167ktrace_assert(struct thread *td) 168{ 169 170 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 171} 172 173static void 174ktrace_init(void *dummy) 175{ 176 struct ktr_request *req; 177 int i; 178 179 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 180 sx_init(&ktrace_sx, "ktrace_sx"); 181 STAILQ_INIT(&ktr_free); 182 for (i = 0; i < ktr_requestpool; i++) { 183 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 184 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 185 } 186} 187SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 188 189static int 190sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 191{ 192 struct thread *td; 193 u_int newsize, oldsize, wantsize; 194 int error; 195 196 /* Handle easy read-only case first to avoid warnings from GCC. */ 197 if (!req->newptr) { 198 mtx_lock(&ktrace_mtx); 199 oldsize = ktr_requestpool; 200 mtx_unlock(&ktrace_mtx); 201 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 202 } 203 204 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 205 if (error) 206 return (error); 207 td = curthread; 208 ktrace_enter(td); 209 mtx_lock(&ktrace_mtx); 210 oldsize = ktr_requestpool; 211 newsize = ktrace_resize_pool(wantsize); 212 mtx_unlock(&ktrace_mtx); 213 ktrace_exit(td); 214 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 215 if (error) 216 return (error); 217 if (wantsize > oldsize && newsize < wantsize) 218 return (ENOSPC); 219 return (0); 220} 221SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 222 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", ""); 223 224static u_int 225ktrace_resize_pool(u_int newsize) 226{ 227 struct ktr_request *req; 228 int bound; 229 230 mtx_assert(&ktrace_mtx, MA_OWNED); 231 print_message = 1; 232 bound = newsize - ktr_requestpool; 233 if (bound == 0) 234 return (ktr_requestpool); 235 if (bound < 0) 236 /* Shrink pool down to newsize if possible. */ 237 while (bound++ < 0) { 238 req = STAILQ_FIRST(&ktr_free); 239 if (req == NULL) 240 return (ktr_requestpool); 241 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 242 ktr_requestpool--; 243 mtx_unlock(&ktrace_mtx); 244 free(req, M_KTRACE); 245 mtx_lock(&ktrace_mtx); 246 } 247 else 248 /* Grow pool up to newsize. */ 249 while (bound-- > 0) { 250 mtx_unlock(&ktrace_mtx); 251 req = malloc(sizeof(struct ktr_request), M_KTRACE, 252 M_WAITOK); 253 mtx_lock(&ktrace_mtx); 254 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 255 ktr_requestpool++; 256 } 257 return (ktr_requestpool); 258} 259 260static struct ktr_request * 261ktr_getrequest(int type) 262{ 263 struct ktr_request *req; 264 struct thread *td = curthread; 265 struct proc *p = td->td_proc; 266 int pm; 267 268 ktrace_enter(td); /* XXX: In caller instead? */ 269 mtx_lock(&ktrace_mtx); 270 if (!KTRCHECK(td, type)) { 271 mtx_unlock(&ktrace_mtx); 272 ktrace_exit(td); 273 return (NULL); 274 } 275 req = STAILQ_FIRST(&ktr_free); 276 if (req != NULL) { 277 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 278 req->ktr_header.ktr_type = type; 279 if (p->p_traceflag & KTRFAC_DROP) { 280 req->ktr_header.ktr_type |= KTR_DROP; 281 p->p_traceflag &= ~KTRFAC_DROP; 282 } 283 mtx_unlock(&ktrace_mtx); 284 microtime(&req->ktr_header.ktr_time); 285 req->ktr_header.ktr_pid = p->p_pid; 286 req->ktr_header.ktr_tid = td->td_tid; 287 bcopy(td->td_name, req->ktr_header.ktr_comm, MAXCOMLEN + 1); 288 req->ktr_buffer = NULL; 289 req->ktr_header.ktr_len = 0; 290 } else { 291 p->p_traceflag |= KTRFAC_DROP; 292 pm = print_message; 293 print_message = 0; 294 mtx_unlock(&ktrace_mtx); 295 if (pm) 296 printf("Out of ktrace request objects.\n"); 297 ktrace_exit(td); 298 } 299 return (req); 300} 301 302/* 303 * Some trace generation environments don't permit direct access to VFS, 304 * such as during a context switch where sleeping is not allowed. Under these 305 * circumstances, queue a request to the thread to be written asynchronously 306 * later. 307 */ 308static void 309ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 310{ 311 312 mtx_lock(&ktrace_mtx); 313 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 314 mtx_unlock(&ktrace_mtx); 315 ktrace_exit(td); 316} 317 318/* 319 * Drain any pending ktrace records from the per-thread queue to disk. This 320 * is used both internally before committing other records, and also on 321 * system call return. We drain all the ones we can find at the time when 322 * drain is requested, but don't keep draining after that as those events 323 * may be approximately "after" the current event. 324 */ 325static void 326ktr_drain(struct thread *td) 327{ 328 struct ktr_request *queued_req; 329 STAILQ_HEAD(, ktr_request) local_queue; 330 331 ktrace_assert(td); 332 sx_assert(&ktrace_sx, SX_XLOCKED); 333 334 STAILQ_INIT(&local_queue); /* XXXRW: needed? */ 335 336 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 337 mtx_lock(&ktrace_mtx); 338 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 339 mtx_unlock(&ktrace_mtx); 340 341 while ((queued_req = STAILQ_FIRST(&local_queue))) { 342 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 343 ktr_writerequest(td, queued_req); 344 ktr_freerequest(queued_req); 345 } 346 } 347} 348 349/* 350 * Submit a trace record for immediate commit to disk -- to be used only 351 * where entering VFS is OK. First drain any pending records that may have 352 * been cached in the thread. 353 */ 354static void 355ktr_submitrequest(struct thread *td, struct ktr_request *req) 356{ 357 358 ktrace_assert(td); 359 360 sx_xlock(&ktrace_sx); 361 ktr_drain(td); 362 ktr_writerequest(td, req); 363 ktr_freerequest(req); 364 sx_xunlock(&ktrace_sx); 365 366 ktrace_exit(td); 367} 368 369static void 370ktr_freerequest(struct ktr_request *req) 371{ 372 373 if (req->ktr_buffer != NULL) 374 free(req->ktr_buffer, M_KTRACE); 375 mtx_lock(&ktrace_mtx); 376 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 377 mtx_unlock(&ktrace_mtx); 378} 379 380void 381ktrsyscall(code, narg, args) 382 int code, narg; 383 register_t args[]; 384{ 385 struct ktr_request *req; 386 struct ktr_syscall *ktp; 387 size_t buflen; 388 char *buf = NULL; 389 390 buflen = sizeof(register_t) * narg; 391 if (buflen > 0) { 392 buf = malloc(buflen, M_KTRACE, M_WAITOK); 393 bcopy(args, buf, buflen); 394 } 395 req = ktr_getrequest(KTR_SYSCALL); 396 if (req == NULL) { 397 if (buf != NULL) 398 free(buf, M_KTRACE); 399 return; 400 } 401 ktp = &req->ktr_data.ktr_syscall; 402 ktp->ktr_code = code; 403 ktp->ktr_narg = narg; 404 if (buflen > 0) { 405 req->ktr_header.ktr_len = buflen; 406 req->ktr_buffer = buf; 407 } 408 ktr_submitrequest(curthread, req); 409} 410 411void 412ktrsysret(code, error, retval) 413 int code, error; 414 register_t retval; 415{ 416 struct ktr_request *req; 417 struct ktr_sysret *ktp; 418 419 req = ktr_getrequest(KTR_SYSRET); 420 if (req == NULL) 421 return; 422 ktp = &req->ktr_data.ktr_sysret; 423 ktp->ktr_code = code; 424 ktp->ktr_error = error; 425 ktp->ktr_retval = retval; /* what about val2 ? */ 426 ktr_submitrequest(curthread, req); 427} 428 429/* 430 * When a process exits, drain per-process asynchronous trace records. 431 */ 432void 433ktrprocexit(struct thread *td) 434{ 435 436 ktrace_enter(td); 437 sx_xlock(&ktrace_sx); 438 ktr_drain(td); 439 sx_xunlock(&ktrace_sx); 440 ktrace_exit(td); 441} 442 443/* 444 * When a thread returns, drain any asynchronous records generated by the 445 * system call. 446 */ 447void 448ktruserret(struct thread *td) 449{ 450 451 ktrace_enter(td); 452 sx_xlock(&ktrace_sx); 453 ktr_drain(td); 454 sx_xunlock(&ktrace_sx); 455 ktrace_exit(td); 456} 457 458void 459ktrnamei(path) 460 char *path; 461{ 462 struct ktr_request *req; 463 int namelen; 464 char *buf = NULL; 465 466 namelen = strlen(path); 467 if (namelen > 0) { 468 buf = malloc(namelen, M_KTRACE, M_WAITOK); 469 bcopy(path, buf, namelen); 470 } 471 req = ktr_getrequest(KTR_NAMEI); 472 if (req == NULL) { 473 if (buf != NULL) 474 free(buf, M_KTRACE); 475 return; 476 } 477 if (namelen > 0) { 478 req->ktr_header.ktr_len = namelen; 479 req->ktr_buffer = buf; 480 } 481 ktr_submitrequest(curthread, req); 482} 483 484void 485ktrsysctl(name, namelen) 486 int *name; 487 u_int namelen; 488{ 489 struct ktr_request *req; 490 u_int mib[CTL_MAXNAME + 2]; 491 char *mibname; 492 size_t mibnamelen; 493 int error; 494 495 /* Lookup name of mib. */ 496 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 497 mib[0] = 0; 498 mib[1] = 1; 499 bcopy(name, mib + 2, namelen * sizeof(*name)); 500 mibnamelen = 128; 501 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 502 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 503 NULL, 0, &mibnamelen, 0); 504 if (error) { 505 free(mibname, M_KTRACE); 506 return; 507 } 508 req = ktr_getrequest(KTR_SYSCTL); 509 if (req == NULL) { 510 free(mibname, M_KTRACE); 511 return; 512 } 513 req->ktr_header.ktr_len = mibnamelen; 514 req->ktr_buffer = mibname; 515 ktr_submitrequest(curthread, req); 516} 517 518void 519ktrgenio(fd, rw, uio, error) 520 int fd; 521 enum uio_rw rw; 522 struct uio *uio; 523 int error; 524{ 525 struct ktr_request *req; 526 struct ktr_genio *ktg; 527 int datalen; 528 char *buf; 529 530 if (error) { 531 free(uio, M_IOV); 532 return; 533 } 534 uio->uio_offset = 0; 535 uio->uio_rw = UIO_WRITE; 536 datalen = imin(uio->uio_resid, ktr_geniosize); 537 buf = malloc(datalen, M_KTRACE, M_WAITOK); 538 error = uiomove(buf, datalen, uio); 539 free(uio, M_IOV); 540 if (error) { 541 free(buf, M_KTRACE); 542 return; 543 } 544 req = ktr_getrequest(KTR_GENIO); 545 if (req == NULL) { 546 free(buf, M_KTRACE); 547 return; 548 } 549 ktg = &req->ktr_data.ktr_genio; 550 ktg->ktr_fd = fd; 551 ktg->ktr_rw = rw; 552 req->ktr_header.ktr_len = datalen; 553 req->ktr_buffer = buf; 554 ktr_submitrequest(curthread, req); 555} 556 557void 558ktrpsig(sig, action, mask, code) 559 int sig; 560 sig_t action; 561 sigset_t *mask; 562 int code; 563{ 564 struct ktr_request *req; 565 struct ktr_psig *kp; 566 567 req = ktr_getrequest(KTR_PSIG); 568 if (req == NULL) 569 return; 570 kp = &req->ktr_data.ktr_psig; 571 kp->signo = (char)sig; 572 kp->action = action; 573 kp->mask = *mask; 574 kp->code = code; 575 ktr_enqueuerequest(curthread, req); 576} 577 578void 579ktrcsw(out, user) 580 int out, user; 581{ 582 struct ktr_request *req; 583 struct ktr_csw *kc; 584 585 req = ktr_getrequest(KTR_CSW); 586 if (req == NULL) 587 return; 588 kc = &req->ktr_data.ktr_csw; 589 kc->out = out; 590 kc->user = user; 591 ktr_enqueuerequest(curthread, req); 592} 593 594void 595ktrstruct(name, namelen, data, datalen) 596 const char *name; 597 size_t namelen; 598 void *data; 599 size_t datalen; 600{ 601 struct ktr_request *req; 602 char *buf = NULL; 603 size_t buflen; 604 605 if (!data) 606 datalen = 0; 607 buflen = namelen + 1 + datalen; 608 buf = malloc(buflen, M_KTRACE, M_WAITOK); 609 bcopy(name, buf, namelen); 610 buf[namelen] = '\0'; 611 bcopy(data, buf + namelen + 1, datalen); 612 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 613 free(buf, M_KTRACE); 614 return; 615 } 616 req->ktr_buffer = buf; 617 req->ktr_header.ktr_len = buflen; 618 ktr_submitrequest(curthread, req); 619} 620#endif /* KTRACE */ 621 622/* Interface and common routines */ 623 624#ifndef _SYS_SYSPROTO_H_ 625struct ktrace_args { 626 char *fname; 627 int ops; 628 int facs; 629 int pid; 630}; 631#endif 632/* ARGSUSED */ 633int 634ktrace(td, uap) 635 struct thread *td; 636 register struct ktrace_args *uap; 637{ 638#ifdef KTRACE 639 register struct vnode *vp = NULL; 640 register struct proc *p; 641 struct pgrp *pg; 642 int facs = uap->facs & ~KTRFAC_ROOT; 643 int ops = KTROP(uap->ops); 644 int descend = uap->ops & KTRFLAG_DESCEND; 645 int nfound, ret = 0; 646 int flags, error = 0, vfslocked; 647 struct nameidata nd; 648 struct ucred *cred; 649 650 /* 651 * Need something to (un)trace. 652 */ 653 if (ops != KTROP_CLEARFILE && facs == 0) 654 return (EINVAL); 655 656 ktrace_enter(td); 657 if (ops != KTROP_CLEAR) { 658 /* 659 * an operation which requires a file argument. 660 */ 661 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 662 uap->fname, td); 663 flags = FREAD | FWRITE | O_NOFOLLOW; 664 error = vn_open(&nd, &flags, 0, NULL); 665 if (error) { 666 ktrace_exit(td); 667 return (error); 668 } 669 vfslocked = NDHASGIANT(&nd); 670 NDFREE(&nd, NDF_ONLY_PNBUF); 671 vp = nd.ni_vp; 672 VOP_UNLOCK(vp, 0); 673 if (vp->v_type != VREG) { 674 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 675 VFS_UNLOCK_GIANT(vfslocked); 676 ktrace_exit(td); 677 return (EACCES); 678 } 679 VFS_UNLOCK_GIANT(vfslocked); 680 } 681 /* 682 * Clear all uses of the tracefile. 683 */ 684 if (ops == KTROP_CLEARFILE) { 685 int vrele_count; 686 687 vrele_count = 0; 688 sx_slock(&allproc_lock); 689 FOREACH_PROC_IN_SYSTEM(p) { 690 PROC_LOCK(p); 691 if (p->p_tracevp == vp) { 692 if (ktrcanset(td, p)) { 693 mtx_lock(&ktrace_mtx); 694 cred = p->p_tracecred; 695 p->p_tracecred = NULL; 696 p->p_tracevp = NULL; 697 p->p_traceflag = 0; 698 mtx_unlock(&ktrace_mtx); 699 vrele_count++; 700 crfree(cred); 701 } else 702 error = EPERM; 703 } 704 PROC_UNLOCK(p); 705 } 706 sx_sunlock(&allproc_lock); 707 if (vrele_count > 0) { 708 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 709 while (vrele_count-- > 0) 710 vrele(vp); 711 VFS_UNLOCK_GIANT(vfslocked); 712 } 713 goto done; 714 } 715 /* 716 * do it 717 */ 718 sx_slock(&proctree_lock); 719 if (uap->pid < 0) { 720 /* 721 * by process group 722 */ 723 pg = pgfind(-uap->pid); 724 if (pg == NULL) { 725 sx_sunlock(&proctree_lock); 726 error = ESRCH; 727 goto done; 728 } 729 /* 730 * ktrops() may call vrele(). Lock pg_members 731 * by the proctree_lock rather than pg_mtx. 732 */ 733 PGRP_UNLOCK(pg); 734 nfound = 0; 735 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 736 PROC_LOCK(p); 737 if (p_cansee(td, p) != 0) { 738 PROC_UNLOCK(p); 739 continue; 740 } 741 PROC_UNLOCK(p); 742 nfound++; 743 if (descend) 744 ret |= ktrsetchildren(td, p, ops, facs, vp); 745 else 746 ret |= ktrops(td, p, ops, facs, vp); 747 } 748 if (nfound == 0) { 749 sx_sunlock(&proctree_lock); 750 error = ESRCH; 751 goto done; 752 } 753 } else { 754 /* 755 * by pid 756 */ 757 p = pfind(uap->pid); 758 if (p == NULL) { 759 sx_sunlock(&proctree_lock); 760 error = ESRCH; 761 goto done; 762 } 763 error = p_cansee(td, p); 764 /* 765 * The slock of the proctree lock will keep this process 766 * from going away, so unlocking the proc here is ok. 767 */ 768 PROC_UNLOCK(p); 769 if (error) { 770 sx_sunlock(&proctree_lock); 771 goto done; 772 } 773 if (descend) 774 ret |= ktrsetchildren(td, p, ops, facs, vp); 775 else 776 ret |= ktrops(td, p, ops, facs, vp); 777 } 778 sx_sunlock(&proctree_lock); 779 if (!ret) 780 error = EPERM; 781done: 782 if (vp != NULL) { 783 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 784 (void) vn_close(vp, FWRITE, td->td_ucred, td); 785 VFS_UNLOCK_GIANT(vfslocked); 786 } 787 ktrace_exit(td); 788 return (error); 789#else /* !KTRACE */ 790 return (ENOSYS); 791#endif /* KTRACE */ 792} 793 794/* ARGSUSED */ 795int 796utrace(td, uap) 797 struct thread *td; 798 register struct utrace_args *uap; 799{ 800 801#ifdef KTRACE 802 struct ktr_request *req; 803 void *cp; 804 int error; 805 806 if (!KTRPOINT(td, KTR_USER)) 807 return (0); 808 if (uap->len > KTR_USER_MAXLEN) 809 return (EINVAL); 810 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 811 error = copyin(uap->addr, cp, uap->len); 812 if (error) { 813 free(cp, M_KTRACE); 814 return (error); 815 } 816 req = ktr_getrequest(KTR_USER); 817 if (req == NULL) { 818 free(cp, M_KTRACE); 819 return (ENOMEM); 820 } 821 req->ktr_buffer = cp; 822 req->ktr_header.ktr_len = uap->len; 823 ktr_submitrequest(td, req); 824 return (0); 825#else /* !KTRACE */ 826 return (ENOSYS); 827#endif /* KTRACE */ 828} 829 830#ifdef KTRACE 831static int 832ktrops(td, p, ops, facs, vp) 833 struct thread *td; 834 struct proc *p; 835 int ops, facs; 836 struct vnode *vp; 837{ 838 struct vnode *tracevp = NULL; 839 struct ucred *tracecred = NULL; 840 841 PROC_LOCK(p); 842 if (!ktrcanset(td, p)) { 843 PROC_UNLOCK(p); 844 return (0); 845 } 846 mtx_lock(&ktrace_mtx); 847 if (ops == KTROP_SET) { 848 if (p->p_tracevp != vp) { 849 /* 850 * if trace file already in use, relinquish below 851 */ 852 tracevp = p->p_tracevp; 853 VREF(vp); 854 p->p_tracevp = vp; 855 } 856 if (p->p_tracecred != td->td_ucred) { 857 tracecred = p->p_tracecred; 858 p->p_tracecred = crhold(td->td_ucred); 859 } 860 p->p_traceflag |= facs; 861 if (priv_check(td, PRIV_KTRACE) == 0) 862 p->p_traceflag |= KTRFAC_ROOT; 863 } else { 864 /* KTROP_CLEAR */ 865 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 866 /* no more tracing */ 867 p->p_traceflag = 0; 868 tracevp = p->p_tracevp; 869 p->p_tracevp = NULL; 870 tracecred = p->p_tracecred; 871 p->p_tracecred = NULL; 872 } 873 } 874 mtx_unlock(&ktrace_mtx); 875 PROC_UNLOCK(p); 876 if (tracevp != NULL) { 877 int vfslocked; 878 879 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 880 vrele(tracevp); 881 VFS_UNLOCK_GIANT(vfslocked); 882 } 883 if (tracecred != NULL) 884 crfree(tracecred); 885 886 return (1); 887} 888 889static int 890ktrsetchildren(td, top, ops, facs, vp) 891 struct thread *td; 892 struct proc *top; 893 int ops, facs; 894 struct vnode *vp; 895{ 896 register struct proc *p; 897 register int ret = 0; 898 899 p = top; 900 sx_assert(&proctree_lock, SX_LOCKED); 901 for (;;) { 902 ret |= ktrops(td, p, ops, facs, vp); 903 /* 904 * If this process has children, descend to them next, 905 * otherwise do any siblings, and if done with this level, 906 * follow back up the tree (but not past top). 907 */ 908 if (!LIST_EMPTY(&p->p_children)) 909 p = LIST_FIRST(&p->p_children); 910 else for (;;) { 911 if (p == top) 912 return (ret); 913 if (LIST_NEXT(p, p_sibling)) { 914 p = LIST_NEXT(p, p_sibling); 915 break; 916 } 917 p = p->p_pptr; 918 } 919 } 920 /*NOTREACHED*/ 921} 922 923static void 924ktr_writerequest(struct thread *td, struct ktr_request *req) 925{ 926 struct ktr_header *kth; 927 struct vnode *vp; 928 struct proc *p; 929 struct ucred *cred; 930 struct uio auio; 931 struct iovec aiov[3]; 932 struct mount *mp; 933 int datalen, buflen, vrele_count; 934 int error, vfslocked; 935 936 /* 937 * We hold the vnode and credential for use in I/O in case ktrace is 938 * disabled on the process as we write out the request. 939 * 940 * XXXRW: This is not ideal: we could end up performing a write after 941 * the vnode has been closed. 942 */ 943 mtx_lock(&ktrace_mtx); 944 vp = td->td_proc->p_tracevp; 945 cred = td->td_proc->p_tracecred; 946 947 /* 948 * If vp is NULL, the vp has been cleared out from under this 949 * request, so just drop it. Make sure the credential and vnode are 950 * in sync: we should have both or neither. 951 */ 952 if (vp == NULL) { 953 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 954 mtx_unlock(&ktrace_mtx); 955 return; 956 } 957 VREF(vp); 958 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 959 crhold(cred); 960 mtx_unlock(&ktrace_mtx); 961 962 kth = &req->ktr_header; 963 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 964 sizeof(data_lengths) / sizeof(data_lengths[0]), 965 ("data_lengths array overflow")); 966 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 967 buflen = kth->ktr_len; 968 auio.uio_iov = &aiov[0]; 969 auio.uio_offset = 0; 970 auio.uio_segflg = UIO_SYSSPACE; 971 auio.uio_rw = UIO_WRITE; 972 aiov[0].iov_base = (caddr_t)kth; 973 aiov[0].iov_len = sizeof(struct ktr_header); 974 auio.uio_resid = sizeof(struct ktr_header); 975 auio.uio_iovcnt = 1; 976 auio.uio_td = td; 977 if (datalen != 0) { 978 aiov[1].iov_base = (caddr_t)&req->ktr_data; 979 aiov[1].iov_len = datalen; 980 auio.uio_resid += datalen; 981 auio.uio_iovcnt++; 982 kth->ktr_len += datalen; 983 } 984 if (buflen != 0) { 985 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 986 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 987 aiov[auio.uio_iovcnt].iov_len = buflen; 988 auio.uio_resid += buflen; 989 auio.uio_iovcnt++; 990 } 991 992 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 993 vn_start_write(vp, &mp, V_WAIT); 994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 995 (void)VOP_LEASE(vp, td, cred, LEASE_WRITE); 996#ifdef MAC 997 error = mac_vnode_check_write(cred, NOCRED, vp); 998 if (error == 0) 999#endif 1000 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1001 VOP_UNLOCK(vp, 0); 1002 vn_finished_write(mp); 1003 crfree(cred); 1004 if (!error) { 1005 vrele(vp); 1006 VFS_UNLOCK_GIANT(vfslocked); 1007 return; 1008 } 1009 VFS_UNLOCK_GIANT(vfslocked); 1010 1011 /* 1012 * If error encountered, give up tracing on this vnode. We defer 1013 * all the vrele()'s on the vnode until after we are finished walking 1014 * the various lists to avoid needlessly holding locks. 1015 * NB: at this point we still hold the vnode reference that must 1016 * not go away as we need the valid vnode to compare with. Thus let 1017 * vrele_count start at 1 and the reference will be freed 1018 * by the loop at the end after our last use of vp. 1019 */ 1020 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1021 error); 1022 vrele_count = 1; 1023 /* 1024 * First, clear this vnode from being used by any processes in the 1025 * system. 1026 * XXX - If one process gets an EPERM writing to the vnode, should 1027 * we really do this? Other processes might have suitable 1028 * credentials for the operation. 1029 */ 1030 cred = NULL; 1031 sx_slock(&allproc_lock); 1032 FOREACH_PROC_IN_SYSTEM(p) { 1033 PROC_LOCK(p); 1034 if (p->p_tracevp == vp) { 1035 mtx_lock(&ktrace_mtx); 1036 p->p_tracevp = NULL; 1037 p->p_traceflag = 0; 1038 cred = p->p_tracecred; 1039 p->p_tracecred = NULL; 1040 mtx_unlock(&ktrace_mtx); 1041 vrele_count++; 1042 } 1043 PROC_UNLOCK(p); 1044 if (cred != NULL) { 1045 crfree(cred); 1046 cred = NULL; 1047 } 1048 } 1049 sx_sunlock(&allproc_lock); 1050 1051 /* 1052 * We can't clear any pending requests in threads that have cached 1053 * them but not yet committed them, as those are per-thread. The 1054 * thread will have to clear it itself on system call return. 1055 */ 1056 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1057 while (vrele_count-- > 0) 1058 vrele(vp); 1059 VFS_UNLOCK_GIANT(vfslocked); 1060} 1061 1062/* 1063 * Return true if caller has permission to set the ktracing state 1064 * of target. Essentially, the target can't possess any 1065 * more permissions than the caller. KTRFAC_ROOT signifies that 1066 * root previously set the tracing status on the target process, and 1067 * so, only root may further change it. 1068 */ 1069static int 1070ktrcanset(td, targetp) 1071 struct thread *td; 1072 struct proc *targetp; 1073{ 1074 1075 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1076 if (targetp->p_traceflag & KTRFAC_ROOT && 1077 priv_check(td, PRIV_KTRACE)) 1078 return (0); 1079 1080 if (p_candebug(td, targetp) != 0) 1081 return (0); 1082 1083 return (1); 1084} 1085 1086#endif /* KTRACE */ 1087